| 1 |
<?php |
<?php |
| 2 |
// $Id: weblinks.module,v 1.86.2.41 2008/08/30 16:11:14 nancyw Exp $ |
// $Id: weblinks.module,v 1.86.2.42 2008/09/01 00:20:05 nancyw Exp $ |
| 3 |
|
|
| 4 |
/** |
/** |
| 5 |
* @file |
* @file |
| 141 |
} |
} |
| 142 |
|
|
| 143 |
$url = trim($node->url); |
$url = trim($node->url); |
| 144 |
if (!valid_url($url, TRUE)) { |
if (!weblinks_valid_url($url)) { |
| 145 |
form_set_error('url', t('@url does not look like a valid URL.', array('@url' => check_plain($url)))); |
form_set_error('url', t('@url does not look like a valid URL.', array('@url' => check_plain($url)))); |
| 146 |
} |
} |
| 147 |
|
|
| 1653 |
|
|
| 1654 |
return $output; |
return $output; |
| 1655 |
} |
} |
| 1656 |
|
|
| 1657 |
|
/** |
| 1658 |
|
* Verify the syntax of the given URL. http://drupal.org/node/295021 |
| 1659 |
|
* |
| 1660 |
|
* This function should only be used on actual URLs. It should not be used for |
| 1661 |
|
* Drupal menu paths, which can contain arbitrary characters. |
| 1662 |
|
* |
| 1663 |
|
* @param $url |
| 1664 |
|
* The URL to verify. |
| 1665 |
|
* @param $options |
| 1666 |
|
* An associative array of additional options, with the following keys: |
| 1667 |
|
* - 'allow_relative' (default FALSE) |
| 1668 |
|
* Whether relative paths are allowed |
| 1669 |
|
* - 'allow_ipv6' (default FALSE) |
| 1670 |
|
* Whether the host may be an bracket-enclosed IPv6 address, e.g. '[2001:0db8::1428:57ab]' |
| 1671 |
|
* - 'allow_idna' (default FALSE) |
| 1672 |
|
* Whether the host may be an IDNA hostname, i.e. contain non-ASCII characters |
| 1673 |
|
* - 'allowed_schemes' (default 'drupal') |
| 1674 |
|
* Either an array of allowed schemes (in lowercase), e.g. ('http', 'https'), |
| 1675 |
|
* or FALSE to allow any scheme. |
| 1676 |
|
* If the array contains one or more of these pseudo-schemes (beginning with "%"), |
| 1677 |
|
* they are expanded to the following: |
| 1678 |
|
* '%drupal' => ('http', 'https') - protocols supported by drupal_http_request() |
| 1679 |
|
* '%browser' => ('http', 'https', 'gopher', 'ftp', 'data') - protocols natively supported by most browsers |
| 1680 |
|
* '%media' => ('mms', 'rtsp') - stream media protocols |
| 1681 |
|
* '%external' => ('news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'feed', 'webcal') - protocols supported by non-browser applications |
| 1682 |
|
* - 'forbidden_ports' (default - see the source code) |
| 1683 |
|
* Ports that are forbidden due to security issues |
| 1684 |
|
* @return |
| 1685 |
|
* TRUE if the URL is in a valid format. |
| 1686 |
|
*/ |
| 1687 |
|
function weblinks_valid_url($url, array $options = array()) { |
| 1688 |
|
static $default_options = array( |
| 1689 |
|
'allow_relative' => FALSE, |
| 1690 |
|
'allow_ipv6' => FALSE, |
| 1691 |
|
'allow_idna' => FALSE, |
| 1692 |
|
'allowed_schemes' => array('%drupal'), |
| 1693 |
|
// See http://www.mozilla.org/projects/netlib/PortBanning.html |
| 1694 |
|
'forbidden_ports' => array(1, 7, 9, 11, 13, 15, 17, 19, 20, 22, 23, 25, |
| 1695 |
|
37, 42, 43, 53, 77, 79, 87, 95, 101, 102, 103, |
| 1696 |
|
104, 109, 110, 111, 113, 115, 117, 119, 123, |
| 1697 |
|
135, 139, 143, 179, 389, 465, 512, 513, 514, |
| 1698 |
|
515, 526, 530, 531, 532, 540, 556, 563, 587, |
| 1699 |
|
601, 636, 993, 995, 2049, 4045, 6000), |
| 1700 |
|
); |
| 1701 |
|
$options = array_merge($default_options, $options); |
| 1702 |
|
|
| 1703 |
|
preg_match('`^ |
| 1704 |
|
(?:([^:/?\#]+):)? # scheme |
| 1705 |
|
(?:// |
| 1706 |
|
([^@]*@)? # userinfo "@" |
| 1707 |
|
(\[[^/?\#\]]*\] # host (IPv6address/IPvFuture) |
| 1708 |
|
|[^/?\#:]*) # host (IPv4address/reg-name) |
| 1709 |
|
(:[^/?\#:]*)? # port |
| 1710 |
|
)? |
| 1711 |
|
([^?\#]*) # path |
| 1712 |
|
(\?[^\#]*)? # "?" query |
| 1713 |
|
(\#.*)? # "#" fragment |
| 1714 |
|
`xs', $url, $reg); |
| 1715 |
|
|
| 1716 |
|
$scheme = $reg[1]; |
| 1717 |
|
$userinfo = empty($reg[2]) ? FALSE : substr($reg[2], 0, -1); |
| 1718 |
|
$host = $reg[3]; |
| 1719 |
|
$port = empty($reg[4]) ? FALSE : strval(substr($reg[4], 1)); |
| 1720 |
|
$path = $reg[5]; |
| 1721 |
|
$query = empty($reg[6]) ? FALSE : strval(substr($reg[6], 1)); |
| 1722 |
|
$fragment = empty($reg[7]) ? FALSE : strval(substr($reg[7], 1)); |
| 1723 |
|
|
| 1724 |
|
if (!$scheme && !$options['allow_relative']) { |
| 1725 |
|
return FALSE; |
| 1726 |
|
} |
| 1727 |
|
|
| 1728 |
|
if (substr($host, 1) == '[') { |
| 1729 |
|
if (!$options['allow_ipv6']) { |
| 1730 |
|
return FALSE; |
| 1731 |
|
} else { |
| 1732 |
|
// TODO: Verify IPv6address - for now we just allow anything |
| 1733 |
|
} |
| 1734 |
|
} |
| 1735 |
|
|
| 1736 |
|
if ($options['allow_idna']) { |
| 1737 |
|
// TODO: Verify IDNA hostname - for now we just allow anything |
| 1738 |
|
} else { |
| 1739 |
|
// Look for invalid characters, hyphens or periods first or last, hyphens next to periods, or consecutive periods |
| 1740 |
|
if (preg_match('`[^0-9a-z.-]|^[-.]|[-.]$|\.-|-\.|\.\.`i', $host)) { |
| 1741 |
|
return FALSE; |
| 1742 |
|
} |
| 1743 |
|
} |
| 1744 |
|
|
| 1745 |
|
if ($options['allowed_schemes']) { |
| 1746 |
|
static $shortcuts = array( |
| 1747 |
|
'%drupal' => array('http', 'https'), |
| 1748 |
|
'%browser' => array('http', 'https', 'gopher', 'ftp', 'data'), |
| 1749 |
|
'%media' => array('mms', 'rtsp'), |
| 1750 |
|
'%external' => array('news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'feed', 'webcal') |
| 1751 |
|
); |
| 1752 |
|
foreach ($shortcuts as $name => $shortcut) { |
| 1753 |
|
if (in_array($name, $options['allowed_schemes'])) { |
| 1754 |
|
$options['allowed_schemes'] = array_merge($options['allowed_schemes'], $shortcut); |
| 1755 |
|
} |
| 1756 |
|
} |
| 1757 |
|
} |
| 1758 |
|
|
| 1759 |
|
if ($scheme && |
| 1760 |
|
$options['allowed_schemes'] && |
| 1761 |
|
!in_array(strtolower($scheme), $options['allowed_schemes'])) { |
| 1762 |
|
|
| 1763 |
|
return FALSE; |
| 1764 |
|
} |
| 1765 |
|
|
| 1766 |
|
// Look for invalid characters |
| 1767 |
|
foreach (array('userinfo', 'path', 'query', 'fragment') as $part) { |
| 1768 |
|
// According to RFC 3986 appendix X, the following characters are allowed: |
| 1769 |
|
// * unreserved: ALPHA / DIGIT / "-" / "." / "_" / "~" |
| 1770 |
|
// * pct-encoded: "%" HEXDIG HEXDIG |
| 1771 |
|
// * sub-delims: "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" |
| 1772 |
|
// * ":" / "@" / "/" / "?" |
| 1773 |
|
// |
| 1774 |
|
// Not all the latter are allowed in all the mentioned parts, but the |
| 1775 |
|
// invalid (e.g. "?" in $path) are handled when the URL was initially |
| 1776 |
|
// parsed above. |
| 1777 |
|
// |
| 1778 |
|
// We also allow the following that are commonly used: "[" "]" |
| 1779 |
|
if (preg_match('`[^a-z0-9\-._~%^!$&\'()*+,;=/?:@[\]]`i', $$part, $reg)) { |
| 1780 |
|
var_dump($reg); |
| 1781 |
|
return FALSE; |
| 1782 |
|
} |
| 1783 |
|
} |
| 1784 |
|
|
| 1785 |
|
// Look for invalid percent encoding |
| 1786 |
|
foreach (array('userinfo', 'host', 'path') as $part) { |
| 1787 |
|
if (preg_match('`%(.?[^0-9a-f])`i', $$part)) { |
| 1788 |
|
return FALSE; |
| 1789 |
|
} |
| 1790 |
|
} |
| 1791 |
|
|
| 1792 |
|
// Make sure port is integer or the empty, and not forbidden |
| 1793 |
|
if (!preg_match('`^\d*$`', $port) || |
| 1794 |
|
in_array($port, $options['forbidden_ports'])) { |
| 1795 |
|
|
| 1796 |
|
return FALSE; |
| 1797 |
|
} |
| 1798 |
|
|
| 1799 |
|
return TRUE; |
| 1800 |
|
} |