| 1 |
<?php |
<?php |
| 2 |
// $Id: weblinks.module,v 1.1.4.58.2.83 2008/08/31 18:16:11 rmiddle Exp $ |
// $Id: weblinks.module,v 1.1.4.58.2.84 2008/09/01 00:20:21 nancyw Exp $ |
| 3 |
|
|
| 4 |
/** |
/** |
| 5 |
* @file |
* @file |
| 891 |
} |
} |
| 892 |
|
|
| 893 |
$url = trim($node->url); |
$url = trim($node->url); |
| 894 |
if (!valid_url($url, TRUE)) { |
if (!weblinks_valid_url($url)) { |
| 895 |
form_set_error('url', t('@url does not look like a valid URL.', array('@url' => check_plain($url)))); |
form_set_error('url', t('@url does not look like a valid URL.', array('@url' => check_plain($url)))); |
| 896 |
} |
} |
| 897 |
|
|
| 2352 |
return '<div class="weblinks-linkview">'. decode_entities($link) .'</div>'. $status; |
return '<div class="weblinks-linkview">'. decode_entities($link) .'</div>'. $status; |
| 2353 |
} |
} |
| 2354 |
} |
} |
| 2355 |
|
|
| 2356 |
|
/** |
| 2357 |
|
* Verify the syntax of the given URL. http://drupal.org/node/295021 |
| 2358 |
|
* |
| 2359 |
|
* This function should only be used on actual URLs. It should not be used for |
| 2360 |
|
* Drupal menu paths, which can contain arbitrary characters. |
| 2361 |
|
* |
| 2362 |
|
* @param $url |
| 2363 |
|
* The URL to verify. |
| 2364 |
|
* @param $options |
| 2365 |
|
* An associative array of additional options, with the following keys: |
| 2366 |
|
* - 'allow_relative' (default FALSE) |
| 2367 |
|
* Whether relative paths are allowed |
| 2368 |
|
* - 'allow_ipv6' (default FALSE) |
| 2369 |
|
* Whether the host may be an bracket-enclosed IPv6 address, e.g. '[2001:0db8::1428:57ab]' |
| 2370 |
|
* - 'allow_idna' (default FALSE) |
| 2371 |
|
* Whether the host may be an IDNA hostname, i.e. contain non-ASCII characters |
| 2372 |
|
* - 'allowed_schemes' (default 'drupal') |
| 2373 |
|
* Either an array of allowed schemes (in lowercase), e.g. ('http', 'https'), |
| 2374 |
|
* or FALSE to allow any scheme. |
| 2375 |
|
* If the array contains one or more of these pseudo-schemes (beginning with "%"), |
| 2376 |
|
* they are expanded to the following: |
| 2377 |
|
* '%drupal' => ('http', 'https') - protocols supported by drupal_http_request() |
| 2378 |
|
* '%browser' => ('http', 'https', 'gopher', 'ftp', 'data') - protocols natively supported by most browsers |
| 2379 |
|
* '%media' => ('mms', 'rtsp') - stream media protocols |
| 2380 |
|
* '%external' => ('news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'feed', 'webcal') - protocols supported by non-browser applications |
| 2381 |
|
* - 'forbidden_ports' (default - see the source code) |
| 2382 |
|
* Ports that are forbidden due to security issues |
| 2383 |
|
* @return |
| 2384 |
|
* TRUE if the URL is in a valid format. |
| 2385 |
|
*/ |
| 2386 |
|
function weblinks_valid_url($url, array $options = array()) { |
| 2387 |
|
static $default_options = array( |
| 2388 |
|
'allow_relative' => FALSE, |
| 2389 |
|
'allow_ipv6' => FALSE, |
| 2390 |
|
'allow_idna' => FALSE, |
| 2391 |
|
'allowed_schemes' => array('%drupal'), |
| 2392 |
|
// See http://www.mozilla.org/projects/netlib/PortBanning.html |
| 2393 |
|
'forbidden_ports' => array(1, 7, 9, 11, 13, 15, 17, 19, 20, 22, 23, 25, |
| 2394 |
|
37, 42, 43, 53, 77, 79, 87, 95, 101, 102, 103, |
| 2395 |
|
104, 109, 110, 111, 113, 115, 117, 119, 123, |
| 2396 |
|
135, 139, 143, 179, 389, 465, 512, 513, 514, |
| 2397 |
|
515, 526, 530, 531, 532, 540, 556, 563, 587, |
| 2398 |
|
601, 636, 993, 995, 2049, 4045, 6000), |
| 2399 |
|
); |
| 2400 |
|
$options = array_merge($default_options, $options); |
| 2401 |
|
|
| 2402 |
|
preg_match('`^ |
| 2403 |
|
(?:([^:/?\#]+):)? # scheme |
| 2404 |
|
(?:// |
| 2405 |
|
([^@]*@)? # userinfo "@" |
| 2406 |
|
(\[[^/?\#\]]*\] # host (IPv6address/IPvFuture) |
| 2407 |
|
|[^/?\#:]*) # host (IPv4address/reg-name) |
| 2408 |
|
(:[^/?\#:]*)? # port |
| 2409 |
|
)? |
| 2410 |
|
([^?\#]*) # path |
| 2411 |
|
(\?[^\#]*)? # "?" query |
| 2412 |
|
(\#.*)? # "#" fragment |
| 2413 |
|
`xs', $url, $reg); |
| 2414 |
|
|
| 2415 |
|
$scheme = $reg[1]; |
| 2416 |
|
$userinfo = empty($reg[2]) ? FALSE : substr($reg[2], 0, -1); |
| 2417 |
|
$host = $reg[3]; |
| 2418 |
|
$port = empty($reg[4]) ? FALSE : strval(substr($reg[4], 1)); |
| 2419 |
|
$path = $reg[5]; |
| 2420 |
|
$query = empty($reg[6]) ? FALSE : strval(substr($reg[6], 1)); |
| 2421 |
|
$fragment = empty($reg[7]) ? FALSE : strval(substr($reg[7], 1)); |
| 2422 |
|
|
| 2423 |
|
if (!$scheme && !$options['allow_relative']) { |
| 2424 |
|
return FALSE; |
| 2425 |
|
} |
| 2426 |
|
|
| 2427 |
|
if (substr($host, 1) == '[') { |
| 2428 |
|
if (!$options['allow_ipv6']) { |
| 2429 |
|
return FALSE; |
| 2430 |
|
} else { |
| 2431 |
|
// TODO: Verify IPv6address - for now we just allow anything |
| 2432 |
|
} |
| 2433 |
|
} |
| 2434 |
|
|
| 2435 |
|
if ($options['allow_idna']) { |
| 2436 |
|
// TODO: Verify IDNA hostname - for now we just allow anything |
| 2437 |
|
} else { |
| 2438 |
|
// Look for invalid characters, hyphens or periods first or last, hyphens next to periods, or consecutive periods |
| 2439 |
|
if (preg_match('`[^0-9a-z.-]|^[-.]|[-.]$|\.-|-\.|\.\.`i', $host)) { |
| 2440 |
|
return FALSE; |
| 2441 |
|
} |
| 2442 |
|
} |
| 2443 |
|
|
| 2444 |
|
if ($options['allowed_schemes']) { |
| 2445 |
|
static $shortcuts = array( |
| 2446 |
|
'%drupal' => array('http', 'https'), |
| 2447 |
|
'%browser' => array('http', 'https', 'gopher', 'ftp', 'data'), |
| 2448 |
|
'%media' => array('mms', 'rtsp'), |
| 2449 |
|
'%external' => array('news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'feed', 'webcal') |
| 2450 |
|
); |
| 2451 |
|
foreach ($shortcuts as $name => $shortcut) { |
| 2452 |
|
if (in_array($name, $options['allowed_schemes'])) { |
| 2453 |
|
$options['allowed_schemes'] = array_merge($options['allowed_schemes'], $shortcut); |
| 2454 |
|
} |
| 2455 |
|
} |
| 2456 |
|
} |
| 2457 |
|
|
| 2458 |
|
if ($scheme && |
| 2459 |
|
$options['allowed_schemes'] && |
| 2460 |
|
!in_array(strtolower($scheme), $options['allowed_schemes'])) { |
| 2461 |
|
|
| 2462 |
|
return FALSE; |
| 2463 |
|
} |
| 2464 |
|
|
| 2465 |
|
// Look for invalid characters |
| 2466 |
|
foreach (array('userinfo', 'path', 'query', 'fragment') as $part) { |
| 2467 |
|
// According to RFC 3986 appendix X, the following characters are allowed: |
| 2468 |
|
// * unreserved: ALPHA / DIGIT / "-" / "." / "_" / "~" |
| 2469 |
|
// * pct-encoded: "%" HEXDIG HEXDIG |
| 2470 |
|
// * sub-delims: "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" |
| 2471 |
|
// * ":" / "@" / "/" / "?" |
| 2472 |
|
// |
| 2473 |
|
// Not all the latter are allowed in all the mentioned parts, but the |
| 2474 |
|
// invalid (e.g. "?" in $path) are handled when the URL was initially |
| 2475 |
|
// parsed above. |
| 2476 |
|
// |
| 2477 |
|
// We also allow the following that are commonly used: "[" "]" |
| 2478 |
|
if (preg_match('`[^a-z0-9\-._~%^!$&\'()*+,;=/?:@[\]]`i', $$part, $reg)) { |
| 2479 |
|
var_dump($reg); |
| 2480 |
|
return FALSE; |
| 2481 |
|
} |
| 2482 |
|
} |
| 2483 |
|
|
| 2484 |
|
// Look for invalid percent encoding |
| 2485 |
|
foreach (array('userinfo', 'host', 'path') as $part) { |
| 2486 |
|
if (preg_match('`%(.?[^0-9a-f])`i', $$part)) { |
| 2487 |
|
return FALSE; |
| 2488 |
|
} |
| 2489 |
|
} |
| 2490 |
|
|
| 2491 |
|
// Make sure port is integer or the empty, and not forbidden |
| 2492 |
|
if (!preg_match('`^\d*$`', $port) || |
| 2493 |
|
in_array($port, $options['forbidden_ports'])) { |
| 2494 |
|
|
| 2495 |
|
return FALSE; |
| 2496 |
|
} |
| 2497 |
|
|
| 2498 |
|
return TRUE; |
| 2499 |
|
} |