| 1 |
<?php |
<?php |
| 2 |
// $Id: htmlcorrector.module,v 1.9 2004/08/21 03:52:47 tdobes Exp $ |
// $Id$ |
| 3 |
|
|
| 4 |
function htmlcorrector_help($type) { |
function htmlcorrector_help($section = 'admin/help#htmlcorrector') { |
| 5 |
switch ($type) { |
switch ($section) { |
| 6 |
case 'admin/modules#description': |
case 'admin/modules#description': |
| 7 |
return t('This module will validate and correct the HTML inside users\' submissions when necessary.'); |
return t('Corrects faulty and chopped off HTML in postings.'); |
| 8 |
} |
} |
| 9 |
} |
} |
| 10 |
|
|
| 12 |
switch ($op) { |
switch ($op) { |
| 13 |
case 'list': |
case 'list': |
| 14 |
return array(0 => t('HTML Corrector')); |
return array(0 => t('HTML Corrector')); |
| 15 |
|
|
| 16 |
case 'description': |
case 'description': |
| 17 |
return htmlcorrector_help('admin/modules#description'); |
return t('Corrects faulty and chopped off HTML in postings.'); |
| 18 |
|
|
| 19 |
case 'process': |
case 'process': |
| 20 |
return _htmlcorrector_process($text, $format); |
return _htmlcorrector_process($text); |
| 21 |
case 'settings': |
|
|
return _htmlcorrector_config($format); |
|
| 22 |
default: |
default: |
| 23 |
return $text; |
return $text; |
| 24 |
} |
} |
| 25 |
} |
} |
| 26 |
|
|
| 27 |
function htmlcorrector_filter_tips($delta, $format, $long = false) { |
function _htmlcorrector_process($text) { |
| 28 |
return t('HTML inside your submission will be validated and corrected if necessary.'); |
// Tags which cannot be nested but are typically left unclosed. |
| 29 |
} |
$nonesting = array('li', 'p'); |
| 30 |
|
|
| 31 |
// check an array of arrays for a match amongst one of the sub-elements |
// Single use tags in HTML4 |
|
function _htmlcorrector_in_array_el($haystack, $needle, $el) { |
|
|
foreach ($haystack as $value) { |
|
|
if ($value[$el] == $needle) |
|
|
return 1; |
|
|
} |
|
|
return 0; |
|
|
} |
|
|
|
|
|
function _htmlcorrector_config($format) { |
|
|
$output = t('The HTML inside users\' submissions will be validated and corrected when necessary.'); |
|
|
$output .= form_select(t('Smart-close'), 'htmlcorrector_smartclose_'. $format, variable_get('htmlcorrector_smartclose_'. $format, 1), array(t('Disabled'), t('Enabled')), t('When enabled, it\'s not allowed to nest identical tags. This is useful for correcting incorrect <p>, <option>, <li>, ... tags that are not closed.')); |
|
|
$output .= form_select(t('XHTMLify'), 'htmlcorrector_xhtmlify_'. $format, variable_get('htmlcorrector_xhtmlify_'. $format, 0), array(t('Disabled'), t('Enabled')), t('When enabled, single-use tags such as <br> will receive a forward slash at the end to comply with the XHTML-specs.')); |
|
|
$output .= form_select(t('Fix entities inside values'), 'htmlcorrector_valueentities_'. $format, variable_get('htmlcorrector_valueentities_'. $format, 0), array(t('Disabled'), t('Enabled')), t('When enabled, entities inside HTML attribute-values will be checked as well. It\'s recommended to leave this off, as most people still don\'t escape them.<br />(e.g. <a href="http://site.com/index.php?foo=bar&bar=foo" instead of <a href="http://site.com/index.php?foo=bar&amp;bar=foo")')); |
|
|
return form_group(t('HTML Corrector'), $output); |
|
|
} |
|
|
|
|
|
function _htmlcorrector_process($text, $format) { |
|
|
$smartclose = variable_get('htmlcorrector_smartclose_'. $format, 1); |
|
|
$xhtmlify = variable_get('htmlcorrector_xhtmlify_'. $format, 0); |
|
|
$valueentities = variable_get('htmlcorrector_valueentities_'. $format, 0); |
|
|
|
|
|
// single-use tags, as defined by the HTML4 standard |
|
| 32 |
$singleuse = array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'); |
$singleuse = array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'); |
|
// tags that are commonly nested and should not be 'smartclosed' |
|
|
$notsmart = array('ul', 'div', 'ol', 'font'); |
|
|
|
|
|
$len = strlen($text); |
|
|
$mode = array('text'); |
|
|
$opentags = array(); |
|
|
$output = ''; |
|
| 33 |
|
|
| 34 |
// Search the text character by character, skipping certain blocks at once. |
// Properly entify angles |
| 35 |
// We use a 'mode' system which defines what kind of situation we're in: |
$text = preg_replace('!<([^a-zA-Z/])!', '<\1', $text); |
|
// text - regular case, nothing special |
|
|
// entity - activated when a & is found (e.g. ) |
|
|
// tagname - activated after a <, switches to 'tag' after the initial tagname is known |
|
|
// tag - inside an html tag, after the initial tagname |
|
|
// attribute - inside an html attribute-value, inside the (optional) quotes |
|
|
// |
|
|
// The modes are stored in a stack-system, to allow entities inside attribute values, inside tags, etc. |
|
|
// |
|
|
// The tags are validated using a stack as well... all open tags are storen in $opentags as array($tagname, $smart) |
|
|
// $tagname is the tagname of course, and smart defines wether this tag has been closed already by the smartcloser |
|
|
// Smartclosed tags are still added to the stack to remove optional end-tags that have been made obsolete. |
|
|
|
|
|
for ($i = 0; $i < $len; $i++) { |
|
|
$c = $text[$i]; |
|
|
switch ($mode[0]) { |
|
|
case 'text': |
|
|
if ($c == '<') { |
|
|
array_unshift($mode, 'tagname'); |
|
|
$tagname = ''; |
|
|
$tag = ''; |
|
|
$closing = 0; |
|
|
$smarty = 0; |
|
|
} else if ($c == '&') { |
|
|
array_unshift($mode, 'entity'); |
|
|
$output .= '&'; |
|
|
$entitylength = 0; |
|
|
} else { |
|
|
$block = strcspn(substr($text, $i), '<&'); |
|
|
if ($block) { |
|
|
$output .= drupal_specialchars(substr($text, $i, $block)); |
|
|
$i += $block - 1; |
|
|
} |
|
|
} |
|
|
break; |
|
| 36 |
|
|
| 37 |
case 'entity': |
// Splits tags from text |
| 38 |
// Depending on where we are, we should put the output in |
$split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); |
| 39 |
// a different buffer. Since tags are only outputted when they're done, |
// Note: PHP ensures the array consists of alternating delimiters and literals |
| 40 |
// we should put it in $tag if we're inside an html attribute-value. |
// and begins and ends with a literal (inserting $null as required). |
|
if ((count($mode) > 1) && ($mode[1] == 'attribute')) |
|
|
$buffer = &$tag; |
|
|
else |
|
|
$buffer = &$output; |
|
|
if (!eregi('[A-Z0-9#]', $c)) { |
|
|
array_shift($mode); |
|
| 41 |
|
|
| 42 |
if ($c == ';') |
$tag = false; // Odd/even counter. Tag or no tag. |
| 43 |
$buffer .= $c; |
$stack = array(); |
| 44 |
else if ($entitylength) { |
$output = ''; |
| 45 |
// common error: forgotten the end semi-colon |
foreach ($split as $value) { |
| 46 |
if ($valueentities || !($mode[0] == 'attribute')) { |
// HTML tag |
| 47 |
$buffer .= ';'; |
if ($tag) { |
| 48 |
$i--; |
list($tagname) = explode(' ', strtolower($value), 2); |
| 49 |
} |
// Closing tag |
| 50 |
else { |
if ($tagname{0} == '/') { |
| 51 |
$buffer .= $c; |
$tagname = substr($tagname, 1); |
| 52 |
} |
if (!in_array($tagname, $singleuse)) { |
| 53 |
} |
// See if we have other tags lingering first, and close them |
| 54 |
else { |
while (($stack[0] != $tagname) && count($stack)) { |
| 55 |
// non-attribute amperstand... convert to & |
$output .= '</'. array_shift($stack) .'>'; |
| 56 |
$buffer .= 'amp;'; |
} |
| 57 |
$i--; |
// If the tag was not found, just leave it out; |
| 58 |
|
if (count($stack)) { |
| 59 |
|
$output .= '</'. array_shift($stack) .'>'; |
| 60 |
} |
} |
|
} else { |
|
|
$buffer .= $c; |
|
|
$entitylength++; |
|
| 61 |
} |
} |
| 62 |
break; |
} |
| 63 |
|
// Opening tag |
| 64 |
case 'tagname': |
else { |
| 65 |
if (($c == '/') && ($tagname == '')) { |
// See if we have an identical tag already open and close it if desired. |
| 66 |
// This is a closing tag |
if (count($stack) && ($stack[0] == $tagname) && in_array($stack[0], $nonesting)) { |
| 67 |
$closing = 1; |
$output .= '</'. array_shift($stack) .'>'; |
|
} else { |
|
|
if (eregi('[A-Z!-]', $c)) { |
|
|
$tagname .= $c; |
|
|
if ($tagname == '!--') // This is a comment-tag, so we can switch to 'tag' mode |
|
|
$mode[0] = 'tag'; |
|
|
} else { |
|
|
// We've extracted the total tagname |
|
|
if ($tagname == '') { |
|
|
// This is not a tag, but a single, unescaped '<'. Back to the previous mode |
|
|
$output .= '<'; |
|
|
array_shift($mode); |
|
|
$i--; |
|
|
continue; |
|
|
} |
|
|
$mode[0] = 'tag'; |
|
|
if (!$closing) { |
|
|
$smarty = 0; |
|
|
$tn = strtolower($tagname); |
|
|
// When smart-close is enabled, it's not allowed to nest identical tags... |
|
|
// This helps fix errors such as using '<p>' but not '</p>' and '<option>' but not '</option>'. |
|
|
// The $notsmart-array contains a list of tags that are frequently smartclosed |
|
|
if ($smartclose && count($opentags) && ($tagname[0] != '!') && (!in_array($tn, $notsmart))) { |
|
|
if (($tn == $opentags[0][0]) && (!in_array($tn, $singleuse))) { |
|
|
$output .= '</' . $tagname . '>'; |
|
|
$smarty = 1; |
|
|
} |
|
|
} |
|
|
// When a tag has been smart-closed, make sure that, if there is an end-tag later on, |
|
|
// we know it is obsolete. We set the currently open-tag's smartstatus to 1. |
|
|
if (count($opentags)) |
|
|
$opentags[0][1] = $smarty; |
|
|
// Add a new tag to the list. |
|
|
array_unshift($opentags, array($tn, 0)); |
|
|
} |
|
|
} |
|
| 68 |
} |
} |
| 69 |
case 'tag': |
// Push non-single-use tags onto the stack |
| 70 |
if ($tagname == '!--') { |
if (!in_array($tagname, $singleuse)) { |
| 71 |
// This is a comment-tag |
array_unshift($stack, $tagname); |
|
if ($c == '-') { |
|
|
// Check for the end of this comment-tag |
|
|
if (($text[$i + 1] == '-') && ($text[$i + 2] == '>')) { |
|
|
array_shift($mode); |
|
|
$output .= '<' . $tag . '-->'; |
|
|
$i+=2; |
|
|
} else { |
|
|
// This is a single dash/hyphen. |
|
|
$tag .= $c; |
|
|
} |
|
|
} else { |
|
|
// Skip to the next dash/hyphen. That might be the end of this comment-tag. |
|
|
$block = strpos(substr($text, $i), '-'); |
|
|
if ($block) { |
|
|
$tag .= drupal_specialchars(substr($text, $i, $block)); |
|
|
$i += $block - 1; |
|
|
} |
|
|
} |
|
|
} else { |
|
|
if ($c == '>') { // We're at the end of the tag. |
|
|
$skiptag = 0; |
|
|
if ($closing) { |
|
|
// This is a closing tag. Let's see if we can find a matching opening tag. |
|
|
if (_htmlcorrector_in_array_el($opentags, strtolower($tagname), 0)) { |
|
|
// Check if some unclosed tags are waiting to be closed before this one. |
|
|
while (count($opentags) && ($opentags[0][0] != strtolower($tagname))) { |
|
|
list($opentag, $skiptag) = array_shift($opentags); |
|
|
if (!in_array($opentag, $singleuse) && (!$skiptag) && ($opentag[0] != '!')) { |
|
|
$output .= '</' . $opentag . '>'; |
|
|
} |
|
|
} |
|
|
// Remove the current tag from the opentags-list and get the smartclose-status. |
|
|
list(,$skiptag) = array_shift($opentags); |
|
|
} else { |
|
|
// We couldn't find a matching opening tag. This is probably an incorrect closing-tag. |
|
|
// It should be removed. |
|
|
$skiptag = 1; |
|
|
} |
|
|
} else { |
|
|
if (count($opentags)) { |
|
|
if ($text[$i - 1] == '/') { |
|
|
// This is a single-use XHTML-tag ending with a slash. We can remove it from $opentags. |
|
|
array_shift($opentags); |
|
|
} else if ($xhtmlify) { |
|
|
if (in_array($tagname, $singleuse)) { |
|
|
// This is a single-use HTML-tag. We need to XHTML-ify it and remove an optional end-tag. |
|
|
$tag .= ($text[$i - 1] == ' ') ? '/' : ' /'; |
|
|
$opentags[0][1] = 1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
array_shift($mode); |
|
|
if (!$skiptag) { |
|
|
$output .= '<' . $tag . '>'; |
|
|
} |
|
|
} else if ($c == '=') { |
|
|
// We've hit an attribute-value. |
|
|
array_unshift($mode, 'attribute'); |
|
|
$tag .= $c; |
|
|
|
|
|
// Common-error: didn't use single or double quote to enclose value. |
|
|
// We'll assume it stops at the next space or at the end of the tag. |
|
|
$quote = (($text[++$i] == '\'') || ($text[$i] == '"')) ? array($text[$i]) : array(' ', '>'); |
|
|
if (in_array(' ', $quote)) { |
|
|
$tag .= $text[$i]; |
|
|
} else { |
|
|
$tag .= $text[$i]; |
|
|
} |
|
|
} else { |
|
|
$tag .= $c; |
|
|
} |
|
| 72 |
} |
} |
| 73 |
break; |
// Add trailing slash to single-use tags as per X(HT)ML. |
| 74 |
|
else { |
| 75 |
case 'attribute': |
$value = rtrim($value, ' /') . ' /'; |
|
if (in_array($c, $quote)) { |
|
|
// We've hit the end of the value |
|
|
array_shift($mode); |
|
|
|
|
|
// If the value was not enclosed in quotes, then a '>' also means the end of the tag. |
|
|
// We should skip back one character in order to activate the tag-closer in the next iteration. |
|
|
if ($c == '>') |
|
|
$i--; |
|
|
else |
|
|
$tag .= $c; |
|
|
} else if (($c == '\\') && !in_array(' ', $quote) && (in_array($text[$i + 1], $quote) || ($text[$i + 1] == '\\'))) { |
|
|
// This is an escaped quote, double-quote or backslash. |
|
|
$tag .= $c . $text[++$i]; |
|
|
} else if ($c == '&') { |
|
|
// This is an entity inside a value. |
|
|
array_unshift($mode, 'entity'); |
|
|
$tag .= '&'; |
|
|
} else { |
|
|
$tag .= $c; |
|
| 76 |
} |
} |
| 77 |
break; |
$output .= '<'. $value .'>'; |
| 78 |
|
} |
| 79 |
} |
} |
| 80 |
} |
else { |
| 81 |
// Check for remaining tags on the stack and close them. |
// Passthrough |
| 82 |
while (count($opentags)) { |
$output .= $value; |
|
list($tagname, $skiptag) = array_shift($opentags); |
|
|
if ((!$skiptag) && !in_array($tagname, $singleuse) && ($tagname[0] != '!')) { |
|
|
$output .= '</' . $tagname . '>'; |
|
| 83 |
} |
} |
| 84 |
|
$tag = !$tag; |
| 85 |
|
} |
| 86 |
|
// Close remaining tags |
| 87 |
|
while (count($stack) > 0) { |
| 88 |
|
$output .= '</'. array_shift($stack) .'>'; |
| 89 |
} |
} |
|
|
|
| 90 |
return $output; |
return $output; |
| 91 |
} |
} |
|
|
|
|
?> |
|