/[drupal]/contributions/modules/htmlcorrector/htmlcorrector.module
ViewVC logotype

Diff of /contributions/modules/htmlcorrector/htmlcorrector.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph | View Patch Patch

revision 1.10, Sat Aug 21 18:31:14 2004 UTC revision 1.11, Sat Oct 16 02:03:56 2004 UTC
# Line 1  Line 1 
1  <?php  <?php
2  // $Id: htmlcorrector.module,v 1.9 2004/08/21 03:52:47 tdobes Exp $  // $Id$
3    
4  function htmlcorrector_help($type) {  function htmlcorrector_help($section = 'admin/help#htmlcorrector') {
5    switch ($type) {    switch ($section) {
6      case 'admin/modules#description':      case 'admin/modules#description':
7        return t('This module will validate and correct the HTML inside users\' submissions when necessary.');        return t('Corrects faulty and chopped off HTML in postings.');
8    }    }
9  }  }
10    
# Line 12  function htmlcorrector_filter($op, $delt Line 12  function htmlcorrector_filter($op, $delt
12    switch ($op) {    switch ($op) {
13      case 'list':      case 'list':
14        return array(0 => t('HTML Corrector'));        return array(0 => t('HTML Corrector'));
15    
16      case 'description':      case 'description':
17        return htmlcorrector_help('admin/modules#description');        return t('Corrects faulty and chopped off HTML in postings.');
18    
19      case 'process':      case 'process':
20        return _htmlcorrector_process($text, $format);        return _htmlcorrector_process($text);
21      case 'settings':  
       return _htmlcorrector_config($format);  
22      default:      default:
23        return $text;        return $text;
24    }    }
25  }  }
26    
27  function htmlcorrector_filter_tips($delta, $format, $long = false) {  function _htmlcorrector_process($text) {
28    return t('HTML inside your submission will be validated and corrected if necessary.');    // Tags which cannot be nested but are typically left unclosed.
29  }    $nonesting = array('li', 'p');
30    
31  // check an array of arrays for a match amongst one of the sub-elements    // Single use tags in HTML4
 function _htmlcorrector_in_array_el($haystack, $needle, $el) {  
   foreach ($haystack as $value) {  
     if ($value[$el] == $needle)  
       return 1;  
   }  
   return 0;  
 }  
   
 function _htmlcorrector_config($format) {  
   $output = t('The HTML inside users\' submissions will be validated and corrected when necessary.');  
   $output .= form_select(t('Smart-close'), 'htmlcorrector_smartclose_'. $format, variable_get('htmlcorrector_smartclose_'. $format, 1), array(t('Disabled'), t('Enabled')), t('When enabled, it\'s not allowed to nest identical tags. This is useful for correcting incorrect &lt;p&gt;, &lt;option&gt;, &lt;li&gt;, ... tags that are not closed.'));  
   $output .= form_select(t('XHTMLify'), 'htmlcorrector_xhtmlify_'. $format, variable_get('htmlcorrector_xhtmlify_'. $format, 0), array(t('Disabled'), t('Enabled')), t('When enabled, single-use tags such as &lt;br&gt; will receive a forward slash at the end to comply with the XHTML-specs.'));  
   $output .= form_select(t('Fix entities inside values'), 'htmlcorrector_valueentities_'. $format, variable_get('htmlcorrector_valueentities_'. $format, 0), array(t('Disabled'), t('Enabled')), t('When enabled, entities inside HTML attribute-values will be checked as well. It\'s recommended to leave this off, as most people still don\'t escape them.<br />(e.g. &lt;a href=&quot;http://site.com/index.php?foo=bar&amp;bar=foo&quot; instead of &lt;a href=&quot;http://site.com/index.php?foo=bar&amp;amp;bar=foo&quot;)'));  
   return form_group(t('HTML Corrector'), $output);  
 }  
   
 function _htmlcorrector_process($text, $format) {  
   $smartclose = variable_get('htmlcorrector_smartclose_'. $format, 1);  
   $xhtmlify = variable_get('htmlcorrector_xhtmlify_'. $format, 0);  
   $valueentities = variable_get('htmlcorrector_valueentities_'. $format, 0);  
   
   // single-use tags, as defined by the HTML4 standard  
32    $singleuse = array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame');    $singleuse = array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame');
   // tags that are commonly nested and should not be 'smartclosed'  
   $notsmart = array('ul', 'div', 'ol', 'font');  
   
   $len = strlen($text);  
   $mode = array('text');  
   $opentags = array();  
   $output = '';  
33    
34    // Search the text character by character, skipping certain blocks at once.    // Properly entify angles
35    // We use a 'mode' system which defines what kind of situation we're in:    $text = preg_replace('!<([^a-zA-Z/])!', '&lt;\1', $text);
   //   text - regular case, nothing special  
   //   entity - activated when a & is found (e.g. &nbsp;)  
   //   tagname - activated after a <, switches to 'tag' after the initial tagname is known  
   //   tag - inside an html tag, after the initial tagname  
   //   attribute - inside an html attribute-value, inside the (optional) quotes  
   //  
   // The modes are stored in a stack-system, to allow entities inside attribute values, inside tags, etc.  
   //  
   // The tags are validated using a stack as well... all open tags are storen in $opentags as array($tagname, $smart)  
   // $tagname is the tagname of course, and smart defines wether this tag has been closed already by the smartcloser  
   // Smartclosed tags are still added to the stack to remove optional end-tags that have been made obsolete.  
   
   for ($i = 0; $i < $len; $i++) {  
     $c = $text[$i];  
     switch ($mode[0]) {  
       case 'text':  
         if ($c == '<') {  
           array_unshift($mode, 'tagname');  
           $tagname = '';  
           $tag = '';  
           $closing = 0;  
           $smarty = 0;  
         } else if ($c == '&') {  
           array_unshift($mode, 'entity');  
           $output .= '&';  
           $entitylength = 0;  
         } else {  
           $block = strcspn(substr($text, $i), '<&');  
           if ($block) {  
             $output .= drupal_specialchars(substr($text, $i, $block));  
             $i += $block - 1;  
           }  
         }  
         break;  
36    
37        case 'entity':    // Splits tags from text
38          // Depending on where we are, we should put the output in    $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
39          // a different buffer. Since tags are only outputted when they're done,    // Note: PHP ensures the array consists of alternating delimiters and literals
40          // we should put it in $tag if we're inside an html attribute-value.    // and begins and ends with a literal (inserting $null as required).
         if ((count($mode) > 1) && ($mode[1] == 'attribute'))  
           $buffer = &$tag;  
         else  
           $buffer = &$output;  
         if (!eregi('[A-Z0-9#]', $c)) {  
           array_shift($mode);  
41    
42            if ($c == ';')    $tag = false; // Odd/even counter. Tag or no tag.
43              $buffer .= $c;    $stack = array();
44            else if ($entitylength) {    $output = '';
45              // common error: forgotten the end semi-colon    foreach ($split as $value) {
46              if ($valueentities || !($mode[0] == 'attribute')) {      // HTML tag
47                $buffer .= ';';      if ($tag) {
48                $i--;        list($tagname) = explode(' ', strtolower($value), 2);
49              }        // Closing tag
50              else {        if ($tagname{0} == '/') {
51                $buffer .= $c;          $tagname = substr($tagname, 1);
52              }          if (!in_array($tagname, $singleuse)) {
53            }            // See if we have other tags lingering first, and close them
54            else {            while (($stack[0] != $tagname) && count($stack)) {
55              // non-attribute amperstand... convert to &amp;              $output .= '</'. array_shift($stack) .'>';
56              $buffer .= 'amp;';            }
57              $i--;            // If the tag was not found, just leave it out;
58              if (count($stack)) {
59                $output .= '</'. array_shift($stack) .'>';
60            }            }
         } else {  
           $buffer .= $c;  
           $entitylength++;  
61          }          }
62          break;        }
63          // Opening tag
64        case 'tagname':        else {
65          if (($c == '/') && ($tagname == '')) {          // See if we have an identical tag already open and close it if desired.
66            // This is a closing tag          if (count($stack) && ($stack[0] == $tagname) && in_array($stack[0], $nonesting)) {
67            $closing = 1;            $output .= '</'. array_shift($stack) .'>';
         } else {  
           if (eregi('[A-Z!-]', $c)) {  
             $tagname .= $c;  
             if ($tagname == '!--') // This is a comment-tag, so we can switch to 'tag' mode  
               $mode[0] = 'tag';  
           } else {  
             // We've extracted the total tagname  
             if ($tagname == '') {  
               // This is not a tag, but a single, unescaped '<'. Back to the previous mode  
               $output .= '&lt;';  
               array_shift($mode);  
               $i--;  
               continue;  
             }  
             $mode[0] = 'tag';  
             if (!$closing) {  
               $smarty = 0;  
               $tn = strtolower($tagname);  
               // When smart-close is enabled, it's not allowed to nest identical tags...  
               // This helps fix errors such as using '<p>' but not '</p>' and '<option>' but not '</option>'.  
               // The $notsmart-array contains a list of tags that are frequently smartclosed  
               if ($smartclose && count($opentags) && ($tagname[0] != '!') && (!in_array($tn, $notsmart))) {  
                 if (($tn == $opentags[0][0]) && (!in_array($tn, $singleuse))) {  
                   $output .= '</' . $tagname . '>';  
                   $smarty = 1;  
                 }  
               }  
               // When a tag has been smart-closed, make sure that, if there is an end-tag later on,  
               // we know it is obsolete. We set the currently open-tag's smartstatus to 1.  
               if (count($opentags))  
                 $opentags[0][1] = $smarty;  
               // Add a new tag to the list.  
               array_unshift($opentags, array($tn, 0));  
             }  
           }  
68          }          }
69        case 'tag':          // Push non-single-use tags onto the stack
70          if ($tagname == '!--') {          if (!in_array($tagname, $singleuse)) {
71            // This is a comment-tag            array_unshift($stack, $tagname);
           if ($c == '-') {  
             // Check for the end of this comment-tag  
             if (($text[$i + 1] == '-') && ($text[$i + 2] == '>')) {  
               array_shift($mode);  
               $output .= '<' . $tag . '-->';  
               $i+=2;  
             } else {  
               // This is a single dash/hyphen.  
               $tag .= $c;  
             }  
           } else {  
             // Skip to the next dash/hyphen. That might be the end of this comment-tag.  
             $block = strpos(substr($text, $i), '-');  
             if ($block) {  
               $tag .= drupal_specialchars(substr($text, $i, $block));  
               $i += $block - 1;  
             }  
           }  
         } else {  
           if ($c == '>') { // We're at the end of the tag.  
             $skiptag = 0;  
             if ($closing) {  
               // This is a closing tag. Let's see if we can find a matching opening tag.  
               if (_htmlcorrector_in_array_el($opentags, strtolower($tagname), 0)) {  
                 // Check if some unclosed tags are waiting to be closed before this one.  
                 while (count($opentags) && ($opentags[0][0] != strtolower($tagname))) {  
                   list($opentag, $skiptag) = array_shift($opentags);  
                   if (!in_array($opentag, $singleuse) && (!$skiptag) && ($opentag[0] != '!')) {  
                     $output .= '</' . $opentag . '>';  
                   }  
                 }  
                 // Remove the current tag from the opentags-list and get the smartclose-status.  
                 list(,$skiptag) = array_shift($opentags);  
               } else {  
                 // We couldn't find a matching opening tag. This is probably an incorrect closing-tag.  
                 // It should be removed.  
                 $skiptag = 1;  
               }  
             } else {  
               if (count($opentags)) {  
                 if ($text[$i - 1] == '/') {  
                   // This is a single-use XHTML-tag ending with a slash. We can remove it from $opentags.  
                   array_shift($opentags);  
                 } else if ($xhtmlify) {  
                   if (in_array($tagname, $singleuse)) {  
                     // This is a single-use HTML-tag. We need to XHTML-ify it and remove an optional end-tag.  
                     $tag .= ($text[$i - 1] == ' ') ? '/' : ' /';  
                     $opentags[0][1] = 1;  
                   }  
                 }  
               }  
             }  
             array_shift($mode);  
             if (!$skiptag) {  
               $output .= '<' . $tag . '>';  
             }  
           } else if ($c == '=') {  
             // We've hit an attribute-value.  
             array_unshift($mode, 'attribute');  
             $tag .= $c;  
   
             // Common-error: didn't use single or double quote to enclose value.  
             // We'll assume it stops at the next space or at the end of the tag.  
             $quote = (($text[++$i] == '\'') || ($text[$i] == '"')) ? array($text[$i]) : array(' ', '>');  
             if (in_array(' ', $quote)) {  
               $tag .= $text[$i];  
             } else {  
               $tag .= $text[$i];  
             }  
           } else {  
             $tag .= $c;  
           }  
72          }          }
73          break;          // Add trailing slash to single-use tags as per X(HT)ML.
74            else {
75        case 'attribute':            $value = rtrim($value, ' /') . ' /';
         if (in_array($c, $quote)) {  
           // We've hit the end of the value  
           array_shift($mode);  
   
           // If the value was not enclosed in quotes, then a '>' also means the end of the tag.  
           // We should skip back one character in order to activate the tag-closer in the next iteration.  
           if ($c == '>')  
             $i--;  
           else  
             $tag .= $c;  
         } else if (($c == '\\') && !in_array(' ', $quote) && (in_array($text[$i + 1], $quote) || ($text[$i + 1] == '\\'))) {  
           // This is an escaped quote, double-quote or backslash.  
           $tag .= $c . $text[++$i];  
         } else if ($c == '&') {  
           // This is an entity inside a value.  
           array_unshift($mode, 'entity');  
           $tag .= '&';  
         } else {  
           $tag .= $c;  
76          }          }
77          break;          $output .= '<'. $value .'>';
78          }
79      }      }
80    }      else {
81    // Check for remaining tags on the stack and close them.        // Passthrough
82    while (count($opentags)) {        $output .= $value;
     list($tagname, $skiptag) = array_shift($opentags);  
     if ((!$skiptag) && !in_array($tagname, $singleuse) && ($tagname[0] != '!')) {  
       $output .= '</' . $tagname . '>';  
83      }      }
84        $tag = !$tag;
85      }
86      // Close remaining tags
87      while (count($stack) > 0) {
88        $output .= '</'. array_shift($stack) .'>';
89    }    }
   
90    return $output;    return $output;
91  }  }
   
 ?>  

Legend:
Removed from v.1.10  
changed lines
  Added in v.1.11

  ViewVC Help
Powered by ViewVC 1.1.2