/[drupal]/contributions/modules/beautify/beautify.module
ViewVC logotype

Contents of /contributions/modules/beautify/beautify.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (show annotations) (download) (as text)
Tue Jan 6 17:47:30 2009 UTC (10 months, 2 weeks ago) by psynaptic
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +85 -51 lines
File MIME type: text/x-php
Code style improvements and modifying comments.
1 <?php
2 // $Id$
3
4 /**
5 * @file
6 * HTML output processor for beautification, compacting and general processing.
7 *
8 * This module buffers the HTML output of Drupal and processes it through
9 * HTML Tidy, htmLawed or a simple built-in function to beautify, flatten or
10 * compact the HTML source code.
11 *
12 * It works with the cache system in that it sets the processed HTML to the
13 * page cache.
14 */
15
16 /**
17 * Implementation of hook_menu().
18 */
19 function beautify_menu() {
20 $items = array();
21 $items['admin/settings/beautify'] = array(
22 'title' => 'Beautify',
23 'description' => 'Configure settings for the processing of HTML output to the browser.',
24 'page callback' => 'drupal_get_form',
25 'page arguments' => array('beautify_settings'),
26 'access arguments' => array('administer beautify')
27 );
28
29 return $items;
30 }
31
32 /**
33 * Implementation of hook_perm().
34 */
35 function beautify_perm() {
36 return array(
37 'administer beautify',
38 'receive processed html',
39 'use beautify debug mode',
40 );
41 }
42
43 /**
44 * Implementation of hook_theme().
45 */
46 function beautify_theme($existing, $type, $theme, $path) {
47 return array('beautify_errors' => array('errors' => NULL));
48 }
49
50 /**
51 * Admin settings form.
52 */
53 function beautify_settings() {
54 // Fetch an object with the options including defaults.
55 $methods = beautify_methods();
56 $form['beautify_enabled'] = array(
57 '#type' => 'checkbox',
58 '#title' => t('Enable output processing'),
59 '#default_value' => variable_get('beautify_enabled', 1),
60 );
61 $form['beautify_method'] = array(
62 '#type' => 'radios',
63 '#title' => t('Method'),
64 '#options' => $methods->methods,
65 '#default_value' => $methods->default,
66 );
67 $form['beautify_mode'] = array(
68 '#type' => 'radios',
69 '#title' => t('Mode'),
70 '#options' => $methods->options,
71 '#default_value' => $methods->mode,
72 );
73
74 // Advanced options.
75 $form['options'] = array(
76 '#type' => 'fieldset',
77 '#title' => t('Advanced options'),
78 '#collapsible' => TRUE,
79 '#collapsed' => TRUE,
80 );
81 switch ($methods->default) {
82
83 // HTML Tidy options
84 case 'htmltidy':
85 $form['options']['beautify_htmltidy_valid_xhtml'] = array(
86 '#type' => 'checkbox',
87 '#title' => t('Output valid XHTML'),
88 '#default_value' => variable_get('beautify_htmltidy_valid_xhtml', 1),
89 );
90 $form['options']['beautify_htmltidy_warnings'] = array(
91 '#type' => 'checkbox',
92 '#title' => t('Append errors'),
93 '#default_value' => variable_get('beautify_htmltidy_warnings', 0),
94 );
95 $form['options']['beautify_htmltidy_doctype'] = array(
96 '#type' => 'radios',
97 '#title' => t('DOCTYPE'),
98 '#description' => t('This option specifies the DOCTYPE declaration generated by Tidy.'),
99 '#options' => array(
100 'omit' => '<strong>Omit</strong>: the output won\'t contain a DOCTYPE declaration',
101 'auto' => '<strong>Auto</strong>: Use an educated guess based upon the contents of the document',
102 'strict' => '<strong>Strict</strong>: set the DOCTYPE to the strict DTD',
103 'transitional' => '<strong>Transitional</strong>: set the DOCTYPE to the transitional (loose) DTD',
104 'theme' => '<strong>Theme</strong>: Use the existing DOCTYPE from the theme (page.tpl.php file)',
105 ),
106 '#default_value' => variable_get('beautify_htmltidy_doctype', 'theme'),
107 );
108 $form['options']['msword'] = array(
109 '#type' => 'fieldset',
110 '#title' => t('Clean MS Word'),
111 '#collapsible' => TRUE,
112 '#collapsed' => TRUE,
113 );
114 $form['options']['msword']['beautify_htmltidy_msword_bare'] = array(
115 '#type' => 'checkbox',
116 '#title' => t('Convert non-breaking spaces to spaces'),
117 '#description' => t('This option specifies if Tidy should strip Microsoft specific HTML from Word 2000 documents, and output spaces rather than non-breaking spaces where they exist in the input.'),
118 '#default_value' => variable_get('beautify_htmltidy_word_bare', 0),
119 );
120 $form['options']['msword']['beautify_htmltidy_word_2000'] = array(
121 '#type' => 'checkbox',
122 '#title' => t('Strip MS Word 2000 HTML'),
123 '#description' => t('This option specifies if Tidy should go to great pains to strip out all the surplus stuff Microsoft Word 2000 inserts when you save Word documents as "Web pages". Doesn\'t handle embedded images or VML. You should consider using Word\'s "Save As: Web Page, Filtered".'),
124 '#default_value' => variable_get('beautify_htmltidy_word_2000', 0),
125 );
126 $form['options']['msword']['beautify_htmltidy_word_attributes'] = array(
127 '#type' => 'checkbox',
128 '#title' => t('Strip proprietary attributes'),
129 '#description' => t('This option specifies if Tidy should strip out proprietary attributes, such as MS data binding attributes.'),
130 '#default_value' => variable_get('beautify_htmltidy_word_attributes', 0),
131 );
132 break;
133
134 // htmLawed options
135 case 'htmlawed':
136 $form['options']['beautify_htmlawed_valid_xhtml'] = array(
137 '#type' => 'checkbox',
138 '#title' => t('Output valid XHTML'),
139 '#description' => t('Output the most valid XHTML possible.'),
140 '#default_value' => variable_get('beautify_htmlawed_valid_xhtml', 1),
141 );
142 $form['options']['beautify_htmlawed_balance_tags'] = array(
143 '#type' => 'checkbox',
144 '#title' => t('Balance tags'),
145 '#description' => t('Balance tags for well-formedness and proper nesting.'),
146 '#default_value' => variable_get('beautify_htmlawed_balance_tags', 1),
147 );
148 $form['options']['beautify_htmlawed_css_expressions'] = array(
149 '#type' => 'checkbox',
150 '#title' => t('Allow CSS expressions'),
151 '#description' => t('Allow dynamic CSS expression by not removing the expression from CSS property values in style attributes. CSS expressions only work in Internet Explorer 5, 6 and 7.'),
152 '#default_value' => variable_get('beautify_htmlawed_css_expressions', 0),
153 );
154 $form['options']['beautify_htmlawed_keep_bad'] = array(
155 '#type' => 'radios',
156 '#title' => t('Bad tags'),
157 '#description' => t('Neutralize bad tags by converting < and > to entities, or remove them.'),
158 '#options' => array(
159 0 => 'Remove',
160 1 => 'Neutralize both tags and element content',
161 2 => 'Remove tags but neutralize element content',
162 3 => 'Neutralize both tags and element content but remove if text is invalid in parent element',
163 4 => 'Remove tags but neutralize element content but remove if text is invalid in parent element',
164 5 => 'Neutralize both tags and element content but line-breaks, tabs and spaces are left',
165 6 => 'Remove tags but neutralize element content but line-breaks, tabs and spaces are left',
166 ),
167 '#default_value' => variable_get('beautify_htmlawed_keep_bad', 2),
168 );
169 $form['options']['beautify_htmlawed_strict_tags'] = array(
170 '#type' => 'radios',
171 '#title' => t('Strict tags'),
172 '#description' => t('Transform/remove these non-strict XHTML elements, even if they are allowed by the admin: &lt;applet&gt;, &lt;center&gt;, &lt;dir&gt;, &lt;embed&gt;, &lt;font&gt;, &lt;isindex&gt;, &lt;menu&gt;, &lt;s&gt;, &lt;strike&gt;, &lt;u&gt;.'),
173 '#options' => array(
174 0 => 'No',
175 1 => 'Yes, but leave applet, embed and isindex elements that currently can\'t be transformed',
176 2 => 'yes, removing applet, embed and isindex elements and their contents (nested elements remain)'
177 ),
178 '#default_value' => variable_get('beautify_htmlawed_strict_tags', 1),
179 );
180 $form['options']['beautify_htmlawed_clean_msword'] = array(
181 '#type' => 'radios',
182 '#title' => t('Clean MS Word'),
183 '#description' => t('Replace discouraged characters introduced by Microsoft Word, etc.'),
184 '#options' => array(
185 0 => 'No',
186 1 => 'Yes',
187 2 => 'Yes, plus replace special single & double quotes with ordinary ones'),
188 '#default_value' => variable_get('beautify_htmlawed_clean_msword', 0),
189 );
190 $form['options']['beautify_htmlawed_comments'] = array(
191 '#type' => 'radios',
192 '#title' => t('Comment handling'),
193 '#options' => array(
194 0 => 'Don\'t consider comments as markup and proceed as if plain text',
195 1 => 'Remove',
196 2 => 'Allow, but neutralize any &lt;, &gt;, and &amp; inside by converting to named entities',
197 3 => 'Allow'),
198 '#default_value' => variable_get('beautify_htmlawed_comments', 3),
199 );
200 break;
201 }
202
203 return system_settings_form($form);
204 }
205
206 /**
207 * Calculate the available methods.
208 */
209 function beautify_methods() {
210 // Always add the built-in method since this should always be available.
211 $methods->methods = array('builtin' => 'Built-in');
212 // If HTML Tidy is found, add that method.
213 if (beautify_htmltidy_test()) {
214 $methods->methods += array('htmltidy' => 'HTMLTidy');
215 }
216 // If htmlLawed is found, add that method.
217 $path = drupal_get_path('module', 'beautify');
218 if (file_exists($path .'/htmLawed.php')) {
219 $methods->methods += array('htmlawed' => 'htmLawed');
220 }
221
222 // Stash the current active method.
223 $methods->default = variable_get('beautify_method', 'builtin');
224 // Get the available options for the active method.
225 $methods->options = beautify_get_options();
226 // Get the active option for this method.
227 $methods->mode = $mode = variable_get('beautify_mode', 2);
228
229 // If the active method is not one of the available options...
230 if (!isset($methods->options[$mode])) {
231 // Set the mode
232 $options = array_keys($methods->options);
233 $methods->mode = $options[0];
234 variable_set('beautify_mode', $options[0]);
235 }
236
237 return $methods;
238 }
239
240 /**
241 * Returns options avilable to the active mode.
242 */
243 function beautify_get_options() {
244 $options = array(
245 1 => 'Beautify',
246 -1 => 'Compact',
247 2 => 'Flatten',
248 );
249 // No Beautify option available for built-in method.
250 if (variable_get('beautify_method', 'builtin') == 'builtin') {
251 unset($options[1]);
252 }
253 // No Flatten option available for htmLawed method.
254 if (variable_get('beautify_method', 'builtin') == 'htmlawed') {
255 unset($options[2]);
256 }
257
258 return $options;
259 }
260
261 /**
262 * Start the output buffering.
263 */
264 function beautify_init() {
265 if (user_access('receive processed html') && variable_get('beautify_enabled', 1)) {
266 // Ensure we are not serving a cached page.
267 if (function_exists('drupal_set_content')) {
268 ob_start();
269 }
270 }
271 }
272
273 /**
274 * Flush the output buffer and send the contents to the router for processing.
275 *
276 * This hook will run Tidy twice if debug mode is enabled to get the line
277 * numbers right on debug messages (this is highly recommendeded if the site
278 * has enough CPU power).
279 */
280 function beautify_exit($destination = NULL) {
281 if (user_access('receive processed html') && variable_get('beautify_enabled', 1)) {
282 // Ensure we are not serving a cached page.
283 if (function_exists('drupal_set_content') && $destination == NULL) {
284 $input = ob_get_contents();
285 ob_end_clean();
286
287 // Send the output to the router for processing.
288 beautify_router($input);
289 }
290 }
291 }
292
293 /**
294 * Route the buffered output to the active processing routine.
295 *
296 * The active mode is used to process the output and then set to the page
297 * cache. It is then printed to the screen to be displayed on the first
298 * subsequent page load. From then on it should be retreived from the
299 * page cache.
300 *
301 * @param $input
302 * The buffered output to be processed.
303 */
304 function beautify_router($input) {
305 $processor = variable_get('beautify_method', 'builtin');
306 $mode = variable_get('beautify_mode', 2);
307 switch ($processor) {
308 case 'builtin':
309 $output = beautify_process($input, $mode);
310 break;
311 case 'htmlawed':
312 $output = beautify_htmlawed_process($input);
313 break;
314 case 'htmltidy':
315 $output = beautify_htmltidy_process($input);
316 break;
317 }
318
319 beautify_set_cache($output);
320 print $output;
321 }
322
323 /**
324 * Set the processed HTML to the page cache.
325 *
326 * This attempts to override the page_set_cache(). There could be a better way
327 * to do this. Suggestions welcome!
328 */
329 function beautify_set_cache($input) {
330 global $user, $base_root;
331 if (!$user->uid && $_SERVER['REQUEST_METHOD'] == 'GET' && page_get_cache(TRUE)) {
332 // This will fail in some cases, see page_get_cache() for the explanation.
333 $cache = TRUE;
334 if (variable_get('page_compression', TRUE) && function_exists('gzencode')) {
335 // We do not store the data in case the zlib mode is deflate.
336 // This should be rarely happening.
337 if (zlib_get_coding_type() == 'deflate') {
338 $cache = FALSE;
339 }
340 else if (zlib_get_coding_type() == FALSE) {
341 $input = gzencode($input, 9, FORCE_GZIP);
342 }
343 // The remaining case is 'gzip' which means the data is
344 // already compressed and nothing left to do but to store it.
345 }
346
347 if ($cache && $input) {
348 cache_set($base_root . request_uri(), $input, 'cache_page', CACHE_TEMPORARY, drupal_get_headers());
349 }
350 }
351 }
352
353 /**
354 * Process whatever we are given and return the HTML Tidy response
355 * The output and warnings will be returned as arrays by reference.
356 *
357 * @param $input
358 * html string to be tidied
359 * @param $errors
360 * an array to be filled with error info
361 * @param $warnings
362 * an array to be filled with warning info
363 * @return
364 * the tidied string
365 */
366 function beautify_htmltidy_command($input, &$errors, &$warnings) {
367 $path = variable_get('beautify_htmltidy_path', '/usr/bin/tidy');
368 if (!file_exists($path)) {
369 $message = t("Couldn't find the Tidy binary at '%path', not using tidy.", array('%path' => $path));
370 watchdog('beautify', $message, WATCHDOG_WARNING);
371 $errors[] = $message;
372 return;
373 }
374
375 /*
376 * Do not pass the parameters their default values as defined in the
377 * documentation for tidy (http://www.w3.org/People/Raggett/tidy/), or weird
378 * stuff starts to happen.
379 */
380 // Output Valid XHMTL
381 $args[] = '--output-xhtml '. variable_get('beautify_htmltidy_valid_xhtml', 1);
382 // Choose DOCTYPE method - if out user based "Theme" option is set we need to
383 // do some additional processing.
384 if (variable_get('beautify_htmltidy_doctype', 'auto') == 'theme') {
385 $args[] = '--doctype omit';
386 $document = beautify_parse_html($input);
387 $doctype = $document->dtd ."\n";
388 }
389 else {
390 $args[] = '--doctype '. variable_get('beautify_htmltidy_doctype', 'auto');
391 }
392 // If mode is set to Beautify we need to set indentation to auto. Yes is not
393 // recommended in the Tidy reference.
394 if (variable_get('beautify_mode', 2) == 1) $args[] = '--indent auto';
395 // TODO: Add these options to the settings page.
396 if (!variable_get('beautify_htmltidy_verbose', 0)) $args[] = '-q';
397 if (!variable_get('beautify_htmltidy_wrapphp', 0)) $args[] = '--wrap-php no';
398 if (variable_get('beautify_htmltidy_clean', 0)) $args[] = '--clean yes';
399 if (variable_get('beautify_htmltidy_enclosetext', 0)) $args[] = '--enclose-text yes';
400 if (variable_get('beautify_htmltidy_encloseblocktext', 0)) $args[] = '--enclose-block-text yes';
401 // Clean MS Word
402 $args[] = '--bare '. variable_get('beautify_htmltidy_word_bare', 0);
403 $args[] = '--word-2000 '. variable_get('beautify_htmltidy_word_2000', 0);
404 $args[] = '--drop-proprietary-attributes '. variable_get('beautify_htmltidy_word_attributes', 0);
405 // User specified configuration file
406 $conf = variable_get('htmltidy_confpath', '');
407 if (file_exists($conf)) {
408 $args[] = '--config '. $conf;
409 }
410 // Don't add a meta tag with the Tidy info.
411 $args[] = '--tidy-mark no';
412 // Don't wrap output - this looks terrible so we always set this to off.
413 $args[] = '-wrap 0';
414 // Output only UTF-8
415 $args[] = '-utf8';
416 // Modify the input file instead of outputting to stdout.
417 $args[] = '-modify';
418 // Run the processing with the specified arguments.
419 beautify_htmltidy_run($input, $args, $output, $errors = array(), $warnings = array());
420
421 /*
422 // Output debugging info.
423 if (variable_get('htmltidy_warnings', 0) && user_access('use htmltidy debug mode')) {
424 $header = "<style type=\"text/css\"> .htmltidy { border: 1px dashed #aaa; background-color: #eee; padding: 1em;\n"
425 . "margin: 1em; float: left; font-family: \"courier new\", sans-serif; font-size: 8pt; color: #050; } </style>";
426 drupal_set_html_head($header);
427
428 // Run Tidy a second time to get line numbers right.
429 if (variable_get('htmltidy_runtwice', 0)) {
430 system("$apppath $cline -wrap $wordwrap -utf8 -f $warningsFilename $dirtyFilename");
431 }
432 $warnings = file_get_contents($warningsFilename);
433 drupal_set_message("<h3>HTMLTidy Debug</h3><kbd>$apppath $cline -wrap $wordwrap -utf8 -f $warningsFilename $dirtyFilename</kbd>");
434 }
435 */
436
437 // Additional processing for beatified output.
438 if (variable_get('beautify_mode', 2) == 1) {
439 // remove newline from empty script tags
440 $output = preg_replace("@(<script[^>]*>)\n(<\/script>)@", '$1$2', $output);
441 // add correct indentation for comments inside script tags
442 $output = preg_replace("@(<script[^>]*>)\n(<!--)@", "$1\n $2", $output);
443 // add newline and correct indentation for comments immediately following closing script tag
444 $output = preg_replace("@(<\/script>)(<!--)@", "$1\n $2", $output);
445 // add newline and correct indentation to opening tags immediately following closing div tag
446 $output = preg_replace("@([ ]*)<\/div>(<)@", "$1</div>\n$1$2", $output);
447 }
448
449 // Additional processing for flattened output.
450 if (variable_get('beautify_mode', 2) == 2) {
451 // remove newline from empty script tags
452 $output = preg_replace("@(<script[^>]*>)\n(<\/script>)@", '$1$2', $output);
453 // add newline comments immediately following closing script tag
454 $output = preg_replace("@(<\/script>)(<!--)@", "$1\n$2", $output);
455 // remove all leading spaces
456 $output = preg_replace('@(\n[ ]*<)@', "\n<", $output);
457 }
458
459 return $doctype . $output;
460 }
461
462 function beautify_htmltidy_run($input, $args, &$output, &$errors, &$warnings) {
463 $tidypath = variable_get('beautify_htmltidy_path', '/usr/bin/tidy');
464 if (!file_exists($tidypath)) {
465 watchdog('beautify', 'Failed to find HTML Tidy executable at %beautify_htmltidy_path, not using tidy', array('%beautify_htmltidy_path' => $tidypath), WATCHDOG_WARNING);
466 $output = '';
467 return 2;
468 }
469
470 // write input to a file because tidy doesn't take input from stdin.
471 $dirtyFilename = tempnam(file_directory_temp(), 'drup');
472 $f = fopen($dirtyFilename, 'w');
473 fwrite($f, $input);
474 fclose($f);
475
476 // warnings are saved to file
477 $warningsFilename = tempnam(file_directory_temp(), 'warn');
478 $args[] = '-f ' . $warningsFilename;
479
480 // Run Tidy with the right options.
481 $command = $tidypath .' '. implode(' ', $args) .' '. $dirtyFilename;
482 system($command, $return_value);
483
484 // return_value 0 means success. 1 means warning. 2 means error, the file
485 // will be there, but not have been touched.
486 switch ($return_value) {
487 case 0:
488 $warnings = $errors = array();
489 $output = file_get_contents($dirtyFilename);
490 break;
491
492 case 1:
493 $errors = array();
494 $warnings = array_map('trim', file($warningsFilename));
495 $output = file_get_contents($dirtyFilename);
496 break;
497
498 case 2:
499 // separate errors and warnings into two different arrays
500 foreach(file($warningsFilename) as $line) {
501 $line = trim($line);
502 if (preg_match('|^line \d+ column \d+ - Warning:|', $line)) {
503 $warnings[] = $line;
504 }
505 else {
506 $errors[] = $line;
507 }
508 }
509 $output = $input;
510 break;
511 }
512
513 // delete the temporary files.
514 unlink($dirtyFilename);
515 unlink($warningsFilename);
516 return $return_value;
517 }
518
519 /**
520 * Calculates a valid path to the HTML Tidy binary.
521 *
522 * @param $message
523 * Assigned to an explanation.
524 * @return
525 * TRUE if found,
526 * FALSE if error.
527 */
528 function beautify_htmltidy_test(&$message = '', &$version = '') {
529 $path = variable_get('beautify_htmltidy_path', '/usr/bin/tidy');
530 if (!file_exists($path)) {
531 $pattern = '@\\\\+@'; // one or more backslashes
532 // Windows paths
533 if (substr(PHP_OS, 0, 3) == 'WIN') {
534 $possible_paths = array(
535 preg_replace($pattern, '/', dirname(__FILE__)) .'/bin/tidy.exe'
536 );
537 }
538 // Unix paths
539 else {
540 $possible_paths = array(
541 '/bin/tidy',
542 '/usr/bin/tidy',
543 '/usr/local/bin/tidy',
544 preg_replace($pattern, '/', dirname(__FILE__)) .'/bin/tidy',
545 );
546 }
547
548 $message = t('Searching for HTML Tidy in:');
549 $message .= '<ul>';
550 foreach ($possible_paths as $path) {
551 $message .= '<li>'. $path .'</li>';
552 if (file_exists($path)) {
553 break;
554 }
555 }
556 $message .= '</ul>';
557
558 if (!file_exists($path)) {
559 $message .= t('Could not find HTML Tidy binary.');
560 return FALSE;
561 }
562 variable_set('beautify_htmltidy_path', $path);
563 }
564
565 // Test the HTML Tidy binary by running a shell command to return the version.
566 $command = escapeshellcmd($path .' -v');
567 if (exec($command, $response)) {
568 $version = $response[0];
569 return TRUE;
570 }
571 else {
572 $message .= t('Found an HTML Tidy binary but it didn\'t seem to run properly. !command failed to respond correctly.',
573 array('!command' => $command));
574 return FALSE;
575 }
576 }
577
578 /**
579 * Parse the HTML document into sections.
580 *
581 * @param $input
582 * The buffered HTML to parse.
583 * @return $output
584 * An object containing:
585 * $html_document->dtd
586 * $html_document->doctype
587 * $html_document->head
588 * $html_document->body
589 * $html_document->bodyattr
590 *
591 * TODO: For performance reasons make this function accept a string to ask for
592 * a specific part of the document. We can still send the whole object by
593 * default if still needed.
594 */
595 function beautify_parse_html($input) {
596 // DTD
597 $regex = '/(.*\.dtd">)/Umsi'; // just the doctype without the html tag.
598 preg_match($regex, $input, $matches);
599 $output->dtd = $matches[1];
600
601 // DOCTYPE
602 $regex = '/(.*<html.*>)/Umsi'; // doctype plus opening html tag with attributes.
603 preg_match($regex, $input, $matches);
604 isset($matches[1]) && $output->doctype = $matches[1] ."\n";
605
606 // head
607 $regex = '@(<head.*>.*</head\\s*>)@Umsi'; // the whole head section
608 preg_match($regex, $input, $matches);
609 if (isset($matches[1])) {
610 $head = trim($matches[1]);
611 $replace = array("\n ", "\n\n");
612 $head = str_replace($replace, "\n", $head);
613 $output->head = $head ."\n";
614 }
615
616 // body
617 $regex = '@<body(.*)>(.*)</body\\s*>@Umsi'; // the whole body section
618 preg_match($regex, $input, $matches);
619 if (isset($matches[1]) && trim($matches[1])) {
620 $output->bodyattr = trim($matches[1]); // body attributes
621 }
622 if (isset($matches[2])) {
623 $output->body = trim($matches[2]);
624 }
625 else {
626 $output->body = trim($input);
627 }
628
629 return $output;
630 }
631
632 /**
633 * Built-in processing function for compacting or flattening the output.
634 *
635 * @param $input
636 * The HTML output to process.
637 * @param $mode
638 * The mode of operation defined on beautify_get_options().
639 * @return
640 * The processed output.
641 */
642 function beautify_process($input, $mode) {
643 switch ($mode) {
644
645 case -1: // Comptact
646 preg_match('/(.*)>/Umsi', $input, $matches); // doctype
647 $no_doctype = str_replace($matches[0], '', $input);
648 $output = preg_replace("@\n\s+@", " ", $no_doctype); // newline and 1 or more spaces
649 $output = preg_replace("@\s\s\s+@", " ", $output); // three or more spaces
650 $output = preg_replace("@>\n@", "> ", $output); // closing tag marker and a new line
651 $output = $matches[0] . $output;
652 break;
653
654 case 2: // Flatten
655 $output = preg_replace("@\n\s+@", "\n", $input); // a newline and some spaces
656 $output = preg_replace("@\s\s+@", "", $output); // two or more spaces
657 break;
658
659 default:
660 $output = $input;
661 break;
662 }
663
664 return $output;
665 }
666
667 /**
668 * Processeses the HTML document through htmLawed.
669 *
670 * @param $input
671 * The buffered output to process.
672 * @return $output
673 * The processed output.
674 */
675 function beautify_htmlawed_process($input) {
676 // Split the document into sections.
677 $html_document = beautify_parse_html($input);
678 $output = $html_document->doctype;
679
680 // If compact mode is active we need to process the head since htmLawed won't
681 // do that for us.
682 $mode = variable_get('beautify_mode', 2);
683 if ($mode == -1) { // Comptact
684 $head = beautify_process($html_document->head, -1) ."\n";
685 }
686 else {
687 $head .= $html_document->head;
688 }
689
690 // Set up the config arguments.
691 $config = array(
692 'balance' => variable_get('beautify_htmlawed_balance_tags', 1),
693 'valid_xhtml' => variable_get('beautify_htmlawed_valid_xhtml', 1),
694 'clean_ms_char' => variable_get('beautify_htmlawed_clean_msword', 0),
695 'comment' => variable_get('beautify_htmlawed_comments', 3),
696 'css_expression' => variable_get('beautify_htmlawed_css_expressions', 0),
697 'schemes' => '*:*',
698 'make_tag_strict' => variable_get('beautify_htmlawed_strict_tags', 1),
699 'keep_bad' => variable_get('beautify_htmlawed_keep_bad', 2),
700 );
701
702 // Our modes are slightly different to those accepted by htmLawed.
703 if ($mode == 1) {
704 $config['tidy'] = 2;
705 }
706 elseif ($mode == -1) {
707 $config['tidy'] = -1;
708 }
709
710 // Process the body of the document through htmLawed.
711 include 'htmLawed.php';
712 $body = htmLawed($html_document->body, $config);
713
714 // Concatenate the separate parts back together.
715 $output .= $head .'<body '. $html_document->bodyattr .">\n". trim($body) ."\n</body>\n</html>";
716
717 return $output;
718 }
719
720 /**
721 * Process the buffered output through the HTML Tidy processor.
722 *
723 * @param $input
724 * The buffered output to process.
725 * @return $output
726 * The processed output.
727 */
728 function beautify_htmltidy_process($input) {
729 $output = beautify_htmltidy_command($input, $errors, $warnings);
730
731 // Merge the errors and warnings together with the errors listed first.
732 $errors = array_merge($errors, $warnings);
733
734 // Here we're using HTML Tidy to flatten and then using the built-in comptact routine.
735 $mode = variable_get('beautify_mode', 2);
736 if ($mode == -1) {
737 $output = beautify_process($input, $mode);
738 }
739
740 // TODO: Fix output of errors. Not working right now.
741 if ($errors && variable_get('beautify_htmltidy_warnings', 0) && user_access('use beautify debug mode')) {
742 $errors = array_map('htmlentities', $errors);
743 $output .= theme('theme_beautify_htmltidy_errors', $errors);
744 }
745
746 return $output;
747 }
748
749 /**
750 * Theme function for displaying the errors.
751 *
752 * @param $errors
753 * An array of errors generated by HTML Tidy.
754 * @return $output
755 * The HTML for displaying the errors on the page.
756 */
757 function theme_beautify_htmltidy_errors($errors) {
758 $output = '<div class="beautify-errors"><h3>Beautify Errors</h3>';
759 $output .= theme('item_list', $errors);
760 $output .= '</div>';
761 return $output;
762 }

  ViewVC Help
Powered by ViewVC 1.1.2