| 1 |
<?php
|
| 2 |
// $Id$
|
| 3 |
|
| 4 |
/**
|
| 5 |
* @file
|
| 6 |
* HTML output processor for beautification, compacting and general processing.
|
| 7 |
*
|
| 8 |
* This module buffers the HTML output of Drupal and processes it through
|
| 9 |
* HTML Tidy, htmLawed or a simple built-in function to beautify, flatten or
|
| 10 |
* compact the HTML source code.
|
| 11 |
*
|
| 12 |
* It works with the cache system in that it sets the processed HTML to the
|
| 13 |
* page cache.
|
| 14 |
*/
|
| 15 |
|
| 16 |
/**
|
| 17 |
* Implementation of hook_menu().
|
| 18 |
*/
|
| 19 |
function beautify_menu() {
|
| 20 |
$items = array();
|
| 21 |
$items['admin/settings/beautify'] = array(
|
| 22 |
'title' => 'Beautify',
|
| 23 |
'description' => 'Configure settings for the processing of HTML output to the browser.',
|
| 24 |
'page callback' => 'drupal_get_form',
|
| 25 |
'page arguments' => array('beautify_settings'),
|
| 26 |
'access arguments' => array('administer beautify')
|
| 27 |
);
|
| 28 |
|
| 29 |
return $items;
|
| 30 |
}
|
| 31 |
|
| 32 |
/**
|
| 33 |
* Implementation of hook_perm().
|
| 34 |
*/
|
| 35 |
function beautify_perm() {
|
| 36 |
return array(
|
| 37 |
'administer beautify',
|
| 38 |
'receive processed html',
|
| 39 |
'use beautify debug mode',
|
| 40 |
);
|
| 41 |
}
|
| 42 |
|
| 43 |
/**
|
| 44 |
* Implementation of hook_theme().
|
| 45 |
*/
|
| 46 |
function beautify_theme($existing, $type, $theme, $path) {
|
| 47 |
return array('beautify_errors' => array('errors' => NULL));
|
| 48 |
}
|
| 49 |
|
| 50 |
/**
|
| 51 |
* Admin settings form.
|
| 52 |
*/
|
| 53 |
function beautify_settings() {
|
| 54 |
// Fetch an object with the options including defaults.
|
| 55 |
$methods = beautify_methods();
|
| 56 |
$form['beautify_enabled'] = array(
|
| 57 |
'#type' => 'checkbox',
|
| 58 |
'#title' => t('Enable output processing'),
|
| 59 |
'#default_value' => variable_get('beautify_enabled', 1),
|
| 60 |
);
|
| 61 |
$form['beautify_method'] = array(
|
| 62 |
'#type' => 'radios',
|
| 63 |
'#title' => t('Method'),
|
| 64 |
'#options' => $methods->methods,
|
| 65 |
'#default_value' => $methods->default,
|
| 66 |
);
|
| 67 |
$form['beautify_mode'] = array(
|
| 68 |
'#type' => 'radios',
|
| 69 |
'#title' => t('Mode'),
|
| 70 |
'#options' => $methods->options,
|
| 71 |
'#default_value' => $methods->mode,
|
| 72 |
);
|
| 73 |
|
| 74 |
// Advanced options.
|
| 75 |
$form['options'] = array(
|
| 76 |
'#type' => 'fieldset',
|
| 77 |
'#title' => t('Advanced options'),
|
| 78 |
'#collapsible' => TRUE,
|
| 79 |
'#collapsed' => TRUE,
|
| 80 |
);
|
| 81 |
switch ($methods->default) {
|
| 82 |
|
| 83 |
// HTML Tidy options
|
| 84 |
case 'htmltidy':
|
| 85 |
$form['options']['beautify_htmltidy_valid_xhtml'] = array(
|
| 86 |
'#type' => 'checkbox',
|
| 87 |
'#title' => t('Output valid XHTML'),
|
| 88 |
'#default_value' => variable_get('beautify_htmltidy_valid_xhtml', 1),
|
| 89 |
);
|
| 90 |
$form['options']['beautify_htmltidy_warnings'] = array(
|
| 91 |
'#type' => 'checkbox',
|
| 92 |
'#title' => t('Append errors'),
|
| 93 |
'#default_value' => variable_get('beautify_htmltidy_warnings', 0),
|
| 94 |
);
|
| 95 |
$form['options']['beautify_htmltidy_doctype'] = array(
|
| 96 |
'#type' => 'radios',
|
| 97 |
'#title' => t('DOCTYPE'),
|
| 98 |
'#description' => t('This option specifies the DOCTYPE declaration generated by Tidy.'),
|
| 99 |
'#options' => array(
|
| 100 |
'omit' => '<strong>Omit</strong>: the output won\'t contain a DOCTYPE declaration',
|
| 101 |
'auto' => '<strong>Auto</strong>: Use an educated guess based upon the contents of the document',
|
| 102 |
'strict' => '<strong>Strict</strong>: set the DOCTYPE to the strict DTD',
|
| 103 |
'transitional' => '<strong>Transitional</strong>: set the DOCTYPE to the transitional (loose) DTD',
|
| 104 |
'theme' => '<strong>Theme</strong>: Use the existing DOCTYPE from the theme (page.tpl.php file)',
|
| 105 |
),
|
| 106 |
'#default_value' => variable_get('beautify_htmltidy_doctype', 'theme'),
|
| 107 |
);
|
| 108 |
$form['options']['msword'] = array(
|
| 109 |
'#type' => 'fieldset',
|
| 110 |
'#title' => t('Clean MS Word'),
|
| 111 |
'#collapsible' => TRUE,
|
| 112 |
'#collapsed' => TRUE,
|
| 113 |
);
|
| 114 |
$form['options']['msword']['beautify_htmltidy_msword_bare'] = array(
|
| 115 |
'#type' => 'checkbox',
|
| 116 |
'#title' => t('Convert non-breaking spaces to spaces'),
|
| 117 |
'#description' => t('This option specifies if Tidy should strip Microsoft specific HTML from Word 2000 documents, and output spaces rather than non-breaking spaces where they exist in the input.'),
|
| 118 |
'#default_value' => variable_get('beautify_htmltidy_word_bare', 0),
|
| 119 |
);
|
| 120 |
$form['options']['msword']['beautify_htmltidy_word_2000'] = array(
|
| 121 |
'#type' => 'checkbox',
|
| 122 |
'#title' => t('Strip MS Word 2000 HTML'),
|
| 123 |
'#description' => t('This option specifies if Tidy should go to great pains to strip out all the surplus stuff Microsoft Word 2000 inserts when you save Word documents as "Web pages". Doesn\'t handle embedded images or VML. You should consider using Word\'s "Save As: Web Page, Filtered".'),
|
| 124 |
'#default_value' => variable_get('beautify_htmltidy_word_2000', 0),
|
| 125 |
);
|
| 126 |
$form['options']['msword']['beautify_htmltidy_word_attributes'] = array(
|
| 127 |
'#type' => 'checkbox',
|
| 128 |
'#title' => t('Strip proprietary attributes'),
|
| 129 |
'#description' => t('This option specifies if Tidy should strip out proprietary attributes, such as MS data binding attributes.'),
|
| 130 |
'#default_value' => variable_get('beautify_htmltidy_word_attributes', 0),
|
| 131 |
);
|
| 132 |
break;
|
| 133 |
|
| 134 |
// htmLawed options
|
| 135 |
case 'htmlawed':
|
| 136 |
$form['options']['beautify_htmlawed_valid_xhtml'] = array(
|
| 137 |
'#type' => 'checkbox',
|
| 138 |
'#title' => t('Output valid XHTML'),
|
| 139 |
'#description' => t('Output the most valid XHTML possible.'),
|
| 140 |
'#default_value' => variable_get('beautify_htmlawed_valid_xhtml', 1),
|
| 141 |
);
|
| 142 |
$form['options']['beautify_htmlawed_balance_tags'] = array(
|
| 143 |
'#type' => 'checkbox',
|
| 144 |
'#title' => t('Balance tags'),
|
| 145 |
'#description' => t('Balance tags for well-formedness and proper nesting.'),
|
| 146 |
'#default_value' => variable_get('beautify_htmlawed_balance_tags', 1),
|
| 147 |
);
|
| 148 |
$form['options']['beautify_htmlawed_css_expressions'] = array(
|
| 149 |
'#type' => 'checkbox',
|
| 150 |
'#title' => t('Allow CSS expressions'),
|
| 151 |
'#description' => t('Allow dynamic CSS expression by not removing the expression from CSS property values in style attributes. CSS expressions only work in Internet Explorer 5, 6 and 7.'),
|
| 152 |
'#default_value' => variable_get('beautify_htmlawed_css_expressions', 0),
|
| 153 |
);
|
| 154 |
$form['options']['beautify_htmlawed_keep_bad'] = array(
|
| 155 |
'#type' => 'radios',
|
| 156 |
'#title' => t('Bad tags'),
|
| 157 |
'#description' => t('Neutralize bad tags by converting < and > to entities, or remove them.'),
|
| 158 |
'#options' => array(
|
| 159 |
0 => 'Remove',
|
| 160 |
1 => 'Neutralize both tags and element content',
|
| 161 |
2 => 'Remove tags but neutralize element content',
|
| 162 |
3 => 'Neutralize both tags and element content but remove if text is invalid in parent element',
|
| 163 |
4 => 'Remove tags but neutralize element content but remove if text is invalid in parent element',
|
| 164 |
5 => 'Neutralize both tags and element content but line-breaks, tabs and spaces are left',
|
| 165 |
6 => 'Remove tags but neutralize element content but line-breaks, tabs and spaces are left',
|
| 166 |
),
|
| 167 |
'#default_value' => variable_get('beautify_htmlawed_keep_bad', 2),
|
| 168 |
);
|
| 169 |
$form['options']['beautify_htmlawed_strict_tags'] = array(
|
| 170 |
'#type' => 'radios',
|
| 171 |
'#title' => t('Strict tags'),
|
| 172 |
'#description' => t('Transform/remove these non-strict XHTML elements, even if they are allowed by the admin: <applet>, <center>, <dir>, <embed>, <font>, <isindex>, <menu>, <s>, <strike>, <u>.'),
|
| 173 |
'#options' => array(
|
| 174 |
0 => 'No',
|
| 175 |
1 => 'Yes, but leave applet, embed and isindex elements that currently can\'t be transformed',
|
| 176 |
2 => 'yes, removing applet, embed and isindex elements and their contents (nested elements remain)'
|
| 177 |
),
|
| 178 |
'#default_value' => variable_get('beautify_htmlawed_strict_tags', 1),
|
| 179 |
);
|
| 180 |
$form['options']['beautify_htmlawed_clean_msword'] = array(
|
| 181 |
'#type' => 'radios',
|
| 182 |
'#title' => t('Clean MS Word'),
|
| 183 |
'#description' => t('Replace discouraged characters introduced by Microsoft Word, etc.'),
|
| 184 |
'#options' => array(
|
| 185 |
0 => 'No',
|
| 186 |
1 => 'Yes',
|
| 187 |
2 => 'Yes, plus replace special single & double quotes with ordinary ones'),
|
| 188 |
'#default_value' => variable_get('beautify_htmlawed_clean_msword', 0),
|
| 189 |
);
|
| 190 |
$form['options']['beautify_htmlawed_comments'] = array(
|
| 191 |
'#type' => 'radios',
|
| 192 |
'#title' => t('Comment handling'),
|
| 193 |
'#options' => array(
|
| 194 |
0 => 'Don\'t consider comments as markup and proceed as if plain text',
|
| 195 |
1 => 'Remove',
|
| 196 |
2 => 'Allow, but neutralize any <, >, and & inside by converting to named entities',
|
| 197 |
3 => 'Allow'),
|
| 198 |
'#default_value' => variable_get('beautify_htmlawed_comments', 3),
|
| 199 |
);
|
| 200 |
break;
|
| 201 |
}
|
| 202 |
|
| 203 |
return system_settings_form($form);
|
| 204 |
}
|
| 205 |
|
| 206 |
/**
|
| 207 |
* Calculate the available methods.
|
| 208 |
*/
|
| 209 |
function beautify_methods() {
|
| 210 |
// Always add the built-in method since this should always be available.
|
| 211 |
$methods->methods = array('builtin' => 'Built-in');
|
| 212 |
// If HTML Tidy is found, add that method.
|
| 213 |
if (beautify_htmltidy_test()) {
|
| 214 |
$methods->methods += array('htmltidy' => 'HTMLTidy');
|
| 215 |
}
|
| 216 |
// If htmlLawed is found, add that method.
|
| 217 |
$path = drupal_get_path('module', 'beautify');
|
| 218 |
if (file_exists($path .'/htmLawed.php')) {
|
| 219 |
$methods->methods += array('htmlawed' => 'htmLawed');
|
| 220 |
}
|
| 221 |
|
| 222 |
// Stash the current active method.
|
| 223 |
$methods->default = variable_get('beautify_method', 'builtin');
|
| 224 |
// Get the available options for the active method.
|
| 225 |
$methods->options = beautify_get_options();
|
| 226 |
// Get the active option for this method.
|
| 227 |
$methods->mode = $mode = variable_get('beautify_mode', 2);
|
| 228 |
|
| 229 |
// If the active method is not one of the available options...
|
| 230 |
if (!isset($methods->options[$mode])) {
|
| 231 |
// Set the mode
|
| 232 |
$options = array_keys($methods->options);
|
| 233 |
$methods->mode = $options[0];
|
| 234 |
variable_set('beautify_mode', $options[0]);
|
| 235 |
}
|
| 236 |
|
| 237 |
return $methods;
|
| 238 |
}
|
| 239 |
|
| 240 |
/**
|
| 241 |
* Returns options avilable to the active mode.
|
| 242 |
*/
|
| 243 |
function beautify_get_options() {
|
| 244 |
$options = array(
|
| 245 |
1 => 'Beautify',
|
| 246 |
-1 => 'Compact',
|
| 247 |
2 => 'Flatten',
|
| 248 |
);
|
| 249 |
// No Beautify option available for built-in method.
|
| 250 |
if (variable_get('beautify_method', 'builtin') == 'builtin') {
|
| 251 |
unset($options[1]);
|
| 252 |
}
|
| 253 |
// No Flatten option available for htmLawed method.
|
| 254 |
if (variable_get('beautify_method', 'builtin') == 'htmlawed') {
|
| 255 |
unset($options[2]);
|
| 256 |
}
|
| 257 |
|
| 258 |
return $options;
|
| 259 |
}
|
| 260 |
|
| 261 |
/**
|
| 262 |
* Start the output buffering.
|
| 263 |
*/
|
| 264 |
function beautify_init() {
|
| 265 |
if (user_access('receive processed html') && variable_get('beautify_enabled', 1)) {
|
| 266 |
// Ensure we are not serving a cached page.
|
| 267 |
if (function_exists('drupal_set_content')) {
|
| 268 |
ob_start();
|
| 269 |
}
|
| 270 |
}
|
| 271 |
}
|
| 272 |
|
| 273 |
/**
|
| 274 |
* Flush the output buffer and send the contents to the router for processing.
|
| 275 |
*
|
| 276 |
* This hook will run Tidy twice if debug mode is enabled to get the line
|
| 277 |
* numbers right on debug messages (this is highly recommendeded if the site
|
| 278 |
* has enough CPU power).
|
| 279 |
*/
|
| 280 |
function beautify_exit($destination = NULL) {
|
| 281 |
if (user_access('receive processed html') && variable_get('beautify_enabled', 1)) {
|
| 282 |
// Ensure we are not serving a cached page.
|
| 283 |
if (function_exists('drupal_set_content') && $destination == NULL) {
|
| 284 |
$input = ob_get_contents();
|
| 285 |
ob_end_clean();
|
| 286 |
|
| 287 |
// Send the output to the router for processing.
|
| 288 |
beautify_router($input);
|
| 289 |
}
|
| 290 |
}
|
| 291 |
}
|
| 292 |
|
| 293 |
/**
|
| 294 |
* Route the buffered output to the active processing routine.
|
| 295 |
*
|
| 296 |
* The active mode is used to process the output and then set to the page
|
| 297 |
* cache. It is then printed to the screen to be displayed on the first
|
| 298 |
* subsequent page load. From then on it should be retreived from the
|
| 299 |
* page cache.
|
| 300 |
*
|
| 301 |
* @param $input
|
| 302 |
* The buffered output to be processed.
|
| 303 |
*/
|
| 304 |
function beautify_router($input) {
|
| 305 |
$processor = variable_get('beautify_method', 'builtin');
|
| 306 |
$mode = variable_get('beautify_mode', 2);
|
| 307 |
switch ($processor) {
|
| 308 |
case 'builtin':
|
| 309 |
$output = beautify_process($input, $mode);
|
| 310 |
break;
|
| 311 |
case 'htmlawed':
|
| 312 |
$output = beautify_htmlawed_process($input);
|
| 313 |
break;
|
| 314 |
case 'htmltidy':
|
| 315 |
$output = beautify_htmltidy_process($input);
|
| 316 |
break;
|
| 317 |
}
|
| 318 |
|
| 319 |
beautify_set_cache($output);
|
| 320 |
print $output;
|
| 321 |
}
|
| 322 |
|
| 323 |
/**
|
| 324 |
* Set the processed HTML to the page cache.
|
| 325 |
*
|
| 326 |
* This attempts to override the page_set_cache(). There could be a better way
|
| 327 |
* to do this. Suggestions welcome!
|
| 328 |
*/
|
| 329 |
function beautify_set_cache($input) {
|
| 330 |
global $user, $base_root;
|
| 331 |
if (!$user->uid && $_SERVER['REQUEST_METHOD'] == 'GET' && page_get_cache(TRUE)) {
|
| 332 |
// This will fail in some cases, see page_get_cache() for the explanation.
|
| 333 |
$cache = TRUE;
|
| 334 |
if (variable_get('page_compression', TRUE) && function_exists('gzencode')) {
|
| 335 |
// We do not store the data in case the zlib mode is deflate.
|
| 336 |
// This should be rarely happening.
|
| 337 |
if (zlib_get_coding_type() == 'deflate') {
|
| 338 |
$cache = FALSE;
|
| 339 |
}
|
| 340 |
else if (zlib_get_coding_type() == FALSE) {
|
| 341 |
$input = gzencode($input, 9, FORCE_GZIP);
|
| 342 |
}
|
| 343 |
// The remaining case is 'gzip' which means the data is
|
| 344 |
// already compressed and nothing left to do but to store it.
|
| 345 |
}
|
| 346 |
|
| 347 |
if ($cache && $input) {
|
| 348 |
cache_set($base_root . request_uri(), $input, 'cache_page', CACHE_TEMPORARY, drupal_get_headers());
|
| 349 |
}
|
| 350 |
}
|
| 351 |
}
|
| 352 |
|
| 353 |
/**
|
| 354 |
* Process whatever we are given and return the HTML Tidy response
|
| 355 |
* The output and warnings will be returned as arrays by reference.
|
| 356 |
*
|
| 357 |
* @param $input
|
| 358 |
* html string to be tidied
|
| 359 |
* @param $errors
|
| 360 |
* an array to be filled with error info
|
| 361 |
* @param $warnings
|
| 362 |
* an array to be filled with warning info
|
| 363 |
* @return
|
| 364 |
* the tidied string
|
| 365 |
*/
|
| 366 |
function beautify_htmltidy_command($input, &$errors, &$warnings) {
|
| 367 |
$path = variable_get('beautify_htmltidy_path', '/usr/bin/tidy');
|
| 368 |
if (!file_exists($path)) {
|
| 369 |
$message = t("Couldn't find the Tidy binary at '%path', not using tidy.", array('%path' => $path));
|
| 370 |
watchdog('beautify', $message, WATCHDOG_WARNING);
|
| 371 |
$errors[] = $message;
|
| 372 |
return;
|
| 373 |
}
|
| 374 |
|
| 375 |
/*
|
| 376 |
* Do not pass the parameters their default values as defined in the
|
| 377 |
* documentation for tidy (http://www.w3.org/People/Raggett/tidy/), or weird
|
| 378 |
* stuff starts to happen.
|
| 379 |
*/
|
| 380 |
// Output Valid XHMTL
|
| 381 |
$args[] = '--output-xhtml '. variable_get('beautify_htmltidy_valid_xhtml', 1);
|
| 382 |
// Choose DOCTYPE method - if out user based "Theme" option is set we need to
|
| 383 |
// do some additional processing.
|
| 384 |
if (variable_get('beautify_htmltidy_doctype', 'auto') == 'theme') {
|
| 385 |
$args[] = '--doctype omit';
|
| 386 |
$document = beautify_parse_html($input);
|
| 387 |
$doctype = $document->dtd ."\n";
|
| 388 |
}
|
| 389 |
else {
|
| 390 |
$args[] = '--doctype '. variable_get('beautify_htmltidy_doctype', 'auto');
|
| 391 |
}
|
| 392 |
// If mode is set to Beautify we need to set indentation to auto. Yes is not
|
| 393 |
// recommended in the Tidy reference.
|
| 394 |
if (variable_get('beautify_mode', 2) == 1) $args[] = '--indent auto';
|
| 395 |
// TODO: Add these options to the settings page.
|
| 396 |
if (!variable_get('beautify_htmltidy_verbose', 0)) $args[] = '-q';
|
| 397 |
if (!variable_get('beautify_htmltidy_wrapphp', 0)) $args[] = '--wrap-php no';
|
| 398 |
if (variable_get('beautify_htmltidy_clean', 0)) $args[] = '--clean yes';
|
| 399 |
if (variable_get('beautify_htmltidy_enclosetext', 0)) $args[] = '--enclose-text yes';
|
| 400 |
if (variable_get('beautify_htmltidy_encloseblocktext', 0)) $args[] = '--enclose-block-text yes';
|
| 401 |
// Clean MS Word
|
| 402 |
$args[] = '--bare '. variable_get('beautify_htmltidy_word_bare', 0);
|
| 403 |
$args[] = '--word-2000 '. variable_get('beautify_htmltidy_word_2000', 0);
|
| 404 |
$args[] = '--drop-proprietary-attributes '. variable_get('beautify_htmltidy_word_attributes', 0);
|
| 405 |
// User specified configuration file
|
| 406 |
$conf = variable_get('htmltidy_confpath', '');
|
| 407 |
if (file_exists($conf)) {
|
| 408 |
$args[] = '--config '. $conf;
|
| 409 |
}
|
| 410 |
// Don't add a meta tag with the Tidy info.
|
| 411 |
$args[] = '--tidy-mark no';
|
| 412 |
// Don't wrap output - this looks terrible so we always set this to off.
|
| 413 |
$args[] = '-wrap 0';
|
| 414 |
// Output only UTF-8
|
| 415 |
$args[] = '-utf8';
|
| 416 |
// Modify the input file instead of outputting to stdout.
|
| 417 |
$args[] = '-modify';
|
| 418 |
// Run the processing with the specified arguments.
|
| 419 |
beautify_htmltidy_run($input, $args, $output, $errors = array(), $warnings = array());
|
| 420 |
|
| 421 |
/*
|
| 422 |
// Output debugging info.
|
| 423 |
if (variable_get('htmltidy_warnings', 0) && user_access('use htmltidy debug mode')) {
|
| 424 |
$header = "<style type=\"text/css\"> .htmltidy { border: 1px dashed #aaa; background-color: #eee; padding: 1em;\n"
|
| 425 |
. "margin: 1em; float: left; font-family: \"courier new\", sans-serif; font-size: 8pt; color: #050; } </style>";
|
| 426 |
drupal_set_html_head($header);
|
| 427 |
|
| 428 |
// Run Tidy a second time to get line numbers right.
|
| 429 |
if (variable_get('htmltidy_runtwice', 0)) {
|
| 430 |
system("$apppath $cline -wrap $wordwrap -utf8 -f $warningsFilename $dirtyFilename");
|
| 431 |
}
|
| 432 |
$warnings = file_get_contents($warningsFilename);
|
| 433 |
drupal_set_message("<h3>HTMLTidy Debug</h3><kbd>$apppath $cline -wrap $wordwrap -utf8 -f $warningsFilename $dirtyFilename</kbd>");
|
| 434 |
}
|
| 435 |
*/
|
| 436 |
|
| 437 |
// Additional processing for beatified output.
|
| 438 |
if (variable_get('beautify_mode', 2) == 1) {
|
| 439 |
// remove newline from empty script tags
|
| 440 |
$output = preg_replace("@(<script[^>]*>)\n(<\/script>)@", '$1$2', $output);
|
| 441 |
// add correct indentation for comments inside script tags
|
| 442 |
$output = preg_replace("@(<script[^>]*>)\n(<!--)@", "$1\n $2", $output);
|
| 443 |
// add newline and correct indentation for comments immediately following closing script tag
|
| 444 |
$output = preg_replace("@(<\/script>)(<!--)@", "$1\n $2", $output);
|
| 445 |
// add newline and correct indentation to opening tags immediately following closing div tag
|
| 446 |
$output = preg_replace("@([ ]*)<\/div>(<)@", "$1</div>\n$1$2", $output);
|
| 447 |
}
|
| 448 |
|
| 449 |
// Additional processing for flattened output.
|
| 450 |
if (variable_get('beautify_mode', 2) == 2) {
|
| 451 |
// remove newline from empty script tags
|
| 452 |
$output = preg_replace("@(<script[^>]*>)\n(<\/script>)@", '$1$2', $output);
|
| 453 |
// add newline comments immediately following closing script tag
|
| 454 |
$output = preg_replace("@(<\/script>)(<!--)@", "$1\n$2", $output);
|
| 455 |
// remove all leading spaces
|
| 456 |
$output = preg_replace('@(\n[ ]*<)@', "\n<", $output);
|
| 457 |
}
|
| 458 |
|
| 459 |
return $doctype . $output;
|
| 460 |
}
|
| 461 |
|
| 462 |
function beautify_htmltidy_run($input, $args, &$output, &$errors, &$warnings) {
|
| 463 |
$tidypath = variable_get('beautify_htmltidy_path', '/usr/bin/tidy');
|
| 464 |
if (!file_exists($tidypath)) {
|
| 465 |
watchdog('beautify', 'Failed to find HTML Tidy executable at %beautify_htmltidy_path, not using tidy', array('%beautify_htmltidy_path' => $tidypath), WATCHDOG_WARNING);
|
| 466 |
$output = '';
|
| 467 |
return 2;
|
| 468 |
}
|
| 469 |
|
| 470 |
// write input to a file because tidy doesn't take input from stdin.
|
| 471 |
$dirtyFilename = tempnam(file_directory_temp(), 'drup');
|
| 472 |
$f = fopen($dirtyFilename, 'w');
|
| 473 |
fwrite($f, $input);
|
| 474 |
fclose($f);
|
| 475 |
|
| 476 |
// warnings are saved to file
|
| 477 |
$warningsFilename = tempnam(file_directory_temp(), 'warn');
|
| 478 |
$args[] = '-f ' . $warningsFilename;
|
| 479 |
|
| 480 |
// Run Tidy with the right options.
|
| 481 |
$command = $tidypath .' '. implode(' ', $args) .' '. $dirtyFilename;
|
| 482 |
system($command, $return_value);
|
| 483 |
|
| 484 |
// return_value 0 means success. 1 means warning. 2 means error, the file
|
| 485 |
// will be there, but not have been touched.
|
| 486 |
switch ($return_value) {
|
| 487 |
case 0:
|
| 488 |
$warnings = $errors = array();
|
| 489 |
$output = file_get_contents($dirtyFilename);
|
| 490 |
break;
|
| 491 |
|
| 492 |
case 1:
|
| 493 |
$errors = array();
|
| 494 |
$warnings = array_map('trim', file($warningsFilename));
|
| 495 |
$output = file_get_contents($dirtyFilename);
|
| 496 |
break;
|
| 497 |
|
| 498 |
case 2:
|
| 499 |
// separate errors and warnings into two different arrays
|
| 500 |
foreach(file($warningsFilename) as $line) {
|
| 501 |
$line = trim($line);
|
| 502 |
if (preg_match('|^line \d+ column \d+ - Warning:|', $line)) {
|
| 503 |
$warnings[] = $line;
|
| 504 |
}
|
| 505 |
else {
|
| 506 |
$errors[] = $line;
|
| 507 |
}
|
| 508 |
}
|
| 509 |
$output = $input;
|
| 510 |
break;
|
| 511 |
}
|
| 512 |
|
| 513 |
// delete the temporary files.
|
| 514 |
unlink($dirtyFilename);
|
| 515 |
unlink($warningsFilename);
|
| 516 |
return $return_value;
|
| 517 |
}
|
| 518 |
|
| 519 |
/**
|
| 520 |
* Calculates a valid path to the HTML Tidy binary.
|
| 521 |
*
|
| 522 |
* @param $message
|
| 523 |
* Assigned to an explanation.
|
| 524 |
* @return
|
| 525 |
* TRUE if found,
|
| 526 |
* FALSE if error.
|
| 527 |
*/
|
| 528 |
function beautify_htmltidy_test(&$message = '', &$version = '') {
|
| 529 |
$path = variable_get('beautify_htmltidy_path', '/usr/bin/tidy');
|
| 530 |
if (!file_exists($path)) {
|
| 531 |
$pattern = '@\\\\+@'; // one or more backslashes
|
| 532 |
// Windows paths
|
| 533 |
if (substr(PHP_OS, 0, 3) == 'WIN') {
|
| 534 |
$possible_paths = array(
|
| 535 |
preg_replace($pattern, '/', dirname(__FILE__)) .'/bin/tidy.exe'
|
| 536 |
);
|
| 537 |
}
|
| 538 |
// Unix paths
|
| 539 |
else {
|
| 540 |
$possible_paths = array(
|
| 541 |
'/bin/tidy',
|
| 542 |
'/usr/bin/tidy',
|
| 543 |
'/usr/local/bin/tidy',
|
| 544 |
preg_replace($pattern, '/', dirname(__FILE__)) .'/bin/tidy',
|
| 545 |
);
|
| 546 |
}
|
| 547 |
|
| 548 |
$message = t('Searching for HTML Tidy in:');
|
| 549 |
$message .= '<ul>';
|
| 550 |
foreach ($possible_paths as $path) {
|
| 551 |
$message .= '<li>'. $path .'</li>';
|
| 552 |
if (file_exists($path)) {
|
| 553 |
break;
|
| 554 |
}
|
| 555 |
}
|
| 556 |
$message .= '</ul>';
|
| 557 |
|
| 558 |
if (!file_exists($path)) {
|
| 559 |
$message .= t('Could not find HTML Tidy binary.');
|
| 560 |
return FALSE;
|
| 561 |
}
|
| 562 |
variable_set('beautify_htmltidy_path', $path);
|
| 563 |
}
|
| 564 |
|
| 565 |
// Test the HTML Tidy binary by running a shell command to return the version.
|
| 566 |
$command = escapeshellcmd($path .' -v');
|
| 567 |
if (exec($command, $response)) {
|
| 568 |
$version = $response[0];
|
| 569 |
return TRUE;
|
| 570 |
}
|
| 571 |
else {
|
| 572 |
$message .= t('Found an HTML Tidy binary but it didn\'t seem to run properly. !command failed to respond correctly.',
|
| 573 |
array('!command' => $command));
|
| 574 |
return FALSE;
|
| 575 |
}
|
| 576 |
}
|
| 577 |
|
| 578 |
/**
|
| 579 |
* Parse the HTML document into sections.
|
| 580 |
*
|
| 581 |
* @param $input
|
| 582 |
* The buffered HTML to parse.
|
| 583 |
* @return $output
|
| 584 |
* An object containing:
|
| 585 |
* $html_document->dtd
|
| 586 |
* $html_document->doctype
|
| 587 |
* $html_document->head
|
| 588 |
* $html_document->body
|
| 589 |
* $html_document->bodyattr
|
| 590 |
*
|
| 591 |
* TODO: For performance reasons make this function accept a string to ask for
|
| 592 |
* a specific part of the document. We can still send the whole object by
|
| 593 |
* default if still needed.
|
| 594 |
*/
|
| 595 |
function beautify_parse_html($input) {
|
| 596 |
// DTD
|
| 597 |
$regex = '/(.*\.dtd">)/Umsi'; // just the doctype without the html tag.
|
| 598 |
preg_match($regex, $input, $matches);
|
| 599 |
$output->dtd = $matches[1];
|
| 600 |
|
| 601 |
// DOCTYPE
|
| 602 |
$regex = '/(.*<html.*>)/Umsi'; // doctype plus opening html tag with attributes.
|
| 603 |
preg_match($regex, $input, $matches);
|
| 604 |
isset($matches[1]) && $output->doctype = $matches[1] ."\n";
|
| 605 |
|
| 606 |
// head
|
| 607 |
$regex = '@(<head.*>.*</head\\s*>)@Umsi'; // the whole head section
|
| 608 |
preg_match($regex, $input, $matches);
|
| 609 |
if (isset($matches[1])) {
|
| 610 |
$head = trim($matches[1]);
|
| 611 |
$replace = array("\n ", "\n\n");
|
| 612 |
$head = str_replace($replace, "\n", $head);
|
| 613 |
$output->head = $head ."\n";
|
| 614 |
}
|
| 615 |
|
| 616 |
// body
|
| 617 |
$regex = '@<body(.*)>(.*)</body\\s*>@Umsi'; // the whole body section
|
| 618 |
preg_match($regex, $input, $matches);
|
| 619 |
if (isset($matches[1]) && trim($matches[1])) {
|
| 620 |
$output->bodyattr = trim($matches[1]); // body attributes
|
| 621 |
}
|
| 622 |
if (isset($matches[2])) {
|
| 623 |
$output->body = trim($matches[2]);
|
| 624 |
}
|
| 625 |
else {
|
| 626 |
$output->body = trim($input);
|
| 627 |
}
|
| 628 |
|
| 629 |
return $output;
|
| 630 |
}
|
| 631 |
|
| 632 |
/**
|
| 633 |
* Built-in processing function for compacting or flattening the output.
|
| 634 |
*
|
| 635 |
* @param $input
|
| 636 |
* The HTML output to process.
|
| 637 |
* @param $mode
|
| 638 |
* The mode of operation defined on beautify_get_options().
|
| 639 |
* @return
|
| 640 |
* The processed output.
|
| 641 |
*/
|
| 642 |
function beautify_process($input, $mode) {
|
| 643 |
switch ($mode) {
|
| 644 |
|
| 645 |
case -1: // Comptact
|
| 646 |
preg_match('/(.*)>/Umsi', $input, $matches); // doctype
|
| 647 |
$no_doctype = str_replace($matches[0], '', $input);
|
| 648 |
$output = preg_replace("@\n\s+@", " ", $no_doctype); // newline and 1 or more spaces
|
| 649 |
$output = preg_replace("@\s\s\s+@", " ", $output); // three or more spaces
|
| 650 |
$output = preg_replace("@>\n@", "> ", $output); // closing tag marker and a new line
|
| 651 |
$output = $matches[0] . $output;
|
| 652 |
break;
|
| 653 |
|
| 654 |
case 2: // Flatten
|
| 655 |
$output = preg_replace("@\n\s+@", "\n", $input); // a newline and some spaces
|
| 656 |
$output = preg_replace("@\s\s+@", "", $output); // two or more spaces
|
| 657 |
break;
|
| 658 |
|
| 659 |
default:
|
| 660 |
$output = $input;
|
| 661 |
break;
|
| 662 |
}
|
| 663 |
|
| 664 |
return $output;
|
| 665 |
}
|
| 666 |
|
| 667 |
/**
|
| 668 |
* Processeses the HTML document through htmLawed.
|
| 669 |
*
|
| 670 |
* @param $input
|
| 671 |
* The buffered output to process.
|
| 672 |
* @return $output
|
| 673 |
* The processed output.
|
| 674 |
*/
|
| 675 |
function beautify_htmlawed_process($input) {
|
| 676 |
// Split the document into sections.
|
| 677 |
$html_document = beautify_parse_html($input);
|
| 678 |
$output = $html_document->doctype;
|
| 679 |
|
| 680 |
// If compact mode is active we need to process the head since htmLawed won't
|
| 681 |
// do that for us.
|
| 682 |
$mode = variable_get('beautify_mode', 2);
|
| 683 |
if ($mode == -1) { // Comptact
|
| 684 |
$head = beautify_process($html_document->head, -1) ."\n";
|
| 685 |
}
|
| 686 |
else {
|
| 687 |
$head .= $html_document->head;
|
| 688 |
}
|
| 689 |
|
| 690 |
// Set up the config arguments.
|
| 691 |
$config = array(
|
| 692 |
'balance' => variable_get('beautify_htmlawed_balance_tags', 1),
|
| 693 |
'valid_xhtml' => variable_get('beautify_htmlawed_valid_xhtml', 1),
|
| 694 |
'clean_ms_char' => variable_get('beautify_htmlawed_clean_msword', 0),
|
| 695 |
'comment' => variable_get('beautify_htmlawed_comments', 3),
|
| 696 |
'css_expression' => variable_get('beautify_htmlawed_css_expressions', 0),
|
| 697 |
'schemes' => '*:*',
|
| 698 |
'make_tag_strict' => variable_get('beautify_htmlawed_strict_tags', 1),
|
| 699 |
'keep_bad' => variable_get('beautify_htmlawed_keep_bad', 2),
|
| 700 |
);
|
| 701 |
|
| 702 |
// Our modes are slightly different to those accepted by htmLawed.
|
| 703 |
if ($mode == 1) {
|
| 704 |
$config['tidy'] = 2;
|
| 705 |
}
|
| 706 |
elseif ($mode == -1) {
|
| 707 |
$config['tidy'] = -1;
|
| 708 |
}
|
| 709 |
|
| 710 |
// Process the body of the document through htmLawed.
|
| 711 |
include 'htmLawed.php';
|
| 712 |
$body = htmLawed($html_document->body, $config);
|
| 713 |
|
| 714 |
// Concatenate the separate parts back together.
|
| 715 |
$output .= $head .'<body '. $html_document->bodyattr .">\n". trim($body) ."\n</body>\n</html>";
|
| 716 |
|
| 717 |
return $output;
|
| 718 |
}
|
| 719 |
|
| 720 |
/**
|
| 721 |
* Process the buffered output through the HTML Tidy processor.
|
| 722 |
*
|
| 723 |
* @param $input
|
| 724 |
* The buffered output to process.
|
| 725 |
* @return $output
|
| 726 |
* The processed output.
|
| 727 |
*/
|
| 728 |
function beautify_htmltidy_process($input) {
|
| 729 |
$output = beautify_htmltidy_command($input, $errors, $warnings);
|
| 730 |
|
| 731 |
// Merge the errors and warnings together with the errors listed first.
|
| 732 |
$errors = array_merge($errors, $warnings);
|
| 733 |
|
| 734 |
// Here we're using HTML Tidy to flatten and then using the built-in comptact routine.
|
| 735 |
$mode = variable_get('beautify_mode', 2);
|
| 736 |
if ($mode == -1) {
|
| 737 |
$output = beautify_process($input, $mode);
|
| 738 |
}
|
| 739 |
|
| 740 |
// TODO: Fix output of errors. Not working right now.
|
| 741 |
if ($errors && variable_get('beautify_htmltidy_warnings', 0) && user_access('use beautify debug mode')) {
|
| 742 |
$errors = array_map('htmlentities', $errors);
|
| 743 |
$output .= theme('theme_beautify_htmltidy_errors', $errors);
|
| 744 |
}
|
| 745 |
|
| 746 |
return $output;
|
| 747 |
}
|
| 748 |
|
| 749 |
/**
|
| 750 |
* Theme function for displaying the errors.
|
| 751 |
*
|
| 752 |
* @param $errors
|
| 753 |
* An array of errors generated by HTML Tidy.
|
| 754 |
* @return $output
|
| 755 |
* The HTML for displaying the errors on the page.
|
| 756 |
*/
|
| 757 |
function theme_beautify_htmltidy_errors($errors) {
|
| 758 |
$output = '<div class="beautify-errors"><h3>Beautify Errors</h3>';
|
| 759 |
$output .= theme('item_list', $errors);
|
| 760 |
$output .= '</div>';
|
| 761 |
return $output;
|
| 762 |
}
|