Fix bug where legal characters were being trimmed from the edges of URLS
[project/print.git] / print.pages.inc
1 <?php
2 // $Id$
3
4 /**
5 * @file
6 * Contains the functions to generate Printer-friendly pages.
7 *
8 * This file is included by the core PF module, and includes all the
9 * functions necessary to generate a PF version of the original page
10 * in HTML format.
11 */
12
13 /**
14 * Generate an HTML version of the printer-friendly page
15 *
16 * @see print_controller()
17 * @see _print_get_template()
18 */
19 function print_controller_html() {
20 $args = func_get_args();
21 // Remove the print/ prefix
22 $path = implode('/', $args);
23 $cid = isset($_GET['comment']) ? $_GET['comment'] : NULL;
24
25 $print = print_controller($path, $cid);
26 $node = $print['node'];
27 include_once(_print_get_template('html', $print['type']));
28 }
29
30 /**
31 * Select the print generator function based on the page type
32 *
33 * Depending on the type of node, this functions chooses the appropriate
34 * generator function.
35 *
36 * @param $path
37 * path of the original page
38 * @param $cid
39 * comment ID of the individual comment to be rendered
40 * @param $teaser
41 * if set to TRUE, outputs only the node's teaser
42 * @param $message
43 * optional sender's message (used by the send e-mail module)
44 * @return
45 * array with the fields to be used in the template
46 * @see _print_generate_node()
47 * @see _print_generate_path()
48 * @see _print_generate_book()
49 */
50 function print_controller($path, $cid, $teaser = FALSE, $message = NULL) {
51 if (!is_numeric($path)) {
52 // Indirect call with print/alias
53 // If there is a path alias with these arguments, generate a printer-friendly version for it
54 $path = drupal_get_normal_path($path);
55 $ret = preg_match('!^node/(.*)!i', $path, $matches);
56 if ($ret == 1) {
57 $path = $matches[1];
58 }
59 }
60 $parts = explode('/', $path);
61 if (is_numeric($parts[0])) {
62 $print = _print_generate_node($path, $cid, $teaser, $message);
63 }
64 else {
65 $ret = preg_match('!^book/export/html/(.*)!i', $path, $matches);
66 if ($ret == 1) {
67 // This is a book PF page link, handle trough the book handling functions
68 $print = _print_generate_book($matches[1], $teaser, $message);
69 }
70 else {
71 // If no content node was found, handle the page printing with the 'printable' engine
72 $print = _print_generate_path($path, $teaser, $message);
73 }
74 }
75
76 return $print;
77 }
78
79 /**
80 * Generates a robots meta tag to tell them what they may index
81 *
82 * @return
83 * string with the meta robots tag
84 */
85 function _print_robots_meta_generator() {
86 $print_robots_noindex = variable_get('print_robots_noindex', PRINT_ROBOTS_NOINDEX_DEFAULT);
87 $print_robots_nofollow = variable_get('print_robots_nofollow', PRINT_ROBOTS_NOFOLLOW_DEFAULT);
88 $print_robots_noarchive = variable_get('print_robots_noarchive', PRINT_ROBOTS_NOARCHIVE_DEFAULT);
89 $robots_meta = array();
90
91 if (!empty($print_robots_noindex)) {
92 $robots_meta[] = 'noindex';
93 }
94 if (!empty($print_robots_nofollow)) {
95 $robots_meta[] = 'nofollow';
96 }
97 if (!empty($print_robots_noarchive)) {
98 $robots_meta[] = 'noarchive';
99 }
100
101 if (count($robots_meta) > 0) {
102 $robots_meta = implode(', ', $robots_meta);
103 $robots_meta = "<meta name='robots' content='$robots_meta' />\n";
104 }
105 else {
106 $robots_meta = '';
107 }
108
109 return $robots_meta;
110 }
111
112 /**
113 * Post-processor that fills the array for the template with common details
114 *
115 * @param $node
116 * generated node with a printer-friendly node body
117 * @param $message
118 * optional sender's message (used by the send e-mail module)
119 * @param $cid
120 * id of current comment being generated (NULL when not generating
121 * an individual comment)
122 * @return
123 * array with the fields to be used in the template
124 */
125 function _print_var_generator($node, $message = NULL, $cid = NULL) {
126 global $base_url, $language;
127
128 $path = empty($node->nid) ? $node->path : "node/$node->nid";
129
130 $themed = theme('print_text');
131
132 // print module settings
133 $print_css = variable_get('print_css', PRINT_CSS_DEFAULT);
134 $print_urls = variable_get('print_urls', PRINT_URLS_DEFAULT);
135 $print_logo_options = variable_get('print_logo_options', PRINT_LOGO_OPTIONS_DEFAULT);
136 $print_logo_url = variable_get('print_logo_url', PRINT_LOGO_URL_DEFAULT);
137 $print_html_sendtoprinter = variable_get('print_html_sendtoprinter', PRINT_HTML_SENDTOPRINTER_DEFAULT);
138 $print_sourceurl_enabled = variable_get('print_sourceurl_enabled', PRINT_SOURCEURL_ENABLED_DEFAULT);
139 $print_sourceurl_forcenode = variable_get('print_sourceurl_forcenode', PRINT_SOURCEURL_FORCENODE_DEFAULT);
140 $print_sourceurl_date = variable_get('print_sourceurl_date', PRINT_SOURCEURL_DATE_DEFAULT);
141
142 $print['language'] = $language->language;
143 $print['title'] = $node->title;
144 $print['head'] = drupal_get_html_head();
145 $print['scripts'] = drupal_get_js();
146 $print['robots_meta'] = _print_robots_meta_generator();
147 $print['url'] = url($path, array('absolute' => TRUE));
148 $print['base_href'] = "<base href='". $print['url'] ."' />\n";
149 $print['favicon'] = theme_get_setting('toggle_favicon') ? "<link rel='shortcut icon' href='". theme_get_setting('favicon') ."' type='image/x-icon' />\n" : '';
150
151 $css_files = array();
152 if (!empty($print_css)) {
153 $replace_pairs = array('%b' => base_path(), '%t' => path_to_theme());
154 $css_files[] = strip_tags(strtr($print_css, $replace_pairs));
155 }
156 else {
157 $css_files[] = base_path() . drupal_get_path('module', 'print') .'/print.css';
158 }
159 // If the current language is RTL add the RTL stylesheet.
160 if (defined('LANGUAGE_RTL') && $language->direction == LANGUAGE_RTL) {
161 $css_files[] = base_path() . drupal_get_path('module', 'print') .'/print-rtl.css';
162 }
163 // Add CCK's CSS file to properly display the fields
164 if (module_exists('content')) {
165 $css_files[] = base_path() . drupal_get_path('module', 'content') .'/content.css';
166 }
167
168 // If we are sending a message via e-mail, the CSS must be embedded
169 if (!empty($message)) {
170 $style = '';
171 $pattern = '!^'. base_path() .'!';
172 foreach ($css_files as $filename) {
173 // Convert to a local path, by removing the base_path
174 $filename = preg_replace($pattern, '', $filename);
175 $res = file_get_contents($filename, TRUE);
176 if ($res != FALSE) {
177 $style .= $res;
178 }
179 }
180 $print['css'] = "<style type='text/css' media='all'>$style</style>\n";
181 }
182 else {
183 $print['css'] = '';
184 foreach ($css_files as $value) {
185 $print['css'] .= "<link type='text/css' rel='stylesheet' media='all' href='$value' />\n";
186 }
187 }
188
189 $print['sendtoprinter'] = $print_html_sendtoprinter ? ' onload="window.print();"' : '';
190
191 switch ($print_logo_options) {
192 case 0: // none
193 $logo_url = 0;
194 break;
195 case 1: // theme's
196 $logo_url = theme_get_setting('logo');
197 break;
198 case 2: // user-specifed
199 $logo_url = strip_tags($print_logo_url);
200 break;
201 }
202 $print['logo'] = $logo_url ? "<img class='print-logo' src='$logo_url' alt='' />\n" : '';
203
204 $published_site = variable_get('site_name', 0);
205 if ($published_site) {
206 $published = (empty($themed['published'])) ? t('Published on %site_name', array('%site_name' => $published_site)) : ($themed['published'] .' '. $published_site);
207 $print['site_name'] = $published .' ('. l($base_url, $base_url) .')';
208 }
209 else {
210 $print['site_name'] = '';
211 }
212
213 if ($print_sourceurl_enabled == 1) {
214 /* Grab and format the src URL */
215 if (empty($print_sourceurl_forcenode)) {
216 $url = $print['url'];
217 }
218 else {
219 $url = $base_url .'/'. (((bool)variable_get('clean_url', '0')) ? '' : '?q=') . $path;
220 }
221 if ($cid) {
222 $url .= '#comment-$cid';
223 }
224 $retrieved_date = format_date(time(), 'small');
225 $retrieved = (empty($themed['retrieved'])) ? t('retrieved on %date', array('%date' => $retrieved_date)) : ($themed['retrieved'] .' '. $retrieved_date);
226 $print['printdate'] = $print_sourceurl_date ? " ($retrieved)" : '';
227
228 $source_url = (empty($themed['sourceURL'])) ? t('Source URL') : $themed['sourceURL'];
229 $print['source_url'] = '<strong>'. $source_url . $print['printdate'] .':</strong> '. l($url, $url);
230 }
231 else {
232 $print['source_url'] = '';
233 }
234
235 if (isset($node->type)) {
236 $node_type = $node->type;
237
238 if (theme_get_setting('toggle_node_info_$node_type')) {
239 $by_author = ($node->name ? $node->name : variable_get('anonymous', t('Anonymous')));
240 $by = (empty($themed['by'])) ? t('By %author', array('%author' => $by_author)) : ($themed['by'] .' '. $by_author);
241 $print['submitted'] = $by;
242
243 $created_datetime = format_date($node->created, 'small');
244 $created = (empty($themed['created'])) ? t('Created %date', array('%date' => $created_datetime)) : ($themed['created'] .' '. $created_datetime);
245 $print['created'] = $created;
246 }
247 else {
248 $print['submitted'] = '';
249 $print['created'] = '';
250 }
251
252 $print['type'] = $node->type;
253 }
254 else {
255 $print['submitted'] = '';
256 $print['created'] = '';
257 $print['type'] = '';
258 }
259
260 menu_set_active_item($path);
261 $breadcrumb = drupal_get_breadcrumb();
262 if (!empty($breadcrumb)) {
263 $breadcrumb[] = menu_get_active_title();
264 $print['breadcrumb'] = implode(' > ', $breadcrumb);
265 }
266 else {
267 $print['breadcrumb'] = '';
268 }
269
270 // Display the collected links at the bottom of the page. Code once taken from Kjartan Mannes' project.module
271 if (!empty($print_urls)) {
272 $urls = _print_friendly_urls();
273 $max = count($urls);
274 $pfp_links = '';
275 if ($max) {
276 for ($i = 0; $i < $max; $i++) {
277 $pfp_links .= '['. ($i + 1) .'] '. $urls[$i] ."<br />\n";
278 }
279 $links = (empty($themed['links'])) ? t('Links') : $themed['links'];
280 $print['pfp_links'] = "<p><strong>$links:</strong><br />$pfp_links</p>";
281 }
282 }
283
284 if (module_exists('taxonomy')) {
285 $terms = taxonomy_link('taxonomy terms', $node);
286 $print['taxonomy'] = theme('links', $terms);
287 }
288
289 $print['content'] = $node->body;
290 $print['node'] = $node;
291 $print['message'] = $message;
292 $print['footer_message'] = filter_xss_admin(variable_get('site_footer', FALSE)) ."\n". theme('blocks', 'footer') ;
293
294 return $print;
295 }
296
297 /**
298 * Callback function for the preg_replace_callback for URL-capable patterns
299 *
300 * Manipulate URLs to make them absolute in the URLs list, and to add a
301 * [n] footnote marker.
302 *
303 * @param $matches
304 * array with the matched tag patterns, usually <a...>+text+</a>
305 * @return
306 * tag with re-written URL and when appropriate the [n] index to the
307 * URL list
308 */
309 function _print_rewrite_urls($matches) {
310 global $base_url, $base_root;
311
312 // Get value of Printer-friendly URLs setting
313 $print_urls = variable_get('print_urls', PRINT_URLS_DEFAULT);
314 $pfurls = (!empty($print_urls));
315
316 //Temporarily convert spaces to %20 so that it isn't split below
317 $in_string = FALSE;
318 $length = strlen($matches[1]);
319 for ($i=0; $i < $length; $i++) {
320 if ($matches[1][$i] == '"') {
321 $in_string = !$in_string;
322 }
323 if (($matches[1][$i] == ' ') && ($in_string)) {
324 $matches[1] = substr_replace($matches[1], '%20', $i, 1);
325 }
326 }
327 // remove whitespace immediately before and after the '=' sign
328 $matches[1] = preg_replace('/\s*=\s*/', '=', $matches[1]);
329
330 // first, split the html into the different tag attributes
331 $attribs = preg_split('/\s+/m', $matches[1]);
332
333 $size = count($attribs);
334 for ($i=1; $i < $size; $i++) {
335 // If the attribute is href or src, we may need to rewrite the URL in the value
336 if (preg_match('/^(?:href|src)\s*?=/i', $attribs[$i]) > 0) {
337 // We may need to rewrite the URL, so let's isolate it
338 preg_match('/.*?=(.*)/is', $attribs[$i], $urls);
339 $url = trim($urls[1], " \t\n\r\0\x0B\"'");
340
341 if (strpos($url, '://') || preg_match('/^mailto:.*?@.*?\..*?$/iu', $url)) {
342 // URL is absolute, do nothing
343 $newurl = urldecode($url);
344 }
345 else {
346 if ($url[0] == '#') {
347 // URL is an anchor tag
348 if ($pfurls) {
349 $path = explode('/', $_GET['q']);
350 unset($path[0]);
351 $path = implode('/', $path);
352 if (is_numeric($path)) {
353 $path = "node/$path";
354 }
355 // Printer-friendly URLs is on, so we need to make it absolute
356 $newurl = url($path, array('fragment' => substr(urldecode($url), 1), 'absolute' => TRUE));
357 }
358 // Because base href is the original page, change the link to
359 // still be usable inside the print page
360 $matches[1] = str_replace($url, $_GET['q'] . $url, $matches[1]);
361 }
362 else {
363 // URL is relative, convert it into absolute URL
364 $clean_url = (bool)variable_get('clean_url', '0');
365 if ($url[0] == '/') {
366 // If it starts with '/' just append it to the server name
367 $newurl = $base_root .'/'. trim(urldecode($url), '/');
368 }
369 elseif ((!$clean_url) && (preg_match('/^(?:index.php)?\?q=.*/i', $url))) {
370 // If Clean URLs is disabled, and it starts with q=?, just prepend with the base URL
371 $newurl = $base_url .'/'. trim(urldecode($url), '/');
372 }
373 else {
374 $newurl = url(trim(urldecode($url), '/'), array('absolute' => TRUE));
375 }
376 $matches[1] = str_replace($url, $newurl, $matches[1]);
377 }
378 }
379 }
380 }
381
382 //Revert all %20 in strings back to spaces
383 $matches[1] = str_replace('%20', ' ', $matches[1]);
384
385 $ret = '<'. $matches[1] .'>';
386 if (count($matches) == 4) {
387 $ret .= $matches[2] . $matches[3];
388 if (($pfurls) && (isset($newurl))) {
389 $ret .= ' <span class="print-footnote">['. _print_friendly_urls(trim(stripslashes($newurl))) .']</span>';
390 }
391 }
392
393 return $ret;
394 }
395
396 /**
397 * Auxiliary function to store the Printer-friendly URLs list as static.
398 *
399 * @param $url
400 * absolute URL to be inserted in the list
401 * @return
402 * list of URLs previously stored if $url is 0, or the current count
403 * otherwise.
404 */
405 function _print_friendly_urls($url = 0) {
406 static $urls = array();
407 if ($url) {
408 $url_idx = array_search($url, $urls);
409 if ($url_idx !== FALSE) {
410 return ($url_idx + 1);
411 }
412 else {
413 $urls[] = $url;
414 return count($urls);
415 }
416 }
417 $ret = $urls;
418 $urls = array();
419 return $ret;
420 }
421
422 /**
423 * Choose most appropriate template
424 *
425 * Auxiliary function to resolve the most appropriate template trying to find
426 * a content specific template in the theme or module dir before falling back
427 * on a generic template also in those dirs.
428 *
429 * @param format
430 * format of the PF page being rendered (html, pdf, etc.)
431 * @param $type
432 * name of the node type being rendered in a PF page
433 * @return
434 * filename of the most suitable template
435 */
436 function _print_get_template($format = NULL, $type = NULL) {
437 $filenames = array();
438 // First try to find a template defined both for the format and then the type
439 if (!empty($format) && !empty($type)) {
440 $filenames[] = "print_$format.node-$type.tpl.php";
441 }
442 // Then only for the format
443 if (!empty($format)) {
444 $filenames[] = "print_$format.tpl.php";
445 }
446 // If the node type is known, then try to find that type's template file
447 if (!empty($type)) {
448 $filenames[] = "print.node-$type.tpl.php";
449 }
450 // Finally search for a generic template file
451 $filenames[] = 'print.tpl.php';
452
453 foreach ($filenames as $value) {
454 // First in the theme directory
455 $file = drupal_get_path('theme', $GLOBALS['theme_key']) .'/'. $value;
456 if (file_exists($file)) {
457 return $file;
458 }
459 // Then in the module directory
460 $file = drupal_get_path('module', 'print') .'/'. $value;
461 if (file_exists($file)) {
462 return $file;
463 }
464 }
465 }
466
467 /**
468 * Prepare a Printer-friendly-ready node body for content nodes
469 *
470 * @param $nid
471 * node ID of the node to be rendered into a printer-friendly page
472 * @param $cid
473 * comment ID of the individual comment to be rendered
474 * @param $teaser
475 * if set to TRUE, outputs only the node's teaser
476 * @param $message
477 * optional sender's message (used by the send e-mail module)
478 * @return
479 * filled array ready to be used in the template
480 */
481 function _print_generate_node($nid, $cid = NULL, $teaser = FALSE, $message = NULL) {
482 // We can take a node id
483 $node = node_load(array('nid' => $nid));
484 if (!node_access('view', $node)) {
485 // Access is denied
486 return drupal_access_denied();
487 }
488 drupal_set_title($node->title);
489
490 //alert other modules that we are generating a printer-friendly page, so they can choose to show/hide info
491 $node->printing = TRUE;
492 // Turn off Pagination by the Paging module
493 unset($node->pages);
494 unset($node->page_count);
495
496 if ($teaser) {
497 unset($node->body);
498 }
499 else {
500 unset($node->teaser);
501 }
502 $node = (object)$node;
503 if ($cid === NULL) {
504 // Adapted (simplified) version of node_view for Drupal 5.x
505 //Render the node content
506 $node = node_build_content($node, $teaser, TRUE);
507 // Disable fivestar widget output
508 unset($node->content['fivestar_widget']);
509 // Disable service links module output
510 unset($node->content['service_links']);
511
512 $node->body = drupal_render($node->content);
513 //TODO the following was part of the fix for http://drupal.org/node/254863
514 //check if it is reproducible and find the exact condition which
515 //triggered it
516 //$node->body = html_entity_decode($node->body);
517 }
518
519 $print_comments = variable_get('print_comments', PRINT_COMMENTS_DEFAULT);
520
521 if (function_exists('comment_render') && (($cid != NULL) || ($print_comments))) {
522 //Print only the requested comment (or if $cid is NULL, all of them)
523 $comments = comment_render($node, $cid);
524
525 //Remove the comment forms
526 $comments = preg_replace('!<form.*?id="comment-.*?">.*?</form>!sim', '', $comments);
527 //Remove the 'Post new comment' title
528 $comments = preg_replace('!<h2.*?>Post new comment</h2>!', '', $comments);
529 //Remove the comment title hyperlink
530 $comments = preg_replace('!(<h3.*?>)(<a.*?>)(.*?)</a>(</h3>)!', '$1$3$4', $comments);
531 //Remove the comment author link
532 $pattern = '!(<span class="submitted">)(.*?)<a.*?>(.*?)</a>(</span>)!sim';
533 if (preg_match($pattern, $comments)) {
534 $comments = preg_replace($pattern , '$1$2$3$4', $comments);
535 }
536 //Remove the comment links
537 $comments = preg_replace('!\s*<ul class="links">.*?</ul>!sim', '', $comments);
538 if ($cid != NULL) {
539 // Single comment requested, output only the comment
540 unset($node->body);
541 }
542 $node->body .= $comments;
543 }
544
545 node_invoke_nodeapi($node, 'alter', FALSE, TRUE);
546
547 // Convert the a href elements
548 $pattern = '!<(a\s[^>]*?)>(.*?)(</a>)!is';
549 $node->body = preg_replace_callback($pattern, '_print_rewrite_urls', $node->body);
550
551 init_theme();
552
553 $print = _print_var_generator($node, $message, $cid);
554
555 return $print;
556 }
557
558 /**
559 * Prepare a Printer-friendly-ready node body for non-content pages
560 *
561 * @param $path
562 * path of the node to be rendered into a printer-friendly page
563 * @param $teaser
564 * if set to TRUE, outputs only the node's teaser
565 * @param $message
566 * optional sender's message (used by the send e-mail module)
567 * @return
568 * filled array ready to be used in the template
569 */
570 function _print_generate_path($path, $teaser = FALSE, $message = NULL) {
571 $path = drupal_get_normal_path($path);
572
573 menu_set_active_item($path);
574 // Adapted from index.php.
575 $node = new stdClass();
576 $node->body = menu_execute_active_handler($path);
577 $node->title = drupal_get_title();
578 $node->path = $path;
579
580 // It may happen that a drupal_not_found is called in the above call
581 if (preg_match('/404 Not Found/', drupal_get_headers()) == 1) {
582 return;
583 }
584
585 if (is_int($node->body)) {
586 switch ($node->body) {
587 case MENU_NOT_FOUND:
588 return drupal_not_found();
589 break;
590 case MENU_ACCESS_DENIED:
591 return drupal_access_denied();
592 break;
593 }
594 }
595
596 // Delete any links area
597 $node->body = preg_replace('!\s*<div class="links">.*?</div>!sim', '', $node->body);
598
599 // Convert the a href elements
600 $pattern = '!<(a\s[^>]*?)>(.*?)(</a>)!is';
601 $node->body = preg_replace_callback($pattern, '_print_rewrite_urls', $node->body);
602
603 init_theme();
604
605 $print = _print_var_generator($node, $message);
606
607 return $print;
608 }
609
610
611 /**
612 * Prepare a Printer-friendly-ready node body for book pages
613 *
614 * @param $nid
615 * node ID of the node to be rendered into a printer-friendly page
616 * @param $teaser
617 * if set to TRUE, outputs only the node's teaser
618 * @param $message
619 * optional sender's message (used by the send e-mail module)
620 * @return
621 * filled array ready to be used in the template
622 */
623 function _print_generate_book($nid, $teaser = FALSE, $message = NULL) {
624 $node = node_load(array('nid' => $nid));
625 if (!node_access('view', $node) || (!user_access('access printer-friendly version'))) {
626 // Access is denied
627 return drupal_access_denied();
628 }
629
630 $tree = book_menu_subtree_data($node->book);
631 $node->body = book_export_traverse($tree, 'book_node_export');
632
633 // Convert the a href elements
634 $pattern = '!<(a\s[^>]*?)>(.*?)(</a>)!is';
635 $node->body = preg_replace_callback($pattern, '_print_rewrite_urls', $node->body);
636
637 init_theme();
638
639 $print = _print_var_generator($node, $message);
640 // The title is already displayed by the book_recurse, so avoid duplication
641 $print['title'] = '';
642
643 return $print;
644 }