Fix #419296 by noelbush: ** Use wkhtmltopdf for PDF generation **
[project/print.git] / print_pdf / print_pdf.pages.inc
1 <?php
2 // $Id$
3
4 /**
5 * @file
6 * Generates the PDF versions of the pages
7 *
8 * This file is included by the print_pdf module and includes the
9 * functions that interface with the PDF generation packages.
10 */
11
12 require_once(drupal_get_path('module', 'print') .'/print.pages.inc');
13
14 /**
15 * Generate a PDF version of the printer-friendly page
16 *
17 * @see print_controller()
18 * @see _print_get_template()
19 * @see _print_pdf_dompdf()
20 * @see _print_pdf_tcpdf()
21 */
22 function print_pdf_controller() {
23 global $base_url;
24
25 // Disable caching for generated PDFs, as Drupal doesn't ouput the proper headers from the cache
26 $GLOBALS['conf']['cache'] = FALSE;
27
28 $args = func_get_args();
29 // Remove the printpdf/ prefix
30 $path = implode('/', $args);
31 $cid = isset($_GET['comment']) ? (int)$_GET['comment'] : NULL;
32
33 $print_pdf_pdf_tool = variable_get('print_pdf_pdf_tool', PRINT_PDF_PDF_TOOL_DEFAULT);
34
35 $print = print_controller($path, $cid, PRINT_PDF_FORMAT);
36 if ($print === FALSE) {
37 return;
38 }
39
40 // Img elements must be set to absolute
41 $pattern = '!<(img\s[^>]*?)>!is';
42 $print['content'] = preg_replace_callback($pattern, '_print_rewrite_urls', $print['content']);
43 $print['logo'] = preg_replace_callback($pattern, '_print_rewrite_urls', $print['logo']);
44 $print['footer_message'] = preg_replace_callback($pattern, '_print_rewrite_urls', $print['footer_message']);
45 // And converted from private to public paths
46 $file_downloads = variable_get('file_downloads', FILE_DOWNLOADS_PUBLIC);
47 if ($file_downloads == FILE_DOWNLOADS_PRIVATE) {
48 $pattern = "!(<img\s[^>]*?src\s*?=\s*?['\"]?)${base_url}/(?:(?:index.php)?\?q=)?system/files(/[^>]*?>)!is";
49 $replacement = '$1file://'. realpath(file_directory_path()) .'$2';
50 $print['content'] = preg_replace($pattern, $replacement, $print['content']);
51 $print['logo'] = preg_replace($pattern, $replacement, $print['logo']);
52 $print['footer_message'] = preg_replace($pattern, $replacement, $print['footer_message']);
53 }
54
55 $node = $print['node'];
56 ob_start();
57 include_once(_print_get_template(PRINT_PDF_FORMAT, $print['type']));
58 $html = ob_get_contents();
59 ob_end_clean();
60
61 // Convert the a href elements
62 $pattern = '!<(a\s[^>]*?)>!is';
63 $html = preg_replace_callback($pattern, '_print_rewrite_urls', $html);
64
65 $pdf_filename = variable_get('print_pdf_filename', PRINT_PDF_FILENAME_DEFAULT);
66 if (function_exists('token_replace') && !empty($pdf_filename)) {
67 $pdf_filename = token_replace($pdf_filename, 'node', $node) .'.pdf';
68 }
69 else {
70 $pdf_filename = str_replace('/', '_', $path) .'.pdf';
71 }
72 if (basename($print_pdf_pdf_tool) == 'dompdf_config.inc.php') {
73 _print_pdf_dompdf($print, $html, $pdf_filename);
74 }
75 elseif (basename($print_pdf_pdf_tool) == 'tcpdf.php') {
76 _print_pdf_tcpdf($print, $html, $pdf_filename);
77 }
78 elseif (basename($print_pdf_pdf_tool) == 'wkhtmltopdf') {
79 _print_pdf_wkhtmltopdf($print, $html, $pdf_filename);
80 }
81 else {
82 return drupal_not_found();
83 }
84
85 $nodepath = drupal_get_normal_path($node->path);
86 db_query("UPDATE {print_pdf_page_counter} SET totalcount = totalcount + 1, timestamp = %d WHERE path = '%s'", time(), $nodepath);
87 // If we affected 0 rows, this is the first time viewing the node.
88 if (!db_affected_rows()) {
89 // We must create a new row to store counters for the new node.
90 db_query("INSERT INTO {print_pdf_page_counter} (path, totalcount, timestamp) VALUES ('%s', 1, %d)", $nodepath, time());
91 }
92 }
93
94 /**
95 * Generate the PDF file using the dompdf library
96 *
97 * @param $print
98 * array containing the configured data
99 * @param $html
100 * contents of the post-processed template already with the node data
101 * @param $filename
102 * name of the PDF file to be generated
103 * @see print_pdf_controller()
104 */
105 function _print_pdf_dompdf($print, $html, $filename) {
106 $print_pdf_pdf_tool = variable_get('print_pdf_pdf_tool', PRINT_PDF_PDF_TOOL_DEFAULT);
107 $print_pdf_paper_size = variable_get('print_pdf_paper_size', PRINT_PDF_PAPER_SIZE_DEFAULT);
108 $print_pdf_page_orientation = variable_get('print_pdf_page_orientation', PRINT_PDF_PAGE_ORIENTATION_DEFAULT);
109 $print_pdf_content_disposition = variable_get('print_pdf_content_disposition', PRINT_PDF_CONTENT_DISPOSITION_DEFAULT);
110 require_once($print_pdf_pdf_tool);
111
112 // dompdf seems to have problems with something in system.css so let's not use it
113 $html = preg_replace('!<link.*?modules/system/system.css.*?/>!', '', $html);
114
115 $url_array = parse_url($print['url']);
116
117 $protocol = $url_array['scheme'] .'://';
118 $host = $url_array['host'];
119 $path = dirname($url_array['path']) .'/';
120
121 $dompdf = new DOMPDF();
122 $dompdf->set_base_path($path);
123 $dompdf->set_host($host);
124 $dompdf->set_paper(drupal_strtolower($print_pdf_paper_size), $print_pdf_page_orientation);
125 $dompdf->set_protocol($protocol);
126
127 // dompdf can't handle footers cleanly, so disable the following
128 // $html = theme('print_pdf_dompdf_footer', $html);
129
130 // Convert from UTF-8 to ISO 8859-1 and then to HTML entities
131 if (function_exists('utf8_decode')) {
132 $html = utf8_decode($html);
133 }
134 // iconv fails silently when it encounters something that it doesn't know, so don't use it
135 // else if (function_exists('iconv')) {
136 // $html = iconv('UTF-8', 'ISO-8859-1', $html);
137 // }
138 else if (function_exists('mb_convert_encoding')) {
139 $html = mb_convert_encoding($html, 'ISO-8859-1', 'UTF-8');
140 }
141 else if (function_exists('recode_string')) {
142 $html = recode_string('UTF-8..ISO_8859-1', $html);
143 }
144 $html = htmlspecialchars_decode(htmlentities($html, ENT_NOQUOTES, 'ISO-8859-1'), ENT_NOQUOTES);
145
146 //must get rid of tbody (dompdf goes into recursion)
147 $html = preg_replace('!<tbody[^>]*?>|</tbody>!i', '', $html);
148
149 $dompdf->load_html($html);
150
151 $dompdf->render();
152 $dompdf->stream($filename, array('Attachment' => ($print_pdf_content_disposition == 2)));
153 }
154
155 /**
156 * Generate the PDF file using the TCPDF library
157 *
158 * @param $print
159 * array containing the configured data
160 * @param $html
161 * contents of the post-processed template already with the node data
162 * @param $filename
163 * name of the PDF file to be generated
164 * @see print_pdf_controller()
165 */
166 function _print_pdf_tcpdf($print, $html, $filename) {
167 global $base_url;
168 $print_pdf_pdf_tool = variable_get('print_pdf_pdf_tool', PRINT_PDF_PDF_TOOL_DEFAULT);
169 $print_pdf_paper_size = variable_get('print_pdf_paper_size', PRINT_PDF_PAPER_SIZE_DEFAULT);
170 $print_pdf_page_orientation = variable_get('print_pdf_page_orientation', PRINT_PDF_PAGE_ORIENTATION_DEFAULT);
171 $print_pdf_content_disposition = variable_get('print_pdf_content_disposition', PRINT_PDF_CONTENT_DISPOSITION_DEFAULT);
172
173 $pdf_tool_path = realpath(dirname($print_pdf_pdf_tool));
174
175 define('K_TCPDF_EXTERNAL_CONFIG', TRUE);
176 define('K_PATH_MAIN', dirname($_SERVER['SCRIPT_FILENAME']));
177 define('K_PATH_URL', $base_url);
178 define('K_PATH_FONTS', $pdf_tool_path .'/fonts/');
179 define('K_PATH_CACHE', $pdf_tool_path .'/cache/');
180 define('K_PATH_IMAGES', '');
181 define('K_BLANK_IMAGE', $pdf_tool_path .'/images/_blank.png');
182 define('K_CELL_HEIGHT_RATIO', 1.25);
183 define('K_SMALL_RATIO', 2/3);
184
185 require_once($print_pdf_pdf_tool);
186 if (strpos(PDF_PRODUCER, 'PHP4') === FALSE) {
187 require_once(drupal_get_path('module', 'print_pdf') .'/print_pdf.class.inc');
188 }
189 else {
190 require_once(drupal_get_path('module', 'print_pdf') .'/print_pdf.class_php4.inc');
191 }
192
193 $font = Array(
194 variable_get('print_pdf_font_family', PRINT_PDF_FONT_FAMILY_DEFAULT),
195 '',
196 variable_get('print_pdf_font_size', PRINT_PDF_FONT_SIZE_DEFAULT),
197 );
198 $orientation = drupal_strtoupper($print_pdf_page_orientation[0]);
199
200 // create new PDF document
201 $pdf = new PrintTCPDF($orientation , 'mm', $print_pdf_paper_size, TRUE);
202
203 // set document information
204 $pdf->SetAuthor(strip_tags($print['submitted']));
205 $pdf->SetCreator(variable_get('site_name', 'Drupal'));
206 $pdf->SetTitle($print['title']);
207 $keys = implode(' ', explode("\n", trim(strip_tags($print['taxonomy']))));
208 $pdf->SetKeywords($keys);
209 $pdf->setPDFVersion('1.6');
210
211 $pdf = theme('print_pdf_tcpdf_header', $pdf, $html, $font);
212 $pdf = theme('print_pdf_tcpdf_footer', $pdf, $html, $font);
213 $pdf = theme('print_pdf_tcpdf_page', $pdf);
214
215 //initialize document
216 $pdf->AliasNbPages();
217
218 // add a page
219 $pdf->AddPage();
220
221 $pdf = theme('print_pdf_tcpdf_content', $pdf, $html, $font);
222
223 // reset pointer to the last page
224 $pdf->lastPage();
225
226 //Close and output PDF document
227 $output_dest = ($print_pdf_content_disposition == 2) ? 'D' : 'I';
228 $pdf->Output($filename, $output_dest);
229 }
230
231 /**
232 * Generate the PDF file using wkhtmltopdf
233 *
234 * @param $print
235 * array containing the configured data
236 * @param $html
237 * contents of the post-processed template already with the node data
238 * @param $filename
239 * name of the PDF file to be generated
240 * @see print_pdf_controller()
241 */
242 function _print_pdf_wkhtmltopdf($print, $html, $filename) {
243 $print_pdf_pdf_tool = variable_get('print_pdf_pdf_tool', PRINT_PDF_PDF_TOOL_DEFAULT);
244 $print_pdf_paper_size = variable_get('print_pdf_paper_size', PRINT_PDF_PAPER_SIZE_DEFAULT);
245 $print_pdf_page_orientation = variable_get('print_pdf_page_orientation', PRINT_PDF_PAGE_ORIENTATION_DEFAULT);
246 $print_pdf_content_disposition = variable_get('print_pdf_content_disposition', PRINT_PDF_CONTENT_DISPOSITION_DEFAULT);
247
248 $xvfb_binary = key(file_scan_directory(drupal_get_path('module', 'print'), '^Xvfb$'));
249
250 define('WKHTMLTOPDF_DPI', '96');
251 define('WKHTMLTOPDF_OPTIONS', " --footer-font-size 7 --footer-right '[page]'");
252 // define('WKHTMLTOPDF_XVFB_FONT_PATH', '/usr/share/fonts/X11/misc/,/usr/share/fonts/X11/Type1/,/usr/share/fonts/X11/75dpi/,/usr/share/fonts/X11/100dpi/');
253 define('WKHTMLTOPDF_TEMP_DIR', '/tmp');
254
255 // If configured to do so, launch a temporary X server with a random display number.
256 if (isset($xvfb_binary)) {
257 $xdisplay = rand(10, 500);
258 $xcmd = realpath($xvfb_binary) ." :$xdisplay -screen 0 320x200x24 -dpi ". WKHTMLTOPDF_DPI .
259 ' -terminate -nolisten tcp'.
260 (defined('WKHTMLTOPDF_XVFB_FONT_PATH') ? ' -fp ' . WKHTMLTOPDF_XVFB_FONT_PATH : '') .
261 ' -tst 2> ' . WKHTMLTOPDF_TEMP_DIR . '/Xvfb-err';
262 $xvfb = popen($xcmd, 'r');
263 }
264
265 $descriptor = array(0 => array('pipe', 'r'),
266 1 => array('pipe', 'w'),
267 2 => array('file', WKHTMLTOPDF_TEMP_DIR . '/wkhtmltopdf-err', 'a'));
268 $cmd = realpath($print_pdf_pdf_tool) .
269 ' --page-size ' . drupal_strtolower($print_pdf_paper_size) .
270 ' --orientation ' . $print_pdf_page_orientation .
271 ' --dpi ' . WKHTMLTOPDF_DPI .
272 (defined('WKHTMLTOPDF_OPTIONS') ? WKHTMLTOPDF_OPTIONS : '') .
273 ' - -';
274
275 $process = proc_open($cmd, $descriptor, $pipes, NULL,
276 isset($xvfb_binary) ? array('DISPLAY' => ':' . $xdisplay) : NULL);
277
278 if (is_resource($process)) {
279 fwrite($pipes[0], $html);
280 fclose($pipes[0]);
281
282 $pdf = stream_get_contents($pipes[1]);
283 fclose($pipes[1]);
284
285 $retval = proc_close($process);
286 }
287 if (isset($xvfb_binary)) {
288 pclose($xvfb);
289 }
290
291 if (isset($pdf)) {
292 if (headers_sent()) {
293 die("Unable to stream pdf: headers already sent");
294 }
295 header("Cache-Control: private");
296 header("Content-Type: application/pdf");
297
298 $attachment = ($print_pdf_content_disposition == 2) ? "attachment" : "inline";
299
300 header("Content-Disposition: $attachment; filename=\"$filename\"");
301
302 echo $pdf;
303 flush();
304 }
305 }
306
307 /**
308 * Format the dompdf footer contents
309 *
310 * @param $html
311 * contents of the body of the HTML from the original node
312 * @see theme_print_pdf_tcpdf_footer()
313 */
314 function theme_print_pdf_dompdf_footer(&$html) {
315 preg_match('!<div class="print-footer">(.*?)</div>!si', $html, $tpl_footer);
316 $html = str_replace($tpl_footer[0], '', $html);
317
318 $text = '<script type="text/php">
319 if (isset($pdf)) {
320 $font = Font_Metrics::get_font("verdana");;
321 $size = 10;
322 $color = array(0,0,0);
323 $text_height = Font_Metrics::get_font_height($font, $size);
324
325 $w = $pdf->get_width();
326 $h = $pdf->get_height();
327
328 $footer = $pdf->open_object();
329
330 // Draw a line along the bottom
331 $y = $h - 25;
332 $pdf->line(15, $y, $w - 15, $y, $color, 1);
333
334 $y += $text_height / 2;
335 $pdf->page_text(15, $y, \''. addslashes(strip_tags($tpl_footer[1])) .'\', $font, $size, $color);
336
337 $pdf->close_object();
338 $pdf->add_object($footer, "all");
339
340 // Center the text
341 $width = Font_Metrics::get_text_width("Page 1 of 2", $font, $size);
342 $pagenumtxt = t("Page !n of !total", array("!n" => "{PAGE_NUM}", "!total" => "{PAGE_COUNT}"));
343 $pdf->page_text($w - 15 - $width, $y, $pagenumtxt, $font, $size, $color);
344 }
345 </script>';
346
347 return str_replace("<body>", "<body>" . $text, $html);
348 }
349
350 /**
351 * Format the TCPDF header
352 *
353 * @param $pdf
354 * current TCPDF object
355 * @param $html
356 * contents of the body of the HTML from the original node
357 * @param $font
358 * array with the font definition (font name, styles and size)
359 * @see theme_print_pdf_tcpdf_header()
360 */
361 function theme_print_pdf_tcpdf_header(&$pdf, &$html, $font) {
362 preg_match('!<div class="print-logo">(.*?)</div>!si', $html, $tpl_logo);
363 preg_match('!<h1 class="print-title">(.*?)</h1>!si', $html, $tpl_title);
364 preg_match('!<div class="print-site_name">(.*?)</div>!si', $html, $tpl_site_name);
365
366 $ratio = 0;
367 $logo = '';
368 $logo_ret = preg_match('!src\s*=\s*(\'.*?\'|".*?"|[^\s]*)!i', $tpl_logo[1], $matches);
369 if ($logo_ret) {
370 $logo = trim($matches[1], '\'"');
371 $size = getimagesize($logo);
372 $ratio = $size ? ($size[0] / $size[1]) : 0;
373 }
374
375 // set header font
376 $pdf->setHeaderFont($font);
377 // set header margin
378 $pdf->SetHeaderMargin(5);
379 // set header data
380 $pdf->SetHeaderData($logo, 10 * $ratio, $tpl_title[1], strip_tags($tpl_site_name[1]));
381
382 return $pdf;
383 }
384
385 /**
386 * Format the TCPDF page settings (margins, etc)
387 *
388 * @param $pdf
389 * current TCPDF object
390 * @see theme_print_pdf_tcpdf_page()
391 */
392 function theme_print_pdf_tcpdf_page(&$pdf) {
393 // set margins
394 $pdf->SetMargins(15, 20, 15);
395 // set auto page breaks
396 $pdf->SetAutoPageBreak(TRUE, 15);
397 // set image scale factor
398 $pdf->setImageScale(4);
399 // set image compression quality
400 $pdf->setJPEGQuality(100);
401
402 return $pdf;
403 }
404
405 /**
406 * Format the TCPDF page content
407 *
408 * @param $pdf
409 * current TCPDF object
410 * @param $html
411 * contents of the body of the HTML from the original node
412 * @param $font
413 * array with the font definition (font name, styles and size)
414 * @see theme_print_pdf_tcpdf_content()
415 */
416 function theme_print_pdf_tcpdf_content(&$pdf, &$html, $font) {
417 // set content font
418 $pdf->setFont($font[0], $font[1], $font[2]);
419
420 preg_match('!<body.*?>(.*)</body>!sim', $html, $matches);
421 $pattern = '!(?:<div class="print-(?:logo|site_name|breadcrumb|footer)">.*?</div>|<hr class="print-hr" />)!si';
422 $matches[1] = preg_replace($pattern, '', $matches[1]);
423
424 // Make CCK fields look better
425 $matches[1] = preg_replace('!(<div class="field.*?>)\s*!sm', '$1', $matches[1]);
426 $matches[1] = preg_replace('!(<div class="field.*?>.*?</div>)\s*!sm', '$1', $matches[1]);
427 $matches[1] = preg_replace('!<div( class="field-label.*?>.*?)</div>!sm', '<strong$1</strong>', $matches[1]);
428
429 // Since TCPDF's writeHTML is so bad with <p>, do everything possible to make it look nice
430 $matches[1] = preg_replace('!<(?:p(|\s+.*?)/?|/p)>!i', '<br$1 />', $matches[1]);
431 $matches[1] = str_replace(array('<div', 'div>'), array('<span', 'span><br />'), $matches[1]);
432 do {
433 $prev = $matches[1];
434 $matches[1] = preg_replace('!(</span>)<br />(\s*?</span><br />)!s', '$1$2', $matches[1]);
435 } while ($prev != $matches[1]);
436
437 @$pdf->writeHTML($matches[1]);
438
439 return $pdf;
440 }
441
442 /**
443 * Format the TCPDF footer contents
444 *
445 * @param $pdf
446 * current TCPDF object
447 * @param $html
448 * contents of the body of the HTML from the original node
449 * @param $font
450 * array with the font definition (font name, styles and size)
451 * @see theme_print_pdf_tcpdf_footer()
452 */
453 function theme_print_pdf_tcpdf_footer(&$pdf, &$html, $font) {
454 preg_match('!<div class="print-footer">(.*?)</div>!si', $html, $tpl_footer);
455 $footer = trim(preg_replace('!</?div[^>]*?>!i', '', $tpl_footer[1]));
456
457 // set footer font
458 $font[2] *= 0.8;
459 $pdf->setFooterFont($font);
460 // set footer margin
461 $pdf->SetFooterMargin(10);
462 // set footer data
463 $pdf->SetFooterData($footer);
464
465 return $pdf;
466 }
467
468 /**
469 * Format the TCPDF footer layout
470 *
471 * @param $pdf
472 * current TCPDF object
473 * @see theme_print_pdf_tcpdf_footer2()
474 */
475 function theme_print_pdf_tcpdf_footer2(&$pdf) {
476 //Position at 1.5 cm from bottom
477 $pdf->writeHTMLCell(0, 15, 15, 0, $pdf->footer, 0, 0, 0, TRUE, '');
478
479 $ormargins = $pdf->getOriginalMargins();
480 $pagenumtxt = t('Page !n of !total', array('!n' => $pdf->PageNo(), '!total' => $pdf->getAliasNbPages()));
481 //Print page number
482 if ($pdf->getRTL()) {
483 $pdf->SetX($ormargins['right']);
484 $pdf->Cell(0, 10, $pagenumtxt, 'T', 0, 'L');
485 }
486 else {
487 $pdf->SetX($ormargins['left']);
488 $pdf->Cell(0, 10, $pagenumtxt, 'T', 0, 'R');
489 }
490
491 return $pdf;
492 }