/[drupal]/contributions/modules/marksmarty/markdown.php
ViewVC logotype

Contents of /contributions/modules/marksmarty/markdown.php

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.5 - (show annotations) (download) (as text)
Tue Jan 22 21:57:25 2008 UTC (22 months ago) by weitzman
Branch: MAIN
CVS Tags: DRUPAL-6--1-0-RC1, DRUPAL-6--1-0-RC2, DRUPAL-6--1-0, HEAD
Changes since 1.4: +2341 -1096 lines
File MIME type: text/x-php
#108706 by moshe weitzman. use markdown extra.
1 <?php
2 #
3 # Markdown Extra - A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown & Extra
6 # Copyright (c) 2004-2007 Michel Fortin
7 # <http://www.michelf.com/projects/php-markdown/>
8 #
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
12 #
13
14
15 define( 'MARKDOWN_VERSION', "1.0.1k" ); # Wed 26 Sep 2007
16 define( 'MARKDOWNEXTRA_VERSION', "1.1.7" ); # Wed 26 Sep 2007
17
18
19 #
20 # Global default settings:
21 #
22
23 # Change to ">" for HTML output
24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
25
26 # Define the width of a tab for code blocks.
27 @define( 'MARKDOWN_TAB_WIDTH', 4 );
28
29 # Optional title attribute for footnote links and backlinks.
30 @define( 'MARKDOWN_FN_LINK_TITLE', "" );
31 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
32
33 # Optional class attribute for footnote links and backlinks.
34 @define( 'MARKDOWN_FN_LINK_CLASS', "" );
35 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
36
37
38 #
39 # WordPress settings:
40 #
41
42 # Change to false to remove Markdown from posts and/or comments.
43 @define( 'MARKDOWN_WP_POSTS', true );
44 @define( 'MARKDOWN_WP_COMMENTS', true );
45
46
47
48 ### Standard Function Interface ###
49
50 @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
51
52 function Markdown($text) {
53 #
54 # Initialize the parser and return the result of its transform method.
55 #
56 # Setup static parser variable.
57 static $parser;
58 if (!isset($parser)) {
59 $parser_class = MARKDOWN_PARSER_CLASS;
60 $parser = new $parser_class;
61 }
62
63 # Transform text using parser.
64 return $parser->transform($text);
65 }
66
67
68 ### WordPress Plugin Interface ###
69
70 /*
71 Plugin Name: Markdown Extra
72 Plugin URI: http://www.michelf.com/projects/php-markdown/
73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
74 Version: 1.1.7
75 Author: Michel Fortin
76 Author URI: http://www.michelf.com/
77 */
78
79 if (isset($wp_version)) {
80 # More details about how it works here:
81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
82
83 # Post content and excerpts
84 # - Remove WordPress paragraph generator.
85 # - Run Markdown on excerpt, then remove all tags.
86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
87 if (MARKDOWN_WP_POSTS) {
88 remove_filter('the_content', 'wpautop');
89 remove_filter('the_content_rss', 'wpautop');
90 remove_filter('the_excerpt', 'wpautop');
91 add_filter('the_content', 'Markdown', 6);
92 add_filter('the_content_rss', 'Markdown', 6);
93 add_filter('get_the_excerpt', 'Markdown', 6);
94 add_filter('get_the_excerpt', 'trim', 7);
95 add_filter('the_excerpt', 'mdwp_add_p');
96 add_filter('the_excerpt_rss', 'mdwp_strip_p');
97
98 remove_filter('content_save_pre', 'balanceTags', 50);
99 remove_filter('excerpt_save_pre', 'balanceTags', 50);
100 add_filter('the_content', 'balanceTags', 50);
101 add_filter('get_the_excerpt', 'balanceTags', 9);
102 }
103
104 # Comments
105 # - Remove WordPress paragraph generator.
106 # - Remove WordPress auto-link generator.
107 # - Scramble important tags before passing them to the kses filter.
108 # - Run Markdown on excerpt then remove paragraph tags.
109 if (MARKDOWN_WP_COMMENTS) {
110 remove_filter('comment_text', 'wpautop', 30);
111 remove_filter('comment_text', 'make_clickable');
112 add_filter('pre_comment_content', 'Markdown', 6);
113 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
114 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
115 add_filter('get_comment_text', 'Markdown', 6);
116 add_filter('get_comment_excerpt', 'Markdown', 6);
117 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
118
119 global $mdwp_hidden_tags, $mdwp_placeholders;
120 $mdwp_hidden_tags = explode(' ',
121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
122 $mdwp_placeholders = explode(' ', str_rot13(
123 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
124 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
125 }
126
127 function mdwp_add_p($text) {
128 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
129 $text = '<p>'.$text.'</p>';
130 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
131 }
132 return $text;
133 }
134
135 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
136
137 function mdwp_hide_tags($text) {
138 global $mdwp_hidden_tags, $mdwp_placeholders;
139 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
140 }
141 function mdwp_show_tags($text) {
142 global $mdwp_hidden_tags, $mdwp_placeholders;
143 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
144 }
145 }
146
147
148 ### bBlog Plugin Info ###
149
150 function identify_modifier_markdown() {
151 return array(
152 'name' => 'markdown',
153 'type' => 'modifier',
154 'nicename' => 'PHP Markdown Extra',
155 'description' => 'A text-to-HTML conversion tool for web writers',
156 'authors' => 'Michel Fortin and John Gruber',
157 'licence' => 'GPL',
158 'version' => MARKDOWNEXTRA_VERSION,
159 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
160 );
161 }
162
163
164 ### Smarty Modifier Interface ###
165
166 function smarty_modifier_markdown($text) {
167 return Markdown($text);
168 }
169
170
171 ### Textile Compatibility Mode ###
172
173 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
174
175 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
176 # Try to include PHP SmartyPants. Should be in the same directory.
177 @include_once 'smartypants.php';
178 # Fake Textile class. It calls Markdown instead.
179 class Textile {
180 function TextileThis($text, $lite='', $encode='') {
181 if ($lite == '' && $encode == '') $text = Markdown($text);
182 if (function_exists('SmartyPants')) $text = SmartyPants($text);
183 return $text;
184 }
185 # Fake restricted version: restrictions are not supported for now.
186 function TextileRestricted($text, $lite='', $noimage='') {
187 return $this->TextileThis($text, $lite);
188 }
189 # Workaround to ensure compatibility with TextPattern 4.0.3.
190 function blockLite($text) { return $text; }
191 }
192 }
193
194
195
196 #
197 # Markdown Parser Class
198 #
199
200 class Markdown_Parser {
201
202 # Regex to match balanced [brackets].
203 # Needed to insert a maximum bracked depth while converting to PHP.
204 var $nested_brackets_depth = 6;
205 var $nested_brackets;
206
207 var $nested_url_parenthesis_depth = 4;
208 var $nested_url_parenthesis;
209
210 # Table of hash values for escaped characters:
211 var $escape_chars = '\`*_{}[]()>#+-.!';
212
213 # Change to ">" for HTML output.
214 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
215 var $tab_width = MARKDOWN_TAB_WIDTH;
216
217 # Change to `true` to disallow markup or entities.
218 var $no_markup = false;
219 var $no_entities = false;
220
221
222 function Markdown_Parser() {
223 #
224 # Constructor function. Initialize appropriate member variables.
225 #
226 $this->_initDetab();
227
228 $this->nested_brackets =
229 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
230 str_repeat('\])*', $this->nested_brackets_depth);
231
232 $this->nested_url_parenthesis =
233 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
234 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
235
236 # Sort document, block, and span gamut in ascendent priority order.
237 asort($this->document_gamut);
238 asort($this->block_gamut);
239 asort($this->span_gamut);
240 }
241
242
243 # Internal hashes used during transformation.
244 var $urls = array();
245 var $titles = array();
246 var $html_hashes = array();
247
248 # Status flag to avoid invalid nesting.
249 var $in_anchor = false;
250
251
252 function transform($text) {
253 #
254 # Main function. The order in which other subs are called here is
255 # essential. Link and image substitutions need to happen before
256 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
257 # and <img> tags get encoded.
258 #
259 # Clear the global hashes. If we don't clear these, you get conflicts
260 # from other articles when generating a page which contains more than
261 # one article (e.g. an index page that shows the N most recent
262 # articles):
263 $this->urls = array();
264 $this->titles = array();
265 $this->html_hashes = array();
266
267 # Standardize line endings:
268 # DOS to Unix and Mac to Unix
269 $text = preg_replace('{\r\n?}', "\n", $text);
270
271 # Make sure $text ends with a couple of newlines:
272 $text .= "\n\n";
273
274 # Convert all tabs to spaces.
275 $text = $this->detab($text);
276
277 # Turn block-level HTML blocks into hash entries
278 $text = $this->hashHTMLBlocks($text);
279
280 # Strip any lines consisting only of spaces and tabs.
281 # This makes subsequent regexen easier to write, because we can
282 # match consecutive blank lines with /\n+/ instead of something
283 # contorted like /[ ]*\n+/ .
284 $text = preg_replace('/^[ ]+$/m', '', $text);
285
286 # Run document gamut methods.
287 foreach ($this->document_gamut as $method => $priority) {
288 $text = $this->$method($text);
289 }
290
291 return $text . "\n";
292 }
293
294 var $document_gamut = array(
295 # Strip link definitions, store in hashes.
296 "stripLinkDefinitions" => 20,
297
298 "runBasicBlockGamut" => 30,
299 );
300
301
302 function stripLinkDefinitions($text) {
303 #
304 # Strips link definitions from text, stores the URLs and titles in
305 # hash references.
306 #
307 $less_than_tab = $this->tab_width - 1;
308
309 # Link defs are in the form: ^[id]: url "optional title"
310 $text = preg_replace_callback('{
311 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
312 [ ]*
313 \n? # maybe *one* newline
314 [ ]*
315 <?(\S+?)>? # url = $2
316 [ ]*
317 \n? # maybe one newline
318 [ ]*
319 (?:
320 (?<=\s) # lookbehind for whitespace
321 ["(]
322 (.*?) # title = $3
323 [")]
324 [ ]*
325 )? # title is optional
326 (?:\n+|\Z)
327 }xm',
328 array(&$this, '_stripLinkDefinitions_callback'),
329 $text);
330 return $text;
331 }
332 function _stripLinkDefinitions_callback($matches) {
333 $link_id = strtolower($matches[1]);
334 $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
335 if (isset($matches[3]))
336 $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
337 return ''; # String that will replace the block
338 }
339
340
341 function hashHTMLBlocks($text) {
342 if ($this->no_markup) return $text;
343
344 $less_than_tab = $this->tab_width - 1;
345
346 # Hashify HTML blocks:
347 # We only want to do this for block-level HTML tags, such as headers,
348 # lists, and tables. That's because we still want to wrap <p>s around
349 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
350 # phrase emphasis, and spans. The list of tags we're looking for is
351 # hard-coded:
352 #
353 # * List "a" is made of tags which can be both inline or block-level.
354 # These will be treated block-level when the start tag is alone on
355 # its line, otherwise they're not matched here and will be taken as
356 # inline later.
357 # * List "b" is made of tags which are always block-level;
358 #
359 $block_tags_a = 'ins|del';
360 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
361 'script|noscript|form|fieldset|iframe|math';
362
363 # Regular expression for the content of a block tag.
364 $nested_tags_level = 4;
365 $attr = '
366 (?> # optional tag attributes
367 \s # starts with whitespace
368 (?>
369 [^>"/]+ # text outside quotes
370 |
371 /+(?!>) # slash not followed by ">"
372 |
373 "[^"]*" # text inside double quotes (tolerate ">")
374 |
375 \'[^\']*\' # text inside single quotes (tolerate ">")
376 )*
377 )?
378 ';
379 $content =
380 str_repeat('
381 (?>
382 [^<]+ # content without tag
383 |
384 <\2 # nested opening tag
385 '.$attr.' # attributes
386 (?>
387 />
388 |
389 >', $nested_tags_level). # end of opening tag
390 '.*?'. # last level nested tag content
391 str_repeat('
392 </\2\s*> # closing nested tag
393 )
394 |
395 <(?!/\2\s*> # other tags with a different name
396 )
397 )*',
398 $nested_tags_level);
399 $content2 = str_replace('\2', '\3', $content);
400
401 # First, look for nested blocks, e.g.:
402 # <div>
403 # <div>
404 # tags for inner block must be indented.
405 # </div>
406 # </div>
407 #
408 # The outermost tags must start at the left margin for this to match, and
409 # the inner nested divs must be indented.
410 # We need to do this before the next, more liberal match, because the next
411 # match will start at the first `<div>` and stop at the first `</div>`.
412 $text = preg_replace_callback('{(?>
413 (?>
414 (?<=\n\n) # Starting after a blank line
415 | # or
416 \A\n? # the beginning of the doc
417 )
418 ( # save in $1
419
420 # Match from `\n<tag>` to `</tag>\n`, handling nested tags
421 # in between.
422
423 [ ]{0,'.$less_than_tab.'}
424 <('.$block_tags_b.')# start tag = $2
425 '.$attr.'> # attributes followed by > and \n
426 '.$content.' # content, support nesting
427 </\2> # the matching end tag
428 [ ]* # trailing spaces/tabs
429 (?=\n+|\Z) # followed by a newline or end of document
430
431 | # Special version for tags of group a.
432
433 [ ]{0,'.$less_than_tab.'}
434 <('.$block_tags_a.')# start tag = $3
435 '.$attr.'>[ ]*\n # attributes followed by >
436 '.$content2.' # content, support nesting
437 </\3> # the matching end tag
438 [ ]* # trailing spaces/tabs
439 (?=\n+|\Z) # followed by a newline or end of document
440
441 | # Special case just for <hr />. It was easier to make a special
442 # case than to make the other regex more complicated.
443
444 [ ]{0,'.$less_than_tab.'}
445 <(hr) # start tag = $2
446 \b # word break
447 ([^<>])*? #
448 /?> # the matching end tag
449 [ ]*
450 (?=\n{2,}|\Z) # followed by a blank line or end of document
451
452 | # Special case for standalone HTML comments:
453
454 [ ]{0,'.$less_than_tab.'}
455 (?s:
456 <!-- .*? -->
457 )
458 [ ]*
459 (?=\n{2,}|\Z) # followed by a blank line or end of document
460
461 | # PHP and ASP-style processor instructions (<? and <%)
462
463 [ ]{0,'.$less_than_tab.'}
464 (?s:
465 <([?%]) # $2
466 .*?
467 \2>
468 )
469 [ ]*
470 (?=\n{2,}|\Z) # followed by a blank line or end of document
471
472 )
473 )}Sxmi',
474 array(&$this, '_hashHTMLBlocks_callback'),
475 $text);
476
477 return $text;
478 }
479 function _hashHTMLBlocks_callback($matches) {
480 $text = $matches[1];
481 $key = $this->hashBlock($text);
482 return "\n\n$key\n\n";
483 }
484
485
486 function hashPart($text, $boundary = 'X') {
487 #
488 # Called whenever a tag must be hashed when a function insert an atomic
489 # element in the text stream. Passing $text to through this function gives
490 # a unique text-token which will be reverted back when calling unhash.
491 #
492 # The $boundary argument specify what character should be used to surround
493 # the token. By convension, "B" is used for block elements that needs not
494 # to be wrapped into paragraph tags at the end, ":" is used for elements
495 # that are word separators and "S" is used for general span-level elements.
496 #
497 # Swap back any tag hash found in $text so we do not have to `unhash`
498 # multiple times at the end.
499 $text = $this->unhash($text);
500
501 # Then hash the block.
502 static $i = 0;
503 $key = "$boundary\x1A" . ++$i . $boundary;
504 $this->html_hashes[$key] = $text;
505 return $key; # String that will replace the tag.
506 }
507
508
509 function hashBlock($text) {
510 #
511 # Shortcut function for hashPart with block-level boundaries.
512 #
513 return $this->hashPart($text, 'B');
514 }
515
516
517 var $block_gamut = array(
518 #
519 # These are all the transformations that form block-level
520 # tags like paragraphs, headers, and list items.
521 #
522 "doHeaders" => 10,
523 "doHorizontalRules" => 20,
524
525 "doLists" => 40,
526 "doCodeBlocks" => 50,
527 "doBlockQuotes" => 60,
528 );
529
530 function runBlockGamut($text) {
531 #
532 # Run block gamut tranformations.
533 #
534 # We need to escape raw HTML in Markdown source before doing anything
535 # else. This need to be done for each block, and not only at the
536 # begining in the Markdown function since hashed blocks can be part of
537 # list items and could have been indented. Indented blocks would have
538 # been seen as a code block in a previous pass of hashHTMLBlocks.
539 $text = $this->hashHTMLBlocks($text);
540
541 return $this->runBasicBlockGamut($text);
542 }
543
544 function runBasicBlockGamut($text) {
545 #
546 # Run block gamut tranformations, without hashing HTML blocks. This is
547 # useful when HTML blocks are known to be already hashed, like in the first
548 # whole-document pass.
549 #
550 foreach ($this->block_gamut as $method => $priority) {
551 $text = $this->$method($text);
552 }
553
554 # Finally form paragraph and restore hashed blocks.
555 $text = $this->formParagraphs($text);
556
557 return $text;
558 }
559
560
561 function doHorizontalRules($text) {
562 # Do Horizontal Rules:
563 return preg_replace(
564 '{
565 ^[ ]{0,3} # Leading space
566 ([-*_]) # $1: First marker
567 (?> # Repeated marker group
568 [ ]{0,2} # Zero, one, or two spaces.
569 \1 # Marker character
570 ){2,} # Group repeated at least twice
571 [ ]* # Tailing spaces
572 $ # End of line.
573 }mx',
574 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
575 $text);
576 }
577
578
579 var $span_gamut = array(
580 #
581 # These are all the transformations that occur *within* block-level
582 # tags like paragraphs, headers, and list items.
583 #
584 # Process character escapes, code spans, and inline HTML
585 # in one shot.
586 "parseSpan" => -30,
587
588 # Process anchor and image tags. Images must come first,
589 # because ![foo][f] looks like an anchor.
590 "doImages" => 10,
591 "doAnchors" => 20,
592
593 # Make links out of things like `<http://example.com/>`
594 # Must come after doAnchors, because you can use < and >
595 # delimiters in inline links like [this](<url>).
596 "doAutoLinks" => 30,
597 "encodeAmpsAndAngles" => 40,
598
599 "doItalicsAndBold" => 50,
600 "doHardBreaks" => 60,
601 );
602
603 function runSpanGamut($text) {
604 #
605 # Run span gamut tranformations.
606 #
607 foreach ($this->span_gamut as $method => $priority) {
608 $text = $this->$method($text);
609 }
610
611 return $text;
612 }
613
614
615 function doHardBreaks($text) {
616 # Do hard breaks:
617 return preg_replace_callback('/ {2,}\n/',
618 array(&$this, '_doHardBreaks_callback'), $text);
619 }
620 function _doHardBreaks_callback($matches) {
621 return $this->hashPart("<br$this->empty_element_suffix\n");
622 }
623
624
625 function doAnchors($text) {
626 #
627 # Turn Markdown link shortcuts into XHTML <a> tags.
628 #
629 if ($this->in_anchor) return $text;
630 $this->in_anchor = true;
631
632 #
633 # First, handle reference-style links: [link text] [id]
634 #
635 $text = preg_replace_callback('{
636 ( # wrap whole match in $1
637 \[
638 ('.$this->nested_brackets.') # link text = $2
639 \]
640
641 [ ]? # one optional space
642 (?:\n[ ]*)? # one optional newline followed by spaces
643
644 \[
645 (.*?) # id = $3
646 \]
647 )
648 }xs',
649 array(&$this, '_doAnchors_reference_callback'), $text);
650
651 #
652 # Next, inline-style links: [link text](url "optional title")
653 #
654 $text = preg_replace_callback('{
655 ( # wrap whole match in $1
656 \[
657 ('.$this->nested_brackets.') # link text = $2
658 \]
659 \( # literal paren
660 [ ]*
661 (?:
662 <(\S*)> # href = $3
663 |
664 ('.$this->nested_url_parenthesis.') # href = $4
665 )
666 [ ]*
667 ( # $5
668 ([\'"]) # quote char = $6
669 (.*?) # Title = $7
670 \6 # matching quote
671 [ ]* # ignore any spaces/tabs between closing quote and )
672 )? # title is optional
673 \)
674 )
675 }xs',
676 array(&$this, '_DoAnchors_inline_callback'), $text);
677
678 #
679 # Last, handle reference-style shortcuts: [link text]
680 # These must come last in case you've also got [link test][1]
681 # or [link test](/foo)
682 #
683 // $text = preg_replace_callback('{
684 // ( # wrap whole match in $1
685 // \[
686 // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
687 // \]
688 // )
689 // }xs',
690 // array(&$this, '_doAnchors_reference_callback'), $text);
691
692 $this->in_anchor = false;
693 return $text;
694 }
695 function _doAnchors_reference_callback($matches) {
696 $whole_match = $matches[1];
697 $link_text = $matches[2];
698 $link_id =& $matches[3];
699
700 if ($link_id == "") {
701 # for shortcut links like [this][] or [this].
702 $link_id = $link_text;
703 }
704
705 # lower-case and turn embedded newlines into spaces
706 $link_id = strtolower($link_id);
707 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
708
709 if (isset($this->urls[$link_id])) {
710 $url = $this->urls[$link_id];
711 $url = $this->encodeAmpsAndAngles($url);
712
713 $result = "<a href=\"$url\"";
714 if ( isset( $this->titles[$link_id] ) ) {
715 $title = $this->titles[$link_id];
716 $title = $this->encodeAmpsAndAngles($title);
717 $result .= " title=\"$title\"";
718 }
719
720 $link_text = $this->runSpanGamut($link_text);
721 $result .= ">$link_text</a>";
722 $result = $this->hashPart($result);
723 }
724 else {
725 $result = $whole_match;
726 }
727 return $result;
728 }
729 function _doAnchors_inline_callback($matches) {
730 $whole_match = $matches[1];
731 $link_text = $this->runSpanGamut($matches[2]);
732 $url = $matches[3] == '' ? $matches[4] : $matches[3];
733 $title =& $matches[7];
734
735 $url = $this->encodeAmpsAndAngles($url);
736
737 $result = "<a href=\"$url\"";
738 if (isset($title)) {
739 $title = str_replace('"', '&quot;', $title);
740 $title = $this->encodeAmpsAndAngles($title);
741 $result .= " title=\"$title\"";
742 }
743
744 $link_text = $this->runSpanGamut($link_text);
745 $result .= ">$link_text</a>";
746
747 return $this->hashPart($result);
748 }
749
750
751 function doImages($text) {
752 #
753 # Turn Markdown image shortcuts into <img> tags.
754 #
755 #
756 # First, handle reference-style labeled images: ![alt text][id]
757 #
758 $text = preg_replace_callback('{
759 ( # wrap whole match in $1
760 !\[
761 ('.$this->nested_brackets.') # alt text = $2
762 \]
763
764 [ ]? # one optional space
765 (?:\n[ ]*)? # one optional newline followed by spaces
766
767 \[
768 (.*?) # id = $3
769 \]
770
771 )
772 }xs',
773 array(&$this, '_doImages_reference_callback'), $text);
774
775 #
776 # Next, handle inline images: ![alt text](url "optional title")
777 # Don't forget: encode * and _
778 #
779 $text = preg_replace_callback('{
780 ( # wrap whole match in $1
781 !\[
782 ('.$this->nested_brackets.') # alt text = $2
783 \]
784 \s? # One optional whitespace character
785 \( # literal paren
786 [ ]*
787 (?:
788 <(\S*)> # src url = $3
789 |
790 ('.$this->nested_url_parenthesis.') # src url = $4
791 )
792 [ ]*
793 ( # $5
794 ([\'"]) # quote char = $6
795 (.*?) # title = $7
796 \6 # matching quote
797 [ ]*
798 )? # title is optional
799 \)
800 )
801 }xs',
802 array(&$this, '_doImages_inline_callback'), $text);
803
804 return $text;
805 }
806 function _doImages_reference_callback($matches) {
807 $whole_match = $matches[1];
808 $alt_text = $matches[2];
809 $link_id = strtolower($matches[3]);
810
811 if ($link_id == "") {
812 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
813 }
814
815 $alt_text = str_replace('"', '&quot;', $alt_text);
816 if (isset($this->urls[$link_id])) {
817 $url = $this->urls[$link_id];
818 $result = "<img src=\"$url\" alt=\"$alt_text\"";
819 if (isset($this->titles[$link_id])) {
820 $title = $this->titles[$link_id];
821 $result .= " title=\"$title\"";
822 }
823 $result .= $this->empty_element_suffix;
824 $result = $this->hashPart($result);
825 }
826 else {
827 # If there's no such link ID, leave intact:
828 $result = $whole_match;
829 }
830
831 return $result;
832 }
833 function _doImages_inline_callback($matches) {
834 $whole_match = $matches[1];
835 $alt_text = $matches[2];
836 $url = $matches[3] == '' ? $matches[4] : $matches[3];
837 $title =& $matches[7];
838
839 $alt_text = str_replace('"', '&quot;', $alt_text);
840 $result = "<img src=\"$url\" alt=\"$alt_text\"";
841 if (isset($title)) {
842 $title = str_replace('"', '&quot;', $title);
843 $result .= " title=\"$title\""; # $title already quoted
844 }
845 $result .= $this->empty_element_suffix;
846
847 return $this->hashPart($result);
848 }
849
850
851 function doHeaders($text) {
852 # Setext-style headers:
853 # Header 1
854 # ========
855 #
856 # Header 2
857 # --------
858 #
859 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
860 array(&$this, '_doHeaders_callback_setext'), $text);
861
862 # atx-style headers:
863 # # Header 1
864 # ## Header 2
865 # ## Header 2 with closing hashes ##
866 # ...
867 # ###### Header 6
868 #
869 $text = preg_replace_callback('{
870 ^(\#{1,6}) # $1 = string of #\'s
871 [ ]*
872 (.+?) # $2 = Header text
873 [ ]*
874 \#* # optional closing #\'s (not counted)
875 \n+
876 }xm',
877 array(&$this, '_doHeaders_callback_atx'), $text);
878
879 return $text;
880 }
881 function _doHeaders_callback_setext($matches) {
882 $level = $matches[2]{0} == '=' ? 1 : 2;
883 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
884 return "\n" . $this->hashBlock($block) . "\n\n";
885 }
886 function _doHeaders_callback_atx($matches) {
887 $level = strlen($matches[1]);
888 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
889 return "\n" . $this->hashBlock($block) . "\n\n";
890 }
891
892
893 function doLists($text) {
894 #
895 # Form HTML ordered (numbered) and unordered (bulleted) lists.
896 #
897 $less_than_tab = $this->tab_width - 1;
898
899 # Re-usable patterns to match list item bullets and number markers:
900 $marker_ul = '[*+-]';
901 $marker_ol = '\d+[.]';
902 $marker_any = "(?:$marker_ul|$marker_ol)";
903
904 $markers = array($marker_ul, $marker_ol);
905
906 foreach ($markers as $marker) {
907 # Re-usable pattern to match any entirel ul or ol list:
908 $whole_list = '
909 ( # $1 = whole list
910 ( # $2
911 [ ]{0,'.$less_than_tab.'}
912 ('.$marker.') # $3 = first list item marker
913 [ ]+
914 )
915 (?s:.+?)
916 ( # $4
917 \z
918 |
919 \n{2,}
920 (?=\S)
921 (?! # Negative lookahead for another list item marker
922 [ ]*
923 '.$marker.'[ ]+
924 )
925 )
926 )
927 '; // mx
928
929 # We use a different prefix before nested lists than top-level lists.
930 # See extended comment in _ProcessListItems().
931
932 if ($this->list_level) {
933 $text = preg_replace_callback('{
934 ^
935 '.$whole_list.'
936 }mx',
937 array(&$this, '_doLists_callback'), $text);
938 }
939 else {
940 $text = preg_replace_callback('{
941 (?:(?<=\n)\n|\A\n?) # Must eat the newline
942 '.$whole_list.'
943 }mx',
944 array(&$this, '_doLists_callback'), $text);
945 }
946 }
947
948 return $text;
949 }
950 function _doLists_callback($matches) {
951 # Re-usable patterns to match list item bullets and number markers:
952 $marker_ul = '[*+-]';
953 $marker_ol = '\d+[.]';
954 $marker_any = "(?:$marker_ul|$marker_ol)";
955
956 $list = $matches[1];
957 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
958
959 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
960
961 $list .= "\n";
962 $result = $this->processListItems($list, $marker_any);
963
964 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
965 return "\n". $result ."\n\n";
966 }
967
968 var $list_level = 0;
969
970 function processListItems($list_str, $marker_any) {
971 #
972 # Process the contents of a single ordered or unordered list, splitting it
973 # into individual list items.
974 #
975 # The $this->list_level global keeps track of when we're inside a list.
976 # Each time we enter a list, we increment it; when we leave a list,
977 # we decrement. If it's zero, we're not in a list anymore.
978 #
979 # We do this because when we're not inside a list, we want to treat
980 # something like this:
981 #
982 # I recommend upgrading to version
983 # 8. Oops, now this line is treated
984 # as a sub-list.
985 #
986 # As a single paragraph, despite the fact that the second line starts
987 # with a digit-period-space sequence.
988 #
989 # Whereas when we're inside a list (or sub-list), that line will be
990 # treated as the start of a sub-list. What a kludge, huh? This is
991 # an aspect of Markdown's syntax that's hard to parse perfectly
992 # without resorting to mind-reading. Perhaps the solution is to
993 # change the syntax rules such that sub-lists must start with a
994 # starting cardinal number; e.g. "1." or "a.".
995
996 $this->list_level++;
997
998 # trim trailing blank lines:
999 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1000
1001 $list_str = preg_replace_callback('{
1002 (\n)? # leading line = $1
1003 (^[ ]*) # leading whitespace = $2
1004 ('.$marker_any.') [ ]+ # list marker = $3
1005 ((?s:.+?)) # list item text = $4
1006 (?:(\n+(?=\n))|\n) # tailing blank line = $5
1007 (?= \n* (\z | \2 ('.$marker_any.') [ ]+))
1008 }xm',
1009 array(&$this, '_processListItems_callback'), $list_str);
1010
1011 $this->list_level--;
1012 return $list_str;
1013 }
1014 function _processListItems_callback($matches) {
1015 $item = $matches[4];
1016 $leading_line =& $matches[1];
1017 $leading_space =& $matches[2];
1018 $tailing_blank_line =& $matches[5];
1019
1020 if ($leading_line || $tailing_blank_line ||
1021 preg_match('/\n{2,}/', $item))
1022 {
1023 $item = $this->runBlockGamut($this->outdent($item)."\n");
1024 }
1025 else {
1026 # Recursion for sub-lists:
1027 $item = $this->doLists($this->outdent($item));
1028 $item = preg_replace('/\n+$/', '', $item);
1029 $item = $this->runSpanGamut($item);
1030 }
1031
1032 return "<li>" . $item . "</li>\n";
1033 }
1034
1035
1036 function doCodeBlocks($text) {
1037 #
1038 # Process Markdown `<pre><code>` blocks.
1039 #
1040 $text = preg_replace_callback('{
1041 (?:\n\n|\A)
1042 ( # $1 = the code block -- one or more lines, starting with a space/tab
1043 (?>
1044 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
1045 .*\n+
1046 )+
1047 )
1048 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1049 }xm',
1050 array(&$this, '_doCodeBlocks_callback'), $text);
1051
1052 return $text;
1053 }
1054 function _doCodeBlocks_callback($matches) {
1055 $codeblock = $matches[1];
1056
1057 $codeblock = $this->outdent($codeblock);
1058 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1059
1060 # trim leading newlines and trailing newlines
1061 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1062
1063 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1064 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1065 }
1066
1067
1068 function makeCodeSpan($code) {
1069 #
1070 # Create a code span markup for $code. Called from handleSpanToken.
1071 #
1072 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1073 return $this->hashPart("<code>$code</code>");
1074 }
1075
1076
1077 function doItalicsAndBold($text) {
1078 # <strong> must go first:
1079 $text = preg_replace_callback('{
1080 ( # $1: Marker
1081 (?<!\*\*) \* | # (not preceded by two chars of
1082 (?<!__) _ # the same marker)
1083 )
1084 \1
1085 (?=\S) # Not followed by whitespace
1086 (?!\1\1) # or two others marker chars.
1087 ( # $2: Content
1088 (?>
1089 [^*_]+? # Anthing not em markers.
1090 |
1091 # Balence any regular emphasis inside.
1092 \1 (?=\S) .+? (?<=\S) \1
1093 |
1094 . # Allow unbalenced * and _.
1095 )+?
1096 )
1097 (?<=\S) \1\1 # End mark not preceded by whitespace.
1098 }sx',
1099 array(&$this, '_doItalicAndBold_strong_callback'), $text);
1100 # Then <em>:
1101 $text = preg_replace_callback(
1102 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',
1103 array(&$this, '_doItalicAndBold_em_callback'), $text);
1104
1105 return $text;
1106 }
1107 function _doItalicAndBold_em_callback($matches) {
1108 $text = $matches[2];
1109 $text = $this->runSpanGamut($text);
1110 return $this->hashPart("<em>$text</em>");
1111 }
1112 function _doItalicAndBold_strong_callback($matches) {
1113 $text = $matches[2];
1114 $text = $this->runSpanGamut($text);
1115 return $this->hashPart("<strong>$text</strong>");
1116 }
1117
1118
1119 function doBlockQuotes($text) {
1120 $text = preg_replace_callback('/
1121 ( # Wrap whole match in $1
1122 (?>
1123 ^[ ]*>[ ]? # ">" at the start of a line
1124 .+\n # rest of the first line
1125 (.+\n)* # subsequent consecutive lines
1126 \n* # blanks
1127 )+
1128 )
1129 /xm',
1130 array(&$this, '_doBlockQuotes_callback'), $text);
1131
1132 return $text;
1133 }
1134 function _doBlockQuotes_callback($matches) {
1135 $bq = $matches[1];
1136 # trim one level of quoting - trim whitespace-only lines
1137 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1138 $bq = $this->runBlockGamut($bq); # recurse
1139
1140 $bq = preg_replace('/^/m', " ", $bq);
1141 # These leading spaces cause problem with <pre> content,
1142 # so we need to fix that:
1143 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1144 array(&$this, '_DoBlockQuotes_callback2'), $bq);
1145
1146 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1147 }
1148 function _doBlockQuotes_callback2($matches) {
1149 $pre = $matches[1];
1150 $pre = preg_replace('/^ /m', '', $pre);
1151 return $pre;
1152 }
1153
1154
1155 function formParagraphs($text) {
1156 #
1157 # Params:
1158 # $text - string to process with html <p> tags
1159 #
1160 # Strip leading and trailing lines:
1161 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1162
1163 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1164
1165 #
1166 # Wrap <p> tags and unhashify HTML blocks
1167 #
1168 foreach ($grafs as $key => $value) {
1169 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1170 # Is a paragraph.
1171 $value = $this->runSpanGamut($value);
1172 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1173 $value .= "</p>";
1174 $grafs[$key] = $this->unhash($value);
1175 }
1176 else {
1177 # Is a block.
1178 # Modify elements of @grafs in-place...
1179 $graf = $value;
1180 $block = $this->html_hashes[$graf];
1181 $graf = $block;
1182 // if (preg_match('{
1183 // \A
1184 // ( # $1 = <div> tag
1185 // <div \s+
1186 // [^>]*
1187 // \b
1188 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1189 // 1
1190 // \2
1191 // [^>]*
1192 // >
1193 // )
1194 // ( # $3 = contents
1195 // .*
1196 // )
1197 // (</div>) # $4 = closing tag
1198 // \z
1199 // }xs', $block, $matches))
1200 // {
1201 // list(, $div_open, , $div_content, $div_close) = $matches;
1202 //
1203 // # We can't call Markdown(), because that resets the hash;
1204 // # that initialization code should be pulled into its own sub, though.
1205 // $div_content = $this->hashHTMLBlocks($div_content);
1206 //
1207 // # Run document gamut methods on the content.
1208 // foreach ($this->document_gamut as $method => $priority) {
1209 // $div_content = $this->$method($div_content);
1210 // }
1211 //
1212 // $div_open = preg_replace(
1213 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1214 //
1215 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1216 // }
1217 $grafs[$key] = $graf;
1218 }
1219 }
1220
1221 return implode("\n\n", $grafs);
1222 }
1223
1224
1225 function encodeAmpsAndAngles($text) {
1226 # Smart processing for ampersands and angle brackets that need to be encoded.
1227 if ($this->no_entities) {
1228 $text = str_replace('&', '&amp;', $text);
1229 $text = str_replace('<', '&lt;', $text);
1230 return $text;
1231 }
1232
1233 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1234 # http://bumppo.net/projects/amputator/
1235 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1236 '&amp;', $text);;
1237
1238 # Encode naked <'s
1239 $text = preg_replace('{<(?![a-z/?\$!%])}i', '&lt;', $text);
1240
1241 return $text;
1242 }
1243
1244
1245 function doAutoLinks($text) {
1246 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
1247 array(&$this, '_doAutoLinks_url_callback'), $text);
1248
1249 # Email addresses: <address@domain.foo>
1250 $text = preg_replace_callback('{
1251 <
1252 (?:mailto:)?
1253 (
1254 [-.\w\x80-\xFF]+
1255 \@
1256 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1257 )
1258 >
1259 }xi',
1260 array(&$this, '_doAutoLinks_email_callback'), $text);
1261
1262 return $text;
1263 }
1264 function _doAutoLinks_url_callback($matches) {
1265 $url = $this->encodeAmpsAndAngles($matches[1]);
1266 $link = "<a href=\"$url\">$url</a>";
1267 return $this->hashPart($link);
1268 }
1269 function _doAutoLinks_email_callback($matches) {
1270 $address = $matches[1];
1271 $link = $this->encodeEmailAddress($address);
1272 return $this->hashPart($link);
1273 }
1274
1275
1276 function encodeEmailAddress($addr) {
1277 #
1278 # Input: an email address, e.g. "foo@example.com"
1279 #
1280 # Output: the email address as a mailto link, with each character
1281 # of the address encoded as either a decimal or hex entity, in
1282 # the hopes of foiling most address harvesting spam bots. E.g.:
1283 #
1284 # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1285 # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1286 # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1287 # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1288 #
1289 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1290 # With some optimizations by Milian Wolff.
1291 #
1292 $addr = "mailto:" . $addr;
1293 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1294 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1295
1296 foreach ($chars as $key => $char) {
1297 $ord = ord($char);
1298 # Ignore non-ascii chars.
1299 if ($ord < 128) {
1300 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1301 # roughly 10% raw, 45% hex, 45% dec
1302 # '@' *must* be encoded. I insist.
1303 if ($r > 90 && $char != '@') /* do nothing */;
1304 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1305 else $chars[$key] = '&#'.$ord.';';
1306 }
1307 }
1308
1309 $addr = implode('', $chars);
1310 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1311 $addr = "<a href=\"$addr\">$text</a>";
1312
1313 return $addr;
1314 }
1315
1316
1317 function parseSpan($str) {
1318 #
1319 # Take the string $str and parse it into tokens, hashing embeded HTML,
1320 # escaped characters and handling code spans.
1321 #
1322 $output = '';
1323
1324 $regex = '{
1325 (
1326 \\\\['.preg_quote($this->escape_chars).']
1327 |
1328 (?<![`\\\\])
1329 `+ # code span marker
1330 '.( $this->no_markup ? '' : '
1331 |
1332 <!-- .*? --> # comment
1333 |
1334 <\?.*?\?> | <%.*?%> # processing instruction
1335 |
1336 <[/!$]?[-a-zA-Z0-9:]+ # regular tags
1337 (?>
1338 \s
1339 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1340 )?
1341 >
1342 ').'
1343 )
1344 }xs';
1345
1346 while (1) {
1347 #
1348 # Each loop iteration seach for either the next tag, the next
1349 # openning code span marker, or the next escaped character.
1350 # Each token is then passed to handleSpanToken.
1351 #
1352 $parts = preg_split($regex, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1353
1354 # Create token from text preceding tag.
1355 if ($parts[0] != "") {
1356 $output .= $parts[0];
1357 }
1358
1359 # Check if we reach the end.
1360 if (isset($parts[1])) {
1361 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1362 $str = $parts[2];
1363 }
1364 else {
1365 break;
1366 }
1367 }
1368
1369 return $output;
1370 }
1371
1372
1373 function handleSpanToken($token, &$str) {
1374 #
1375 # Handle $token provided by parseSpan by determining its nature and
1376 # returning the corresponding value that should replace it.
1377 #
1378 switch ($token{0}) {
1379 case "\\":
1380 return $this->hashPart("&#". ord($token{1}). ";");
1381 case "`":
1382 # Search for end marker in remaining text.
1383 if (preg_match('/^(.*?[^`])'.$token.'(?!`)(.*)$/sm',
1384 $str, $matches))
1385 {
1386 $str = $matches[2];
1387 $codespan = $this->makeCodeSpan($matches[1]);
1388 return $this->hashPart($codespan);
1389 }
1390 return $token;