/[drupal]/contributions/modules/typogrify/smartypants.php
ViewVC logotype

Contents of /contributions/modules/typogrify/smartypants.php

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.3 - (show annotations) (download) (as text)
Sat May 30 12:55:19 2009 UTC (5 months, 3 weeks ago) by mikl
Branch: MAIN
CVS Tags: DRUPAL-7--1-0-ALPHA1, DRUPAL-7--1-0-BETA1, HEAD
Changes since 1.2: +24 -28 lines
File MIME type: text/x-php
Merged changes from DRUPAL6--1.
1 <?php
2 // $Id: smartypants.php,v 1.2.2.1 2009/04/28 21:49:37 mikl Exp $
3
4 /**
5 * @file smartypants.php
6 * SmartyPants - Smart punctuation for web sites
7 *
8 * by John Gruber <http://daringfireball.net>
9 *
10 * PHP port by Michel Fortin
11 * <http://www.michelf.com/>
12 *
13 * Copyright (c) 2003-2004 John Gruber
14 * Copyright (c) 2004-2005 Michel Fortin
15 *
16 * Re-released under GPLv2 for Drupal.
17 */
18
19 define('SMARTYPANTS_PHP_VERSION', '1.5.1e'); # Fri 9 Dec 2005
20 define('SMARTYPANTS_SYNTAX_VERSION', '1.5.1'); # Fri 12 Mar 2004
21 // Regex-pattern for tags we don't mess with.
22 define('SMARTYPANTS_TAGS_TO_SKIP', '@<(/?)(?:pre|code|kbd|script|math)[\s>]@');
23
24 // A global variable to keep track of our current SmartyPants
25 // configuration setting.
26 global $_typogrify_smartypants_attr;
27 $_typogrify_smartypants_attr = "1"; # Change this to configure.
28 # 1 => "--" for em-dashes; no en-dash support
29 # 2 => "---" for em-dashes; "--" for en-dashes
30 # 3 => "--" for em-dashes; "---" for en-dashes
31 # See docs for more configuration options.
32
33
34 # -- Smarty Modifier Interface ------------------------------------------------
35 function smarty_modifier_smartypants($text, $attr = NULL) {
36 return SmartyPants($text, $attr);
37 }
38
39
40
41 function SmartyPants($text, $attr = NULL, $ctx = NULL) {
42 global $_typogrify_smartypants_attr;
43 # Paramaters:
44 $text; # text to be parsed
45 $attr; # value of the smart_quotes="" attribute
46 $ctx; # MT context object (unused)
47 if ($attr == NULL) $attr = $_typogrify_smartypants_attr;
48
49 # Options to specify which transformations to make:
50 $do_stupefy = FALSE;
51 $convert_quot = 0; # should we translate &quot; entities into normal quotes?
52
53 # Parse attributes:
54 # 0 : do nothing
55 # 1 : set all
56 # 2 : set all, using old school en- and em- dash shortcuts
57 # 3 : set all, using inverted old school en and em- dash shortcuts
58 #
59 # q : quotes
60 # b : backtick quotes (``double'' only)
61 # B : backtick quotes (``double'' and `single')
62 # d : dashes
63 # D : old school dashes
64 # i : inverted old school dashes
65 # e : ellipses
66 # w : convert &quot; entities to " for Dreamweaver users
67
68 if ($attr == "0") {
69 # Do nothing.
70 return $text;
71 }
72 else if ($attr == "1") {
73 # Do everything, turn all options on.
74 $do_quotes = 1;
75 $do_backticks = 1;
76 $do_dashes = 1;
77 $do_ellipses = 1;
78 }
79 else if ($attr == "2") {
80 # Do everything, turn all options on, use old school dash shorthand.
81 $do_quotes = 1;
82 $do_backticks = 1;
83 $do_dashes = 2;
84 $do_ellipses = 1;
85 }
86 else if ($attr == "3") {
87 # Do everything, turn all options on, use inverted old school dash shorthand.
88 $do_quotes = 1;
89 $do_backticks = 1;
90 $do_dashes = 3;
91 $do_ellipses = 1;
92 }
93 else if ($attr == "-1") {
94 # Special "stupefy" mode.
95 $do_stupefy = 1;
96 }
97 else {
98 $chars = preg_split('//', $attr);
99 foreach ($chars as $c){
100 if ($c == "q") { $do_quotes = 1; }
101 else if ($c == "b") { $do_backticks = 1; }
102 else if ($c == "B") { $do_backticks = 2; }
103 else if ($c == "d") { $do_dashes = 1; }
104 else if ($c == "D") { $do_dashes = 2; }
105 else if ($c == "i") { $do_dashes = 3; }
106 else if ($c == "e") { $do_ellipses = 1; }
107 else if ($c == "w") { $convert_quot = 1; }
108 else {
109 # Unknown attribute option, ignore.
110 }
111 }
112 }
113
114 $tokens = _TokenizeHTML($text);
115 $result = '';
116 $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
117
118 $prev_token_last_char = ''; # This is a cheat, used to get some context
119 # for one-character tokens that consist of
120 # just a quote char. What we do is remember
121 # the last character of the previous text
122 # token, to use as context to curl single-
123 # character quote tokens correctly.
124
125 foreach ($tokens as $cur_token) {
126 if ($cur_token[0] == 'tag') {
127 # Don't mess with quotes inside tags.
128 $result .= $cur_token[1];
129 if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
130 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
131 }
132 } else {
133 $t = $cur_token[1];
134 $last_char = substr($t, -1); # Remember last char of this token before processing.
135 if (! $in_pre) {
136 $t = ProcessEscapes($t);
137
138 if ($convert_quot) {
139 $t = preg_replace('/&quot;/', '"', $t);
140 }
141
142 if ($do_dashes) {
143 if ($do_dashes == 1) $t = EducateDashes($t);
144 if ($do_dashes == 2) $t = EducateDashesOldSchool($t);
145 if ($do_dashes == 3) $t = EducateDashesOldSchoolInverted($t);
146 }
147
148 if ($do_ellipses) $t = EducateEllipses($t);
149
150 # Note: backticks need to be processed before quotes.
151 if ($do_backticks) {
152 $t = EducateBackticks($t);
153 if ($do_backticks == 2) $t = EducateSingleBackticks($t);
154 }
155
156 if ($do_quotes) {
157 if ($t == "'") {
158 # Special case: single-character ' token
159 if (preg_match('/\S/', $prev_token_last_char)) {
160 $t = "&#8217;";
161 }
162 else {
163 $t = "&#8216;";
164 }
165 }
166 else if ($t == '"') {
167 # Special case: single-character " token
168 if (preg_match('/\S/', $prev_token_last_char)) {
169 $t = "&#8221;";
170 }
171 else {
172 $t = "&#8220;";
173 }
174 }
175 else {
176 # Normal case:
177 $t = EducateQuotes($t);
178 }
179 }
180
181 if ($do_stupefy) $t = StupefyEntities($t);
182 }
183 $prev_token_last_char = $last_char;
184 $result .= $t;
185 }
186 }
187
188 return $result;
189 }
190
191
192 function SmartQuotes($text, $attr = NULL, $ctx = NULL) {
193 global $_typogrify_smartypants_attr;
194 # Paramaters:
195 $text; # text to be parsed
196 $attr; # value of the smart_quotes="" attribute
197 $ctx; # MT context object (unused)
198 if ($attr == NULL) $attr = $_typogrify_smartypants_attr;
199
200 $do_backticks; # should we educate ``backticks'' -style quotes?
201
202 if ($attr == 0) {
203 # do nothing;
204 return $text;
205 }
206 else if ($attr == 2) {
207 # smarten ``backticks'' -style quotes
208 $do_backticks = 1;
209 }
210 else {
211 $do_backticks = 0;
212 }
213
214 # Special case to handle quotes at the very end of $text when preceded by
215 # an HTML tag. Add a space to give the quote education algorithm a bit of
216 # context, so that it can guess correctly that it's a closing quote:
217 $add_extra_space = 0;
218 if (preg_match("/>['\"]\\z/", $text)) {
219 $add_extra_space = 1; # Remember, so we can trim the extra space later.
220 $text .= " ";
221 }
222
223 $tokens = _TokenizeHTML($text);
224 $result = '';
225 $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
226
227 $prev_token_last_char = ""; # This is a cheat, used to get some context
228 # for one-character tokens that consist of
229 # just a quote char. What we do is remember
230 # the last character of the previous text
231 # token, to use as context to curl single-
232 # character quote tokens correctly.
233
234 foreach ($tokens as $cur_token) {
235 if ($cur_token[0] == "tag") {
236 # Don't mess with quotes inside tags
237 $result .= $cur_token[1];
238 if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
239 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
240 }
241 } else {
242 $t = $cur_token[1];
243 $last_char = substr($t, -1); # Remember last char of this token before processing.
244 if (! $in_pre) {
245 $t = ProcessEscapes($t);
246 if ($do_backticks) {
247 $t = EducateBackticks($t);
248 }
249
250 if ($t == "'") {
251 # Special case: single-character ' token
252 if (preg_match('/\S/', $prev_token_last_char)) {
253 $t = "&#8217;";
254 }
255 else {
256 $t = "&#8216;";
257 }
258 }
259 else if ($t == '"') {
260 # Special case: single-character " token
261 if (preg_match('/\S/', $prev_token_last_char)) {
262 $t = "&#8221;";
263 }
264 else {
265 $t = "&#8220;";
266 }
267 }
268 else {
269 # Normal case:
270 $t = EducateQuotes($t);
271 }
272
273 }
274 $prev_token_last_char = $last_char;
275 $result .= $t;
276 }
277 }
278
279 if ($add_extra_space) {
280 preg_replace('/ \z/', '', $result); # Trim trailing space if we added one earlier.
281 }
282 return $result;
283 }
284
285
286 function SmartDashes($text, $attr = NULL, $ctx = NULL) {
287 global $_typogrify_smartypants_attr;
288 # Paramaters:
289 $text; # text to be parsed
290 $attr; # value of the smart_dashes="" attribute
291 $ctx; # MT context object (unused)
292 if ($attr == NULL) $attr = $_typogrify_smartypants_attr;
293
294 # reference to the subroutine to use for dash education, default to EducateDashes:
295 $dash_sub_ref = 'EducateDashes';
296
297 if ($attr == 0) {
298 # do nothing;
299 return $text;
300 }
301 else if ($attr == 2) {
302 # use old smart dash shortcuts, "--" for en, "---" for em
303 $dash_sub_ref = 'EducateDashesOldSchool';
304 }
305 else if ($attr == 3) {
306 # inverse of 2, "--" for em, "---" for en
307 $dash_sub_ref = 'EducateDashesOldSchoolInverted';
308 }
309
310 $tokens;
311 $tokens = _TokenizeHTML($text);
312
313 $result = '';
314 $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
315 foreach ($tokens as $cur_token) {
316 if ($cur_token[0] == "tag") {
317 # Don't mess with quotes inside tags
318 $result .= $cur_token[1];
319 if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
320 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
321 }
322 } else {
323 $t = $cur_token[1];
324 if (! $in_pre) {
325 $t = ProcessEscapes($t);
326 $t = $dash_sub_ref($t);
327 }
328 $result .= $t;
329 }
330 }
331 return $result;
332 }
333
334
335 function SmartEllipses($text, $attr = NULL, $ctx = NULL) {
336 # Paramaters:
337 $text; # text to be parsed
338 $attr; # value of the smart_ellipses="" attribute
339 $ctx; # MT context object (unused)
340 if ($attr == NULL) $attr = $_typogrify_smartypants_attr;
341
342 if ($attr == 0) {
343 # do nothing;
344 return $text;
345 }
346
347 $tokens;
348 $tokens = _TokenizeHTML($text);
349
350 $result = '';
351 $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
352 foreach ($tokens as $cur_token) {
353 if ($cur_token[0] == "tag") {
354 # Don't mess with quotes inside tags
355 $result .= $cur_token[1];
356 if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
357 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
358 }
359 } else {
360 $t = $cur_token[1];
361 if (! $in_pre) {
362 $t = ProcessEscapes($t);
363 $t = EducateEllipses($t);
364 }
365 $result .= $t;
366 }
367 }
368 return $result;
369 }
370
371
372 function EducateQuotes($_) {
373 #
374 # Parameter: String.
375 #
376 # Returns: The string, with "educated" curly quote HTML entities.
377 #
378 # Example input: "Isn't this fun?"
379 # Example output: &#8220;Isn&#8217;t this fun?&#8221;
380 #
381 # Make our own "punctuation" character class, because the POSIX-style
382 # [:PUNCT:] is only available in Perl 5.6 or later:
383 $punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
384
385 # Special case if the very first character is a quote
386 # followed by punctuation at a non-word-break. Close the quotes by brute force:
387 $_ = preg_replace(
388 array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
389 array('&#8217;', '&#8221;'), $_);
390
391
392 # Special case for double sets of quotes, e.g.:
393 # <p>He said, "'Quoted' words in a larger quote."</p>
394 $_ = preg_replace(
395 array("/\"'(?=\w)/", "/'\"(?=\w)/"),
396 array('&#8220;&#8216;', '&#8216;&#8220;'), $_);
397
398 # Special case for decade abbreviations (the '80s):
399 $_ = preg_replace("/'(?=\\d{2}s)/", '&#8217;', $_);
400
401 $close_class = '[^\ \t\r\n\[\{\(\-]';
402 $dec_dashes = '&\#8211;|&\#8212;';
403
404 # Get most opening single quotes:
405 $_ = preg_replace("{
406 (
407 \\s | # a whitespace char, or
408 &nbsp; | # a non-breaking space entity, or
409 -- | # dashes, or
410 &[mn]dash; | # named dash entities
411 $dec_dashes | # or decimal entities
412 &\\#x201[34]; # or hex
413 )
414 ' # the quote
415 (?=\\w) # followed by a word character
416 }x", '\1&#8216;', $_);
417 # Single closing quotes:
418 $_ = preg_replace("{
419 ($close_class)?
420 '
421 (?(1)| # If $1 captured, then do nothing;
422 (?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
423 ) # char or an 's' at a word ending position. This
424 # is a special case to handle something like:
425 # \"<i>Custer</i>'s Last Stand.\"
426 }xi", '\1&#8217;', $_);
427
428 # Any remaining single quotes should be opening ones:
429 $_ = str_replace("'", '&#8216;', $_);
430
431
432 # Get most opening double quotes:
433 $_ = preg_replace("{
434 (
435 \\s | # a whitespace char, or
436 &nbsp; | # a non-breaking space entity, or
437 -- | # dashes, or
438 &[mn]dash; | # named dash entities
439 $dec_dashes | # or decimal entities
440 &\\#x201[34]; # or hex
441 )
442 \" # the quote
443 (?=\\w) # followed by a word character
444 }x", '\1&#8220;', $_);
445
446 # Double closing quotes:
447 $_ = preg_replace("{
448 ($close_class)?
449 \"
450 (?(1)|(?=\\s)) # If $1 captured, then do nothing;
451 # if not, then make sure the next char is whitespace.
452 }x", '\1&#8221;', $_);
453
454 # Any remaining quotes should be opening ones.
455 $_ = str_replace('"', '&#8220;', $_);
456
457 return $_;
458 }
459
460
461 function EducateBackticks($_) {
462 #
463 # Parameter: String.
464 # Returns: The string, with ``backticks'' -style double quotes
465 # translated into HTML curly quote entities.
466 #
467 # Example input: ``Isn't this fun?''
468 # Example output: &#8220;Isn't this fun?&#8221;
469 #
470
471 $_ = str_replace(array("``", "''",),
472 array('&#8220;', '&#8221;'), $_);
473 return $_;
474 }
475
476
477 function EducateSingleBackticks($_) {
478 #
479 # Parameter: String.
480 # Returns: The string, with `backticks' -style single quotes
481 # translated into HTML curly quote entities.
482 #
483 # Example input: `Isn't this fun?'
484 # Example output: &#8216;Isn&#8217;t this fun?&#8217;
485 #
486
487 $_ = str_replace(array("`", "'",),
488 array('&#8216;', '&#8217;'), $_);
489 return $_;
490 }
491
492
493 function EducateDashes($_) {
494 #
495 # Parameter: String.
496 #
497 # Returns: The string, with each instance of "--" translated to
498 # an em-dash HTML entity.
499 #
500
501 $_ = str_replace('--', '&#8212;', $_);
502 return $_;
503 }
504
505
506 function EducateDashesOldSchool($_) {
507 #
508 # Parameter: String.
509 #
510 # Returns: The string, with each instance of "--" translated to
511 # an en-dash HTML entity, and each "---" translated to
512 # an em-dash HTML entity.
513 #
514
515 # em en
516 $_ = str_replace(array("---", "--",),
517 array('&#8212;', '&#8211;'), $_);
518 return $_;
519 }
520
521
522 function EducateDashesOldSchoolInverted($_) {
523 #
524 # Parameter: String.
525 #
526 # Returns: The string, with each instance of "--" translated to
527 # an em-dash HTML entity, and each "---" translated to
528 # an en-dash HTML entity. Two reasons why: First, unlike the
529 # en- and em-dash syntax supported by
530 # EducateDashesOldSchool(), it's compatible with existing
531 # entries written before SmartyPants 1.1, back when "--" was
532 # only used for em-dashes. Second, em-dashes are more
533 # common than en-dashes, and so it sort of makes sense that
534 # the shortcut should be shorter to type. (Thanks to Aaron
535 # Swartz for the idea.)
536 #
537
538 # en em
539 $_ = str_replace(array("---", "--",),
540 array('&#8211;', '&#8212;'), $_);
541 return $_;
542 }
543
544
545 function EducateEllipses($_) {
546 #
547 # Parameter: String.
548 # Returns: The string, with each instance of "..." translated to
549 # an ellipsis HTML entity. Also converts the case where
550 # there are spaces between the dots.
551 #
552 # Example input: Huh...?
553 # Example output: Huh&#8230;?
554 #
555
556 $_ = str_replace(array("...", ". . .",), '&#8230;', $_);
557 return $_;
558 }
559
560
561 function StupefyEntities($_) {
562 #
563 # Parameter: String.
564 # Returns: The string, with each SmartyPants HTML entity translated to
565 # its ASCII counterpart.
566 #
567 # Example input: &#8220;Hello &#8212; world.&#8221;
568 # Example output: "Hello -- world."
569 #
570
571 # en-dash em-dash
572 $_ = str_replace(array('&#8211;', '&#8212;'),
573 array('-', '--'), $_);
574
575 # single quote open close
576 $_ = str_replace(array('&#8216;', '&#8217;'), "'", $_);
577
578 # double quote open close
579 $_ = str_replace(array('&#8220;', '&#8221;'), '"', $_);
580
581 $_ = str_replace('&#8230;', '...', $_); # ellipsis
582
583 return $_;
584 }
585
586
587 function ProcessEscapes($_) {
588 #
589 # Parameter: String.
590 # Returns: The string, with after processing the following backslash
591 # escape sequences. This is useful if you want to force a "dumb"
592 # quote or other character to appear.
593 #
594 # Escape Value
595 # ------ -----
596 # \\ &#92;
597 # \" &#34;
598 # \' &#39;
599 # \. &#46;
600 # \- &#45;
601 # \` &#96;
602 #
603 $_ = str_replace(
604 array('\\\\', '\"', "\'", '\.', '\-', '\`'),
605 array('&#92;', '&#34;', '&#39;', '&#46;', '&#45;', '&#96;'), $_);
606
607 return $_;
608 }
609
610
611 # _TokenizeHTML is shared between PHP SmartyPants and PHP Markdown.
612 # We only define it if it is not already defined.
613 if (!function_exists('_TokenizeHTML')) :
614 function _TokenizeHTML($str) {
615 #
616 # Parameter: String containing HTML markup.
617 # Returns: An array of the tokens comprising the input
618 # string. Each token is either a tag (possibly with nested,
619 # tags contained therein, such as <a href="<MTFoo>">, or a
620 # run of text between tags. Each element of the array is a
621 # two-element array; the first is either 'tag' or 'text';
622 # the second is the actual value.
623 #
624 #
625 # Regular expression derived from the _tokenize() subroutine in
626 # Brad Choate's MTRegex plugin.
627 # <http://www.bradchoate.com/past/mtregex.php>
628 #
629 $index = 0;
630 $tokens = array();
631
632 $match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
633 '(?s:<\?.*?\?>)|'. # processing instruction
634 # regular tags
635 '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
636
637 $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
638
639 foreach ($parts as $part) {
640 if (++$index % 2 && $part != '')
641 $tokens[] = array('text', $part);
642 else
643 $tokens[] = array('tag', $part);
644 }
645 return $tokens;
646 }
647 endif;
648
649 /*
650 Copyright and License
651 ---------------------
652
653 Copyright (c) 2003 John Gruber
654 <http://daringfireball.net/>
655 All rights reserved.
656
657 Copyright (c) 2004-2005 Michel Fortin
658 <http://www.michelf.com>
659
660 Redistribution and use in source and binary forms, with or without
661 modification, are permitted provided that the following conditions are met:
662
663 * Redistributions of source code must retain the above copyright
664 notice, this list of conditions and the following disclaimer.
665
666 * Redistributions in binary form must reproduce the above copyright
667 notice, this list of conditions and the following disclaimer in the
668 documentation and/or other materials provided with the distribution.
669
670 * Neither the name "SmartyPants" nor the names of its contributors may
671 be used to endorse or promote products derived from this software
672 without specific prior written permission.
673
674 This software is provided by the copyright holders and contributors "as is"
675 and any express or implied warranties, including, but not limited to, the
676 implied warranties of merchantability and fitness for a particular purpose
677 are disclaimed. In no event shall the copyright owner or contributors be
678 liable for any direct, indirect, incidental, special, exemplary, or
679 consequential damages (including, but not limited to, procurement of
680 substitute goods or services; loss of use, data, or profits; or business
681 interruption) however caused and on any theory of liability, whether in
682 contract, strict liability, or tort (including negligence or otherwise)
683 arising in any way out of the use of this software, even if advised of the
684 possibility of such damage.
685 */
686

  ViewVC Help
Powered by ViewVC 1.1.2