| 1 |
<?php
|
| 2 |
// $Id: smartypants.php,v 1.2.2.1 2009/04/28 21:49:37 mikl Exp $
|
| 3 |
|
| 4 |
/**
|
| 5 |
* @file smartypants.php
|
| 6 |
* SmartyPants - Smart punctuation for web sites
|
| 7 |
*
|
| 8 |
* by John Gruber <http://daringfireball.net>
|
| 9 |
*
|
| 10 |
* PHP port by Michel Fortin
|
| 11 |
* <http://www.michelf.com/>
|
| 12 |
*
|
| 13 |
* Copyright (c) 2003-2004 John Gruber
|
| 14 |
* Copyright (c) 2004-2005 Michel Fortin
|
| 15 |
*
|
| 16 |
* Re-released under GPLv2 for Drupal.
|
| 17 |
*/
|
| 18 |
|
| 19 |
define('SMARTYPANTS_PHP_VERSION', '1.5.1e'); # Fri 9 Dec 2005
|
| 20 |
define('SMARTYPANTS_SYNTAX_VERSION', '1.5.1'); # Fri 12 Mar 2004
|
| 21 |
// Regex-pattern for tags we don't mess with.
|
| 22 |
define('SMARTYPANTS_TAGS_TO_SKIP', '@<(/?)(?:pre|code|kbd|script|math)[\s>]@');
|
| 23 |
|
| 24 |
// A global variable to keep track of our current SmartyPants
|
| 25 |
// configuration setting.
|
| 26 |
global $_typogrify_smartypants_attr;
|
| 27 |
$_typogrify_smartypants_attr = "1"; # Change this to configure.
|
| 28 |
# 1 => "--" for em-dashes; no en-dash support
|
| 29 |
# 2 => "---" for em-dashes; "--" for en-dashes
|
| 30 |
# 3 => "--" for em-dashes; "---" for en-dashes
|
| 31 |
# See docs for more configuration options.
|
| 32 |
|
| 33 |
|
| 34 |
# -- Smarty Modifier Interface ------------------------------------------------
|
| 35 |
function smarty_modifier_smartypants($text, $attr = NULL) {
|
| 36 |
return SmartyPants($text, $attr);
|
| 37 |
}
|
| 38 |
|
| 39 |
|
| 40 |
|
| 41 |
function SmartyPants($text, $attr = NULL, $ctx = NULL) {
|
| 42 |
global $_typogrify_smartypants_attr;
|
| 43 |
# Paramaters:
|
| 44 |
$text; # text to be parsed
|
| 45 |
$attr; # value of the smart_quotes="" attribute
|
| 46 |
$ctx; # MT context object (unused)
|
| 47 |
if ($attr == NULL) $attr = $_typogrify_smartypants_attr;
|
| 48 |
|
| 49 |
# Options to specify which transformations to make:
|
| 50 |
$do_stupefy = FALSE;
|
| 51 |
$convert_quot = 0; # should we translate " entities into normal quotes?
|
| 52 |
|
| 53 |
# Parse attributes:
|
| 54 |
# 0 : do nothing
|
| 55 |
# 1 : set all
|
| 56 |
# 2 : set all, using old school en- and em- dash shortcuts
|
| 57 |
# 3 : set all, using inverted old school en and em- dash shortcuts
|
| 58 |
#
|
| 59 |
# q : quotes
|
| 60 |
# b : backtick quotes (``double'' only)
|
| 61 |
# B : backtick quotes (``double'' and `single')
|
| 62 |
# d : dashes
|
| 63 |
# D : old school dashes
|
| 64 |
# i : inverted old school dashes
|
| 65 |
# e : ellipses
|
| 66 |
# w : convert " entities to " for Dreamweaver users
|
| 67 |
|
| 68 |
if ($attr == "0") {
|
| 69 |
# Do nothing.
|
| 70 |
return $text;
|
| 71 |
}
|
| 72 |
else if ($attr == "1") {
|
| 73 |
# Do everything, turn all options on.
|
| 74 |
$do_quotes = 1;
|
| 75 |
$do_backticks = 1;
|
| 76 |
$do_dashes = 1;
|
| 77 |
$do_ellipses = 1;
|
| 78 |
}
|
| 79 |
else if ($attr == "2") {
|
| 80 |
# Do everything, turn all options on, use old school dash shorthand.
|
| 81 |
$do_quotes = 1;
|
| 82 |
$do_backticks = 1;
|
| 83 |
$do_dashes = 2;
|
| 84 |
$do_ellipses = 1;
|
| 85 |
}
|
| 86 |
else if ($attr == "3") {
|
| 87 |
# Do everything, turn all options on, use inverted old school dash shorthand.
|
| 88 |
$do_quotes = 1;
|
| 89 |
$do_backticks = 1;
|
| 90 |
$do_dashes = 3;
|
| 91 |
$do_ellipses = 1;
|
| 92 |
}
|
| 93 |
else if ($attr == "-1") {
|
| 94 |
# Special "stupefy" mode.
|
| 95 |
$do_stupefy = 1;
|
| 96 |
}
|
| 97 |
else {
|
| 98 |
$chars = preg_split('//', $attr);
|
| 99 |
foreach ($chars as $c){
|
| 100 |
if ($c == "q") { $do_quotes = 1; }
|
| 101 |
else if ($c == "b") { $do_backticks = 1; }
|
| 102 |
else if ($c == "B") { $do_backticks = 2; }
|
| 103 |
else if ($c == "d") { $do_dashes = 1; }
|
| 104 |
else if ($c == "D") { $do_dashes = 2; }
|
| 105 |
else if ($c == "i") { $do_dashes = 3; }
|
| 106 |
else if ($c == "e") { $do_ellipses = 1; }
|
| 107 |
else if ($c == "w") { $convert_quot = 1; }
|
| 108 |
else {
|
| 109 |
# Unknown attribute option, ignore.
|
| 110 |
}
|
| 111 |
}
|
| 112 |
}
|
| 113 |
|
| 114 |
$tokens = _TokenizeHTML($text);
|
| 115 |
$result = '';
|
| 116 |
$in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
|
| 117 |
|
| 118 |
$prev_token_last_char = ''; # This is a cheat, used to get some context
|
| 119 |
# for one-character tokens that consist of
|
| 120 |
# just a quote char. What we do is remember
|
| 121 |
# the last character of the previous text
|
| 122 |
# token, to use as context to curl single-
|
| 123 |
# character quote tokens correctly.
|
| 124 |
|
| 125 |
foreach ($tokens as $cur_token) {
|
| 126 |
if ($cur_token[0] == 'tag') {
|
| 127 |
# Don't mess with quotes inside tags.
|
| 128 |
$result .= $cur_token[1];
|
| 129 |
if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
|
| 130 |
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
| 131 |
}
|
| 132 |
} else {
|
| 133 |
$t = $cur_token[1];
|
| 134 |
$last_char = substr($t, -1); # Remember last char of this token before processing.
|
| 135 |
if (! $in_pre) {
|
| 136 |
$t = ProcessEscapes($t);
|
| 137 |
|
| 138 |
if ($convert_quot) {
|
| 139 |
$t = preg_replace('/"/', '"', $t);
|
| 140 |
}
|
| 141 |
|
| 142 |
if ($do_dashes) {
|
| 143 |
if ($do_dashes == 1) $t = EducateDashes($t);
|
| 144 |
if ($do_dashes == 2) $t = EducateDashesOldSchool($t);
|
| 145 |
if ($do_dashes == 3) $t = EducateDashesOldSchoolInverted($t);
|
| 146 |
}
|
| 147 |
|
| 148 |
if ($do_ellipses) $t = EducateEllipses($t);
|
| 149 |
|
| 150 |
# Note: backticks need to be processed before quotes.
|
| 151 |
if ($do_backticks) {
|
| 152 |
$t = EducateBackticks($t);
|
| 153 |
if ($do_backticks == 2) $t = EducateSingleBackticks($t);
|
| 154 |
}
|
| 155 |
|
| 156 |
if ($do_quotes) {
|
| 157 |
if ($t == "'") {
|
| 158 |
# Special case: single-character ' token
|
| 159 |
if (preg_match('/\S/', $prev_token_last_char)) {
|
| 160 |
$t = "’";
|
| 161 |
}
|
| 162 |
else {
|
| 163 |
$t = "‘";
|
| 164 |
}
|
| 165 |
}
|
| 166 |
else if ($t == '"') {
|
| 167 |
# Special case: single-character " token
|
| 168 |
if (preg_match('/\S/', $prev_token_last_char)) {
|
| 169 |
$t = "”";
|
| 170 |
}
|
| 171 |
else {
|
| 172 |
$t = "“";
|
| 173 |
}
|
| 174 |
}
|
| 175 |
else {
|
| 176 |
# Normal case:
|
| 177 |
$t = EducateQuotes($t);
|
| 178 |
}
|
| 179 |
}
|
| 180 |
|
| 181 |
if ($do_stupefy) $t = StupefyEntities($t);
|
| 182 |
}
|
| 183 |
$prev_token_last_char = $last_char;
|
| 184 |
$result .= $t;
|
| 185 |
}
|
| 186 |
}
|
| 187 |
|
| 188 |
return $result;
|
| 189 |
}
|
| 190 |
|
| 191 |
|
| 192 |
function SmartQuotes($text, $attr = NULL, $ctx = NULL) {
|
| 193 |
global $_typogrify_smartypants_attr;
|
| 194 |
# Paramaters:
|
| 195 |
$text; # text to be parsed
|
| 196 |
$attr; # value of the smart_quotes="" attribute
|
| 197 |
$ctx; # MT context object (unused)
|
| 198 |
if ($attr == NULL) $attr = $_typogrify_smartypants_attr;
|
| 199 |
|
| 200 |
$do_backticks; # should we educate ``backticks'' -style quotes?
|
| 201 |
|
| 202 |
if ($attr == 0) {
|
| 203 |
# do nothing;
|
| 204 |
return $text;
|
| 205 |
}
|
| 206 |
else if ($attr == 2) {
|
| 207 |
# smarten ``backticks'' -style quotes
|
| 208 |
$do_backticks = 1;
|
| 209 |
}
|
| 210 |
else {
|
| 211 |
$do_backticks = 0;
|
| 212 |
}
|
| 213 |
|
| 214 |
# Special case to handle quotes at the very end of $text when preceded by
|
| 215 |
# an HTML tag. Add a space to give the quote education algorithm a bit of
|
| 216 |
# context, so that it can guess correctly that it's a closing quote:
|
| 217 |
$add_extra_space = 0;
|
| 218 |
if (preg_match("/>['\"]\\z/", $text)) {
|
| 219 |
$add_extra_space = 1; # Remember, so we can trim the extra space later.
|
| 220 |
$text .= " ";
|
| 221 |
}
|
| 222 |
|
| 223 |
$tokens = _TokenizeHTML($text);
|
| 224 |
$result = '';
|
| 225 |
$in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
|
| 226 |
|
| 227 |
$prev_token_last_char = ""; # This is a cheat, used to get some context
|
| 228 |
# for one-character tokens that consist of
|
| 229 |
# just a quote char. What we do is remember
|
| 230 |
# the last character of the previous text
|
| 231 |
# token, to use as context to curl single-
|
| 232 |
# character quote tokens correctly.
|
| 233 |
|
| 234 |
foreach ($tokens as $cur_token) {
|
| 235 |
if ($cur_token[0] == "tag") {
|
| 236 |
# Don't mess with quotes inside tags
|
| 237 |
$result .= $cur_token[1];
|
| 238 |
if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
|
| 239 |
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
| 240 |
}
|
| 241 |
} else {
|
| 242 |
$t = $cur_token[1];
|
| 243 |
$last_char = substr($t, -1); # Remember last char of this token before processing.
|
| 244 |
if (! $in_pre) {
|
| 245 |
$t = ProcessEscapes($t);
|
| 246 |
if ($do_backticks) {
|
| 247 |
$t = EducateBackticks($t);
|
| 248 |
}
|
| 249 |
|
| 250 |
if ($t == "'") {
|
| 251 |
# Special case: single-character ' token
|
| 252 |
if (preg_match('/\S/', $prev_token_last_char)) {
|
| 253 |
$t = "’";
|
| 254 |
}
|
| 255 |
else {
|
| 256 |
$t = "‘";
|
| 257 |
}
|
| 258 |
}
|
| 259 |
else if ($t == '"') {
|
| 260 |
# Special case: single-character " token
|
| 261 |
if (preg_match('/\S/', $prev_token_last_char)) {
|
| 262 |
$t = "”";
|
| 263 |
}
|
| 264 |
else {
|
| 265 |
$t = "“";
|
| 266 |
}
|
| 267 |
}
|
| 268 |
else {
|
| 269 |
# Normal case:
|
| 270 |
$t = EducateQuotes($t);
|
| 271 |
}
|
| 272 |
|
| 273 |
}
|
| 274 |
$prev_token_last_char = $last_char;
|
| 275 |
$result .= $t;
|
| 276 |
}
|
| 277 |
}
|
| 278 |
|
| 279 |
if ($add_extra_space) {
|
| 280 |
preg_replace('/ \z/', '', $result); # Trim trailing space if we added one earlier.
|
| 281 |
}
|
| 282 |
return $result;
|
| 283 |
}
|
| 284 |
|
| 285 |
|
| 286 |
function SmartDashes($text, $attr = NULL, $ctx = NULL) {
|
| 287 |
global $_typogrify_smartypants_attr;
|
| 288 |
# Paramaters:
|
| 289 |
$text; # text to be parsed
|
| 290 |
$attr; # value of the smart_dashes="" attribute
|
| 291 |
$ctx; # MT context object (unused)
|
| 292 |
if ($attr == NULL) $attr = $_typogrify_smartypants_attr;
|
| 293 |
|
| 294 |
# reference to the subroutine to use for dash education, default to EducateDashes:
|
| 295 |
$dash_sub_ref = 'EducateDashes';
|
| 296 |
|
| 297 |
if ($attr == 0) {
|
| 298 |
# do nothing;
|
| 299 |
return $text;
|
| 300 |
}
|
| 301 |
else if ($attr == 2) {
|
| 302 |
# use old smart dash shortcuts, "--" for en, "---" for em
|
| 303 |
$dash_sub_ref = 'EducateDashesOldSchool';
|
| 304 |
}
|
| 305 |
else if ($attr == 3) {
|
| 306 |
# inverse of 2, "--" for em, "---" for en
|
| 307 |
$dash_sub_ref = 'EducateDashesOldSchoolInverted';
|
| 308 |
}
|
| 309 |
|
| 310 |
$tokens;
|
| 311 |
$tokens = _TokenizeHTML($text);
|
| 312 |
|
| 313 |
$result = '';
|
| 314 |
$in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
|
| 315 |
foreach ($tokens as $cur_token) {
|
| 316 |
if ($cur_token[0] == "tag") {
|
| 317 |
# Don't mess with quotes inside tags
|
| 318 |
$result .= $cur_token[1];
|
| 319 |
if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
|
| 320 |
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
| 321 |
}
|
| 322 |
} else {
|
| 323 |
$t = $cur_token[1];
|
| 324 |
if (! $in_pre) {
|
| 325 |
$t = ProcessEscapes($t);
|
| 326 |
$t = $dash_sub_ref($t);
|
| 327 |
}
|
| 328 |
$result .= $t;
|
| 329 |
}
|
| 330 |
}
|
| 331 |
return $result;
|
| 332 |
}
|
| 333 |
|
| 334 |
|
| 335 |
function SmartEllipses($text, $attr = NULL, $ctx = NULL) {
|
| 336 |
# Paramaters:
|
| 337 |
$text; # text to be parsed
|
| 338 |
$attr; # value of the smart_ellipses="" attribute
|
| 339 |
$ctx; # MT context object (unused)
|
| 340 |
if ($attr == NULL) $attr = $_typogrify_smartypants_attr;
|
| 341 |
|
| 342 |
if ($attr == 0) {
|
| 343 |
# do nothing;
|
| 344 |
return $text;
|
| 345 |
}
|
| 346 |
|
| 347 |
$tokens;
|
| 348 |
$tokens = _TokenizeHTML($text);
|
| 349 |
|
| 350 |
$result = '';
|
| 351 |
$in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
|
| 352 |
foreach ($tokens as $cur_token) {
|
| 353 |
if ($cur_token[0] == "tag") {
|
| 354 |
# Don't mess with quotes inside tags
|
| 355 |
$result .= $cur_token[1];
|
| 356 |
if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
|
| 357 |
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
| 358 |
}
|
| 359 |
} else {
|
| 360 |
$t = $cur_token[1];
|
| 361 |
if (! $in_pre) {
|
| 362 |
$t = ProcessEscapes($t);
|
| 363 |
$t = EducateEllipses($t);
|
| 364 |
}
|
| 365 |
$result .= $t;
|
| 366 |
}
|
| 367 |
}
|
| 368 |
return $result;
|
| 369 |
}
|
| 370 |
|
| 371 |
|
| 372 |
function EducateQuotes($_) {
|
| 373 |
#
|
| 374 |
# Parameter: String.
|
| 375 |
#
|
| 376 |
# Returns: The string, with "educated" curly quote HTML entities.
|
| 377 |
#
|
| 378 |
# Example input: "Isn't this fun?"
|
| 379 |
# Example output: “Isn’t this fun?”
|
| 380 |
#
|
| 381 |
# Make our own "punctuation" character class, because the POSIX-style
|
| 382 |
# [:PUNCT:] is only available in Perl 5.6 or later:
|
| 383 |
$punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
|
| 384 |
|
| 385 |
# Special case if the very first character is a quote
|
| 386 |
# followed by punctuation at a non-word-break. Close the quotes by brute force:
|
| 387 |
$_ = preg_replace(
|
| 388 |
array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
|
| 389 |
array('’', '”'), $_);
|
| 390 |
|
| 391 |
|
| 392 |
# Special case for double sets of quotes, e.g.:
|
| 393 |
# <p>He said, "'Quoted' words in a larger quote."</p>
|
| 394 |
$_ = preg_replace(
|
| 395 |
array("/\"'(?=\w)/", "/'\"(?=\w)/"),
|
| 396 |
array('“‘', '‘“'), $_);
|
| 397 |
|
| 398 |
# Special case for decade abbreviations (the '80s):
|
| 399 |
$_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
|
| 400 |
|
| 401 |
$close_class = '[^\ \t\r\n\[\{\(\-]';
|
| 402 |
$dec_dashes = '&\#8211;|&\#8212;';
|
| 403 |
|
| 404 |
# Get most opening single quotes:
|
| 405 |
$_ = preg_replace("{
|
| 406 |
(
|
| 407 |
\\s | # a whitespace char, or
|
| 408 |
| # a non-breaking space entity, or
|
| 409 |
-- | # dashes, or
|
| 410 |
&[mn]dash; | # named dash entities
|
| 411 |
$dec_dashes | # or decimal entities
|
| 412 |
&\\#x201[34]; # or hex
|
| 413 |
)
|
| 414 |
' # the quote
|
| 415 |
(?=\\w) # followed by a word character
|
| 416 |
}x", '\1‘', $_);
|
| 417 |
# Single closing quotes:
|
| 418 |
$_ = preg_replace("{
|
| 419 |
($close_class)?
|
| 420 |
'
|
| 421 |
(?(1)| # If $1 captured, then do nothing;
|
| 422 |
(?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
|
| 423 |
) # char or an 's' at a word ending position. This
|
| 424 |
# is a special case to handle something like:
|
| 425 |
# \"<i>Custer</i>'s Last Stand.\"
|
| 426 |
}xi", '\1’', $_);
|
| 427 |
|
| 428 |
# Any remaining single quotes should be opening ones:
|
| 429 |
$_ = str_replace("'", '‘', $_);
|
| 430 |
|
| 431 |
|
| 432 |
# Get most opening double quotes:
|
| 433 |
$_ = preg_replace("{
|
| 434 |
(
|
| 435 |
\\s | # a whitespace char, or
|
| 436 |
| # a non-breaking space entity, or
|
| 437 |
-- | # dashes, or
|
| 438 |
&[mn]dash; | # named dash entities
|
| 439 |
$dec_dashes | # or decimal entities
|
| 440 |
&\\#x201[34]; # or hex
|
| 441 |
)
|
| 442 |
\" # the quote
|
| 443 |
(?=\\w) # followed by a word character
|
| 444 |
}x", '\1“', $_);
|
| 445 |
|
| 446 |
# Double closing quotes:
|
| 447 |
$_ = preg_replace("{
|
| 448 |
($close_class)?
|
| 449 |
\"
|
| 450 |
(?(1)|(?=\\s)) # If $1 captured, then do nothing;
|
| 451 |
# if not, then make sure the next char is whitespace.
|
| 452 |
}x", '\1”', $_);
|
| 453 |
|
| 454 |
# Any remaining quotes should be opening ones.
|
| 455 |
$_ = str_replace('"', '“', $_);
|
| 456 |
|
| 457 |
return $_;
|
| 458 |
}
|
| 459 |
|
| 460 |
|
| 461 |
function EducateBackticks($_) {
|
| 462 |
#
|
| 463 |
# Parameter: String.
|
| 464 |
# Returns: The string, with ``backticks'' -style double quotes
|
| 465 |
# translated into HTML curly quote entities.
|
| 466 |
#
|
| 467 |
# Example input: ``Isn't this fun?''
|
| 468 |
# Example output: “Isn't this fun?”
|
| 469 |
#
|
| 470 |
|
| 471 |
$_ = str_replace(array("``", "''",),
|
| 472 |
array('“', '”'), $_);
|
| 473 |
return $_;
|
| 474 |
}
|
| 475 |
|
| 476 |
|
| 477 |
function EducateSingleBackticks($_) {
|
| 478 |
#
|
| 479 |
# Parameter: String.
|
| 480 |
# Returns: The string, with `backticks' -style single quotes
|
| 481 |
# translated into HTML curly quote entities.
|
| 482 |
#
|
| 483 |
# Example input: `Isn't this fun?'
|
| 484 |
# Example output: ‘Isn’t this fun?’
|
| 485 |
#
|
| 486 |
|
| 487 |
$_ = str_replace(array("`", "'",),
|
| 488 |
array('‘', '’'), $_);
|
| 489 |
return $_;
|
| 490 |
}
|
| 491 |
|
| 492 |
|
| 493 |
function EducateDashes($_) {
|
| 494 |
#
|
| 495 |
# Parameter: String.
|
| 496 |
#
|
| 497 |
# Returns: The string, with each instance of "--" translated to
|
| 498 |
# an em-dash HTML entity.
|
| 499 |
#
|
| 500 |
|
| 501 |
$_ = str_replace('--', '—', $_);
|
| 502 |
return $_;
|
| 503 |
}
|
| 504 |
|
| 505 |
|
| 506 |
function EducateDashesOldSchool($_) {
|
| 507 |
#
|
| 508 |
# Parameter: String.
|
| 509 |
#
|
| 510 |
# Returns: The string, with each instance of "--" translated to
|
| 511 |
# an en-dash HTML entity, and each "---" translated to
|
| 512 |
# an em-dash HTML entity.
|
| 513 |
#
|
| 514 |
|
| 515 |
# em en
|
| 516 |
$_ = str_replace(array("---", "--",),
|
| 517 |
array('—', '–'), $_);
|
| 518 |
return $_;
|
| 519 |
}
|
| 520 |
|
| 521 |
|
| 522 |
function EducateDashesOldSchoolInverted($_) {
|
| 523 |
#
|
| 524 |
# Parameter: String.
|
| 525 |
#
|
| 526 |
# Returns: The string, with each instance of "--" translated to
|
| 527 |
# an em-dash HTML entity, and each "---" translated to
|
| 528 |
# an en-dash HTML entity. Two reasons why: First, unlike the
|
| 529 |
# en- and em-dash syntax supported by
|
| 530 |
# EducateDashesOldSchool(), it's compatible with existing
|
| 531 |
# entries written before SmartyPants 1.1, back when "--" was
|
| 532 |
# only used for em-dashes. Second, em-dashes are more
|
| 533 |
# common than en-dashes, and so it sort of makes sense that
|
| 534 |
# the shortcut should be shorter to type. (Thanks to Aaron
|
| 535 |
# Swartz for the idea.)
|
| 536 |
#
|
| 537 |
|
| 538 |
# en em
|
| 539 |
$_ = str_replace(array("---", "--",),
|
| 540 |
array('–', '—'), $_);
|
| 541 |
return $_;
|
| 542 |
}
|
| 543 |
|
| 544 |
|
| 545 |
function EducateEllipses($_) {
|
| 546 |
#
|
| 547 |
# Parameter: String.
|
| 548 |
# Returns: The string, with each instance of "..." translated to
|
| 549 |
# an ellipsis HTML entity. Also converts the case where
|
| 550 |
# there are spaces between the dots.
|
| 551 |
#
|
| 552 |
# Example input: Huh...?
|
| 553 |
# Example output: Huh…?
|
| 554 |
#
|
| 555 |
|
| 556 |
$_ = str_replace(array("...", ". . .",), '…', $_);
|
| 557 |
return $_;
|
| 558 |
}
|
| 559 |
|
| 560 |
|
| 561 |
function StupefyEntities($_) {
|
| 562 |
#
|
| 563 |
# Parameter: String.
|
| 564 |
# Returns: The string, with each SmartyPants HTML entity translated to
|
| 565 |
# its ASCII counterpart.
|
| 566 |
#
|
| 567 |
# Example input: “Hello — world.”
|
| 568 |
# Example output: "Hello -- world."
|
| 569 |
#
|
| 570 |
|
| 571 |
# en-dash em-dash
|
| 572 |
$_ = str_replace(array('–', '—'),
|
| 573 |
array('-', '--'), $_);
|
| 574 |
|
| 575 |
# single quote open close
|
| 576 |
$_ = str_replace(array('‘', '’'), "'", $_);
|
| 577 |
|
| 578 |
# double quote open close
|
| 579 |
$_ = str_replace(array('“', '”'), '"', $_);
|
| 580 |
|
| 581 |
$_ = str_replace('…', '...', $_); # ellipsis
|
| 582 |
|
| 583 |
return $_;
|
| 584 |
}
|
| 585 |
|
| 586 |
|
| 587 |
function ProcessEscapes($_) {
|
| 588 |
#
|
| 589 |
# Parameter: String.
|
| 590 |
# Returns: The string, with after processing the following backslash
|
| 591 |
# escape sequences. This is useful if you want to force a "dumb"
|
| 592 |
# quote or other character to appear.
|
| 593 |
#
|
| 594 |
# Escape Value
|
| 595 |
# ------ -----
|
| 596 |
# \\ \
|
| 597 |
# \" "
|
| 598 |
# \' '
|
| 599 |
# \. .
|
| 600 |
# \- -
|
| 601 |
# \` `
|
| 602 |
#
|
| 603 |
$_ = str_replace(
|
| 604 |
array('\\\\', '\"', "\'", '\.', '\-', '\`'),
|
| 605 |
array('\', '"', ''', '.', '-', '`'), $_);
|
| 606 |
|
| 607 |
return $_;
|
| 608 |
}
|
| 609 |
|
| 610 |
|
| 611 |
# _TokenizeHTML is shared between PHP SmartyPants and PHP Markdown.
|
| 612 |
# We only define it if it is not already defined.
|
| 613 |
if (!function_exists('_TokenizeHTML')) :
|
| 614 |
function _TokenizeHTML($str) {
|
| 615 |
#
|
| 616 |
# Parameter: String containing HTML markup.
|
| 617 |
# Returns: An array of the tokens comprising the input
|
| 618 |
# string. Each token is either a tag (possibly with nested,
|
| 619 |
# tags contained therein, such as <a href="<MTFoo>">, or a
|
| 620 |
# run of text between tags. Each element of the array is a
|
| 621 |
# two-element array; the first is either 'tag' or 'text';
|
| 622 |
# the second is the actual value.
|
| 623 |
#
|
| 624 |
#
|
| 625 |
# Regular expression derived from the _tokenize() subroutine in
|
| 626 |
# Brad Choate's MTRegex plugin.
|
| 627 |
# <http://www.bradchoate.com/past/mtregex.php>
|
| 628 |
#
|
| 629 |
$index = 0;
|
| 630 |
$tokens = array();
|
| 631 |
|
| 632 |
$match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
|
| 633 |
'(?s:<\?.*?\?>)|'. # processing instruction
|
| 634 |
# regular tags
|
| 635 |
'(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
|
| 636 |
|
| 637 |
$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
|
| 638 |
|
| 639 |
foreach ($parts as $part) {
|
| 640 |
if (++$index % 2 && $part != '')
|
| 641 |
$tokens[] = array('text', $part);
|
| 642 |
else
|
| 643 |
$tokens[] = array('tag', $part);
|
| 644 |
}
|
| 645 |
return $tokens;
|
| 646 |
}
|
| 647 |
endif;
|
| 648 |
|
| 649 |
/*
|
| 650 |
Copyright and License
|
| 651 |
---------------------
|
| 652 |
|
| 653 |
Copyright (c) 2003 John Gruber
|
| 654 |
<http://daringfireball.net/>
|
| 655 |
All rights reserved.
|
| 656 |
|
| 657 |
Copyright (c) 2004-2005 Michel Fortin
|
| 658 |
<http://www.michelf.com>
|
| 659 |
|
| 660 |
Redistribution and use in source and binary forms, with or without
|
| 661 |
modification, are permitted provided that the following conditions are met:
|
| 662 |
|
| 663 |
* Redistributions of source code must retain the above copyright
|
| 664 |
notice, this list of conditions and the following disclaimer.
|
| 665 |
|
| 666 |
* Redistributions in binary form must reproduce the above copyright
|
| 667 |
notice, this list of conditions and the following disclaimer in the
|
| 668 |
documentation and/or other materials provided with the distribution.
|
| 669 |
|
| 670 |
* Neither the name "SmartyPants" nor the names of its contributors may
|
| 671 |
be used to endorse or promote products derived from this software
|
| 672 |
without specific prior written permission.
|
| 673 |
|
| 674 |
This software is provided by the copyright holders and contributors "as is"
|
| 675 |
and any express or implied warranties, including, but not limited to, the
|
| 676 |
implied warranties of merchantability and fitness for a particular purpose
|
| 677 |
are disclaimed. In no event shall the copyright owner or contributors be
|
| 678 |
liable for any direct, indirect, incidental, special, exemplary, or
|
| 679 |
consequential damages (including, but not limited to, procurement of
|
| 680 |
substitute goods or services; loss of use, data, or profits; or business
|
| 681 |
interruption) however caused and on any theory of liability, whether in
|
| 682 |
contract, strict liability, or tort (including negligence or otherwise)
|
| 683 |
arising in any way out of the use of this software, even if advised of the
|
| 684 |
possibility of such damage.
|
| 685 |
*/
|
| 686 |
|