| 1 |
<?php
|
| 2 |
|
| 3 |
#
|
| 4 |
# SmartyPants - Smart punctuation for web sites
|
| 5 |
#
|
| 6 |
# by John Gruber
|
| 7 |
# <http://daringfireball.net>
|
| 8 |
#
|
| 9 |
# PHP port by Michel Fortin
|
| 10 |
# <http://www.michelf.com/>
|
| 11 |
#
|
| 12 |
# Copyright (c) 2003-2004 John Gruber
|
| 13 |
# Copyright (c) 2004-2005 Michel Fortin
|
| 14 |
#
|
| 15 |
|
| 16 |
|
| 17 |
global $SmartyPantsPHPVersion, $SmartyPantsSyntaxVersion,
|
| 18 |
$smartypants_attr, $sp_tags_to_skip;
|
| 19 |
|
| 20 |
$SmartyPantsPHPVersion = '1.5.1e'; # Fru 9 Dec 2005
|
| 21 |
$SmartyPantsSyntaxVersion = '1.5.1'; # Fri 12 Mar 2004
|
| 22 |
|
| 23 |
|
| 24 |
# Configurable variables:
|
| 25 |
$smartypants_attr = "1"; # Change this to configure.
|
| 26 |
# 1 => "--" for em-dashes; no en-dash support
|
| 27 |
# 2 => "---" for em-dashes; "--" for en-dashes
|
| 28 |
# 3 => "--" for em-dashes; "---" for en-dashes
|
| 29 |
# See docs for more configuration options.
|
| 30 |
|
| 31 |
# Globals:
|
| 32 |
$sp_tags_to_skip = '<(/?)(?:pre|code|kbd|script|math)[\s>]';
|
| 33 |
|
| 34 |
|
| 35 |
# -- WordPress plugin interface -----------------------------------------------
|
| 36 |
/*
|
| 37 |
Plugin Name: SmartyPants
|
| 38 |
Plugin URI: http://www.michelf.com/projects/php-smartypants/
|
| 39 |
Description: SmartyPants is a web publishing utility that translates plain ASCII punctuation characters into “smart” typographic punctuation HTML entities. This plugin <strong>replace the default WordPress Texturize algorithm</strong> for the content and the title of your posts, the comments body and author name, and everywhere else Texturize normally apply. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>.
|
| 40 |
Version: 1.5.1e
|
| 41 |
Author: Michel Fortin
|
| 42 |
Author URI: http://www.michelf.com/
|
| 43 |
*/
|
| 44 |
if (isset($wp_version)) {
|
| 45 |
# Remove default Texturize filter that would conflict with SmartyPants.
|
| 46 |
remove_filter('category_description', 'wptexturize');
|
| 47 |
remove_filter('list_cats', 'wptexturize');
|
| 48 |
remove_filter('comment_author', 'wptexturize');
|
| 49 |
remove_filter('comment_text', 'wptexturize');
|
| 50 |
remove_filter('single_post_title', 'wptexturize');
|
| 51 |
remove_filter('the_title', 'wptexturize');
|
| 52 |
remove_filter('the_content', 'wptexturize');
|
| 53 |
remove_filter('the_excerpt', 'wptexturize');
|
| 54 |
# Add SmartyPants filter with priority 10 (same as Texturize).
|
| 55 |
add_filter('category_description', 'SmartyPants', 10);
|
| 56 |
add_filter('list_cats', 'SmartyPants', 10);
|
| 57 |
add_filter('comment_author', 'SmartyPants', 10);
|
| 58 |
add_filter('comment_text', 'SmartyPants', 10);
|
| 59 |
add_filter('single_post_title', 'SmartyPants', 10);
|
| 60 |
add_filter('the_title', 'SmartyPants', 10);
|
| 61 |
add_filter('the_content', 'SmartyPants', 10);
|
| 62 |
add_filter('the_excerpt', 'SmartyPants', 10);
|
| 63 |
}
|
| 64 |
|
| 65 |
# -- Smarty Modifier Interface ------------------------------------------------
|
| 66 |
function smarty_modifier_smartypants($text, $attr = NULL) {
|
| 67 |
return SmartyPants($text, $attr);
|
| 68 |
}
|
| 69 |
|
| 70 |
|
| 71 |
|
| 72 |
function SmartyPants($text, $attr = NULL, $ctx = NULL) {
|
| 73 |
global $smartypants_attr, $sp_tags_to_skip;
|
| 74 |
# Paramaters:
|
| 75 |
$text; # text to be parsed
|
| 76 |
$attr; # value of the smart_quotes="" attribute
|
| 77 |
$ctx; # MT context object (unused)
|
| 78 |
if ($attr == NULL) $attr = $smartypants_attr;
|
| 79 |
|
| 80 |
# Options to specify which transformations to make:
|
| 81 |
$do_stupefy = FALSE;
|
| 82 |
$convert_quot = 0; # should we translate " entities into normal quotes?
|
| 83 |
|
| 84 |
# Parse attributes:
|
| 85 |
# 0 : do nothing
|
| 86 |
# 1 : set all
|
| 87 |
# 2 : set all, using old school en- and em- dash shortcuts
|
| 88 |
# 3 : set all, using inverted old school en and em- dash shortcuts
|
| 89 |
#
|
| 90 |
# q : quotes
|
| 91 |
# b : backtick quotes (``double'' only)
|
| 92 |
# B : backtick quotes (``double'' and `single')
|
| 93 |
# d : dashes
|
| 94 |
# D : old school dashes
|
| 95 |
# i : inverted old school dashes
|
| 96 |
# e : ellipses
|
| 97 |
# w : convert " entities to " for Dreamweaver users
|
| 98 |
|
| 99 |
if ($attr == "0") {
|
| 100 |
# Do nothing.
|
| 101 |
return $text;
|
| 102 |
}
|
| 103 |
else if ($attr == "1") {
|
| 104 |
# Do everything, turn all options on.
|
| 105 |
$do_quotes = 1;
|
| 106 |
$do_backticks = 1;
|
| 107 |
$do_dashes = 1;
|
| 108 |
$do_ellipses = 1;
|
| 109 |
}
|
| 110 |
else if ($attr == "2") {
|
| 111 |
# Do everything, turn all options on, use old school dash shorthand.
|
| 112 |
$do_quotes = 1;
|
| 113 |
$do_backticks = 1;
|
| 114 |
$do_dashes = 2;
|
| 115 |
$do_ellipses = 1;
|
| 116 |
}
|
| 117 |
else if ($attr == "3") {
|
| 118 |
# Do everything, turn all options on, use inverted old school dash shorthand.
|
| 119 |
$do_quotes = 1;
|
| 120 |
$do_backticks = 1;
|
| 121 |
$do_dashes = 3;
|
| 122 |
$do_ellipses = 1;
|
| 123 |
}
|
| 124 |
else if ($attr == "-1") {
|
| 125 |
# Special "stupefy" mode.
|
| 126 |
$do_stupefy = 1;
|
| 127 |
}
|
| 128 |
else {
|
| 129 |
$chars = preg_split('//', $attr);
|
| 130 |
foreach ($chars as $c){
|
| 131 |
if ($c == "q") { $do_quotes = 1; }
|
| 132 |
else if ($c == "b") { $do_backticks = 1; }
|
| 133 |
else if ($c == "B") { $do_backticks = 2; }
|
| 134 |
else if ($c == "d") { $do_dashes = 1; }
|
| 135 |
else if ($c == "D") { $do_dashes = 2; }
|
| 136 |
else if ($c == "i") { $do_dashes = 3; }
|
| 137 |
else if ($c == "e") { $do_ellipses = 1; }
|
| 138 |
else if ($c == "w") { $convert_quot = 1; }
|
| 139 |
else {
|
| 140 |
# Unknown attribute option, ignore.
|
| 141 |
}
|
| 142 |
}
|
| 143 |
}
|
| 144 |
|
| 145 |
$tokens = _TokenizeHTML($text);
|
| 146 |
$result = '';
|
| 147 |
$in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
|
| 148 |
|
| 149 |
$prev_token_last_char = ""; # This is a cheat, used to get some context
|
| 150 |
# for one-character tokens that consist of
|
| 151 |
# just a quote char. What we do is remember
|
| 152 |
# the last character of the previous text
|
| 153 |
# token, to use as context to curl single-
|
| 154 |
# character quote tokens correctly.
|
| 155 |
|
| 156 |
foreach ($tokens as $cur_token) {
|
| 157 |
if ($cur_token[0] == "tag") {
|
| 158 |
# Don't mess with quotes inside tags.
|
| 159 |
$result .= $cur_token[1];
|
| 160 |
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
|
| 161 |
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
| 162 |
}
|
| 163 |
} else {
|
| 164 |
$t = $cur_token[1];
|
| 165 |
$last_char = substr($t, -1); # Remember last char of this token before processing.
|
| 166 |
if (! $in_pre) {
|
| 167 |
$t = ProcessEscapes($t);
|
| 168 |
|
| 169 |
if ($convert_quot) {
|
| 170 |
$t = preg_replace('/"/', '"', $t);
|
| 171 |
}
|
| 172 |
|
| 173 |
if ($do_dashes) {
|
| 174 |
if ($do_dashes == 1) $t = EducateDashes($t);
|
| 175 |
if ($do_dashes == 2) $t = EducateDashesOldSchool($t);
|
| 176 |
if ($do_dashes == 3) $t = EducateDashesOldSchoolInverted($t);
|
| 177 |
}
|
| 178 |
|
| 179 |
if ($do_ellipses) $t = EducateEllipses($t);
|
| 180 |
|
| 181 |
# Note: backticks need to be processed before quotes.
|
| 182 |
if ($do_backticks) {
|
| 183 |
$t = EducateBackticks($t);
|
| 184 |
if ($do_backticks == 2) $t = EducateSingleBackticks($t);
|
| 185 |
}
|
| 186 |
|
| 187 |
if ($do_quotes) {
|
| 188 |
if ($t == "'") {
|
| 189 |
# Special case: single-character ' token
|
| 190 |
if (preg_match('/\S/', $prev_token_last_char)) {
|
| 191 |
$t = "’";
|
| 192 |
}
|
| 193 |
else {
|
| 194 |
$t = "‘";
|
| 195 |
}
|
| 196 |
}
|
| 197 |
else if ($t == '"') {
|
| 198 |
# Special case: single-character " token
|
| 199 |
if (preg_match('/\S/', $prev_token_last_char)) {
|
| 200 |
$t = "”";
|
| 201 |
}
|
| 202 |
else {
|
| 203 |
$t = "“";
|
| 204 |
}
|
| 205 |
}
|
| 206 |
else {
|
| 207 |
# Normal case:
|
| 208 |
$t = EducateQuotes($t);
|
| 209 |
}
|
| 210 |
}
|
| 211 |
|
| 212 |
if ($do_stupefy) $t = StupefyEntities($t);
|
| 213 |
}
|
| 214 |
$prev_token_last_char = $last_char;
|
| 215 |
$result .= $t;
|
| 216 |
}
|
| 217 |
}
|
| 218 |
|
| 219 |
return $result;
|
| 220 |
}
|
| 221 |
|
| 222 |
|
| 223 |
function SmartQuotes($text, $attr = NULL, $ctx = NULL) {
|
| 224 |
global $smartypants_attr, $sp_tags_to_skip;
|
| 225 |
# Paramaters:
|
| 226 |
$text; # text to be parsed
|
| 227 |
$attr; # value of the smart_quotes="" attribute
|
| 228 |
$ctx; # MT context object (unused)
|
| 229 |
if ($attr == NULL) $attr = $smartypants_attr;
|
| 230 |
|
| 231 |
$do_backticks; # should we educate ``backticks'' -style quotes?
|
| 232 |
|
| 233 |
if ($attr == 0) {
|
| 234 |
# do nothing;
|
| 235 |
return $text;
|
| 236 |
}
|
| 237 |
else if ($attr == 2) {
|
| 238 |
# smarten ``backticks'' -style quotes
|
| 239 |
$do_backticks = 1;
|
| 240 |
}
|
| 241 |
else {
|
| 242 |
$do_backticks = 0;
|
| 243 |
}
|
| 244 |
|
| 245 |
# Special case to handle quotes at the very end of $text when preceded by
|
| 246 |
# an HTML tag. Add a space to give the quote education algorithm a bit of
|
| 247 |
# context, so that it can guess correctly that it's a closing quote:
|
| 248 |
$add_extra_space = 0;
|
| 249 |
if (preg_match("/>['\"]\\z/", $text)) {
|
| 250 |
$add_extra_space = 1; # Remember, so we can trim the extra space later.
|
| 251 |
$text .= " ";
|
| 252 |
}
|
| 253 |
|
| 254 |
$tokens = _TokenizeHTML($text);
|
| 255 |
$result = '';
|
| 256 |
$in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
|
| 257 |
|
| 258 |
$prev_token_last_char = ""; # This is a cheat, used to get some context
|
| 259 |
# for one-character tokens that consist of
|
| 260 |
# just a quote char. What we do is remember
|
| 261 |
# the last character of the previous text
|
| 262 |
# token, to use as context to curl single-
|
| 263 |
# character quote tokens correctly.
|
| 264 |
|
| 265 |
foreach ($tokens as $cur_token) {
|
| 266 |
if ($cur_token[0] == "tag") {
|
| 267 |
# Don't mess with quotes inside tags
|
| 268 |
$result .= $cur_token[1];
|
| 269 |
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
|
| 270 |
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
| 271 |
}
|
| 272 |
} else {
|
| 273 |
$t = $cur_token[1];
|
| 274 |
$last_char = substr($t, -1); # Remember last char of this token before processing.
|
| 275 |
if (! $in_pre) {
|
| 276 |
$t = ProcessEscapes($t);
|
| 277 |
if ($do_backticks) {
|
| 278 |
$t = EducateBackticks($t);
|
| 279 |
}
|
| 280 |
|
| 281 |
if ($t == "'") {
|
| 282 |
# Special case: single-character ' token
|
| 283 |
if (preg_match('/\S/', $prev_token_last_char)) {
|
| 284 |
$t = "’";
|
| 285 |
}
|
| 286 |
else {
|
| 287 |
$t = "‘";
|
| 288 |
}
|
| 289 |
}
|
| 290 |
else if ($t == '"') {
|
| 291 |
# Special case: single-character " token
|
| 292 |
if (preg_match('/\S/', $prev_token_last_char)) {
|
| 293 |
$t = "”";
|
| 294 |
}
|
| 295 |
else {
|
| 296 |
$t = "“";
|
| 297 |
}
|
| 298 |
}
|
| 299 |
else {
|
| 300 |
# Normal case:
|
| 301 |
$t = EducateQuotes($t);
|
| 302 |
}
|
| 303 |
|
| 304 |
}
|
| 305 |
$prev_token_last_char = $last_char;
|
| 306 |
$result .= $t;
|
| 307 |
}
|
| 308 |
}
|
| 309 |
|
| 310 |
if ($add_extra_space) {
|
| 311 |
preg_replace('/ \z/', '', $result); # Trim trailing space if we added one earlier.
|
| 312 |
}
|
| 313 |
return $result;
|
| 314 |
}
|
| 315 |
|
| 316 |
|
| 317 |
function SmartDashes($text, $attr = NULL, $ctx = NULL) {
|
| 318 |
global $smartypants_attr, $sp_tags_to_skip;
|
| 319 |
# Paramaters:
|
| 320 |
$text; # text to be parsed
|
| 321 |
$attr; # value of the smart_dashes="" attribute
|
| 322 |
$ctx; # MT context object (unused)
|
| 323 |
if ($attr == NULL) $attr = $smartypants_attr;
|
| 324 |
|
| 325 |
# reference to the subroutine to use for dash education, default to EducateDashes:
|
| 326 |
$dash_sub_ref = 'EducateDashes';
|
| 327 |
|
| 328 |
if ($attr == 0) {
|
| 329 |
# do nothing;
|
| 330 |
return $text;
|
| 331 |
}
|
| 332 |
else if ($attr == 2) {
|
| 333 |
# use old smart dash shortcuts, "--" for en, "---" for em
|
| 334 |
$dash_sub_ref = 'EducateDashesOldSchool';
|
| 335 |
}
|
| 336 |
else if ($attr == 3) {
|
| 337 |
# inverse of 2, "--" for em, "---" for en
|
| 338 |
$dash_sub_ref = 'EducateDashesOldSchoolInverted';
|
| 339 |
}
|
| 340 |
|
| 341 |
$tokens;
|
| 342 |
$tokens = _TokenizeHTML($text);
|
| 343 |
|
| 344 |
$result = '';
|
| 345 |
$in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
|
| 346 |
foreach ($tokens as $cur_token) {
|
| 347 |
if ($cur_token[0] == "tag") {
|
| 348 |
# Don't mess with quotes inside tags
|
| 349 |
$result .= $cur_token[1];
|
| 350 |
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
|
| 351 |
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
| 352 |
}
|
| 353 |
} else {
|
| 354 |
$t = $cur_token[1];
|
| 355 |
if (! $in_pre) {
|
| 356 |
$t = ProcessEscapes($t);
|
| 357 |
$t = $dash_sub_ref($t);
|
| 358 |
}
|
| 359 |
$result .= $t;
|
| 360 |
}
|
| 361 |
}
|
| 362 |
return $result;
|
| 363 |
}
|
| 364 |
|
| 365 |
|
| 366 |
function SmartEllipses($text, $attr = NULL, $ctx = NULL) {
|
| 367 |
# Paramaters:
|
| 368 |
$text; # text to be parsed
|
| 369 |
$attr; # value of the smart_ellipses="" attribute
|
| 370 |
$ctx; # MT context object (unused)
|
| 371 |
if ($attr == NULL) $attr = $smartypants_attr;
|
| 372 |
|
| 373 |
if ($attr == 0) {
|
| 374 |
# do nothing;
|
| 375 |
return $text;
|
| 376 |
}
|
| 377 |
|
| 378 |
$tokens;
|
| 379 |
$tokens = _TokenizeHTML($text);
|
| 380 |
|
| 381 |
$result = '';
|
| 382 |
$in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
|
| 383 |
foreach ($tokens as $cur_token) {
|
| 384 |
if ($cur_token[0] == "tag") {
|
| 385 |
# Don't mess with quotes inside tags
|
| 386 |
$result .= $cur_token[1];
|
| 387 |
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
|
| 388 |
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
| 389 |
}
|
| 390 |
} else {
|
| 391 |
$t = $cur_token[1];
|
| 392 |
if (! $in_pre) {
|
| 393 |
$t = ProcessEscapes($t);
|
| 394 |
$t = EducateEllipses($t);
|
| 395 |
}
|
| 396 |
$result .= $t;
|
| 397 |
}
|
| 398 |
}
|
| 399 |
return $result;
|
| 400 |
}
|
| 401 |
|
| 402 |
|
| 403 |
function EducateQuotes($_) {
|
| 404 |
#
|
| 405 |
# Parameter: String.
|
| 406 |
#
|
| 407 |
# Returns: The string, with "educated" curly quote HTML entities.
|
| 408 |
#
|
| 409 |
# Example input: "Isn't this fun?"
|
| 410 |
# Example output: “Isn’t this fun?”
|
| 411 |
#
|
| 412 |
# Make our own "punctuation" character class, because the POSIX-style
|
| 413 |
# [:PUNCT:] is only available in Perl 5.6 or later:
|
| 414 |
$punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
|
| 415 |
|
| 416 |
# Special case if the very first character is a quote
|
| 417 |
# followed by punctuation at a non-word-break. Close the quotes by brute force:
|
| 418 |
$_ = preg_replace(
|
| 419 |
array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
|
| 420 |
array('’', '”'), $_);
|
| 421 |
|
| 422 |
|
| 423 |
# Special case for double sets of quotes, e.g.:
|
| 424 |
# <p>He said, "'Quoted' words in a larger quote."</p>
|
| 425 |
$_ = preg_replace(
|
| 426 |
array("/\"'(?=\w)/", "/'\"(?=\w)/"),
|
| 427 |
array('“‘', '‘“'), $_);
|
| 428 |
|
| 429 |
# Special case for decade abbreviations (the '80s):
|
| 430 |
$_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
|
| 431 |
|
| 432 |
$close_class = '[^\ \t\r\n\[\{\(\-]';
|
| 433 |
$dec_dashes = '&\#8211;|&\#8212;';
|
| 434 |
|
| 435 |
# Get most opening single quotes:
|
| 436 |
$_ = preg_replace("{
|
| 437 |
(
|
| 438 |
\\s | # a whitespace char, or
|
| 439 |
| # a non-breaking space entity, or
|
| 440 |
-- | # dashes, or
|
| 441 |
&[mn]dash; | # named dash entities
|
| 442 |
$dec_dashes | # or decimal entities
|
| 443 |
&\\#x201[34]; # or hex
|
| 444 |
)
|
| 445 |
' # the quote
|
| 446 |
(?=\\w) # followed by a word character
|
| 447 |
}x", '\1‘', $_);
|
| 448 |
# Single closing quotes:
|
| 449 |
$_ = preg_replace("{
|
| 450 |
($close_class)?
|
| 451 |
'
|
| 452 |
(?(1)| # If $1 captured, then do nothing;
|
| 453 |
(?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
|
| 454 |
) # char or an 's' at a word ending position. This
|
| 455 |
# is a special case to handle something like:
|
| 456 |
# \"<i>Custer</i>'s Last Stand.\"
|
| 457 |
}xi", '\1’', $_);
|
| 458 |
|
| 459 |
# Any remaining single quotes should be opening ones:
|
| 460 |
$_ = str_replace("'", '‘', $_);
|
| 461 |
|
| 462 |
|
| 463 |
# Get most opening double quotes:
|
| 464 |
$_ = preg_replace("{
|
| 465 |
(
|
| 466 |
\\s | # a whitespace char, or
|
| 467 |
| # a non-breaking space entity, or
|
| 468 |
-- | # dashes, or
|
| 469 |
&[mn]dash; | # named dash entities
|
| 470 |
$dec_dashes | # or decimal entities
|
| 471 |
&\\#x201[34]; # or hex
|
| 472 |
)
|
| 473 |
\" # the quote
|
| 474 |
(?=\\w) # followed by a word character
|
| 475 |
}x", '\1“', $_);
|
| 476 |
|
| 477 |
# Double closing quotes:
|
| 478 |
$_ = preg_replace("{
|
| 479 |
($close_class)?
|
| 480 |
\"
|
| 481 |
(?(1)|(?=\\s)) # If $1 captured, then do nothing;
|
| 482 |
# if not, then make sure the next char is whitespace.
|
| 483 |
}x", '\1”', $_);
|
| 484 |
|
| 485 |
# Any remaining quotes should be opening ones.
|
| 486 |
$_ = str_replace('"', '“', $_);
|
| 487 |
|
| 488 |
return $_;
|
| 489 |
}
|
| 490 |
|
| 491 |
|
| 492 |
function EducateBackticks($_) {
|
| 493 |
#
|
| 494 |
# Parameter: String.
|
| 495 |
# Returns: The string, with ``backticks'' -style double quotes
|
| 496 |
# translated into HTML curly quote entities.
|
| 497 |
#
|
| 498 |
# Example input: ``Isn't this fun?''
|
| 499 |
# Example output: “Isn't this fun?”
|
| 500 |
#
|
| 501 |
|
| 502 |
$_ = str_replace(array("``", "''",),
|
| 503 |
array('“', '”'), $_);
|
| 504 |
return $_;
|
| 505 |
}
|
| 506 |
|
| 507 |
|
| 508 |
function EducateSingleBackticks($_) {
|
| 509 |
#
|
| 510 |
# Parameter: String.
|
| 511 |
# Returns: The string, with `backticks' -style single quotes
|
| 512 |
# translated into HTML curly quote entities.
|
| 513 |
#
|
| 514 |
# Example input: `Isn't this fun?'
|
| 515 |
# Example output: ‘Isn’t this fun?’
|
| 516 |
#
|
| 517 |
|
| 518 |
$_ = str_replace(array("`", "'",),
|
| 519 |
array('‘', '’'), $_);
|
| 520 |
return $_;
|
| 521 |
}
|
| 522 |
|
| 523 |
|
| 524 |
function EducateDashes($_) {
|
| 525 |
#
|
| 526 |
# Parameter: String.
|
| 527 |
#
|
| 528 |
# Returns: The string, with each instance of "--" translated to
|
| 529 |
# an em-dash HTML entity.
|
| 530 |
#
|
| 531 |
|
| 532 |
$_ = str_replace('--', '—', $_);
|
| 533 |
return $_;
|
| 534 |
}
|
| 535 |
|
| 536 |
|
| 537 |
function EducateDashesOldSchool($_) {
|
| 538 |
#
|
| 539 |
# Parameter: String.
|
| 540 |
#
|
| 541 |
# Returns: The string, with each instance of "--" translated to
|
| 542 |
# an en-dash HTML entity, and each "---" translated to
|
| 543 |
# an em-dash HTML entity.
|
| 544 |
#
|
| 545 |
|
| 546 |
# em en
|
| 547 |
$_ = str_replace(array("---", "--",),
|
| 548 |
array('—', '–'), $_);
|
| 549 |
return $_;
|
| 550 |
}
|
| 551 |
|
| 552 |
|
| 553 |
function EducateDashesOldSchoolInverted($_) {
|
| 554 |
#
|
| 555 |
# Parameter: String.
|
| 556 |
#
|
| 557 |
# Returns: The string, with each instance of "--" translated to
|
| 558 |
# an em-dash HTML entity, and each "---" translated to
|
| 559 |
# an en-dash HTML entity. Two reasons why: First, unlike the
|
| 560 |
# en- and em-dash syntax supported by
|
| 561 |
# EducateDashesOldSchool(), it's compatible with existing
|
| 562 |
# entries written before SmartyPants 1.1, back when "--" was
|
| 563 |
# only used for em-dashes. Second, em-dashes are more
|
| 564 |
# common than en-dashes, and so it sort of makes sense that
|
| 565 |
# the shortcut should be shorter to type. (Thanks to Aaron
|
| 566 |
# Swartz for the idea.)
|
| 567 |
#
|
| 568 |
|
| 569 |
# en em
|
| 570 |
$_ = str_replace(array("---", "--",),
|
| 571 |
array('–', '—'), $_);
|
| 572 |
return $_;
|
| 573 |
}
|
| 574 |
|
| 575 |
|
| 576 |
function EducateEllipses($_) {
|
| 577 |
#
|
| 578 |
# Parameter: String.
|
| 579 |
# Returns: The string, with each instance of "..." translated to
|
| 580 |
# an ellipsis HTML entity. Also converts the case where
|
| 581 |
# there are spaces between the dots.
|
| 582 |
#
|
| 583 |
# Example input: Huh...?
|
| 584 |
# Example output: Huh…?
|
| 585 |
#
|
| 586 |
|
| 587 |
$_ = str_replace(array("...", ". . .",), '…', $_);
|
| 588 |
return $_;
|
| 589 |
}
|
| 590 |
|
| 591 |
|
| 592 |
function StupefyEntities($_) {
|
| 593 |
#
|
| 594 |
# Parameter: String.
|
| 595 |
# Returns: The string, with each SmartyPants HTML entity translated to
|
| 596 |
# its ASCII counterpart.
|
| 597 |
#
|
| 598 |
# Example input: “Hello — world.”
|
| 599 |
# Example output: "Hello -- world."
|
| 600 |
#
|
| 601 |
|
| 602 |
# en-dash em-dash
|
| 603 |
$_ = str_replace(array('–', '—'),
|
| 604 |
array('-', '--'), $_);
|
| 605 |
|
| 606 |
# single quote open close
|
| 607 |
$_ = str_replace(array('‘', '’'), "'", $_);
|
| 608 |
|
| 609 |
# double quote open close
|
| 610 |
$_ = str_replace(array('“', '”'), '"', $_);
|
| 611 |
|
| 612 |
$_ = str_replace('…', '...', $_); # ellipsis
|
| 613 |
|
| 614 |
return $_;
|
| 615 |
}
|
| 616 |
|
| 617 |
|
| 618 |
function ProcessEscapes($_) {
|
| 619 |
#
|
| 620 |
# Parameter: String.
|
| 621 |
# Returns: The string, with after processing the following backslash
|
| 622 |
# escape sequences. This is useful if you want to force a "dumb"
|
| 623 |
# quote or other character to appear.
|
| 624 |
#
|
| 625 |
# Escape Value
|
| 626 |
# ------ -----
|
| 627 |
# \\ \
|
| 628 |
# \" "
|
| 629 |
# \' '
|
| 630 |
# \. .
|
| 631 |
# \- -
|
| 632 |
# \` `
|
| 633 |
#
|
| 634 |
$_ = str_replace(
|
| 635 |
array('\\\\', '\"', "\'", '\.', '\-', '\`'),
|
| 636 |
array('\', '"', ''', '.', '-', '`'), $_);
|
| 637 |
|
| 638 |
return $_;
|
| 639 |
}
|
| 640 |
|
| 641 |
|
| 642 |
# _TokenizeHTML is shared between PHP SmartyPants and PHP Markdown.
|
| 643 |
# We only define it if it is not already defined.
|
| 644 |
if (!function_exists('_TokenizeHTML')) :
|
| 645 |
function _TokenizeHTML($str) {
|
| 646 |
#
|
| 647 |
# Parameter: String containing HTML markup.
|
| 648 |
# Returns: An array of the tokens comprising the input
|
| 649 |
# string. Each token is either a tag (possibly with nested,
|
| 650 |
# tags contained therein, such as <a href="<MTFoo>">, or a
|
| 651 |
# run of text between tags. Each element of the array is a
|
| 652 |
# two-element array; the first is either 'tag' or 'text';
|
| 653 |
# the second is the actual value.
|
| 654 |
#
|
| 655 |
#
|
| 656 |
# Regular expression derived from the _tokenize() subroutine in
|
| 657 |
# Brad Choate's MTRegex plugin.
|
| 658 |
# <http://www.bradchoate.com/past/mtregex.php>
|
| 659 |
#
|
| 660 |
$index = 0;
|
| 661 |
$tokens = array();
|
| 662 |
|
| 663 |
$match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
|
| 664 |
'(?s:<\?.*?\?>)|'. # processing instruction
|
| 665 |
# regular tags
|
| 666 |
'(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
|
| 667 |
|
| 668 |
$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
|
| 669 |
|
| 670 |
foreach ($parts as $part) {
|
| 671 |
if (++$index % 2 && $part != '')
|
| 672 |
$tokens[] = array('text', $part);
|
| 673 |
else
|
| 674 |
$tokens[] = array('tag', $part);
|
| 675 |
}
|
| 676 |
return $tokens;
|
| 677 |
}
|
| 678 |
endif;
|
| 679 |
|
| 680 |
|
| 681 |
/*
|
| 682 |
|
| 683 |
PHP SmartyPants
|
| 684 |
===============
|
| 685 |
|
| 686 |
Description
|
| 687 |
-----------
|
| 688 |
|
| 689 |
This is a PHP translation of the original SmartyPants quote educator written in
|
| 690 |
Perl by John Gruber.
|
| 691 |
|
| 692 |
SmartyPants is a web publishing utility that translates plain ASCII
|
| 693 |
punctuation characters into "smart" typographic punctuation HTML
|
| 694 |
entities. SmartyPants can perform the following transformations:
|
| 695 |
|
| 696 |
* Straight quotes (`"` and `'`) into "curly" quote HTML entities
|
| 697 |
* Backticks-style quotes (` ``like this'' `) into "curly" quote HTML
|
| 698 |
entities
|
| 699 |
* Dashes (`--` and `---`) into en- and em-dash entities
|
| 700 |
* Three consecutive dots (`...`) into an ellipsis entity
|
| 701 |
|
| 702 |
SmartyPants does not modify characters within `<pre>`, `<code>`, `<kbd>`,
|
| 703 |
`<script>`, or `<math>` tag blocks. Typically, these tags are used to
|
| 704 |
display text where smart quotes and other "smart punctuation" would not
|
| 705 |
be appropriate, such as source code or example markup.
|
| 706 |
|
| 707 |
|
| 708 |
### Backslash Escapes ###
|
| 709 |
|
| 710 |
If you need to use literal straight quotes (or plain hyphens and
|
| 711 |
periods), SmartyPants accepts the following backslash escape sequences
|
| 712 |
to force non-smart punctuation. It does so by transforming the escape
|
| 713 |
sequence into a decimal-encoded HTML entity:
|
| 714 |
|
| 715 |
Escape Value Character
|
| 716 |
------ ----- ---------
|
| 717 |
\\ \ \
|
| 718 |
\" " "
|
| 719 |
\' ' '
|
| 720 |
\. . .
|
| 721 |
\- - -
|
| 722 |
\` ` `
|
| 723 |
|
| 724 |
This is useful, for example, when you want to use straight quotes as
|
| 725 |
foot and inch marks: 6'2" tall; a 17" iMac.
|
| 726 |
|
| 727 |
|
| 728 |
Bugs
|
| 729 |
----
|
| 730 |
|
| 731 |
To file bug reports or feature requests (other than topics listed in the
|
| 732 |
Caveats section above) please send email to:
|
| 733 |
|
| 734 |
<michel.fortin@michelf.com>
|
| 735 |
|
| 736 |
If the bug involves quotes being curled the wrong way, please send example
|
| 737 |
text to illustrate.
|
| 738 |
|
| 739 |
|
| 740 |
### Algorithmic Shortcomings ###
|
| 741 |
|
| 742 |
One situation in which quotes will get curled the wrong way is when
|
| 743 |
apostrophes are used at the start of leading contractions. For example:
|
| 744 |
|
| 745 |
'Twas the night before Christmas.
|
| 746 |
|
| 747 |
In the case above, SmartyPants will turn the apostrophe into an opening
|
| 748 |
single-quote, when in fact it should be a closing one. I don't think
|
| 749 |
this problem can be solved in the general case -- every word processor
|
| 750 |
I've tried gets this wrong as well. In such cases, it's best to use the
|
| 751 |
proper HTML entity for closing single-quotes (`’`) by hand.
|
| 752 |
|
| 753 |
|
| 754 |
Version History
|
| 755 |
---------------
|
| 756 |
|
| 757 |
1.5.1e (9 Dec 2005)
|
| 758 |
|
| 759 |
* Corrected a bug that prevented special characters from being
|
| 760 |
escaped.
|
| 761 |
|
| 762 |
|
| 763 |
1.5.1d (25 May 2005)
|
| 764 |
|
| 765 |
* Corrected a small bug in `_TokenizeHTML` where a Doctype declaration
|
| 766 |
was not seen as HTML (smart quotes where applied inside).
|
| 767 |
|
| 768 |
|
| 769 |
1.5.1c (13 Dec 2004)
|
| 770 |
|
| 771 |
* Changed a regular expression in `_TokenizeHTML` that could lead to
|
| 772 |
a segmentation fault with PHP 4.3.8 on Linux.
|
| 773 |
|
| 774 |
|
| 775 |
1.5.1b (6 Sep 2004)
|
| 776 |
|
| 777 |
* Corrected a problem with quotes immediately following a dash
|
| 778 |
with no space between: `Text--"quoted text"--text.`
|
| 779 |
|
| 780 |
* PHP SmartyPants can now be used as a modifier by the Smarty
|
| 781 |
template engine. Rename the file to "modifier.smartypants.php"
|
| 782 |
and put it in your smarty plugins folder.
|
| 783 |
|
| 784 |
* Replaced a lot of space characters by tabs, saving about 4 KB.
|
| 785 |
|
| 786 |
|
| 787 |
1.5.1a (30 Jun 2004)
|
| 788 |
|
| 789 |
* PHP Markdown and PHP Smartypants now share the same `_TokenizeHTML`
|
| 790 |
function when loaded simultanously.
|
| 791 |
|
| 792 |
* Changed the internals of `_TokenizeHTML` to lower the PHP version
|
| 793 |
requirement to PHP 4.0.5.
|
| 794 |
|
| 795 |
|
| 796 |
1.5.1 (6 Jun 2004)
|
| 797 |
|
| 798 |
* Initial release of PHP SmartyPants, based on version 1.5.1 of the
|
| 799 |
original SmartyPants written in Perl.
|
| 800 |
|
| 801 |
|
| 802 |
Author
|
| 803 |
------
|
| 804 |
|
| 805 |
John Gruber
|
| 806 |
<http://daringfireball.net/>
|
| 807 |
|
| 808 |
Ported to PHP by Michel Fortin
|
| 809 |
<http://www.michelf.com/>
|
| 810 |
|
| 811 |
|
| 812 |
Additional Credits
|
| 813 |
------------------
|
| 814 |
|
| 815 |
Portions of this plug-in are based on Brad Choate's nifty MTRegex plug-in.
|
| 816 |
Brad Choate also contributed a few bits of source code to this plug-in.
|
| 817 |
Brad Choate is a fine hacker indeed. (<http://bradchoate.com/>)
|
| 818 |
|
| 819 |
Jeremy Hedley (<http://antipixel.com/>) and Charles Wiltgen
|
| 820 |
(<http://playbacktime.com/>) deserve mention for exemplary beta testing.
|
| 821 |
|
| 822 |
|
| 823 |
Copyright and License
|
| 824 |
---------------------
|
| 825 |
|
| 826 |
Copyright (c) 2003 John Gruber
|
| 827 |
<http://daringfireball.net/>
|
| 828 |
All rights reserved.
|
| 829 |
|
| 830 |
Copyright (c) 2004-2005 Michel Fortin
|
| 831 |
<http://www.michelf.com>
|
| 832 |
|
| 833 |
Redistribution and use in source and binary forms, with or without
|
| 834 |
modification, are permitted provided that the following conditions are met:
|
| 835 |
|
| 836 |
* Redistributions of source code must retain the above copyright
|
| 837 |
notice, this list of conditions and the following disclaimer.
|
| 838 |
|
| 839 |
* Redistributions in binary form must reproduce the above copyright
|
| 840 |
notice, this list of conditions and the following disclaimer in the
|
| 841 |
documentation and/or other materials provided with the distribution.
|
| 842 |
|
| 843 |
* Neither the name "SmartyPants" nor the names of its contributors may
|
| 844 |
be used to endorse or promote products derived from this software
|
| 845 |
without specific prior written permission.
|
| 846 |
|
| 847 |
This software is provided by the copyright holders and contributors "as is"
|
| 848 |
and any express or implied warranties, including, but not limited to, the
|
| 849 |
implied warranties of merchantability and fitness for a particular purpose
|
| 850 |
are disclaimed. In no event shall the copyright owner or contributors be
|
| 851 |
liable for any direct, indirect, incidental, special, exemplary, or
|
| 852 |
consequential damages (including, but not limited to, procurement of
|
| 853 |
substitute goods or services; loss of use, data, or profits; or business
|
| 854 |
interruption) however caused and on any theory of liability, whether in
|
| 855 |
contract, strict liability, or tort (including negligence or otherwise)
|
| 856 |
arising in any way out of the use of this software, even if advised of the
|
| 857 |
possibility of such damage.
|
| 858 |
|
| 859 |
*/
|
| 860 |
?>
|