/[drupal]/contributions/modules/typogrify/unicode-conversion.php
ViewVC logotype

Contents of /contributions/modules/typogrify/unicode-conversion.php

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.6 - (show annotations) (download) (as text)
Tue Oct 20 09:31:35 2009 UTC (5 weeks, 1 day ago) by mikl
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +3 -1 lines
File MIME type: text/x-php
#513708 by pukku: Add ij ligatures
1 <?php
2 // $Id: unicode-conversion.php,v 1.5 2009/10/20 09:19:03 mikl Exp $
3
4 /**
5 * Return the unicode conversion maps.
6 *
7 * @param string $type
8 * The map type we're looking for, one of 'ligature', 'punctuation',
9 * 'arrow' 'nested' or 'all'.
10 * @return array
11 * Array of conversions, keyed by the original string.
12 */
13 function unicode_conversion_map($type = 'all') {
14 $map = array(
15 // See http://www.unicode.org/charts/PDF/UFB00.pdf
16 'ligature' => array(
17 'ffi' => '&#xfb03;',
18 'ffl' => '&#xfb04;',
19 'ff' => '&#xfb00;',
20 'fi' => '&#xfb01;',
21 'fl' => '&#xfb02;',
22 'ij' => '&#x0133;',
23 'IJ' => '&#x0132;',
24 'st' => '&#xfb06;',
25 'ss' => '&szlig;',
26 ),
27 // See http:#www.unicode.org/charts/PDF/U2000.pdf
28 'punctuation' => array(
29 '...' => '&#x2026;',
30 '..' => '&#x2025;',
31 '. . .' => '&#x2026;',
32 '---' => '&mdash;',
33 '--' => '&ndash;',
34 ),
35 // See http:#www.unicode.org/charts/PDF/U2190.pdf
36 'arrow' => array(
37 '->>' => '&#x21a0;',
38 '<<-' => '&#x219e;',
39 '->|' => '&#x21e5;',
40 '|<-' => '&#x21e4;',
41 '<->' => '&#x2194;',
42 '->' => '&#x2192;',
43 '<-' => '&#x2190;',
44 '<=>' => '&#x21d4;',
45 '=>' => '&#x21d2;',
46 '<=' => '&#x21d0;',
47 ),
48 );
49
50 if ($type == 'all') {
51 return array_merge($map['ligature'], $map['arrow'], $map['punctuation']);
52 }
53 elseif ($type == 'nested') {
54 return $map;
55 }
56 else {
57 return $map[$type];
58 }
59 }
60
61 /**
62 * Perform character conversion.
63 *
64 * @param string $test
65 * Text to be parsed.
66 * @param array $characters_to_convert
67 * Array of ASCII characters to convert.
68 * @return string
69 * The result of the conversion.
70 */
71 function convert_characters($text, $characters_to_convert) {
72 if (($characters_to_convert == NULL) || (count($characters_to_convert) < 1)) {
73 // do nothing
74 return $text;
75 }
76
77 // get ascii to unicode mappings
78 $unicode_map = unicode_conversion_map();
79
80 foreach ($characters_to_convert as $ascii_string) {
81 $unicode_strings[] = $unicode_map[$ascii_string];
82 }
83
84 $tokens = _TokenizeHTML($text);
85 $result = '';
86 $in_pre = 0; // Keep track of when we're inside <pre> or <code> tags
87 foreach ($tokens as $cur_token) {
88 if ($cur_token[0] == "tag") {
89 // Don't mess with text inside tags, <pre> blocks, or <code> blocks
90 $result .= $cur_token[1];
91 // Get the tags to skip regex from SmartyPants
92 if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
93 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
94 }
95 } else {
96 $t = $cur_token[1];
97 if ($in_pre == 0) {
98 $t = ProcessEscapes($t);
99 $t = str_replace($characters_to_convert, $unicode_strings, $t);
100 }
101 $result .= $t;
102 }
103 }
104 return $result;
105 }
106
107
108 // _TokenizeHTML is shared between PHP SmartyPants and PHP Markdown.
109 // We're borrowing it for Typogrify.module, too
110 // We only define it if it is not already defined.
111 if (!function_exists('_TokenizeHTML')) {
112 function _TokenizeHTML($str) {
113 //
114 // Parameter: String containing HTML markup.
115 // Returns: An array of the tokens comprising the input
116 // string. Each token is either a tag (possibly with nested,
117 // tags contained therein, such as <a href="<MTFoo>">, or a
118 // run of text between tags. Each element of the array is a
119 // two-element array; the first is either 'tag' or 'text';
120 // the second is the actual value.
121 //
122 //
123 // Regular expression derived from the _tokenize() subroutine in
124 // Brad Choate's MTRegex plugin.
125 // <http://www.bradchoate.com/past/mtregex.php>
126 //
127 $index = 0;
128 $tokens = array();
129
130 $match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
131 '(?s:<\?.*?\?>)|'. # processing instruction
132 # regular tags
133 '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
134
135 $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
136
137 foreach ($parts as $part) {
138 if (++$index % 2 && $part != '')
139 $tokens[] = array('text', $part);
140 else
141 $tokens[] = array('tag', $part);
142 }
143 return $tokens;
144 }
145 }
146

  ViewVC Help
Powered by ViewVC 1.1.2