/[drupal]/contributions/sandbox/megagrunt/modules/enewsletter/html2txt.module
ViewVC logotype

Contents of /contributions/sandbox/megagrunt/modules/enewsletter/html2txt.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.3 - (show annotations) (download) (as text)
Wed May 11 15:13:11 2005 UTC (4 years, 6 months ago) by MegaGrunt
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +0 -0 lines
File MIME type: text/x-php
Re-factored email send out functions:
1 <?php
2 // $Id:$
3
4 // Author: Robert Castelo
5 // Support: development@cortextcommunications.com
6 // Info: http://www.cortextcommunications.com/development/newsletter
7 // This module based on class.html2text.inc
8 // by Jon Abernathy <jon@chuggnutt.com>
9
10 /**
11 * Converts HTML to plain text equivalent.
12 */
13
14 /**
15 * Implementation of hook_help().
16 */
17 function html2txt_help($section) {
18 switch ($section) {
19 case 'admin/modules#description':
20 return t('Converts HTML to plain text.');
21 }
22 }
23
24
25 /**
26 * Workhorse function that does actual conversion.
27 *
28 * First performs custom tag replacement specified by $search and
29 * $replace arrays. Then strips any remaining HTML tags, reduces whitespace
30 * and newlines to a readable format, and word wraps the text to
31 * $width characters.
32 *
33 */
34 function html2txt_convert($html, $width = NULL, $list_links = NULL, $allowed_tags = '', $empty_lines_max = NULL) {
35
36 global $saved_snippets;
37 $snippet_count = 1;
38 $saved_snippets = array();
39 $converted->text = $html;
40
41 $converted->text = trim(stripslashes($converted->text));
42
43 // Get HTML to search for
44 // Get replacements for HTML
45 $search = _html2txt_define_search();
46 $replace = _html2txt_define_replace();
47
48 // Strip out links or list them?
49 if ($list_links) {
50 global $link_list;
51 $link_list = '';
52 $link_count = 1;
53 $search[] = '/<a href="([^"]+)"[^>]*>(.+?)<\/a>/ie';
54 $replace[] = '_html2txt_build_link_list($link_count++, "\\1", "\\2")';
55 }
56
57 // Run our defined search-and-replace
58 $converted->text = preg_replace($search, $replace, $converted->text);
59
60 // Strip any other HTML tags
61 $converted->text = strip_tags($converted->text, $allowed_tags);
62
63 // Bring down number of empty lines to 2 max
64 // TO DO
65 // Currently this is just TRUE or FALSE
66 // Add maximum line configuration
67 if ($empty_lines_max) {
68 $converted->text= preg_replace("/\n\s+\n/", "\n\n", $converted->text);
69 //$converted->text = preg_replace("/[\n]{3,}/", "\n\n", $converted->text);
70 }
71
72 // Add link list
73 if ( !empty($link_list) ) {
74 $converted->links = "\n\nLinks:\n------\n" . $link_list;
75 }
76
77 // Convert HTML entities
78 $translation_table = get_html_translation_table(HTML_ENTITIES);
79 $translation_table = array_flip($translation_table);
80 $converted->text = strtr($converted->text, $translation_table);
81
82 // Load <code>...</code> snippets back in to text
83 if ( !empty($saved_snippets) ) {
84 foreach ($saved_snippets as $count => $snippet) {
85 $converted->text = str_replace('[html2txt-code-snippet-' . $count . ']', "\n" . stripslashes($snippet), $converted->text);
86 }
87 }
88
89 // Wrap the text to a readable format
90 if ($width) {
91 $converted->text = wordwrap($converted->text, $width);
92 }
93
94 return $converted;
95 }
96
97
98
99 /**
100 * List of preg* regular expression patterns to search for,
101 * used in conjunction with $replace.
102 *
103 */
104 function _html2txt_define_search() {
105
106 $search = array(
107 '@<code>(.+?)</code>@se', // Take out <code>...</code> snippets for later
108 "/\r/", // Non-legal carriage return
109 "/[\n\t]+/", // Newlines and tabs
110 '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
111 //'/<!-- .* -->/', // Comments -- which strip_tags might have problem a with
112 '/<h[123][^>]*>(.+?)<\/h[123]>/ie', // H1 - H3
113 '/<h[456][^>]*>(.+?)<\/h[456]>/ie', // H4 - H6
114 '/<p[^>]*>/i', // <P>
115 '/<br[^>]*>/i', // <br>
116 '/<b[^>]*>(.+?)<\/b>/ie', // <b>
117 '/<i[^>]*>(.+?)<\/i>/i', // <i>
118 '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul>
119 '/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol>
120 '/<li[^>]*>/i', // <li>
121 '/<hr[^>]*>/i', // <hr>
122 '/(<table[^>]*>|<\/table>)/i', // <table> and </table>
123 '/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr>
124 '/<td[^>]*>(.+?)<\/td>/i', // <td> and </td>
125 '/<th[^>]*>(.+?)<\/th>/i', // <th> and </th>
126 '/&nbsp;/i',
127 '/&quot;/i',
128 '/&gt;/i',
129 '/&lt;/i',
130 '/&amp;/i',
131 '/&copy;/i',
132 '/&trade;/i',
133 '/&#8220;/',
134 '/&#8221;/',
135 '/&#8211;/',
136 '/&#8217;/',
137 '/&#38;/',
138 '/&#169;/',
139 '/&#8482;/',
140 '/&#151;/',
141 '/&#147;/',
142 '/&#148;/',
143 '/&#149;/',
144 '/&reg;/i',
145 '/&bull;/i',
146 '/&[&;]+;/i',
147 '/&#039;/'
148 );
149
150 return $search;
151 }
152
153 /**
154 * List of pattern replacements corresponding to patterns searched.
155 *
156 */
157 function _html2txt_define_replace() {
158
159 $replace = array(
160 '_html2txt_get_code_snippets($snippet_count++, "\\1", "\\2")', // Take out <code>...</code> snippets for later
161 '', // Non-legal carriage return
162 ' ', // Newlines and tabs
163 '', // <script>s -- which strip_tags supposedly has problems with
164 //'', // Comments -- which strip_tags might have problem a with
165 "strtoupper(\"\n\n\\1\n\")", // H1 - H3
166 "ucwords(\"\n\n\\1\n\n\")", // H4 - H6
167 "\n\n", // <P>
168 "\n", // <br>
169 'strtoupper("\\1")', // <b>
170 '_\\1_', // <i>
171 "\n\n", // <ul> and </ul>
172 "\n\n", // <ol> and </ol>
173 "\n\t*", // <li>
174 "\n-------------------------\n", // <hr>
175 "\n\n", // <table> and </table>
176 "\n", // <tr> and </tr>
177 "\t\t\\1\n", // <td> and </td>
178 "strtoupper(\"\t\t\\1\n\")", // <th> and </th>
179 ' ',
180 '"',
181 '>',
182 '<',
183 '&',
184 '(c)',
185 '(tm)',
186 '"',
187 '"',
188 '-',
189 "'",
190 '&',
191 '(c)',
192 '(tm)',
193 '--',
194 '"',
195 '"',
196 '*',
197 '(R)',
198 '*',
199 '',
200 "'"
201 );
202 return $replace;
203 }
204
205
206 /**
207 *
208 * Maintains an internal list of links to be displayed seperate to the
209 * text, with numeric indices to the original point in the text they
210 * appeared. Also makes an effort at identifying and handling absolute
211 * and relative links.
212 *
213 */
214 function _html2txt_build_link_list($link_count, $link, $display) {
215
216 global $base_url;
217 global $link_list;
218
219 if ( substr($link, 0, 7) == 'http://' || substr($link, 0, 7) == 'mailto:' ) {
220 $link_list .= "[$link_count] $link\n";
221 } else {
222 $link_list .= "[$link_count] " . $base_url;
223 if ( substr($link, 0, 1) != '/' ) {
224 $link_list .= '/';
225 }
226 $link_list .= "$link\n";
227 }
228
229 return $display . ' [' . $link_count . ']';
230 }
231
232 /**
233 *
234 * Temporeraly replaces <code>...</code> with a placeholder
235 *
236 */
237 function _html2txt_get_code_snippets($snippet_count, $snippet, $display) {
238
239 global $saved_snippets;
240
241 $saved_snippets[$snippet_count] = $snippet;
242
243 return $display . '[html2txt-code-snippet-' . $snippet_count . ']';
244 }
245
246
247 ?>

  ViewVC Help
Powered by ViewVC 1.1.2