/[drupal]/contributions/sandbox/megagrunt/modules/enewsletter/class.html2text.inc
ViewVC logotype

Contents of /contributions/sandbox/megagrunt/modules/enewsletter/class.html2text.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (show annotations) (download) (as text)
Tue Aug 3 09:37:26 2004 UTC (5 years, 3 months ago) by MegaGrunt
Branch: MAIN
CVS Tags: HEAD
File MIME type: text/x-php
nodes can only be sent once in email, improved scheduling, HTML emails now work
1 <?php
2
3 /*************************************************************************
4 * *
5 * class.html2text.inc *
6 * *
7 *************************************************************************
8 * *
9 * Converts HTML to formatted plain text *
10 * *
11 * Copyright (c) 2003 Jon Abernathy <jon@chuggnutt.com> *
12 * All rights reserved. *
13 * *
14 * This script is free software; you can redistribute it and/or modify *
15 * it under the terms of the GNU General Public License as published by *
16 * the Free Software Foundation; either version 2 of the License, or *
17 * (at your option) any later version. *
18 * *
19 * The GNU General Public License can be found at *
20 * http://www.gnu.org/copyleft/gpl.html. *
21 * *
22 * This script is distributed in the hope that it will be useful, *
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
25 * GNU General Public License for more details. *
26 * *
27 * Author(s): Jon Abernathy <jon@chuggnutt.com> *
28 * *
29 * Last modified: 11/07/03 *
30 * *
31 *************************************************************************/
32
33
34 /**
35 * Takes HTML and converts it to formatted, plain text.
36 *
37 * Thanks to Alexander Krug (http://www.krugar.de/) to pointing out and
38 * correcting an error in the regexp search array. Fixed 7/30/03.
39 *
40 * Updated set_html() function's file reading mechanism, 9/25/03.
41 *
42 * Thanks to Joss Sanglier (http://www.dancingbear.co.uk/) for adding
43 * several more HTML entity codes to the $search and $replace arrays.
44 * Updated 11/7/03.
45 *
46 * @author Jon Abernathy <jon@chuggnutt.com>
47 * @version 0.4
48 * @since PHP 4.0.2
49 */
50 class html2text
51 {
52
53 /**
54 * Contains the HTML content to convert.
55 *
56 * @var string $html
57 * @access public
58 */
59 var $html;
60
61 /**
62 * Contains the converted, formatted text.
63 *
64 * @var string $text
65 * @access public
66 */
67 var $text;
68
69 /**
70 * Maximum width of the formatted text, in columns.
71 *
72 * @var integer $width
73 * @access public
74 */
75 var $width = 70;
76
77 /**
78 * List of preg* regular expression patterns to search for,
79 * used in conjunction with $replace.
80 *
81 * @var array $search
82 * @access public
83 * @see $replace
84 */
85 var $search = array(
86 "/\r/", // Non-legal carriage return
87 "/[\n\t]+/", // Newlines and tabs
88 '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
89 //'/<!-- .* -->/', // Comments -- which strip_tags might have problem a with
90 '/<h[123][^>]*>(.+?)<\/h[123]>/ie', // H1 - H3
91 '/<h[456][^>]*>(.+?)<\/h[456]>/ie', // H4 - H6
92 '/<p[^>]*>/i', // <P>
93 '/<br[^>]*>/i', // <br>
94 '/<b[^>]*>(.+?)<\/b>/ie', // <b>
95 '/<i[^>]*>(.+?)<\/i>/i', // <i>
96 '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul>
97 '/<li[^>]*>/i', // <li>
98 '/<a href="([^"]+)"[^>]*>(.+?)<\/a>/ie', // <a href="">
99 '/<hr[^>]*>/i', // <hr>
100 '/(<table[^>]*>|<\/table>)/i', // <table> and </table>
101 '/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr>
102 '/<td[^>]*>(.+?)<\/td>/i', // <td> and </td>
103 '/&nbsp;/i',
104 '/&quot;/i',
105 '/&gt;/i',
106 '/&lt;/i',
107 '/&amp;/i',
108 '/&copy;/i',
109 '/&trade;/i',
110 '/&#8220;/',
111 '/&#8221;/',
112 '/&#8211;/',
113 '/&#8217;/',
114 '/&#38;/',
115 '/&#169;/',
116 '/&#8482;/',
117 '/&#151;/',
118 '/&#147;/',
119 '/&#148;/',
120 '/&#149;/',
121 '/&reg;/i'
122 );
123
124 /**
125 * List of pattern replacements corresponding to patterns searched.
126 *
127 * @var array $replace
128 * @access public
129 * @see $search
130 */
131 var $replace = array(
132 '', // Non-legal carriage return
133 ' ', // Newlines and tabs
134 '', // <script>s -- which strip_tags supposedly has problems with
135 //'', // Comments -- which strip_tags might have problem a with
136 "strtoupper(\"\n\n\\1\n\n\")", // H1 - H3
137 "ucwords(\"\n\n\\1\n\n\")", // H4 - H6
138 "\n\n", // <P>
139 "\n", // <br>
140 'strtoupper("\\1")', // <b>
141 '_\\1_', // <i>
142 "\n\n", // <ul> and </ul>
143 "\t*", // <li>
144 '$this->_build_link_list($link_count++, "\\1", "\\2")',
145 // <a href="">
146 "\n-------------------------\n", // <hr>
147 "\n\n", // <table> and </table>
148 "\n", // <tr> and </tr>
149 "\t\t\\1\n", // <td> and </td>
150 ' ',
151 '"',
152 '>',
153 '<',
154 '&',
155 '(c)',
156 '(tm)',
157 '"',
158 '"',
159 '-',
160 "'",
161 '&',
162 '(c)',
163 '(tm)',
164 '--',
165 '"',
166 '"',
167 '*',
168 '(R)'
169 );
170
171 /**
172 * Indicates whether content in the $html variable has been converted yet.
173 *
174 * @var boolean $converted
175 * @access private
176 * @see $html, $text
177 */
178 var $_converted = false;
179
180 /**
181 * Contains URL addresses from links to be rendered in plain text.
182 *
183 * @var string $link_list
184 * @access private
185 * @see _build_link_list()
186 */
187 var $_link_list;
188
189 /**
190 * Constructor.
191 *
192 * If the HTML source string (or file) is supplied, the class
193 * will instantiate with that source propagated, all that has
194 * to be done it to call get_text().
195 *
196 * @param string $source HTML content
197 * @param boolean $from_file Indicates $source is a file to pull content from
198 * @access public
199 * @return void
200 */
201 function html2text( $source = '', $from_file = false )
202 {
203 if ( !empty($source) ) {
204 $this->set_html($source, $from_file);
205 }
206 }
207
208 /**
209 * Loads source HTML into memory, either from $source string or a file.
210 *
211 * @param string $source HTML content
212 * @param boolean $from_file Indicates $source is a file to pull content from
213 * @access public
214 * @return void
215 */
216 function set_html( $source, $from_file = false )
217 {
218 $this->html = $source;
219
220 if ( $from_file && file_exists($source) ) {
221 $fp = fopen($source, 'r');
222 $this->html = fread($fp, filesize($source));
223 fclose($fp);
224 }
225
226 $this->_converted = false;
227 }
228
229 /**
230 * Returns the text, converted from HTML.
231 *
232 * @access public
233 * @return string
234 */
235 function get_text()
236 {
237 if ( !$this->_converted ) {
238 $this->_convert();
239 }
240
241 return $this->text;
242 }
243
244 /**
245 * Prints the text, converted from HTML.
246 *
247 * @access public
248 * @return void
249 */
250 function print_text()
251 {
252 print $this->get_text();
253 }
254
255 /**
256 * Alias to print_text(), operates identically.
257 *
258 * @access public
259 * @return void
260 * @see print_text()
261 */
262 function p()
263 {
264 print $this->get_text();
265 }
266
267 /**
268 * Workhorse function that does actual conversion.
269 *
270 * First performs custom tag replacement specified by $search and
271 * $replace arrays. Then strips any remaining HTML tags, reduces whitespace
272 * and newlines to a readable format, and word wraps the text to
273 * $width characters.
274 *
275 * @access private
276 * @return void
277 */
278 function _convert()
279 {
280 // Variables used for building the link list
281 $link_count = 1;
282 $this->_link_list = '';
283
284 $text = trim(stripslashes($this->html));
285
286 // Run our defined search-and-replace
287 $text = preg_replace($this->search, $this->replace, $text);
288
289 // Strip any other HTML tags
290 $text = strip_tags($text);
291
292 // Bring down number of empty lines to 2 max
293 $text = preg_replace("/\n[[:space:]]+\n/", "\n", $text);
294 $text = preg_replace("/[\n]{3,}/", "\n\n", $text);
295
296 // Add link list
297 if ( !empty($this->_link_list) ) {
298 $text .= "\n\nLinks:\n------\n" . $this->_link_list;
299 }
300
301 // Wrap the text to a readable format
302 // for PHP versions >= 4.0.2. Default width is 75
303 $text = wordwrap($text, $this->width);
304
305 $this->text = $text;
306
307 $this->_converted = true;
308 }
309
310 /**
311 * Helper function called by preg_replace() on link replacement.
312 *
313 * Maintains an internal list of links to be displayed at the end of the
314 * text, with numeric indices to the original point in the text they
315 * appeared.
316 *
317 * @param integer $link_count Counter tracking current link number
318 * @param string $link URL of the link
319 * @param string $display Part of the text to associate number with
320 * @access private
321 * @return string
322 */
323 function _build_link_list($link_count, $link, $display)
324 {
325 $this->_link_list .= "[$link_count] $link\n";
326
327 return $display . '[' . $link_count . ']';
328 }
329
330 }
331
332 ?>

  ViewVC Help
Powered by ViewVC 1.1.2