/[drupal]/contributions/modules/leech/leech_news_parser.inc
ViewVC logotype

Contents of /contributions/modules/leech/leech_news_parser.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.2 - (show annotations) (download) (as text)
Fri Jan 5 00:05:29 2007 UTC (2 years, 10 months ago) by alexb
Branch: MAIN
CVS Tags: DRUPAL-5--1-9, DRUPAL-5--1-8, DRUPAL-5--1-7, DRUPAL-5--1-6, DRUPAL-5--0-1-before-upgrade, HEAD
Branch point for: DRUPAL-5
Changes since 1.1: +646 -0 lines
File MIME type: text/x-php
merged DRUPAL-4-7--1-4-before-merge into trunk
1 <?php
2
3 function leech_news_parse_news_feed(&$xml) {
4 // Some feeds already use CDATA but in "wrong way": http://www.rocketboom.com/vlog/quicktime_daily_enclosures.xml (ie. <description> something <CDATA soemthing else></description>
5 $data = trim(str_replace(array('<![CDATA[', ']]>'), '', $xml));
6
7 // Add CDATA around content which may contain (x)html data, and is not contained in CDATA yet
8 $src = array(
9 '%(<(link|content|content:encoded|description|title|summary|info|tagline|copyright|source|itunes:summary|media:text|text)(?>[^<]*(?<!/)>)(?!<!\[CDATA\[))(.*)(</\2>)%sUS',
10 '%24:(\d\d:\d\d)%' // workaround buggy hour format in feeds
11 /*'%(<(\w+)(?>[^<]*type=")(?:text/html|application/xhtml\+xml|html|xhtml")(?>[^<]*(?<!/)>)(?!<!\[CDATA\[))(.*)(</\2>)%sUS'*/
12 );
13 $dst = array(
14 '$1<![CDATA[$3]]>$4',
15 '00:$1'
16 );
17 $data = preg_replace($src, $dst, $data);
18
19 $xml_parser = new LeechNewsParser();
20 $xml_tree = $xml_parser->Parse($data);
21 return _leech_news_get_feed($xml_tree);
22 }
23
24
25 function _leech_news_get_feed(&$xml_tree) {
26 $feed = new StdClass();
27
28 if ($xml_tree['RSS']) { // RSS 0.91, 0.92, 2.0
29 $feed->format = 'RSS';
30 $feed->root = &$xml_tree['RSS'][0];
31 $feed->channel = &$xml_tree['RSS'][0]['CHANNEL'][0];
32 $items = &$xml_tree['RSS'][0]['CHANNEL'][0]['ITEM'];
33
34 $feed->logo = $feed->channel['IMAGE'][0];
35 $feed->link = $feed->channel['LINK'][0]['VALUE'];
36 $feed->description = $feed->channel['DESCRIPTION'][0]['VALUE'];
37
38 if ($feed->items[0]['GUID'][0]['VALUE']) {
39 $feed->has_guids = 'GUID';
40 }
41 }
42 else if ($xml_tree['RDF:RDF']) {
43 $feed->format = 'RDF';
44 $feed->root = &$xml_tree['RDF:RDF'][0];
45 $feed->channel = &$xml_tree['RDF:RDF'][0]['CHANNEL'][0];
46 $items = &$xml_tree['RDF:RDF'][0]['ITEM'];
47
48 $feed->logo = $feed->root['IMAGE'][0];
49 $feed->link = $feed->channel['LINK'][0]['VALUE'];
50 $feed->description = $feed->channel['DESCRIPTION'][0]['VALUE'];
51
52 if ($feed->items[0]['GUID'][0]['VALUE']) {
53 $feed->has_guids = 'GUID';
54 }
55 }
56 else if ($xml_tree['FEED']) { // Atom 0.3, 1.0
57 $feed->format = 'ATOM';
58 $feed->root = &$xml_tree['FEED'][0];
59 $feed->channel = &$xml_tree['FEED'][0];
60 $items = &$xml_tree['FEED'][0]['ENTRY'];
61
62 $feed->logo = ($feed->channel['LOGO'][0]['VALUE'] ? $feed->channel['LOGO'][0]['VALUE'] : $feed->channel['ICON'][0]['VALUE']);
63 // TODO: remove this Atom hack when we have field mapping or at least specialized parsers in place
64 if (count($feed->channel['LINK']) > 1) {
65 $feed->link = '';
66 foreach ($feed->channel['LINK'] as $l) {
67 if ($l['REL'] == 'alternate') {
68 $feed->link = $l['HREF'];
69 }
70 }
71 }
72 else {
73 $feed->link = $feed->channel['LINK'][0]['HREF'];
74 }
75 $feed->description = $feed->channel['TAGLINE'][0]['VALUE'];
76
77 if ($feed->items[0]['ATOM:ID'][0]['VALUE']) {
78 $feed->has_guids = 'ATOM:ID';
79 }
80 else if ($feed->items[0]['ID'][0]['VALUE']) {
81 $feed->has_guids = 'ID';
82 }
83 }
84 else if ($xml_tree['CHANNEL']) { // RSS 1.1
85 $feed->format = 'RSS';
86 $feed->root = &$xml_tree['CHANNEL'][0];
87 $feed->channel = &$xml_tree['CHANNEL'][0];
88 $items = &$xml_tree['CHANNEL'][0]['ITEMS'][0]['ITEM'];
89
90 $feed->logo = $feed->channel['IMAGE'][0];
91 $feed->link = $feed->channel['LINK'][0]['VALUE'];
92 $feed->description = $feed->channel['DESCRIPTION'][0]['VALUE'];
93
94 if ($feed->items[0]['GUID'][0]['VALUE']) {
95 $feed->has_guids = 'GUID';
96 }
97 }
98 else {
99 // unsupported format
100 $feed->format = '[unknown]';
101 $feed->root = array();
102 $feed->channel = array();
103 $items = array();
104
105 $feed->logo = '';
106 $feed->link = '';
107 $feed->description = '';
108
109 $feed->has_guids = FALSE;
110 return $feed;
111 }
112
113 // Now handle image/logo tags which are parents of other tags with url and stuff
114 if (is_array($feed->logo)) {
115 if ($feed->logo['URL']) {
116 $feed->logo = $feed->logo['URL'][0]['VALUE'];
117 }
118 else {
119 // unsupported
120 $feed->logo = '';
121 }
122 }
123
124 // Try to find author of feed
125 if ($feed->channel['AUTHOR'][0]['VALUE']) {
126 $feed->author = $channel['AUTHOR'][0]['VALUE'];
127 }
128 if ($feed->channel['AUTHOR'][0]['NAME'][0]['VALUE']) {
129 $feed->author = $feed->channel['AUTHOR'][0]['NAME'][0]['VALUE'];
130 }
131 else if ($feed->channel['DC:CREATOR']) {
132 $feed->author = $feed->channel['DC:CREATOR'][0]['VALUE'];
133 }
134 else {
135 $feed->author = '';
136 }
137
138 // TODO: find nicer way for handling namespaces ;)
139 if ($items[0]['PUBDATE']) $feed->has_dates = 'PUBDATE'; // RSS 2.0
140 else if ($items[0]['DC:DATE']) $feed->has_dates = 'DC:DATE'; // Dublin core
141 else if ($items[0]['DATE']) $feed->has_dates = 'DATE'; // Dublin core
142 else if ($items[0]['DCTERMS:ISSUED']) $feed->has_dates = 'DCTERMS:ISSUED'; // Dublin core
143 else if ($items[0]['ISSUED']) $feed->has_dates = 'ISSUED'; // Dublin core
144 else if ($items[0]['DCTERMS:CREATED']) $feed->has_dates = 'DCTERMS:CREATED'; // Dublin core
145 else if ($items[0]['CREATED']) $feed->has_dates = 'CREATED'; // Dublin core
146 else if ($items[0]['DCTERMS:MODIFIED']) $feed->has_dates = 'DCTERMS:MODIFIED'; // Dublin core
147 else if ($items[0]['MODIFIED']) $feed->has_dates = 'MODIFIED'; // Dublin core
148 else if ($items[0]['ATOM:UPDATED']) $feed->has_dates = 'ATOM:UPDATED'; // Atom
149 else if ($items[0]['UPDATED']) $feed->has_dates = 'UPDATED'; // Atom
150 else $feed->has_dates = NULL;
151
152 // Set default value
153 $feed->has_unique_links = TRUE;
154 $feed->items = array();
155 $temp = array();
156 for ($index = count($items) - 1; $index >= 0; $index--) {
157 $feed->items[] = _leech_news_get_item($items[$index], $feed);
158
159 // If there was no guid try to use link as guid.
160 if (!$feed->has_guids && is_array($temp)) {
161 $link = &$feed->items[count($feed->items)-1]->link;
162 // Check if link is unique
163 if (!isset($temp[$link])) {
164 $temp[$link] = 1;
165 }
166 else {
167 $feed->has_unique_links = FALSE;
168 unset($temp);
169 }
170 }
171 }
172
173 return $feed;
174 }
175
176 function _leech_news_get_item(&$data, &$feed) {
177 $item = new StdClass();
178
179 // Description field is needed early for case when no title is specified
180 if ($data['DESCRIPTION']) { // RSS 0.91, 0.92, 1.0, 1.1, 2.0
181 $item->body = $data['DESCRIPTION'][0]['VALUE'];
182 }
183 else if ($data['SUMMARY']) { // Atom 0.3, 1.0
184 $item->body = $data['SUMMARY'][0]['VALUE'];
185 }
186
187 $temp = '';
188 if ($data['CONTENT']) { // Atom 0.3, 1.0
189 $temp = &$data['CONTENT'][0]['VALUE'];
190 }
191 else if ($data['CONTENT:ENCODED']) { // Don't know where it came from but it can be found in RSS 2.0 feeds
192 $temp = &$data['CONTENT:ENCODED'][0]['VALUE'];
193 }
194 if (strlen($item->body) < strlen($temp)) {
195 if ($item->body) {
196 $item->teaser = $item->body;
197 }
198 $item->body = $temp;
199 }
200
201 /*
202 ** Resolve the item's title. If no title is found, we use
203 ** up to 40 characters of the description ending at a word
204 ** boundary but not splitting potential entities.
205 */
206 if (!($item->title = $data['TITLE'][0]['VALUE'])) {
207 $item->title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item->body, 40));
208 }
209
210 // If title was "escaped" then it may still contain entities, becuase each & from entity was also escaped to &amp; before
211 // TODO: the same for content?
212 if ($data['TITLE'][0]['MODE'] == 'escaped') {
213 $item->title = parse_entities($item->title);
214 }
215 $item->title = strip_tags($item->title);
216
217 /*
218 ** Resolve the items link.
219 */
220 if ($data['LINK']) {
221 // TODO: remove this Atom hack when we have field mapping or at least specialized parsers in place
222 if (count($data['LINK']) > 1) {
223 $item->link = $feed->link;
224 foreach ($data['LINK'] as $temp) {
225 if ($temp['REL'] == 'alternate') {
226 $item->link = $temp['HREF'];
227 }
228 }
229 }
230 else {
231 $item->link = ($data['LINK'][0]['HREF'] ? $data['LINK'][0]['HREF'] : $data['LINK'][0]['VALUE']);
232 }
233 }
234 elseif ($data['GUID'] && (strncmp($data['GUID'][0]['VALUE'], 'http://', 7) == 0) && $data['GUID'][0]['ISPERMALINK'] != 'false') {
235 $item->link = $data['GUID'][0]['VALUE'];
236 }
237 else {
238 $item->link = $feed->link;
239 }
240
241 // Try to "sniff" real link from feeds like news.google.com which "hide" real link behind own url
242 if (strpos($item->link, 'http://news.google.com/news/url?') === 0) {
243 if (preg_match('/\&url=(.*)\&/U', $item->link, $matches) && $matches[1]) {
244 $item->link = rawurldecode($matches[1]);
245 }
246 }
247 else if (preg_match('/^\w+:\/\/(?:\w+\.|\.)*yahoo.com\/dailynews\/rss\/.*\*(.*)/', $item->link, $matches)) {
248 $item->link = rawurldecode($matches[1]);
249 }
250
251 /*
252 ** Resolve the items source.
253 */
254 // RSS 2.0 description of SOURCE is a bit different from ATOM and DC.
255 // It says link should point to XML data of source (so i guess to feed/channel??),
256 // while ATOM and DC say it just points to original data (and from examples on web
257 // it looks like it means link to original article on site, not in RSS/ATOM format).
258 if ($data['SOURCE'][0]['VALUE'] && $data['SOURCE'][0]['URL']) { // RSS 2.0
259 $item->source_title = $data['SOURCE'][0]['VALUE'];
260 $item->source_xml = $data['SOURCE'][0]['URL'];
261 }
262 if ($data['DC:SOURCE'][0]['VALUE'] || (!$item->source_xml && $data['SOURCE'][0]['VALUE'])) { // Dublin core
263 $item->source_link = $data['DC:SOURCE'][0]['VALUE'];
264 }
265 else if ($data['SOURCE'] || $data['ATOM:SOURCE']) { // ATOM 1.0
266 if ($data['SOURCE'][0]['TITLE']) $item->source_title = $data['SOURCE'][0]['TITLE'][0]['VALUE'];
267 else if ($data['SOURCE'][0]['ATOM:TITLE']) $item->source_title = $data['SOURCE'][0]['ATOM:TITLE'][0]['VALUE'];
268 if ($data['SOURCE'][0]['LINK']) $item->source_link = $data['SOURCE'][0]['LINK'][0]['VALUE'];
269 else if ($data['SOURCE'][0]['ATOM:LINK']) $item->source_link = $data['SOURCE'][0]['ATOM:LINK'][0]['VALUE'];
270 }
271
272 if (!$item->source_title) {
273 $item->source_title = '';
274 }
275 if (!$item->source_link) {
276 $item->source_link = '';
277 }
278 if (!$item->source_xml) {
279 $item->source_xml = '';
280 }
281
282 if ($feed->has_dates) {
283 $item->date = strtotime($data[$feed->has_dates][0]['VALUE']); // strtotime() returns -1 on failure
284 if ($item->date < 0) {
285 $item->date = parse_w3cdtf($data[$feed->has_dates][0]['VALUE']); // also returns -1 on failure
286 if ($item->date < 0) {
287 $item->date = time(); // better than nothing
288 }
289 }
290 }
291 else {
292 $item->date = time();
293 }
294
295 // Try to use RSS:GUID/ATOM:ID as unique identifier
296 if ($data['GUID'][0]['VALUE']) { // RSS 2.0
297 $item->guid = $data['GUID'][0]['VALUE'];
298 }
299 else if ($data['ATOM:ID'][0]['VALUE']) { // ATOM 0.3, 1.0
300 $item->guid = $data['ATOM:ID'][0]['VALUE'];
301 }
302 else if ($data['ID'][0]['VALUE']) { // ATOM 0.3, 1.0
303 $item->guid = $data['ID'][0]['VALUE'];
304 }
305 else {
306 $feed->has_guids = FALSE;
307 }
308 // TODO: is there anyway to check if DC:IDENTIFIER is unique?
309 // http://dublincore.org/documents/usageguide/elements.shtml says it can be non-unique so useles for us :(
310
311 // Make relative URLs to be global
312 $base = '';
313 if ($item->link) {
314 $q = strpos($item->link, '?');
315 if ($q > 0) {
316 $q = strrpos(substr($item->link, 0, $q), '/');
317 }
318 else {
319 $q = strrpos($item->link, '/');
320 }
321 $base = substr($item->link, 0, $q);
322 }
323 $item->body = parse_relative_urls($item->body, $base);
324 $item->teaser = parse_relative_urls($item->teaser, $base);
325
326 return $item;
327 }
328
329 /**
330 * Private function;
331 * Parse the W3C date/time format, a subset of ISO 8601. PHP date parsing
332 * functions do not handle this format.
333 * See http://www.w3.org/TR/NOTE-datetime for more information.
334 * Origionally from MagpieRSS (http://magpierss.sourceforge.net/).
335 *
336 * @param $date_str A string with a potentially W3C DTF date.
337 * @return A timestamp if parsed successfully or -1 if not.
338 */
339 function parse_w3cdtf($date_str) {
340 if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {
341 list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
342 // calc epoch for current date assuming GMT
343 $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
344 if ($match[10] != 'Z') { // Z is zulu time, aka GMT
345 list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);
346 // zero out the variables
347 if (!$tz_hour) {
348 $tz_hour = 0;
349 }
350 if (!$tz_min) {
351 $tz_min = 0;
352 }
353 $offset_secs = (($tz_hour * 60) + $tz_min) * 60;
354 // is timezone ahead of GMT? then subtract offset
355 if ($tz_mod == '+') {
356 $offset_secs *= -1;
357 }
358 $epoch += $offset_secs;
359 }
360 return $epoch;
361 }
362 else {
363 return -1;
364 }
365 }
366
367 /**
368 * Private function;
369 * from: http://pl2.php.net/manual/en/function.html-entity-decode.php#51055
370 * Used as callback function for preg_replace_all() to decode numeric entities to UTF-8 chars
371 *
372 * @param $ord Number
373 * @return UTF-8 string
374 */
375 function _parse_num_entity($ord) {
376 $ord = $ord[1];
377 if (preg_match('/^x([0-9a-f]+)$/i', $ord, $match)) {
378 $ord = hexdec($match[1]);
379 }
380 else {
381 $ord = intval($ord);
382 }
383
384 $no_bytes = 0;
385 $byte = array();
386
387 if ($ord == 128) {
388 return chr(226).chr(130).chr(172);
389 }
390 else if($ord == 129) {
391 return chr(239).chr(191).chr(189);
392 }
393 else if($ord == 130) {
394 return chr(226).chr(128).chr(154);
395 }
396 else if($ord == 131) {
397 return chr(198).chr(146);
398 }
399 else if($ord == 132) {
400 return chr(226).chr(128).chr(158);
401 }
402 else if($ord == 133) {
403 return chr(226).chr(128).chr(166);
404 }
405 else if($ord == 134) {
406 return chr(226).chr(128).chr(160);
407 }
408 else if($ord == 135) {
409 return chr(226).chr(128).chr(161);
410 }
411 else if($ord == 136) {
412 return chr(203).chr(134);
413 }
414 else if($ord == 137) {
415 return chr(226).chr(128).chr(176);
416 }
417 else if($ord == 138) {
418 return chr(197).chr(160);
419 }
420 else if($ord == 139) {
421 return chr(226).chr(128).chr(185);
422 }
423 else if($ord == 140) {
424 return chr(197).chr(146);
425 }
426 else if($ord == 141) {
427 return chr(239).chr(191).chr(189);
428 }
429 else if($ord == 142) {
430 return chr(197).chr(189);
431 }
432 else if($ord == 143) {
433 return chr(239).chr(191).chr(189);
434 }
435 else if($ord == 144) {
436 return chr(239).chr(191).chr(189);
437 }
438 else if($ord == 145) {
439 return chr(226).chr(128).chr(152);
440 }
441 else if($ord == 146) {
442 return chr(226).chr(128).chr(153);
443 }
444 else if($ord == 147) {
445 return chr(226).chr(128).chr(156);
446 }
447 else if($ord == 148) {
448 return chr(226).chr(128).chr(157);
449 }
450 else if($ord == 149) {
451 return chr(226).chr(128).chr(162);
452 }
453 else if($ord == 150) {
454 return chr(226).chr(128).chr(147);
455 }
456 else if($ord == 151) {
457 return chr(226).chr(128).chr(148);
458 }
459 else if($ord == 152) {
460 return chr(203).chr(156);
461 }
462 else if($ord == 153) {
463 return chr(226).chr(132).chr(162);
464 }
465 else if($ord == 154) {
466 return chr(197).chr(161);
467 }
468 else if($ord == 155) {
469 return chr(226).chr(128).chr(186);
470 }
471 else if($ord == 156) {
472 return chr(197).chr(147);
473 }
474 else if($ord == 157) {
475 return chr(239).chr(191).chr(189);
476 }
477 else if($ord == 158) {
478 return chr(197).chr(190);
479 }
480 else if($ord == 159) {
481 return chr(197).chr(184);
482 }
483 else if($ord == 160) {
484 return chr(194).chr(160);
485 }
486
487 if ($ord < 128) {
488 return chr($ord);
489 }
490 else if ($ord < 2048) {
491 $no_bytes = 2;
492 }
493 else if ($ord < 65536) {
494 $no_bytes = 3;
495 }
496 else if ($ord < 1114112) {
497 $no_bytes = 4;
498 }
499 else {
500 return;
501 }
502
503 switch ($no_bytes) {
504 case 2:
505 $prefix = array(31, 192);
506 break;
507
508 case 3:
509 $prefix = array(15, 224);
510 break;
511
512 case 4:
513 $prefix = array(7, 240);
514 break;
515 }
516
517 for ($i = 0; $i < $no_bytes; $i++) {
518 $byte[$no_bytes - $i - 1] = (($ord & (63 * pow(2, 6 * $i))) / pow(2, 6 * $i)) & 63 | 128;
519 }
520
521 $byte[0] = ($byte[0] & $prefix[0]) | $prefix[1];
522
523 $ret = '';
524 for ($i = 0; $i < $no_bytes; $i++) {
525 $ret .= chr($byte[$i]);
526 }
527
528 return $ret;
529 }
530
531 /**
532 * Private function; Convert named entities to UTF-8 characters
533 * from: http://pl2.php.net/manual/en/function.html-entity-decode.php#51722
534 */
535 function _parse_name_entities(&$data) {
536 static $ttr;
537 if (!$ttr) {
538 $trans_tbl = get_html_translation_table(HTML_ENTITIES);
539 foreach ($trans_tbl as $k => $v) {
540 $ttr[$v] = utf8_encode($k);
541 }
542 $ttr['&apos;'] = "'";
543 }
544 return strtr($data, $ttr);
545 }
546
547 /**
548 * Private function; Convert all entities to UTF-8 characters
549 */
550 function parse_entities(&$data) {
551 $result = _parse_name_entities($data);
552 return preg_replace_callback('/&#([0-9a-fx]+);/mi', '_parse_num_entity', $result);
553 }
554
555
556 /**
557 * Private function; Convert relative URLs
558 */
559 function parse_relative_urls(&$data, $base_url) {
560 $src = '%( href| src)="(?!\w+://)/?([^"]*)"%';
561 $dst = '$1="'. trim($base_url, '/') .'/$2"';
562 return preg_replace($src, $dst, $data);
563 }
564
565 class LeechNewsParser {
566 var $xml_parser;
567
568 var $xml_tree;
569 var $xml_paths;
570 var $xml_path_cur;
571 var $xml_timer;
572
573 var $_start;
574 var $_end;
575
576 function LeechNewsParser() {
577 $this->xml_parser = NULL;
578 $this->xml_tree = array();
579 $this->xml_paths[] = &$this->xml_tree;
580 $this->xml_path_cur = 0;
581 $this->xml_timer = 0;
582 $this->_start = 0;
583 $this->_end = 0;
584 }
585
586 function Parse(&$data) {
587 $this->xml_tree = array();
588 $this->xml_paths[] = &$this->xml_tree;
589 $this->xml_path_cur = 0;
590
591 $this->_start = microtime();
592
593 $this->xml_parser = drupal_xml_parser_create($data);
594 if ($this->xml_parser == NULL) {
595 return $this->xml_tree;
596 }
597
598 xml_set_object($this->xml_parser, $this);
599 xml_set_element_handler($this->xml_parser, '_element_start', '_element_end');
600 xml_set_character_data_handler($this->xml_parser, '_element_data');
601 xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, 1);
602 xml_parser_set_option($this->xml_parser, XML_OPTION_SKIP_WHITE, 1);
603 if (!xml_parse($this->xml_parser, $data, 1)) {
604 $this->xml_tree['parser_error'] = xml_error_string(xml_get_error_code($this->xml_parser));
605 $this->xml_tree['parser_line'] = xml_get_current_line_number($this->xml_parser);
606 }
607 else {
608 unset($this->xml_tree['parser_error']);
609 unset($this->xml_tree['parser_line']);
610 }
611 xml_parser_free($this->xml_parser);
612
613 $this->_end = microtime();
614 list($sec, $usec) = explode(' ', $this->_start);
615 $this->_start = $sec + $usec;
616 list($sec, $usec) = explode(' ', $this->_end);
617 $this->xml_tree['parser_time'] = $this->xml_timer = ($sec + $usec) - $this->_start;
618
619 return $this->xml_tree;
620 }
621
622 function _element_start($parser, $name, $attributes) {
623 $temp = &$this->xml_paths[$this->xml_path_cur++];
624 $temp[$name][] = $attributes;
625 $this->xml_paths[$this->xml_path_cur] = &$temp[$name][count($temp[$name])-1];
626 }
627
628 function _element_end($parser, $name) {
629 $temp = &$this->xml_paths[$this->xml_path_cur];
630 array_pop($this->xml_paths);
631 $this->xml_path_cur--;
632 if (isset($temp['VALUE'])) {
633 $temp['VALUE'] = trim(parse_entities($temp['VALUE']));
634 }
635 }
636
637 function _element_data($parser, $data) {
638 $temp = trim($data);
639 if (strlen($temp) > 0) {
640 $temp = &$this->xml_paths[$this->xml_path_cur];
641 $temp['VALUE'] .= $data;
642 }
643 }
644 }
645
646 ?>

  ViewVC Help
Powered by ViewVC 1.1.2