/[drupal]/contributions/modules/dmoz/dmoz_stages.inc
ViewVC logotype

Contents of /contributions/modules/dmoz/dmoz_stages.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.2 - (show annotations) (download) (as text)
Fri May 26 23:04:57 2006 UTC (3 years, 6 months ago) by trunks
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +68 -59 lines
File MIME type: text/x-php
cleaning
1 <?php
2 /**
3 * @file
4 * Dmoz gateway for Llengua.org
5 *
6 * Developed by Jose A. Reyero, http://www.reyero.net
7 * for Interactors, http://www.interactors.coop
8 *
9 * Some code from PHParadise, http://phparadise.de
10 */
11
12 /**
13 * Check status and start next stage if finished
14 */
15 function dmoz_control($next){
16 if(_dmoz_status()){
17 dmoz_stage('start', $next);
18 }
19 }
20
21 /**
22 * Process step by step
23 */
24
25 /**
26 * Special stage: Stop
27 *
28 * Process disabled
29 */
30 function dmoz_stage_stop($op){
31 // Do nothing, just wait
32 }
33
34 /**
35 * Special stage: Idle
36 *
37 * Wait until next update.
38 * We need to manage remote times because of time zones
39 */
40 function dmoz_stage_idle($op){
41 switch($op){
42 case 'start': // Check for updates
43 variable_set('dmoz_last_process',time());
44 $updated = min(
45 _dmoz_remote_last_modified(variable_get('dmoz_content_url', 'http://rdf.dmoz.org/rdf/content.rdf.u8.gz')),
46 _dmoz_remote_last_modified(variable_get('dmoz_struct_url', 'http://rdf.dmoz.org/rdf/structure.rdf.u8.gz'))
47 );
48 if($updated > variable_get('dmoz_last_update', 0)){
49 // Go for it
50 variable_set('dmoz_last_update', $updated);
51 dmoz_stage('start', 'download_structure');
52 }
53 break;
54 case 'cron':
55 // Check for updates only once every.. hour ?
56 if(time() - variable_get('dmoz_last_process',0) > 3600){
57 _dmoz_log("Its time to check for new files...");
58 dmoz_stage('start');
59 }
60 break;
61 }
62 }
63
64 /**
65 * Check when a remote file was last modified
66 *
67 * Returns time
68 * Uses PHParadise, remote last modified
69 */
70 function _dmoz_remote_last_modified($url){
71 _dmoz_log("Checking remote file $url");
72 $data = get_header_array($url);
73 $last = strtotime($data['Last-Modified']);
74 _dmoz_log("File $url Last Modified ".format_date($last));
75 // Returns 0 if everything fails
76 return is_numeric($last)? $last : 0;
77 }
78
79 /**
80 * Special stage: Error
81 *
82 * An error has occurred, stop waiting human intervention
83 */
84 function dmoz_stage_error($op){
85 switch($op) {
86 case 'start':
87 if($text = _dmoz_status('error contents')){
88 _dmoz_watchdog('Dmoz processing error:<br/>'.$text, WATCHDOG_ERROR);
89 }
90 break;
91 case 'cron':
92 // Do nothing. Just wait.
93 }
94 }
95
96 /**
97 * Download.
98 * Pre: Delete files
99 * End: Files completed
100 */
101 function dmoz_stage_download_structure($op){
102 switch($op){
103 case 'start': // Start download
104 _dmoz_download(variable_get('dmoz_struct_url', 'http://rdf.dmoz.org/rdf/structure.rdf.u8.gz'));
105 break;
106 case 'cron': // Check end of download
107 dmoz_control('download_content');
108 break;
109 }
110 }
111
112 function dmoz_stage_download_content($op){
113 switch($op){
114 case 'start': // Start download
115 _dmoz_download(variable_get('dmoz_content_url', 'http://rdf.dmoz.org/rdf/content.rdf.u8.gz'));
116 break;
117 case 'cron': // Check end of download
118 dmoz_control('expand_structure');
119 break;
120 }
121 }
122
123 /**
124 * Expand. Uncompress file.
125 *
126 * Pre: Delete files
127 * End: Files created
128 */
129 function dmoz_stage_expand_structure($op){
130 switch($op){
131 case 'start': // Start download
132 $file = _dmoz_file(basename(variable_get('dmoz_struct_url', 'http://rdf.dmoz.org/rdf/structure.rdf.u8.gz')));
133 _dmoz_expand($file);
134 break;
135 case 'cron': // Check end of download
136 dmoz_control('expand_content');
137 break;
138 }
139 }
140
141 function dmoz_stage_expand_content($op){
142 switch($op){
143 case 'start': // Start download
144 $file = _dmoz_file(basename(variable_get('dmoz_content_url', 'http://rdf.dmoz.org/rdf/content.rdf.u8.gz')));
145 _dmoz_expand($file);
146 break;
147 case 'cron': // Check end of download
148 dmoz_control('extract_structure');
149 break;
150 }
151 }
152
153 /**
154 * Data Extraction
155 *
156 * Pre: Delete extracted files
157 * End: Files created
158 */
159 function dmoz_stage_extract_structure($op){
160 switch($op) {
161 case 'start':
162 $filein = _dmoz_file(variable_get('dmoz_struct_file', 'structure.rdf.u8'));
163 $fileout = _dmoz_file(variable_get('dmoz_struct_extract', 'extracted-structure.rdf.u8'));
164 _dmoz_exec('extract.pl', variable_get('dmoz_category', ''), $filein, $fileout);
165 break;
166 case 'cron':
167 dmoz_control('extract_content');
168 break;
169 }
170 }
171
172 function dmoz_stage_extract_content($op){
173 switch($op){
174 case 'start':
175 $filein = _dmoz_file(variable_get('dmoz_content_file', 'content.rdf.u8'));
176 $fileout = _dmoz_file(variable_get('dmoz_content_extract', 'extracted-content.rdf.u8'));
177 _dmoz_exec('extract.pl', variable_get('dmoz_category', ''), $filein, $fileout);
178 break;
179 case 'cron':
180 dmoz_control('import_structure');
181 break;
182 }
183 }
184
185 function dmoz_stage_import_structure($op){
186 switch($op){
187 case 'start':
188 global $config;
189 $datafile = _dmoz_file(variable_get('dmoz_struct_extract', 'extracted-structure.rdf.u8'));
190 $configfile = "$config/settings.php";
191 _dmoz_exec('structure2db.pl', $datafile, $configfile);
192 break;
193 case 'cron':
194 dmoz_control('import_content');
195 break;
196 }
197 }
198
199 function dmoz_stage_import_content($op){
200 switch($op){
201 case 'start':
202 global $config;
203 $datafile = _dmoz_file(variable_get('dmoz_content_extract', 'extracted-content.rdf.u8'));
204 $configfile = "$config/settings.php";
205 return _dmoz_exec('content2db.pl', $datafile, $configfile);
206 break;
207 case 'cron':
208 dmoz_control('process_structure');
209 break;
210 }
211 }
212
213 /**
214 * Create categories and path aliases
215 */
216 function dmoz_stage_process_structure($op){
217 switch($op){
218 case 'cron':
219 dmoz_timer(); // Start
220 $limit = variable_get('dmoz_process_rows', 100);
221 $vid = variable_get('dmoz_vocabulary', '');
222 $count = 0;
223 // TODO: Lock tables
224 $result = db_query_range('SELECT * FROM {dmoz_structure} WHERE processed=0', 0, $limit);
225 while($dmoz = db_fetch_object($result)) {
226 _dmoz_log("DEBUG: processing title=$dmoz->title catid=$dmoz->catid topic=$dmoz->topic");
227 $dmoz->vid = $vid;
228 dmoz_category_update($dmoz);
229 // Mark as processed
230 db_query("UPDATE {dmoz_structure} SET processed=1 WHERE catid=%d", $dmoz->catid);
231 dmoz_stats('category', 'processed');
232 $count++;
233 }
234 dmoz_stats('category', 'save');
235 // Flush messages, taxonomy updates, etc..
236 drupal_get_messages();
237 _dmoz_watchdog("Processing structure: $count records, time=".dmoz_timer().", ".dmoz_stats('category', 'string'));
238 _dmoz_log("STATS:Global: ". dmoz_stats('category', 'string', TRUE));
239 // End of stage
240 if(!$count){
241 dmoz_stage('start', 'process_content');
242 }
243 break;
244 }
245 }
246
247 /**
248 * Create link nodes
249 */
250 function dmoz_stage_process_content($op){
251 switch($op){
252 case 'cron':
253 dmoz_timer(); // Start
254 $limit = variable_get('dmoz_process_rows', 100);
255 $vid = variable_get('dmoz_vocabulary', '');
256 $count = 0;
257 $result = db_query_range('SELECT * FROM {dmoz_xurls} WHERE processed=0', 0, $limit);
258 while($dmoz = db_fetch_object($result)) {
259 _dmoz_log("DEBUG: processing url=$dmoz->url title=$dmoz->title catid=$dmoz->catid");
260 dmoz_content_update($dmoz);
261 // Mark as processed
262 db_query("UPDATE {dmoz_xurls} SET processed=1 WHERE urlid=%d", $dmoz->urlid);
263 dmoz_stats('content', 'processed');
264 $count++;
265 }
266 dmoz_stats('content', 'save');
267 // Flush messages
268 drupal_get_messages();
269 //print theme('status_messages');
270 _dmoz_watchdog("Processing content: $count records, time=".dmoz_timer().", ".dmoz_stats('content', 'string'));
271 _dmoz_log("STATS:Global: ". dmoz_stats('content', 'string', TRUE));
272 // End of stage
273 if(!$count){
274 dmoz_stage('start', 'cleanup');
275 }
276 break;
277
278 }
279 }
280
281 /**
282 * Final cleanup
283 * - Delete not refreshed/updated nodes, also removed in dmoz
284 * - Delete not refreshed/updated categories, (and empty categories?)
285 */
286 function dmoz_stage_cleanup($op){
287 switch($op){
288 case 'cron':
289 dmoz_timer(); // Start
290 $limit = variable_get('dmoz_process_rows', 100);
291 $vid = variable_get('dmoz_vocabulary', '');
292 $lastupdate = variable_get('dmoz_last_update', 0);
293 $count = 0;
294
295 // Cleanup deleted links
296 $result = db_query_range('SELECT nid FROM {dmoz_node} WHERE lastrefresh < %d', $lastupdate, 0, $limit);
297 while($node = db_fetch_object($result)) {
298 _dmoz_log("DEBUG: deleting node $node->nid");
299 node_delete(array('nid' => $node->nid, 'confirm' => 1));
300 $count++;
301 }
302 if($count) {
303 _dmoz_watchdog("Clean up, deleted old links: $count nodes, time=".dmoz_timer());
304 break;
305 }
306
307 // Cleanup deleted categories
308 $result = db_query_range('SELECT * FROM {dmoz_term_data} WHERE lastrefresh < %d', $lastupdate, 0, $limit);
309 while($cat = db_fetch_object($result)){
310 _dmoz_log("DEBUG $count: deleting category (term) $cat->tid $cat->name");
311 taxonomy_del_term($cat->tid);
312 $count++;
313 }
314 if($count) {
315 _dmoz_watchdog("Clean up, deleted old categories: $count terms, time=".dmoz_timer());
316 break;
317 }
318
319 // Cleanup empty categories ?
320 /*
321 $result = db_query_range('SELECT * FROM {term_data} WHERE vid = %d AND tid NOT IN (SELECT parent FROM {term_hierarchy}) AND tid NOT IN (SELECT tid FROM {term_node})', $vid, 0, $limit);
322 while($cat = db_fetch_object($result)){
323 _dmoz_log("DEBUG $count: deleting category (term) $cat->tid $cat->name");
324 $count++;
325 }
326 */
327
328 // If we reach here and no rows have been processed, it is the End of stage
329 if(!$count){
330 dmoz_stage('start', 'recount');
331 }
332 break;
333 }
334 // Flush messages (comment out for debugging)
335 drupal_get_messages();
336
337 }
338
339 /**
340 * Last step: Recount links in categories
341 */
342 function dmoz_stage_recount($op){
343 switch($op){
344 case 'start':
345 // Reset fields for recount = number of direct nodes
346 db_query("UPDATE {dmoz_term_data} d SET recount = -1");
347 variable_set('dmoz_recount_tid', 0);
348 break;
349 case 'cron':
350 dmoz_timer(); // Start
351 $limit = variable_get('dmoz_process_rows', 100);
352 $vid = variable_get('dmoz_vocabulary', '');
353 $tid = variable_get('dmoz_recount_tid', 0);
354 $lastupdate = variable_get('dmoz_last_update', 0);
355 $count = dmoz_recount(0);
356 if($count == 1){
357 dmoz_stage('start', 'idle');
358 } else {
359 _dmoz_watchdog("Recounting links by category: $count terms processed, time=".dmoz_timer());
360 }
361 break;
362
363 }
364 }
365
366 /**
367 * Recursive function to count nodes.
368 * Returns number of processed rows. Simple tree traversal algorithm
369 */
370 function dmoz_recount($tid){
371 $rows = 1; // Number of rows processed
372 $count = 0;
373 $limit = variable_get('dmoz_process_rows', 100);
374 //$limit = 10;
375
376 //_dmoz_log("DEBUG: Recounting category tid=$tid rows=$rows");
377
378 $result = db_query('SELECT t.tid FROM {term_hierarchy} h, {dmoz_term_data} t WHERE h.parent = %d AND h.tid = t.tid AND t.recount = -1', $tid);
379
380 // Recount subtree
381 while( ($rows < $limit ) && ($cat = db_fetch_object($result))){
382 $rows += dmoz_recount($cat->tid);
383 }
384
385 if($tid && ($rows < $limit)) {
386 // Subtree successfully recounted: update this one
387 $count += db_result(db_query('SELECT SUM(t.count) FROM {term_hierarchy} h, {dmoz_term_data} t WHERE h.parent = %d AND h.tid = t.tid', $tid));
388 $count += db_result(db_query('SELECT COUNT(*) FROM {term_node} WHERE tid = %d', $tid));
389 db_query('UPDATE {dmoz_term_data} SET count = %d, recount = %d WHERE tid = %d', $count, $count, $tid);
390 _dmoz_log("DEBUG: Updating node count: tid=$tid count=$count");
391 } else {
392 _dmoz_log("DEBUG: NOT updating node count: tid=$tid rows=$count");
393 }
394 return $rows;
395 }
396
397 /**
398 * Gets parent category using 'topic'
399 */
400 function dmoz_category_get_parent(&$dmoz){
401 return dmoz_get_category(dmoz_topic_get_parent($dmoz->topic), 'topic' );
402 }
403
404 /**
405 * Trims last word to get the parent topic
406 */
407 function dmoz_topic_get_parent($topic){
408 return substr($topic, 0, strrpos($topic, '/'));
409 }
410
411 /**
412 * Update category
413 */
414 function dmoz_category_update($dmoz){
415 // First, get rid of one letter categories in the middle
416 $dmoz->topic = preg_replace("'/\w/'",'/',$dmoz->topic);
417 $output = "DEBUG: dmoz_category_update $dmoz->topic";
418 if($data = dmoz_get_category($dmoz->catid, 'catid', TRUE)){
419 // Update
420 $output .= " Update ";
421 // Check for changes
422 $changed = array();
423 if($data->name != $dmoz->title){
424 $data->name = $dmoz->title;
425 $changed[] = 'title';
426 }
427 if($data->topic != $dmoz->topic){
428 $changed[] = 'topic';
429 $data->topic = $dmoz->topic;
430 }
431 if($data->description != $dmoz->description) {
432 $changed[] = 'description';
433 $data->description = $dmoz->description;
434 }
435
436 if(count($changed)){
437 dmoz_stats('category', 'updated');
438 $output .= " UPDATE ". implode(', ', $changed);
439 } else {
440 // No changes, update lastrefresh
441 db_query("UPDATE {dmoz_term_data} SET lastrefresh=%d WHERE catid=%d", time(), $dmoz->catid);
442 $output .= " unchanged ";
443 $data = NULL;
444 }
445
446 } elseif(strlen(trim($dmoz->title)) == 1){
447 // One letter category: group with parent and reassign nodes
448 $parent = dmoz_category_get_parent($dmoz);
449 // Update links with old category
450 $result = db_query("UPDATE {dmoz_xurls} SET catid=%d WHERE catid=%d", $parent->catid, $dmoz->catid);
451 $number = db_affected_rows($result);
452 $output .= " One letter, reassigning $number links to parent: catid=$parent->catid topic=$parent->topic ";
453 // Update children, there are some, this dirty dmoz directory.. :-(
454 // Change children from /Parent/x/topic to /Parent/topic
455 /*
456 $result = db_query("SELECT * FROM {dmoz_structure} WHERE catid != %d AND topic LIKE '%s'", $dmoz->catid, $dmoz->topic.'%');
457 $output .= " ";
458 while($child = db_fetch_object($result)){
459 $output .= " updating $child->topic to ".$parent->topic.'/'.$child->title;
460 db_query("UPDATE {dmoz_structure} SET topic='%s' WHERE catid=%d", $parent->topic.'/'.$child->title, $child->catid);
461 }
462 */
463 dmoz_stats('category', 'reassigned');
464 $data = NULL;
465 } else {
466 // Create new category
467 $output .= " NEW ";
468 $data = $dmoz;
469 $data->name = $dmoz->title;
470 dmoz_stats('category', 'created');
471 }
472
473 // Save if new / updated
474 if($data){
475 $data->lastrefresh = $data->lastupdate = time();
476 // Check for parent
477 if($parent = dmoz_category_get_parent($data)) {
478 $data->parent = array($parent->tid);
479 } else {
480 _dmoz_watchdog("Category without parent catid=$dmoz->catid topic=$dmoz->topic",WATCHDOG_ERROR);
481 }
482 taxonomy_save_term(object2array($data));
483 }
484 // Mark as processed
485
486 _dmoz_log($output);
487 }
488
489 /**
490 * Update content
491 */
492 function dmoz_content_update($dmoz){
493 $output = "DEBUG: dmoz_content_update ";
494 $category = dmoz_get_category($dmoz->catid);
495 if($data = dmoz_get_content($dmoz, TRUE)){
496 // Check for changes
497 $changed = array();
498 if($data->title != $dmoz->title){
499 $data->title = $dmoz->title;
500 $changed[] = 'title';
501 }
502 if($data->body != $dmoz->description){
503 $changed[] = 'description';
504 $data->body= $dmoz->description;
505 }
506 if($data->body != $dmoz->description) {
507 $changed[] = 'description';
508 $data->description = $dmoz->description;
509 }
510 // A node may have other categories assigned
511 if(!$data->taxonomy || !in_array($category->tid, $data->taxonomy)){
512 $changed[] = 'category';
513 $data->taxonomy = array($category->tid);
514 }
515
516 if(count($changed)){
517 dmoz_stats('content', 'updated');
518 $output .= " UPDATE ". implode(', ', $changed);
519 } else {
520 // No changes, update last refresh
521 db_query("UPDATE {dmoz_node} SET lastrefresh=%d WHERE nid=%d", time(), $data->nid);
522 $output .= " REFRESH ";
523 $data = NULL;
524 }
525 } elseif($category->tid) {
526 // Create new node
527 $output .= " NEW ";
528 $data = $dmoz;
529 $data->type = 'dlink';
530 $data->body = $dmoz->description;
531 $data->taxonomy = array($category->tid);
532 dmoz_stats('content', 'created');
533 } else {
534 _dmoz_watchdog("Link without category urlid=$dmoz->urlid catid=$dmoz->catid", WATCHDOG_ERROR);
535 }
536
537 // Save if new / updated
538 if($data){
539 $data->urltype = $dmoz->type ? $dmoz->type : 'link';
540 $data->teaser = $dmoz->description;
541 $data->lastrefresh = $data->lastupdate = time();
542 node_save($data);
543 }
544 // Mark as processed
545
546 _dmoz_log($output);
547 }
548
549 /**
550 * Finds existing links by url, catid
551 * There may be duplicated urls, that's why we search also by catid
552 * If node is in a different category then we'll create a new one. The old one will be deleted in the cleanup
553 */
554 function dmoz_get_content($data, $taxonomy = TRUE){
555 $nid = db_result(db_query("SELECT nid FROM {dmoz_node} WHERE url = '%s' AND catid = %d", $data->url, $data->catid));
556 if(is_numeric($nid) && $node =node_load(array('nid' => $nid))){
557 if($taxonomy) $node->taxonomy = array_keys(taxonomy_node_get_terms($node->nid));
558 return $node;
559 } else {
560 return NULL;
561 }
562 }
563
564 /**
565 * Simple timer
566 */
567 function dmoz_timer(){
568 static $timer = 0;
569 list($usec, $sec) = explode(' ', microtime());
570 $current = (float)$usec + (float)$sec;
571 $diff = $current - $timer;
572 $timer = $current;
573 return $diff;
574 }
575
576 /**
577 * Keeps general statistics about data
578 */
579 function dmoz_stats($type, $op = 'get', $total = FALSE){
580 static $stats = array();
581 static $request = array();
582 if(!isset($stats[$type])){
583 $stats[$type] = variable_get("dmoz_stats_$type", array());
584 $request[$type] = array();
585 }
586 switch($op){
587 case 'save':
588 variable_set("dmoz_stats_$type", $stats[$type]);
589 break;
590 case 'get':
591 return $total ? $stats[$type] : $request[$type];
592 break;
593 case 'string':
594 $data = $total ? $stats[$type] : $request[$type];
595 $output = '';
596 foreach($data as $op => $value){
597 $output .= "<strong>$op</strong>=$value&nbsp";
598 }
599 return $output;
600 break;
601 default: // case 'updated': case 'deleted': case 'created': case 'processed':
602 $stats[$type][$op] += 1;
603 $request[$type][$op] += 1;
604 return $total ? $stats[$type][$op] : $request[$type][$op];
605 break;
606 }
607 }
608
609 /**
610 * Expand file
611 */
612 function _dmoz_expand($file){
613 $output = _dmoz_exec('unzip.pl', $file);
614 _dmoz_log("DEBUG:dmoz_expand: file=$file output=$output");
615 }
616
617 /**
618 * Download file
619 */
620 function _dmoz_download($url){
621 $folder = variable_get('dmoz_dir_working', 'files/dmoz');
622 _dmoz_exec('download.pl', $url, $folder);
623 _dmoz_log("DEBUG:dmoz_download url=$url output=$otput");
624 }
625
626 /**
627 * Handles command execution and signaling
628 */
629 function _dmoz_exec(){
630 $args = func_get_args();
631 $command = DMOZ_PERL .' '.variable_get('dmoz_dir_scripts', 'modules/dmoz/scripts') .'/'. array_shift($args);
632 //$command = realpath($command);
633 $params = implode(' ', $args);
634
635 // $cwd = realpath(variable_get('dmoz_dir_working', 'files/dmoz'));
636
637 // Redirect stdout, stderr and run in background.
638 $control = _dmoz_status('command').' &';
639 $commandline = "$command $params $control";
640 $output = exec($commandline);
641 _dmoz_log("DEBUG:dmoz_exec: command=$commandline output=$output");
642 }
643
644 /**
645 * Simple inter process signaling through output.txt and error.txt files
646 * $op = check | reset | command | output file | error file | output contents | error contents
647 */
648 function _dmoz_status($op = 'check'){
649 $file = _dmoz_file('output.txt');
650 $error = _dmoz_file('error.txt');
651 switch($op){
652 case 'output file':
653 return $file;
654 case 'error file':
655 return $error;
656 case 'command': // Returns exit condition for commands
657 return '>'.$file.' 2>'.$error;
658 case 'reset':
659 if(file_exists($file) ) unlink($file);
660 if(file_exists($error) ) unlink($error);
661 return TRUE;
662 case 'check':
663 // Check for error file first, then for output file
664 if(file_exists($error) && $contents = file_get_contents($error)){
665 _dmoz_log("Error file $contents");
666 dmoz_stage('start', 'error');
667 return FALSE;
668 }elseif(file_exists($file) && $contents = file_get_contents($file)) {
669 if(preg_match("/.*OK$/",$contents)) {
670 return TRUE;
671 }
672 }
673 break;
674 case 'output contents':
675 if(file_exists($file)){
676 return file_get_contents($file);
677 }
678 break;
679 case 'error contents':
680 if(file_exists($error)){
681 return file_get_contents($error);
682 }
683 break;
684
685 }
686 return FALSE;
687 }
688
689 /*------------------------------------------------------------------------------
690 |
691 | PHParadise source code
692 |
693 |-------------------------------------------------------------------------------
694 |
695 | file: remote last modified
696 | category: date and time
697 |
698 | last modified: Mon, 20 Jun 2005 16:40:28 GMT
699 | downloaded: Tue, 20 Dec 2005 17:59:39 GMT as PHP file
700 |
701 | code URL:
702 | http://phparadise.de/php-code/date-and-time/remote-last-modified/
703 |
704 | description:
705 | functions to retrieve the last modified date from files on remote servers. very
706 | practical to check if a new version of a file is ready to download on another
707 | server.
708 |
709 ------------------------------------------------------------------------------*/
710
711
712 function get_raw_header($host,$doc)
713 {
714 $httpheader = '';
715 $fp = fsockopen ($host, 80, $errno, $errstr, 30);
716 if (!$fp)
717 {
718 echo $errstr.' ('.$errno.')';
719 }else{
720 fputs($fp, 'GET '.$doc.' HTTP/1.0'."\r\n".'Host: '.$host."\r\n\r\n");
721 while(!feof($fp))
722 {
723 $httpresult = fgets ($fp,1024);
724 $httpheader = $httpheader.$httpresult;
725 if (ereg("^\r\n",$httpresult))
726 break;
727 }
728 fclose ($fp);
729 }
730 return $httpheader;
731 }
732 function get_header_array($url)
733 {
734 $url = ereg_replace('http://','',$url);
735 $endHostPos = strpos($url,'/');
736 if(!$endHostPos) $endHostPos = strlen($url);
737 $host = substr($url,0,$endHostPos);
738 $doc = substr($url,$endHostPos,strlen($url)-$endHostPos);
739 if($doc == '') $doc = '/';
740 $raw = get_raw_header($host,$doc);
741 $tmpArray = explode("\n",$raw);
742 for ($i=0;$i<sizeof($tmpArray); $i++)
743 {
744 @list($name, $value) = explode(':', $tmpArray[$i], 2);
745 $array[trim($name)]=trim($value);
746 }
747 return $array;
748 }
749
750 ?>

  ViewVC Help
Powered by ViewVC 1.1.2