/[drupal]/contributions/modules/broken_anchor/broken_anchor.module
ViewVC logotype

Contents of /contributions/modules/broken_anchor/broken_anchor.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (show annotations) (download) (as text)
Tue Feb 3 16:48:08 2009 UTC (9 months, 3 weeks ago) by ulhume
Branch: MAIN
CVS Tags: DRUPAL-6--1-0-RC3, HEAD
Changes since 1.3: +7 -7 lines
File MIME type: text/x-php
*** empty log message ***
1 <?php
2
3 /* This file is part of "Broken Anchor for Node comments Module".
4 * Copyright 2009, arNuméral
5 * Author : Yoran Brault
6 * eMail : yoran.brault@bad_arnumeral.fr (remove bad_ before sending an email)
7 * Site : http://www.arnumeral.fr/node/2
8 *
9 * "Broken Anchor for Node comments Module" is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version.
13 *
14 * "Broken Anchor for Node comments Module" is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public
20 * License along with "Broken Anchor for Node comments Module"; if not, write to the Free
21 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23 */
24
25
26 /**
27 * Implementation of hook_menu
28 */
29 function broken_anchor_menu() {
30 $items = array (
31 'admin/reports/broken_anchor' => array (
32 'title' => t('Broken Anchors Report'),
33 'description' => t('Broken anchor report.'),
34 'page callback' => 'broken_anchor_report',
35 'access arguments' => array ('administer site configuration'),
36 'file' => 'broken_anchor.reports.inc',
37 ),
38 // Main settings tab (for other modules to hook on)
39 'admin/settings/broken_anchor/main' => array(
40 'title' => 'Main settings',
41 'type' => MENU_DEFAULT_LOCAL_TASK,
42 'weight' => -10,
43 ),
44 'admin/settings/broken_anchor' => array (
45 'title' => t('Broken anchor settings'),
46 'description' => t('Broken anchor settings.'),
47 'page callback' => 'drupal_get_form',
48 'page arguments' => array ('broken_anchor_admin_settings'),
49 'access arguments' => array ('administer site configuration'),
50 'file' => 'broken_anchor.admin.inc',
51 ),
52
53 'admin/reports/broken_anchor/ignore/%' => array (
54 'title' => 'Ignore',
55 'description' => 'Ignore an error.',
56 'page callback' => 'broken_anchor_ignore_error',
57 'page arguments' => array (4),
58 'access arguments' => array ('administer site configuration'),
59 'file' => 'broken_anchor.reports.inc',
60 ),
61
62 'admin/reports/broken_anchor/check/%/%' => array (
63 'title' => 'Check node',
64 'description' => 'Check errors for a node',
65 'page callback' => 'broken_anchor_update_node',
66 'page arguments' => array (4,5),
67 'access arguments' => array ('administer site configuration'),
68 'file' => 'broken_anchor.reports.inc',
69 ),
70 );
71 return $items;
72 }
73
74
75 /**
76 * Helper to parse HTTP header.
77 *
78 * @return a list header field names / values. Note that HTTP status is also converted
79 * as a field name 'status' containing text status and 'code' containing HTTP code.
80 */
81 function broken_anchor_parse_http_headers($headers) {
82 $results=array();
83 foreach ($headers as $header) {
84 $header=trim($header);
85 $code="";
86 if (preg_match("/HTTP\/1.\d\s+(\d+)\s+(.*)$/i", $header, $matches)) {
87 $code=$matches[1];
88 $results[$code]['status']=$matches[2];
89 $results[$code]['code']=$code;
90 } elseif (preg_match("/\s*(.*)\s*:\s*(.*)\s*/i", $header, $matches)) {
91 $results[$code][$matches[1]]=$matches[2];
92 } else
93 {
94 $code=-1;
95 $results[$code]['status']=$header;
96 $results[$code]['code']=$code;
97 }
98 }
99 return array_values($results);
100 }
101
102 /**
103 * PHP error handler.implementation.
104 */
105 function broken_anchor_error_handler($errno, $errstr)
106 {
107 global $broken_anchor_last_error;
108 $tmp=explode(":",$errstr);
109 $tmp=trim($tmp[count($tmp)-1]);
110 $broken_anchor_last_error=broken_anchor_parse_http_headers(array($tmp));
111 if (count($broken_anchor_last_error)==1 && $broken_anchor_last_error[0]['code']==-1) {
112 $broken_anchor_last_error[0]['code']=1000+$errno;
113 }
114 }
115
116 /**
117 * check if an URL is broken.
118 *
119 * @param $url
120 * @return true if OK, error else.
121 */
122 function broken_anchor_check_url($url) {
123 global $broken_anchor_last_error;
124 global $broken_anchor_check_url_cache;
125
126 // If we already checked this in this session, no need to do it again
127 if (isset($broken_anchor_check_url_cache[$url])) {
128 return $broken_anchor_check_url_cache[$url];
129 }
130
131 $header ="Accept: image/png,image/*;q=0.8,*/*;q=0.5\r\n";
132 $header.="Accept-Language: en-us\r\n";
133 $header.="Accept-Encoding: gzip,deflate\r\n";
134 $header.="Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n";
135 $header.="Keep-Alive: 300\r\n";
136 $header.="Connection: keep-alive";
137 ini_set('user_agent', broken_anchor_user_agent()."\r\n".$header);
138
139 set_error_handler('broken_anchor_error_handler');
140 $handler = fopen($url, 'r');
141 restore_error_handler();
142 if (!$handler) {
143 restore_error_handler();
144 $broken_anchor_check_url_cache[$url]=$broken_anchor_last_error[0];
145 return $broken_anchor_last_error[0];
146 }
147
148 $headers = stream_get_meta_data($handler);
149 fclose($handler);
150 if ($headers['wrapper_type']=='plainfile') {
151 $broken_anchor_check_url_cache[$url]=true;
152 return true;
153 }
154
155 $results=broken_anchor_parse_http_headers($headers['wrapper_data']);
156 $result=$results[count($results)-1];
157 $broken_anchor_check_url_cache[$url]=($result['code']==200 || $result['code']==302)?true:$result;
158 return $broken_anchor_check_url_cache[$url];
159 }
160
161
162
163 /**
164 * getter for broken_anchor_job_size setting.
165 */
166 function broken_anchor_job_size($type) {
167 return variable_get("broken_anchor_".$type."_job_size",10);
168 }
169
170 /**
171 * getter for broken_anchor_check_emails setting.
172 */
173 function broken_anchor_check_emails() {
174 return variable_get('broken_anchor_check_emails', true);
175 }
176
177 /**
178 * getter for broken_anchor_last_check_id setting.
179 */
180 function broken_anchor_last_check_id($type) {
181 return variable_get("broken_anchor_last_check_id_".$type, 0);
182 }
183
184
185 /**
186 * getter for broken_anchor_user_agent setting.
187 */
188 function broken_anchor_user_agent() {
189 return variable_get("broken_anchor_user_agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.3) Gecko/2008092903 Mandriva/1.9.0.3-1mdv2009.0 (2009.0) Firefox/3.0.3");
190 }
191
192 /**
193 * getter for broken_anchor_timeout setting.
194 */
195 function broken_anchor_timeout() {
196 return variable_get('broken_anchor_timeout',5);
197 }
198
199 /**
200 * Check if debug logging is enabled.
201 *
202 * @return true if enabled.
203 */
204 function broken_anchor_is_debug($level=1) {
205 return (broken_anchor_debug_level()>=$level);
206 }
207 /**
208 * output a debug message (syslog).
209 *
210 * @param $message message
211 */
212 function broken_anchor_debug($message) {
213 $lines=explode("\n", $message);
214 foreach ($lines as $line) {
215 error_log($line);
216 }
217 }
218
219 /**
220 * Check if watchdog logging is enabled.
221 *
222 * @return true if enabled.
223 */
224 function broken_anchor_debug_level() {
225 return variable_get("broken_anchor_debug_level", 1);
226 }
227
228
229 /**
230 * getter for broken_anchor_report_page_size setting.
231 */
232 function broken_anchor_report_page_size() {
233 return variable_get('broken_anchor_report_page_size', 50);
234 }
235
236
237 function broken_anchor_broken_anchor_types(){
238 return array('node','comment');
239 }
240
241 /**
242 * Validate module dependencies.
243 *
244 * @return false if a dependency is not found.
245 */
246 function broken_anchor_dependencies() {
247 global $broken_anchor_dependencies_checked;
248 if ($broken_anchor_dependencies_checked) return;
249 $broken_anchor_dependencies_checked=true;
250 if (!ini_get('allow_url_fopen')) {
251 $message='PHP INI "allow_url_fopen" is false, unable to check broken anchors...';
252 error_log($message);
253 watchdog('broken_anchor', false, WATCHDOG_NOTICE);
254 drupal_set_message($message);
255 return false;
256 }
257 return true;
258 }
259
260 /**
261 * hook_cron.
262 */
263 function broken_anchor_cron() {
264 if (!broken_anchor_dependencies()) return;
265
266 foreach (module_implements('broken_anchor_info') as $module) {
267 // search for job size
268 $job_size=broken_anchor_job_size($module);
269 if ($job_size==0) {
270 continue;
271 }
272
273 // retreived last checked ID
274 $last_checked_id=broken_anchor_last_check_id($module);
275 variable_set("broken_anchor_statistics_first_id_".$module, $last_checked_id);
276 if (broken_anchor_is_debug()) broken_anchor_debug("Checking with '".$module."' contents : ".$last_checked_id." ".$job_size);
277
278 // Get data cursor
279 $contents = module_invoke($module,'broken_anchor_get_contents', $last_checked_id, $job_size);
280 $current_id=-1;
281 foreach($contents as $content) {
282 broken_anchor_check_content($module, $content);
283 $current_id=$content['id'];
284 error_log('==>$current_id');
285 }
286 if ($current_id==-1) {
287 $current_id=0;
288 } {
289 $current_id++;
290 }
291 variable_set("broken_anchor_last_check_id_".$module, $current_id);
292 if (broken_anchor_is_debug()) broken_anchor_debug("Checking terminated. Next ID > ".$current_id);
293 }
294 variable_set("broken_anchor_statistics_last_cron", time());
295 }
296
297
298 function broken_anchor_link($type, $node = NULL, $teaser = FALSE) {
299 if (!user_access("administer site configuration") || $teaser) return;
300 $links = array ();
301
302 if ($type == 'node') {
303 $links['broken_anchor'] = array (
304 'title' => t('Check links'),
305 'href' => "admin/reports/broken_anchor/check/".$node->nid,
306 'attributes' => array (
307 'title' => t('Check all links in this node.')
308 )
309 );
310 }
311 return $links;
312 }
313
314 function broken_anchor_check_node($nid) {
315 $node=node_load($nid);
316 $node=node_prepare($node, FALSE);
317 $result=broken_anchor_check_content($nid, 'node',$node->body);
318 return $result;
319 }
320
321 function broken_anchor_check_content($module, $content) {
322 $cid=$content['id'];
323 $content=$content['content'];
324 // retreive all error records for this content type id
325 $cursor=db_query("
326 select *
327 from {broken_anchor_errors}
328 where
329 cid=%d and
330 module='%s'",
331 $cid,$module);
332
333 $errors=array();
334 while ($error=db_fetch_array($cursor)) {
335 $error['count']=0;
336 $errors[$error['value']]=$error;
337 }
338
339 // Drop all old errors
340 db_query("
341 delete
342 from {broken_anchor_errors}
343 where
344 cid=%d and
345 module='%s'",
346 $cid,$module);
347
348 // Match all URL's
349 $url_pattern = "`(href|src)=('|\")\s*(https?://([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-])([.,?!]*?))\s*('|\")`i";
350 preg_match_all($url_pattern,$content,$matches,PREG_PATTERN_ORDER);
351 foreach ($matches[3] as $url)
352 {
353 $error=$errors[$url];
354 if ($error['ignored']) {
355 continue;
356 }
357
358 if (broken_anchor_is_debug()) broken_anchor_debug(" - ".$cid.":".$url);
359 $result=broken_anchor_check_url($url);
360 if ($result!==true) {
361 if (broken_anchor_is_debug()) broken_anchor_debug(" => ".$result['status']);
362 $error['count']++;
363 $error['cid']=$cid;
364 $error['module']=$module;
365 $error['status']=$result['status'];
366 $error['value']=$url;
367 $error['value_type']=0;
368 $error['checked']=time();
369 $errors[$url]=$error;
370 }
371 }
372
373 // match all emails
374 $emails_pattern="`([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)`i";
375 if (broken_anchor_check_emails()) {
376 preg_match_all($emails_pattern, $content, $matches,PREG_PATTERN_ORDER);
377 foreach ($matches[0] as $email)
378 {
379 if (broken_anchor_is_debug()) broken_anchor_debug(" - ".$cid.":".$email);
380 $error=$errors[$email];
381 if (!$error['ignored']) {
382 $error['count']++;
383 $error['cid']=$cid;
384 $error['module']=$module;
385 $error['status']=t("Clear email address");
386 $error['value']=$email;
387 $error['checked']=time();
388 $error['value_type']=1;
389 $errors[$email]=$error;
390 }
391 }
392 }
393
394 // Dump all record to database
395 foreach($errors as $error) {
396 if ($error['count']>0) {
397 drupal_write_record('broken_anchor_errors', $error);
398 }
399 }
400 return $errors;
401 }

  ViewVC Help
Powered by ViewVC 1.1.2