/[drupal]/contributions/modules/link_checker/link_checker.module
ViewVC logotype

Contents of /contributions/modules/link_checker/link_checker.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.5 - (show annotations) (download) (as text)
Thu Sep 25 08:45:45 2008 UTC (14 months ago) by jredding
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +7 -6 lines
File MIME type: text/x-php
Fixed errors with improper column names thus improper SQL #258280, used code from quicksketch posted at #236183 particulary #236183#comment-811450
1 <?php
2
3 //Link checker module to check on 404 links created by the link module (http://drupal.org/project/link)
4 //A large majority of this code was lovingly borrowed from the janode project at http://drupal.org/project/janode
5 //THANK YOU janode!
6
7 define("_LINK_CHECKER_NOT_HTTP", 251);
8 define("_LINK_CHECKER_DATA", 252);
9 define("_LINK_CHECKER_FILE_OPEN_FAILURE", 353);
10 define("_LINK_CHECKER_FAILURE_PIVOT", 299);
11
12 /**
13 * Implementation of hook_menu().
14 */
15 function link_checker_menu($may_cache) {
16 $items = array();
17 if (!$may_cache) {
18 $items[] = array(
19 'path' => 'admin/settings/linkchecker',
20 'callback' => 'drupal_get_form',
21 'callback arguments' => array('link_checker_admin_settings'),
22 'title' => t('Link checker'),
23 'description' => t('Configure link checker'),
24 'access' => user_access('administer link checker'),
25 'type' => MENU_NORMAL_ITEM,
26 );
27 }
28 return $items;
29 }
30
31 /**
32 * Implementation of hook_perm().
33 */
34 function link_checker_perm(){
35 return array('administer link checker');
36 }
37
38 /**
39 * Administrator settings
40 *
41 * @return Array for FAPI
42 */
43 function link_checker_admin_settings() {
44
45 //Check to see if allow_url_fopen is available, if not throw an error
46 //so that the administrator knows that this will not work without it
47 if (!ini_get('allow_url_fopen')) {
48 drupal_set_message(t('PHP allow_url_fopen is not enabled in order for this module to work it must be enabled! '));
49 }
50 //Create the settings form
51 $form = array();
52 $form['link_checker_batch_quantity'] = array (
53 '#type' => 'select',
54 '#title' => t('Max links to check per link field'),
55 '#description' => t('Determines the maximum number of links that will be checked for each link field per cron run.
56 A high number will cause cron to run slowly whereas a low number will require cron to be ran more often.
57 '),
58 '#options' => array(1=>1, 2=>2, 5=>5, 10=>10),
59 '#default_value' => variable_get('link_checker_batch_quantity', NULL),
60 );
61 $form['link_checker_unpublish'] = array(
62 '#type' => 'textfield',
63 '#title' => t('Unpublish threshold'),
64 '#description' => t('Number of consecutive CRON runs that detect an error to occur before unpublishing the related node, note, not all links are checked at every cron run due to the above setting. Enter 0 for never unpublish'),
65 '#default_value' => variable_get('link_checker_unpublish', 0),
66 );
67
68 //check for which nodes have link fields
69 $types = array();
70 foreach (content_types() as $node_type) {
71 foreach ($node_type['fields'] as $field) {
72 if ($field['type'] == 'link') {
73 //found a node type with a link field
74 $types[$node_type['type']] = $node_type['name'];
75 }
76 }
77 }
78
79 $form['link_checker_node_types'] = array(
80 '#type' => 'select',
81 '#title' => t('Select the node types to check'),
82 '#description' => t('Unselected items will not be checked for 404 errors'),
83 '#options' => $types,
84 '#default_value' => variable_get('link_checker_node_types', NULL),
85 '#multiple' => true,
86 );
87
88 return system_settings_form($form);
89 }
90
91 /*
92 * Implementation of hook_cron
93 */
94 function link_checker_cron() {
95 //only search the node types the user wants to search through
96 $node_types = (array)variable_get('link_checker_node_types', NULL);
97 foreach ($node_types as $node) {
98 $fields = content_fields(NULL, $node);
99 foreach ($fields as $field) {
100 if ($field['type'] == 'link') {
101 //We're at the link field type if its multiple we'll query the link table directly
102 //otherwise we'll query the node type table directly
103 if ($field['multiple'] == 1) {
104 $db_info = content_database_info($field);
105 $table_name = $db_info['table'];
106 $field_name = $db_info['columns']['url']['column'];
107 $max = variable_get('link_checker_batch_quantity', 10);
108 $sql = "SELECT c.nid, c.vid, c.delta, c.$field_name as link
109 FROM {$table_name} c
110 LEFT JOIN {link_checker} lc on c.nid = lc.nid AND c.vid = lc.vid AND c.delta = lc.delta
111 WHERE c.$field_name <> ''
112 ORDER BY lc.last_checked ASC LIMIT %d
113 ";
114 $links = db_query($sql, $max);
115 while ($link = db_fetch_object($links)) {
116 //send over to _link_checker_check_status to update the status in the DB
117 $link->field_name = $field_name;
118 _link_checker_check_status($link);
119 }
120 }
121 else {
122 //It is a field on the node type table
123 $db_info = content_database_info($field);
124 $table_name = $db_info['table'];
125 $field_name = $db_info['columns']['url']['column'];
126 $max = variable_get('link_checker_batch_quantity', NULL);
127 $sql = "SELECT c.nid, c.vid, c.$field_name as link
128 FROM {$table_name} c
129 LEFT JOIN {link_checker} lc on c.nid = lc.nid AND c.vid = lc.vid AND c.$field_name = lc.field_name
130 WHERE c.$field_name <> ''
131 ORDER BY lc.last_checked ASC LIMIT %d
132 ";
133 $links = db_query($sql, $max);
134 while ($link = db_fetch_object($links)) {
135 //send over to _link_checker_check_status to update the status in the DB
136 $link->field_name = $field_name;
137 _link_checker_check_status($link);
138 }
139 }
140 } //if field['type']
141 } //foreach fields as field
142 } //foreach node_types as node
143 }
144
145 /*
146 * Checks the link for a status
147 */
148 function _link_checker_check_status($link) {
149 static $message_once = TRUE;
150 // check php var "allow_url_fopen" is true as we need it to fetch the URL
151 if (!ini_get('allow_url_fopen')) {
152 if ($message_once) {
153 watchdog('cron', 'PHP INI "allow_url_fopen" is false', WATCHDOG_NOTICE);
154 $message_once = FALSE;
155 return;
156 }
157 }
158
159 static $link_threshold_check;
160
161 //Code originally written in the janode project at http://drupal.org/project/janode, thanks!
162
163 // There are a number of HTTP status return codes. However, below 300
164 // usually means all went ok. We use 250 series for our own internal
165 // error messaging. Our errors are non-fatal however, so are less than
166 // 300. Status codes above 299 are considered errors and we force the
167 // node back into the moderation queue for admin attention.
168
169 $status = 0; // provide a default value to ensure var exists
170
171 // create a full URL
172 $url_parts = parse_url($link->link);
173 if (isset($url_parts['port']) && strlen($url_parts['port']) > 0) {
174 $url_parts['host'] .= ':' . $url_parts['port'];
175 }
176 $url_parts['path'] = (isset($url_parts['path'])) ? $url_parts['path'] : ('');
177 $full_url = $url_parts['scheme'] . "://" . $url_parts['host'] . $url_parts['path'];
178 if (isset($url_parts['query']) && strlen($url_parts['query']) > 0) {
179 $full_url .= "?" . urlencode($url_parts['query']);
180 }
181
182 // currently only support http
183 if ($url_parts['scheme'] != 'http') {
184 $status = _LINK_CHECKER_NOT_HTTP;
185 }
186 else {
187 if(!function_exists('stream_get_meta_data')) { // needed next
188 $status = _LINK_CHECKER_NO_MATA_DATA;
189 }
190 elseif(!($fp = @fopen($full_url, 'r'))) {
191 $status = _LINK_CHECKER_FILE_OPEN_FAILURE;
192 unset($_SESSION['messages']['error']); // rough but gets rid of hostname errors, @fopen didn't work
193 }
194 else {
195 $meta_data = @stream_get_meta_data($fp);
196 fclose($fp);
197 if (is_array($meta_data['wrapper_data'])) {
198 foreach($meta_data['wrapper_data'] as $v) {
199 if (strtolower(substr($v, 0, 4)) == 'http') { // look for a server header starting "http"
200 list($protcol, $status, $verbal) = explode(' ', $v); // and if found, assign to $status
201 break;
202 }
203 }
204 }
205 }
206 }
207
208 // restore system error handler
209 restore_error_handler();
210
211 // tell the db what we have discovered...
212
213 //First we need to check if there is an existing row
214 $sql = "SELECT lc.nid, lc.delta, lc.vid
215 FROM {link_checker} lc
216 WHERE lc.nid = %d AND lc.vid = %d AND lc.delta = %d AND lc.field_name = '%s' LIMIT 1";
217
218 if (db_num_rows(db_query($sql, $link->nid, $link->vid, $link->delta, $link->field_name)) > 0) {
219 //row exist so we'll update it.
220 $sql = "UPDATE {link_checker}
221 SET status = %s, last_checked = %d
222 WHERE nid = %d AND vid = %d AND delta = %d AND field_name = '%s'
223 ";
224 db_query($sql,$status, time(), $link->nid, $link->vid, $link->delta, $link->field_name);
225 }
226 else {
227 //row doesn't exist so we'll add it in
228 $sql = "INSERT INTO {link_checker} (nid, vid, delta, last_checked, status, field_name)
229 VALUES (%d, %d, %d, %d, '%s', '%s') ";
230 db_query($sql, $link->nid, $link->vid, $link->delta, time(), $status, $link->field_name);
231 }
232
233 // this seems to execute more than once for the same field, so better be safe
234 if(@!isset($link_threshold_check["{$link->vid}-{$link->delta}"])) {
235 // unpublish and force the node into the moderation queue
236 if ((int)$status > _LINK_CHECKER_FAILURE_PIVOT && variable_get('link_checker_unpublish',0) >0) {
237 //The user would like all errors above 300 unpublished.. ok we'll do that.
238 db_query("UPDATE {link_checker} set error_count = error_count+1 where vid=%d and delta =%d",$link->vid, $link->delta );
239 $result = db_query("SELECT error_count from {link_checker} where vid=%d and delta =%d",$link->vid, $link->delta );
240 $stat = db_fetch_array($result);
241 if($stat['error_count'] >= variable_get('link_checker_unpublish',0)) {
242 // @todo you could work in someting from actions module here instead?
243 db_query("UPDATE {node} SET status = 0, moderate = 1 WHERE nid = %d", $link->nid);
244 }
245 } else {
246 // we dont care for, or we need to reset the error_count
247 db_query("UPDATE {link_checker} set error_count = 0 where vid=%d and delta =%d",$link->vid, $link->delta );
248 }
249 }
250 $link_threshold_check["{$link->vid}-{$link->delta}"]=true;
251 }

  ViewVC Help
Powered by ViewVC 1.1.2