/[drupal]/contributions/modules/antiproxyhack/antiproxyhack.module
ViewVC logotype

Contents of /contributions/modules/antiproxyhack/antiproxyhack.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.2 - (show annotations) (download) (as text)
Sat Sep 1 16:37:50 2007 UTC (2 years, 2 months ago) by geojaz
Branch: MAIN
CVS Tags: DRUPAL-5--1-1, DRUPAL-5--1-2, HEAD
Changes since 1.1: +8 -9 lines
File MIME type: text/x-php
Missing parameter in antiproxyhack_is_spider. Fixed.
1 <?php
2 /* $Id:*/
3
4 /**
5 * @file
6 * Insert tags into header to protect against Google Proxy Attack
7 */
8
9 /**
10 * Implemenation of hook_help().
11 */
12 function antiproxyhack_help($section) {
13 switch ($section) {
14 case 'admin/settings/antiproxyhack':
15 //return t('From here you can manage all the settings for antiproxyhack.');
16 }
17 }
18
19 /**
20 * Implementation of hook_menu().
21 */
22 function antiproxyhack_menu($may_cache) {
23 $items = array();
24 if ($may_cache) {
25 $items[] = array(
26 'title' => t('Google Proxy Attack'),
27 'path' => 'admin/settings/antiproxyhack',
28 'callback' => 'drupal_get_form',
29 'callback arguments' => array('antiproxyhack_settings_form'),
30 'description' => t('Configure Google Proxy Attack settings.'),
31 'access' => user_access('administer antiproxyhack'),
32 'type' => MENU_NORMAL_ITEM,);
33 $items[] = array(
34 'path' => 'aph/update',
35 'callback' => 'antiproxyhack_update_all',
36 'access' => user_access('administer antiproxyhack'),
37 'type' => MENU_CALLBACK,);
38 }
39 else {
40 antiproxyhack_metaRobotsExcludeProxies();
41 }
42 return $items;
43 }
44
45 function antiproxyhack_settings_form() {
46
47 $form = array();
48 $form['robot_settings'] = array(
49 '#type' => 'fieldset',
50 '#title' => t('Robot Settings'),
51 '#weight' => -5,);
52 $robot_options = array(
53 'google' => 'Google',
54 'yahoo' => 'Yahoo!',
55 'msn' => 'MSN',
56 'ask' => 'Ask.com',
57 );
58 $form['robot_settings']['antiproxyhack_robots'] = array(
59 '#type' => 'checkboxes',
60 '#options' => $robot_options,
61 '#default_value' => variable_get('antiproxyhack_robots', 0),
62 '#title' => t('Robots to Detect'),
63 );
64 $form['robot_settings']['antiproxyhack_meta_tags'] = array(
65 '#type' => 'textfield',
66 '#default_value' => variable_get('antiproxyhack_meta_tags', '<meta name="robots" content="noindex,nofollow" />'),
67 '#title' => t('Meta tags to insert when illicit proxy scans site'),
68 );
69 $form['robot_checking'] = array(
70 '#type' => 'fieldset',
71 '#title' => t('Robot Confidence Checks'),
72 '#weight' => -4,);
73 $check_options = array(
74 'check_uas' => 'Check User Agent',
75 'check_ips' => 'Check IP Range',
76 );
77 $form['robot_checking']['antiproxyhack_confidence_checks'] = array(
78 '#type' => 'checkboxes',
79 '#options' => $check_options,
80 '#default_value' => variable_get('antiproxyhack_confidence_checks', 0),
81 '#title' => t('Methods of Detection'),
82 '#description' => t('Please select the types of checking you would like to conduct to determine if a given user agent is a "bad" robot.'),
83 );
84 return system_settings_form($form);
85 }
86
87 function antiproxyhack_cron() {
88 return antiproxyhack_update_all();
89 }
90
91 function antiproxyhack_is_spider($spider_name = '', $check_uas = true, $check_ips = true, $use_user_defined_data = true, $ignore_bad_uas = true) {
92 // default confidence level to 0
93 $confidence = 0;
94
95 // matching user agent?
96 $confidence_checks = variable_get('antiproxyhack_confidence_checks','');
97 if ($confidence_checks['check_uas'])
98 if (antiproxyhack_get(0, $spider_name, 'UA', $_SERVER['HTTP_USER_AGENT'], '', $use_user_defined_data ? '' : 'N', $ignore_bad_uas ? 'bad' : ''))
99 $confidence += 2;
100 // matching IP?
101 if ($confidence_checks['check_ips'])
102 if (antiproxyhack_get(0, $spider_name, 'IP', '', $_SERVER['REMOTE_ADDR'], $use_user_defined_data ? '' : 'N', $ignore_bad_uas ? 'bad' : ''))
103 $confidence += 3;
104
105 // return confidence level
106 return $confidence;
107 }
108
109 // retrieve cloaking data filtered by the supplied parameters
110 function antiproxyhack_get($id = 0, $spider_name = '', $record_type = '', $value = '', $wildcard_value = '', $is_user_defined_data = '', $not_spider_name = '') {
111 // by default, retrieve all records
112 $q = " SELECT antiproxyhack_cloak_data.* FROM {antiproxyhack_cloak_data} WHERE TRUE ";
113
114 // add filters
115 if ($id) {
116 $id = (int) $id;
117 $q .= " AND id = $id ";
118 }
119 if ($spider_name) {
120 $spider_name = mysql_escape_string($spider_name);
121 $q .= " AND spider_name = '$spider_name' ";
122 }
123 if ($record_type) {
124 $record_type = mysql_escape_string($record_type);
125 $q .= " AND record_type = '$record_type' ";
126 }
127 if ($value) {
128 $value = mysql_escape_string($value);
129 $q .= " AND value = '$value' ";
130 }
131 if ($wildcard_value) {
132 $wildcard_value = mysql_escape_string($wildcard_value);
133 $q .= " AND ( '$wildcard_value' = value OR '$wildcard_value' LIKE CONCAT(value, '.%') ) ";
134 }
135 if ($is_user_defined_data) {
136 $is_user_defined_data = mysql_escape_string($is_user_defined_data);
137 $q .= " AND is_user_defined = '$is_user_defined_data' ";
138 }
139 if ($not_spider_name) {
140 $not_spider_name = mysql_escape_string($not_spider_name);
141 $q .= " AND spider_name <> '$not_spider_name' ";
142 }
143 $tmp = db_query($q);
144 $rows = array();
145 while ($_x = db_fetch_array($tmp)) {
146 $rows[] = $_x;
147 }
148 return $rows;
149 }
150
151 // updates the entire database with fresh spider data, but only if our data is
152 // more than 7 days old, and if the online version from iplists.org has changed
153 function antiproxyhack_update_all($delete_user_defined_data = false) {
154 // retrieve last update information from database
155 $q = "SELECT antiproxyhack_cloak_update.* FROM {antiproxyhack_cloak_update}";
156 $updated = db_fetch_array(db_query($q));
157 $db_version = $updated['version'];
158 $updated_on = $updated ['updated_on'];
159
160 // get the latest update more recent than 7 days, don't attempt an update
161 if (isset($updated_on) &&
162 (strtotime($updated_on) > strtotime("-604800 seconds")))
163 {
164 return false;
165 }
166
167 // read the latest iplists version
168 $version_url = 'http://www.iplists.com/nw/version.php';
169
170 $ch = curl_init();
171 curl_setopt ($ch, CURLOPT_URL, $version_url);
172 curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
173 curl_setopt ($ch, CURLOPT_TIMEOUT, 60);
174 $latest_version = curl_exec($ch);
175 curl_close($ch);
176
177 $latest_version = mysql_escape_string($latest_version);
178
179 // if no updated version information was retrieved, abort
180 if (!$latest_version)
181 {
182 // return false to indicate an update wasn't performed
183 return false;
184 }
185
186 // save the update data
187 $q = "DELETE FROM {antiproxyhack_cloak_update}";
188 db_query($q);
189 $q = "INSERT INTO {antiproxyhack_cloak_update} (version, updated_on) " .
190 "VALUES('$latest_version', NOW())";
191 db_query($q);
192
193 // if we already have the current data, don't attempt an update
194 if ($latest_version == $db_version) {
195 return false;
196 }
197
198 // update the database
199 antiproxyhack_update_cloaking_DB('google',
200 'http://www.iplists.com/nw/google.txt', $delete_user_defined_data);
201 antiproxyhack_update_cloaking_DB('yahoo',
202 'http://www.iplists.com/nw/inktomi.txt', $delete_user_defined_data);
203 antiproxyhack_update_cloaking_DB('msn',
204 'http://www.iplists.com/nw/msn.txt', $delete_user_defined_data);
205 antiproxyhack_update_cloaking_DB('ask',
206 'http://www.iplists.com/nw/askjeeves.txt', $delete_user_defined_data);
207 antiproxyhack_update_cloaking_DB('altavista',
208 'http://www.iplists.com/nw/altavista.txt', $delete_user_defined_data);
209 antiproxyhack_update_cloaking_DB('lycos',
210 'http://www.iplists.com/nw/lycos.txt', $delete_user_defined_data);
211 antiproxyhack_update_cloaking_DB('wisenut',
212 'http://www.iplists.com/nw/wisenut.txt', $delete_user_defined_data);
213
214 return "Database updated successfully.";
215 }
216
217 // update the database for the mentioned spider, by reading the provided URL
218 function antiproxyhack_update_cloaking_DB($spider_name, $url, $delete_user_defined_data = false) {
219
220 $ua_regex = '/^# UA "(.*)"$/m';
221 $ip_regex = '/^([0-9.]+)$/m';
222
223 // use cURL to read the data from $url
224 // NOTE: additional settings are required when accessing the web through a proxy
225 $ch = curl_init();
226 curl_setopt ($ch, CURLOPT_URL, $url);
227 curl_setopt ($ch, CURLOPT_HEADER, 1);
228 curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
229 curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
230 curl_setopt ($ch, CURLOPT_TIMEOUT, 60);
231 $result = curl_exec($ch);
232 curl_close($ch);
233
234 // use _parseListURL to parse the list of IPs and user agents
235 $lists = antiproxyhack_parse_list_URL($result, $ua_regex, $ip_regex);
236
237
238 // if the user agents and IPs weren't retrieved, we cancel the update
239 if (!$lists['ua_list'] || !$lists['ip_list']) return;
240
241 // lock the cloack_data table to avoid concurrency problems
242 db_query('LOCK TABLES {antiproxyhack_cloak_data} WRITE');
243
244 // delete all the existing data for $spider_name
245 antiproxyhack_delete_spider_data($spider_name, $delete_user_defined_data ? '' : 'N');
246
247 // insert the list of user agents for the spider
248 foreach ($lists['ua_list'] as $ua) {
249 antiproxyhack_insert_spider_data($spider_name, 'UA', $ua);
250 }
251
252 // insert the list of IPs for the spider
253 foreach ($lists['ip_list'] as $ip) {
254 antiproxyhack_insert_spider_data($spider_name, 'IP', $ip);
255 }
256
257 // release the table lock
258 db_query('UNLOCK TABLES');
259 }
260
261 // inserts a new row of data to the cloaking table
262 function antiproxyhack_insert_spider_data($spider_name, $record_type, $value, $is_user_defined = 'N') {
263 // escape input data
264 $spider_name = mysql_escape_string($spider_name);
265 $record_type = mysql_escape_string($record_type);
266 $value = mysql_escape_string($value);
267 $is_user_defined = mysql_escape_string($is_user_defined);
268
269 // build and execute the INSERT query
270 $q = "INSERT INTO {antiproxyhack_cloak_data} (spider_name, record_type, value, is_user_defined) " .
271 "VALUES ('$spider_name', '$record_type', '$value', '$is_user_defined')";
272
273 db_query($q);
274 //mysql_query($q);
275 }
276
277 // delete the cloaking data for the mentioned spider
278 function antiproxyhack_delete_spider_data($spider_name, $is_user_defined = '') {
279 // escape input data
280 $spider_name = mysql_escape_string($spider_name);
281
282 // build and execute the DELETE query
283 $q = "DELETE FROM {antiproxyhack_cloak_data} WHERE spider_name='$spider_name'";
284
285 if ($is_user_defined) {
286 $is_user_defined = mysql_escape_string($is_user_defined);
287 $q .= " AND is_user_defined = '$is_user_defined' ";
288 }
289 db_query($q);
290 }
291
292 function antiproxyhack_parse_list_URL($data, $ua_regex, $ip_regex) {
293 $ua_list_ret = preg_match_all($ua_regex, $data, $ua_list);
294 $ip_list_ret = preg_match_all($ip_regex, $data, $ip_list);
295 return array('ua_list' => $ua_list[1], 'ip_list' => $ip_list[1]);
296 }
297
298 // only use if it's not found via the IPLists cloaking database
299 function antiproxyhack_bot_verify_by_DNS($ua = array('google', '#.*\.googlebot\.com$#')) {
300
301 // check cache of bad bots
302 if (antiproxyhack_is_spider('bad', false, true, true, false)) {
303 return false;
304 }
305
306 // check only UA since this function is only called if the cloaking DB doesn't handle it
307 if (antiproxyhack_is_spider($ua[0], true, false)) {
308 // reverse lookup
309 $host_name = gethostbyaddr($_SERVER['REMOTE_ADDR']);
310
311 // if it says it's a certain UA but gethostbyaddr the corresponding domain regex, store it and then abort
312 if (!preg_match($ua[1], $host_name)) {
313 //$dbLink = SimpleCloakV2::_connect();
314 antiproxyhack_insert_spider_data('bad', 'IP', $_SERVER['REMOTE_ADDR'], 'Y');
315 // SimpleCloakV2::_close($dbLink);
316 return false;
317 }
318
319 $connected_ip_address = $_SERVER['REMOTE_ADDR'];
320 $host_name_ip_address = gethostbyname($host_name);
321
322 // if the connected IP matches the authoritative IP, we have a match
323 if ($connected_ip_address == $host_name_ip_address) {
324 //$dbLink = SimpleCloakV2::_connect();
325 antiproxyhack_insert_spider_data($ua[0], 'IP', $_SERVER['REMOTE_ADDR'], 'Y');
326 //SimpleCloakV2::_close($dbLink);
327 return true;
328 } else {
329 // if it says it's a certain UA, gethostbyaddr says the right thing, but gethostbyname does not
330 //$dbLink = SimpleCloakV2::_connect();
331 antiproxyhack_insert_spider_data('bad', 'IP', $_SERVER['REMOTE_ADDR'], 'Y');
332 // SimpleCloakV2::_close($dbLink);
333 return false;
334 }
335 }
336 // it does not even say it's a bot via UA
337 return false;
338 }
339
340 function antiproxyhack_metaRobotsExcludeProxies($auto_modify_content = true, $uas = array(array('google', '#.*\.googlebot\.com$#'), array('yahoo', '#.*\.yahoo\.net$#'), array('msn', '#.*\.live\.com$#'), array('ask', '#.*\.ask.com$#') ), $meta_tag = '<meta name="robots" content="noindex,nofollow" />', $passlist_regex = '') {
341 // if it's on our passlist
342 // ex: #become|lycos|somestupidbot#
343 if ($passlist_regex) {
344 if (preg_match($passlist_regex, $_SERVER['HTTP_USER_AGENT'])) return false;
345 }
346 foreach ($uas as $u) {
347 // if it's a bot according to UA, then start to investigate
348 if (antiproxyhack_is_spider($u[0], true, false)) {
349 // if it's a bot according to IPLists or our user-defined list
350 if (antiproxyhack_is_spider($u[0], false, true)) {
351 return false;
352 //if it's a bot according to DNS
353 }
354 else if (antiproxyhack_bot_verify_by_DNS($u)) {
355 return false;
356 // if it's not
357 }
358 else {
359 drupal_set_html_head(variable_get('antiproxyhack_meta_tags', '<meta name="robots" content="noindex,nofollow" />'));
360 return true;
361 }
362 }
363 }
364 // it's not a bot according to UA
365 /* if ($auto_modify_content) //ob_start(array('SimpleCloakV2', '_addMetaRobotsExcludeProxiesCallback'));
366 drupal_set_html_head(variable_get('antiproxyhack_meta_tags', '<meta name="robots" content="noindex,nofollow" />'));*/
367 return true + 1;
368 }

  ViewVC Help
Powered by ViewVC 1.1.2