/[drupal]/contributions/modules/search_keywords/search_keywords.module
ViewVC logotype

Contents of /contributions/modules/search_keywords/search_keywords.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.20 - (show annotations) (download) (as text)
Sun Apr 15 11:10:19 2007 UTC (2 years, 7 months ago) by sugree
Branch: MAIN
CVS Tags: HEAD
Changes since 1.19: +2 -2 lines
File MIME type: text/x-php
changed description (#135689)
1 <?php
2 // $Id: search_keywords.module,v 1.19 2007/03/27 06:00:44 sugree Exp $
3
4 /**
5 * @file
6 * Logs access search_keywords for your site.
7 */
8
9 /**
10 * Implementation of hook_help().
11 */
12 function search_keywords_help($section) {
13 switch ($section) {
14 case 'admin/help#search_keywords':
15 $output = '<p>'. t('The search_keywords module keeps track of search keywords used to find content on this site.') .'</p>';
16 return $output;
17 case 'admin/modules#description':
18 return t('Logs access search_keywords for your site.');
19 case 'admin/settings/search_keywords':
20 return t('<p>Settings for the search keywords information that Drupal will keep about the site. See <a href="!search_keywords">site search_keywords</a> for the actual information.</p>', array('!search_keywords' => url('admin/logs/search_keywords')));
21 case 'admin/logs/search_keywords':
22 return t('<p>This page shows you the recent search keywords.</p>');
23 }
24 }
25
26 /**
27 * Implementation of hook_exit().
28 *
29 * This is where search_keywords are gathered on page accesses.
30 */
31 function search_keywords_exit() {
32 global $user, $recent_activity;
33
34 if (variable_get('search_keywords_enable_access_log', 0)) {
35 list($search_engine,$keywords) = _search_keywords_extract(referer_uri());
36 if (isset($search_engine) && isset($keywords)) {
37 db_query("INSERT INTO {search_keywords_log} (title, path, url, hostname, search_engine, keywords, timestamp) values('%s', '%s', '%s', '%s', '%s', '%s', %d)", function_exists('drupal_get_title')?drupal_get_title():$_GET['q'], $_GET['q'], referer_uri(), $_SERVER['REMOTE_ADDR'], $search_engine, $keywords, time());
38 }
39 }
40 }
41
42 /**
43 * Implementation of hook_perm().
44 */
45 function search_keywords_perm() {
46 return array('access search keywords');
47 }
48
49 /**
50 * Implementation of hook_menu().
51 */
52 function search_keywords_menu($may_cache) {
53 $items = array();
54
55 $access = user_access('access search keywords');
56 if ($may_cache) {
57 $items[] = array(
58 'path' => 'admin/settings/search_keywords',
59 'title' => t('Search keywords'),
60 'description' => t('Administer search keywords.'),
61 'callback' => 'drupal_get_form',
62 'callback arguments' => array('search_keywords_admin_settings'),
63 'access' => user_access('administer site configuration'),
64 'type' => MENU_NORMAL_ITEM,
65 );
66 $items[] = array(
67 'path' => 'admin/logs/search_keywords',
68 'title' => t('Recent keywords'),
69 'description' => t('View recent search keywords.'),
70 'callback' => 'search_keywords_recent_keywords',
71 'access' => $access,
72 'weight' => 3);
73 }
74
75 return $items;
76 }
77
78 /**
79 * Menu callback; presents the "recent keywords" page.
80 */
81 function search_keywords_recent_keywords() {
82 $header = array(
83 array('data' => t('Timestamp'), 'field' => 'a.timestamp', 'sort' => 'desc'),
84 array('data' => t('Search Engine'), 'field' => 'a.search_engine'),
85 array('data' => t('Keywords'), 'field' => 'a.keywords'),
86 array('data' => t('Path'), 'field' => 'a.path'),
87 );
88
89 $sql = 'SELECT a.* FROM {search_keywords_log} a' . tablesort_sql($header);
90
91 $result = pager_query($sql, 30);
92 while ($log = db_fetch_object($result)) {
93 $title = (empty($log->title))?$log->path:$log->title;
94 if (module_exists('ip2cc')) {
95 $flag = theme('ip2cc_ip_flag',$log->hostname);
96 } else {
97 $flag = '';
98 }
99 if (!_search_keywords_is_utf8($log->keywords)) {
100 $log->keywords = _search_keywords_to_utf8($log->keywords);
101 }
102 $rows[] = array(
103 array('data' => format_date($log->timestamp, 'small'), 'class' => 'nowrap'),
104 array('data' => l($log->search_engine, "http://$log->search_engine/")),
105 array('data' => "$flag" . l($log->keywords, $log->url)),
106 array('data' => l(_search_keywords_column_width(decode_entities($title)), $log->path)));
107 }
108
109 $output = theme('table', $header, $rows);
110 $output .= theme('pager', NULL, 30, 0);
111 return $output;
112 }
113
114 /**
115 * Implementation of hook_settings().
116 */
117 function search_keywords_admin_settings() {
118 // access log settings:
119 $options = array('1' => t('Enabled'), '0' => t('Disabled'));
120 $form['access'] = array(
121 '#type' => 'fieldset',
122 '#title' => t('Access log settings'));
123 $form['access']['search_keywords_enable_access_log'] = array(
124 '#type' => 'radios',
125 '#title' => t('Enable access log'),
126 '#default_value' => variable_get('search_keywords_enable_access_log', 0),
127 '#options' => $options,
128 '#description' => t('Log each page access. Required for referrer search_keywords.'));
129
130 $period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 4838400, 9676800), 'format_interval');
131 $form['access']['search_keywords_flush_accesslog_timer'] = array(
132 '#type' => 'select',
133 '#title' => t('Discard access logs older than'),
134 '#default_value' => variable_get('search_keywords_flush_accesslog_timer', 259200),
135 '#options' => $period,
136 '#description' => t('Older access log entries (including referrer search_keywords) will be automatically discarded. Requires crontab.'));
137
138 return system_settings_form($form);
139 }
140
141 /**
142 * Implementation of hook_cron().
143 */
144 function search_keywords_cron() {
145 /* clean expired access logs */
146 db_query('DELETE FROM {search_keywords_log} WHERE timestamp < %d', time() - variable_get('search_keywords_flush_accesslog_timer', 259200));
147 }
148
149 /**
150 * It is possible to adjust the width of columns generated by the
151 * search_keywords module.
152 */
153 function _search_keywords_column_width($column, $width = 35) {
154 return (drupal_strlen($column) > $width ? truncate_utf8($column, 0, $width) . '...' : $column);
155 }
156
157 function _search_keywords_format_item($title, $link) {
158 $link = ($link ? $link : '/');
159 $output = ($title ? "$title<br />" : '');
160 $output .= l(_search_keywords_column_width($link), $link);
161 return $output;
162 }
163
164 function _search_keywords_extract($url) {
165 $engines = array();
166 $engines[] = '/http:\/\/(.*?)\/search\?.*?q=([^&]+)/i';
167 $engines[] = '/http:\/\/(.*?)\/base\/search\?.*?q=([^&]+)/i';
168 $engines[] = '/http:\/\/(.*?)\/blogsearch\?.*?q=([^&]+)/i';
169 $engines[] = '/http:\/\/(.*?)\/search\?.*?p=([^&]+)/i';
170 $engines[] = '/http:\/\/(.*?)\/results.aspx\?.*?q=([^&]+)/i';
171 $engines[] = '/http:\/\/(.*?)\/tags?\/(.*)/i';
172 $engines[] = '/http:\/\/(.*?)\/search\/(.*)/i';
173 $engines[] = '/http:\/\/(.*?)\/search\.php\?.*?q=([^&]+)/i';
174 $engines[] = '/http:\/\/(.*?)\/\?.*?tag=([^&]+)/i';
175 $engines[] = '/http:\/\/(.*?)\/cgi-bin\/search\?.*?q=([^&]+)/i';
176 $engines[] = '/http:\/\/(.*?)\/aolcom\/search\?.*?query=([^&]+)/i';
177 $engines[] = '/http:\/\/(.*?)\/jsp\/([^\.]+)\.jsp\?.*?searchfor=([^&]+)/i';
178 $engines[] = '/http:\/\/(.*?)\/p\/search\?.*?qt=([^&]+)/i';
179 $engines[] = '/http:\/\/(.*?)\/web\?.*?q=([^&]+)/i';
180 $engines[] = '/http:\/\/(.*?)\/searchScreen\?.*?q=([^&]+)/i'; //Seznam.cz
181 $engines[] = '/http:\/\/(.*?)\/searchScreen\?.*?w=([^&]+)/i'; //Seznam.cz
182 $engines[] = '/http:\/\/(.*?)\/index.php\?.*?q=([^&]+)/i'; //ZoooHoo.cz
183 $engines[] = '/http:\/\/(.*?)\/\?.*?q=([^&]+)/i'; //ZoooHoo.cz
184 foreach ($engines as $pattern) {
185 if (preg_match($pattern, $url, $matches)) {
186 $q = urldecode($matches[2]);
187 if (!_search_keywords_is_utf8($q)) {
188 $q = _search_keywords_to_utf8($q);
189 }
190 return array($matches[1],$q);
191 }
192 }
193 return array(NULL,NULL);
194 }
195
196 function _search_keywords_is_utf8($string) {
197 return preg_match('%^(?:
198 [\x09\x0A\x0D\x20-\x7E] # ASCII
199 | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
200 | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
201 | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
202 | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
203 | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
204 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
205 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
206 )*$%xs', $string);
207
208 }
209
210 function _search_keywords_to_utf8($string) {
211 $s = drupal_convert_to_utf8($string, 'TIS-620');
212 if (!$s) {
213 $s = _search_keywords_tis620_to_utf8($string);
214 }
215 return $s;
216 }
217
218 function _search_keywords_tis620_to_utf8($string) {
219 if (!ereg("[\241-\377]", $string))
220 return $string;
221
222 $tis620 = array(
223 "\xa1" => "\xe0\xb8\x81",
224 "\xa2" => "\xe0\xb8\x82",
225 "\xa3" => "\xe0\xb8\x83",
226 "\xa4" => "\xe0\xb8\x84",
227 "\xa5" => "\xe0\xb8\x85",
228 "\xa6" => "\xe0\xb8\x86",
229 "\xa7" => "\xe0\xb8\x87",
230 "\xa8" => "\xe0\xb8\x88",
231 "\xa9" => "\xe0\xb8\x89",
232 "\xaa" => "\xe0\xb8\x8a",
233 "\xab" => "\xe0\xb8\x8b",
234 "\xac" => "\xe0\xb8\x8c",
235 "\xad" => "\xe0\xb8\x8d",
236 "\xae" => "\xe0\xb8\x8e",
237 "\xaf" => "\xe0\xb8\x8f",
238 "\xb0" => "\xe0\xb8\x90",
239 "\xb1" => "\xe0\xb8\x91",
240 "\xb2" => "\xe0\xb8\x92",
241 "\xb3" => "\xe0\xb8\x93",
242 "\xb4" => "\xe0\xb8\x94",
243 "\xb5" => "\xe0\xb8\x95",
244 "\xb6" => "\xe0\xb8\x96",
245 "\xb7" => "\xe0\xb8\x97",
246 "\xb8" => "\xe0\xb8\x98",
247 "\xb9" => "\xe0\xb8\x99",
248 "\xba" => "\xe0\xb8\x9a",
249 "\xbb" => "\xe0\xb8\x9b",
250 "\xbc" => "\xe0\xb8\x9c",
251 "\xbd" => "\xe0\xb8\x9d",
252 "\xbe" => "\xe0\xb8\x9e",
253 "\xbf" => "\xe0\xb8\x9f",
254 "\xc0" => "\xe0\xb8\xa0",
255 "\xc1" => "\xe0\xb8\xa1",
256 "\xc2" => "\xe0\xb8\xa2",
257 "\xc3" => "\xe0\xb8\xa3",
258 "\xc4" => "\xe0\xb8\xa4",
259 "\xc5" => "\xe0\xb8\xa5",
260 "\xc6" => "\xe0\xb8\xa6",
261 "\xc7" => "\xe0\xb8\xa7",
262 "\xc8" => "\xe0\xb8\xa8",
263 "\xc9" => "\xe0\xb8\xa9",
264 "\xca" => "\xe0\xb8\xaa",
265 "\xcb" => "\xe0\xb8\xab",
266 "\xcc" => "\xe0\xb8\xac",
267 "\xcd" => "\xe0\xb8\xad",
268 "\xce" => "\xe0\xb8\xae",
269 "\xcf" => "\xe0\xb8\xaf",
270 "\xd0" => "\xe0\xb8\xb0",
271 "\xd1" => "\xe0\xb8\xb1",
272 "\xd2" => "\xe0\xb8\xb2",
273 "\xd3" => "\xe0\xb8\xb3",
274 "\xd4" => "\xe0\xb8\xb4",
275 "\xd5" => "\xe0\xb8\xb5",
276 "\xd6" => "\xe0\xb8\xb6",
277 "\xd7" => "\xe0\xb8\xb7",
278 "\xd8" => "\xe0\xb8\xb8",
279 "\xd9" => "\xe0\xb8\xb9",
280 "\xda" => "\xe0\xb8\xba",
281 "\xdf" => "\xe0\xb8\xbf",
282 "\xe0" => "\xe0\xb9\x80",
283 "\xe1" => "\xe0\xb9\x81",
284 "\xe2" => "\xe0\xb9\x82",
285 "\xe3" => "\xe0\xb9\x83",
286 "\xe4" => "\xe0\xb9\x84",
287 "\xe5" => "\xe0\xb9\x85",
288 "\xe6" => "\xe0\xb9\x86",
289 "\xe7" => "\xe0\xb9\x87",
290 "\xe8" => "\xe0\xb9\x88",
291 "\xe9" => "\xe0\xb9\x89",
292 "\xea" => "\xe0\xb9\x8a",
293 "\xeb" => "\xe0\xb9\x8b",
294 "\xec" => "\xe0\xb9\x8c",
295 "\xed" => "\xe0\xb9\x8d",
296 "\xee" => "\xe0\xb9\x8e",
297 "\xef" => "\xe0\xb9\x8f",
298 "\xf0" => "\xe0\xb9\x90",
299 "\xf1" => "\xe0\xb9\x91",
300 "\xf2" => "\xe0\xb9\x92",
301 "\xf3" => "\xe0\xb9\x93",
302 "\xf4" => "\xe0\xb9\x94",
303 "\xf5" => "\xe0\xb9\x95",
304 "\xf6" => "\xe0\xb9\x96",
305 "\xf7" => "\xe0\xb9\x97",
306 "\xf8" => "\xe0\xb9\x98",
307 "\xf9" => "\xe0\xb9\x99",
308 "\xfa" => "\xe0\xb9\x9a",
309 "\xfb" => "\xe0\xb9\x9b"
310 );
311
312 $string=strtr($string,$tis620);
313 return $string;
314 }

  ViewVC Help
Powered by ViewVC 1.1.2