| 1 |
<?php
|
| 2 |
// $Id: search_keywords.module,v 1.19 2007/03/27 06:00:44 sugree Exp $
|
| 3 |
|
| 4 |
/**
|
| 5 |
* @file
|
| 6 |
* Logs access search_keywords for your site.
|
| 7 |
*/
|
| 8 |
|
| 9 |
/**
|
| 10 |
* Implementation of hook_help().
|
| 11 |
*/
|
| 12 |
function search_keywords_help($section) {
|
| 13 |
switch ($section) {
|
| 14 |
case 'admin/help#search_keywords':
|
| 15 |
$output = '<p>'. t('The search_keywords module keeps track of search keywords used to find content on this site.') .'</p>';
|
| 16 |
return $output;
|
| 17 |
case 'admin/modules#description':
|
| 18 |
return t('Logs access search_keywords for your site.');
|
| 19 |
case 'admin/settings/search_keywords':
|
| 20 |
return t('<p>Settings for the search keywords information that Drupal will keep about the site. See <a href="!search_keywords">site search_keywords</a> for the actual information.</p>', array('!search_keywords' => url('admin/logs/search_keywords')));
|
| 21 |
case 'admin/logs/search_keywords':
|
| 22 |
return t('<p>This page shows you the recent search keywords.</p>');
|
| 23 |
}
|
| 24 |
}
|
| 25 |
|
| 26 |
/**
|
| 27 |
* Implementation of hook_exit().
|
| 28 |
*
|
| 29 |
* This is where search_keywords are gathered on page accesses.
|
| 30 |
*/
|
| 31 |
function search_keywords_exit() {
|
| 32 |
global $user, $recent_activity;
|
| 33 |
|
| 34 |
if (variable_get('search_keywords_enable_access_log', 0)) {
|
| 35 |
list($search_engine,$keywords) = _search_keywords_extract(referer_uri());
|
| 36 |
if (isset($search_engine) && isset($keywords)) {
|
| 37 |
db_query("INSERT INTO {search_keywords_log} (title, path, url, hostname, search_engine, keywords, timestamp) values('%s', '%s', '%s', '%s', '%s', '%s', %d)", function_exists('drupal_get_title')?drupal_get_title():$_GET['q'], $_GET['q'], referer_uri(), $_SERVER['REMOTE_ADDR'], $search_engine, $keywords, time());
|
| 38 |
}
|
| 39 |
}
|
| 40 |
}
|
| 41 |
|
| 42 |
/**
|
| 43 |
* Implementation of hook_perm().
|
| 44 |
*/
|
| 45 |
function search_keywords_perm() {
|
| 46 |
return array('access search keywords');
|
| 47 |
}
|
| 48 |
|
| 49 |
/**
|
| 50 |
* Implementation of hook_menu().
|
| 51 |
*/
|
| 52 |
function search_keywords_menu($may_cache) {
|
| 53 |
$items = array();
|
| 54 |
|
| 55 |
$access = user_access('access search keywords');
|
| 56 |
if ($may_cache) {
|
| 57 |
$items[] = array(
|
| 58 |
'path' => 'admin/settings/search_keywords',
|
| 59 |
'title' => t('Search keywords'),
|
| 60 |
'description' => t('Administer search keywords.'),
|
| 61 |
'callback' => 'drupal_get_form',
|
| 62 |
'callback arguments' => array('search_keywords_admin_settings'),
|
| 63 |
'access' => user_access('administer site configuration'),
|
| 64 |
'type' => MENU_NORMAL_ITEM,
|
| 65 |
);
|
| 66 |
$items[] = array(
|
| 67 |
'path' => 'admin/logs/search_keywords',
|
| 68 |
'title' => t('Recent keywords'),
|
| 69 |
'description' => t('View recent search keywords.'),
|
| 70 |
'callback' => 'search_keywords_recent_keywords',
|
| 71 |
'access' => $access,
|
| 72 |
'weight' => 3);
|
| 73 |
}
|
| 74 |
|
| 75 |
return $items;
|
| 76 |
}
|
| 77 |
|
| 78 |
/**
|
| 79 |
* Menu callback; presents the "recent keywords" page.
|
| 80 |
*/
|
| 81 |
function search_keywords_recent_keywords() {
|
| 82 |
$header = array(
|
| 83 |
array('data' => t('Timestamp'), 'field' => 'a.timestamp', 'sort' => 'desc'),
|
| 84 |
array('data' => t('Search Engine'), 'field' => 'a.search_engine'),
|
| 85 |
array('data' => t('Keywords'), 'field' => 'a.keywords'),
|
| 86 |
array('data' => t('Path'), 'field' => 'a.path'),
|
| 87 |
);
|
| 88 |
|
| 89 |
$sql = 'SELECT a.* FROM {search_keywords_log} a' . tablesort_sql($header);
|
| 90 |
|
| 91 |
$result = pager_query($sql, 30);
|
| 92 |
while ($log = db_fetch_object($result)) {
|
| 93 |
$title = (empty($log->title))?$log->path:$log->title;
|
| 94 |
if (module_exists('ip2cc')) {
|
| 95 |
$flag = theme('ip2cc_ip_flag',$log->hostname);
|
| 96 |
} else {
|
| 97 |
$flag = '';
|
| 98 |
}
|
| 99 |
if (!_search_keywords_is_utf8($log->keywords)) {
|
| 100 |
$log->keywords = _search_keywords_to_utf8($log->keywords);
|
| 101 |
}
|
| 102 |
$rows[] = array(
|
| 103 |
array('data' => format_date($log->timestamp, 'small'), 'class' => 'nowrap'),
|
| 104 |
array('data' => l($log->search_engine, "http://$log->search_engine/")),
|
| 105 |
array('data' => "$flag" . l($log->keywords, $log->url)),
|
| 106 |
array('data' => l(_search_keywords_column_width(decode_entities($title)), $log->path)));
|
| 107 |
}
|
| 108 |
|
| 109 |
$output = theme('table', $header, $rows);
|
| 110 |
$output .= theme('pager', NULL, 30, 0);
|
| 111 |
return $output;
|
| 112 |
}
|
| 113 |
|
| 114 |
/**
|
| 115 |
* Implementation of hook_settings().
|
| 116 |
*/
|
| 117 |
function search_keywords_admin_settings() {
|
| 118 |
// access log settings:
|
| 119 |
$options = array('1' => t('Enabled'), '0' => t('Disabled'));
|
| 120 |
$form['access'] = array(
|
| 121 |
'#type' => 'fieldset',
|
| 122 |
'#title' => t('Access log settings'));
|
| 123 |
$form['access']['search_keywords_enable_access_log'] = array(
|
| 124 |
'#type' => 'radios',
|
| 125 |
'#title' => t('Enable access log'),
|
| 126 |
'#default_value' => variable_get('search_keywords_enable_access_log', 0),
|
| 127 |
'#options' => $options,
|
| 128 |
'#description' => t('Log each page access. Required for referrer search_keywords.'));
|
| 129 |
|
| 130 |
$period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 4838400, 9676800), 'format_interval');
|
| 131 |
$form['access']['search_keywords_flush_accesslog_timer'] = array(
|
| 132 |
'#type' => 'select',
|
| 133 |
'#title' => t('Discard access logs older than'),
|
| 134 |
'#default_value' => variable_get('search_keywords_flush_accesslog_timer', 259200),
|
| 135 |
'#options' => $period,
|
| 136 |
'#description' => t('Older access log entries (including referrer search_keywords) will be automatically discarded. Requires crontab.'));
|
| 137 |
|
| 138 |
return system_settings_form($form);
|
| 139 |
}
|
| 140 |
|
| 141 |
/**
|
| 142 |
* Implementation of hook_cron().
|
| 143 |
*/
|
| 144 |
function search_keywords_cron() {
|
| 145 |
/* clean expired access logs */
|
| 146 |
db_query('DELETE FROM {search_keywords_log} WHERE timestamp < %d', time() - variable_get('search_keywords_flush_accesslog_timer', 259200));
|
| 147 |
}
|
| 148 |
|
| 149 |
/**
|
| 150 |
* It is possible to adjust the width of columns generated by the
|
| 151 |
* search_keywords module.
|
| 152 |
*/
|
| 153 |
function _search_keywords_column_width($column, $width = 35) {
|
| 154 |
return (drupal_strlen($column) > $width ? truncate_utf8($column, 0, $width) . '...' : $column);
|
| 155 |
}
|
| 156 |
|
| 157 |
function _search_keywords_format_item($title, $link) {
|
| 158 |
$link = ($link ? $link : '/');
|
| 159 |
$output = ($title ? "$title<br />" : '');
|
| 160 |
$output .= l(_search_keywords_column_width($link), $link);
|
| 161 |
return $output;
|
| 162 |
}
|
| 163 |
|
| 164 |
function _search_keywords_extract($url) {
|
| 165 |
$engines = array();
|
| 166 |
$engines[] = '/http:\/\/(.*?)\/search\?.*?q=([^&]+)/i';
|
| 167 |
$engines[] = '/http:\/\/(.*?)\/base\/search\?.*?q=([^&]+)/i';
|
| 168 |
$engines[] = '/http:\/\/(.*?)\/blogsearch\?.*?q=([^&]+)/i';
|
| 169 |
$engines[] = '/http:\/\/(.*?)\/search\?.*?p=([^&]+)/i';
|
| 170 |
$engines[] = '/http:\/\/(.*?)\/results.aspx\?.*?q=([^&]+)/i';
|
| 171 |
$engines[] = '/http:\/\/(.*?)\/tags?\/(.*)/i';
|
| 172 |
$engines[] = '/http:\/\/(.*?)\/search\/(.*)/i';
|
| 173 |
$engines[] = '/http:\/\/(.*?)\/search\.php\?.*?q=([^&]+)/i';
|
| 174 |
$engines[] = '/http:\/\/(.*?)\/\?.*?tag=([^&]+)/i';
|
| 175 |
$engines[] = '/http:\/\/(.*?)\/cgi-bin\/search\?.*?q=([^&]+)/i';
|
| 176 |
$engines[] = '/http:\/\/(.*?)\/aolcom\/search\?.*?query=([^&]+)/i';
|
| 177 |
$engines[] = '/http:\/\/(.*?)\/jsp\/([^\.]+)\.jsp\?.*?searchfor=([^&]+)/i';
|
| 178 |
$engines[] = '/http:\/\/(.*?)\/p\/search\?.*?qt=([^&]+)/i';
|
| 179 |
$engines[] = '/http:\/\/(.*?)\/web\?.*?q=([^&]+)/i';
|
| 180 |
$engines[] = '/http:\/\/(.*?)\/searchScreen\?.*?q=([^&]+)/i'; //Seznam.cz
|
| 181 |
$engines[] = '/http:\/\/(.*?)\/searchScreen\?.*?w=([^&]+)/i'; //Seznam.cz
|
| 182 |
$engines[] = '/http:\/\/(.*?)\/index.php\?.*?q=([^&]+)/i'; //ZoooHoo.cz
|
| 183 |
$engines[] = '/http:\/\/(.*?)\/\?.*?q=([^&]+)/i'; //ZoooHoo.cz
|
| 184 |
foreach ($engines as $pattern) {
|
| 185 |
if (preg_match($pattern, $url, $matches)) {
|
| 186 |
$q = urldecode($matches[2]);
|
| 187 |
if (!_search_keywords_is_utf8($q)) {
|
| 188 |
$q = _search_keywords_to_utf8($q);
|
| 189 |
}
|
| 190 |
return array($matches[1],$q);
|
| 191 |
}
|
| 192 |
}
|
| 193 |
return array(NULL,NULL);
|
| 194 |
}
|
| 195 |
|
| 196 |
function _search_keywords_is_utf8($string) {
|
| 197 |
return preg_match('%^(?:
|
| 198 |
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
| 199 |
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
| 200 |
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
| 201 |
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|
| 202 |
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
| 203 |
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
| 204 |
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
| 205 |
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
| 206 |
)*$%xs', $string);
|
| 207 |
|
| 208 |
}
|
| 209 |
|
| 210 |
function _search_keywords_to_utf8($string) {
|
| 211 |
$s = drupal_convert_to_utf8($string, 'TIS-620');
|
| 212 |
if (!$s) {
|
| 213 |
$s = _search_keywords_tis620_to_utf8($string);
|
| 214 |
}
|
| 215 |
return $s;
|
| 216 |
}
|
| 217 |
|
| 218 |
function _search_keywords_tis620_to_utf8($string) {
|
| 219 |
if (!ereg("[\241-\377]", $string))
|
| 220 |
return $string;
|
| 221 |
|
| 222 |
$tis620 = array(
|
| 223 |
"\xa1" => "\xe0\xb8\x81",
|
| 224 |
"\xa2" => "\xe0\xb8\x82",
|
| 225 |
"\xa3" => "\xe0\xb8\x83",
|
| 226 |
"\xa4" => "\xe0\xb8\x84",
|
| 227 |
"\xa5" => "\xe0\xb8\x85",
|
| 228 |
"\xa6" => "\xe0\xb8\x86",
|
| 229 |
"\xa7" => "\xe0\xb8\x87",
|
| 230 |
"\xa8" => "\xe0\xb8\x88",
|
| 231 |
"\xa9" => "\xe0\xb8\x89",
|
| 232 |
"\xaa" => "\xe0\xb8\x8a",
|
| 233 |
"\xab" => "\xe0\xb8\x8b",
|
| 234 |
"\xac" => "\xe0\xb8\x8c",
|
| 235 |
"\xad" => "\xe0\xb8\x8d",
|
| 236 |
"\xae" => "\xe0\xb8\x8e",
|
| 237 |
"\xaf" => "\xe0\xb8\x8f",
|
| 238 |
"\xb0" => "\xe0\xb8\x90",
|
| 239 |
"\xb1" => "\xe0\xb8\x91",
|
| 240 |
"\xb2" => "\xe0\xb8\x92",
|
| 241 |
"\xb3" => "\xe0\xb8\x93",
|
| 242 |
"\xb4" => "\xe0\xb8\x94",
|
| 243 |
"\xb5" => "\xe0\xb8\x95",
|
| 244 |
"\xb6" => "\xe0\xb8\x96",
|
| 245 |
"\xb7" => "\xe0\xb8\x97",
|
| 246 |
"\xb8" => "\xe0\xb8\x98",
|
| 247 |
"\xb9" => "\xe0\xb8\x99",
|
| 248 |
"\xba" => "\xe0\xb8\x9a",
|
| 249 |
"\xbb" => "\xe0\xb8\x9b",
|
| 250 |
"\xbc" => "\xe0\xb8\x9c",
|
| 251 |
"\xbd" => "\xe0\xb8\x9d",
|
| 252 |
"\xbe" => "\xe0\xb8\x9e",
|
| 253 |
"\xbf" => "\xe0\xb8\x9f",
|
| 254 |
"\xc0" => "\xe0\xb8\xa0",
|
| 255 |
"\xc1" => "\xe0\xb8\xa1",
|
| 256 |
"\xc2" => "\xe0\xb8\xa2",
|
| 257 |
"\xc3" => "\xe0\xb8\xa3",
|
| 258 |
"\xc4" => "\xe0\xb8\xa4",
|
| 259 |
"\xc5" => "\xe0\xb8\xa5",
|
| 260 |
"\xc6" => "\xe0\xb8\xa6",
|
| 261 |
"\xc7" => "\xe0\xb8\xa7",
|
| 262 |
"\xc8" => "\xe0\xb8\xa8",
|
| 263 |
"\xc9" => "\xe0\xb8\xa9",
|
| 264 |
"\xca" => "\xe0\xb8\xaa",
|
| 265 |
"\xcb" => "\xe0\xb8\xab",
|
| 266 |
"\xcc" => "\xe0\xb8\xac",
|
| 267 |
"\xcd" => "\xe0\xb8\xad",
|
| 268 |
"\xce" => "\xe0\xb8\xae",
|
| 269 |
"\xcf" => "\xe0\xb8\xaf",
|
| 270 |
"\xd0" => "\xe0\xb8\xb0",
|
| 271 |
"\xd1" => "\xe0\xb8\xb1",
|
| 272 |
"\xd2" => "\xe0\xb8\xb2",
|
| 273 |
"\xd3" => "\xe0\xb8\xb3",
|
| 274 |
"\xd4" => "\xe0\xb8\xb4",
|
| 275 |
"\xd5" => "\xe0\xb8\xb5",
|
| 276 |
"\xd6" => "\xe0\xb8\xb6",
|
| 277 |
"\xd7" => "\xe0\xb8\xb7",
|
| 278 |
"\xd8" => "\xe0\xb8\xb8",
|
| 279 |
"\xd9" => "\xe0\xb8\xb9",
|
| 280 |
"\xda" => "\xe0\xb8\xba",
|
| 281 |
"\xdf" => "\xe0\xb8\xbf",
|
| 282 |
"\xe0" => "\xe0\xb9\x80",
|
| 283 |
"\xe1" => "\xe0\xb9\x81",
|
| 284 |
"\xe2" => "\xe0\xb9\x82",
|
| 285 |
"\xe3" => "\xe0\xb9\x83",
|
| 286 |
"\xe4" => "\xe0\xb9\x84",
|
| 287 |
"\xe5" => "\xe0\xb9\x85",
|
| 288 |
"\xe6" => "\xe0\xb9\x86",
|
| 289 |
"\xe7" => "\xe0\xb9\x87",
|
| 290 |
"\xe8" => "\xe0\xb9\x88",
|
| 291 |
"\xe9" => "\xe0\xb9\x89",
|
| 292 |
"\xea" => "\xe0\xb9\x8a",
|
| 293 |
"\xeb" => "\xe0\xb9\x8b",
|
| 294 |
"\xec" => "\xe0\xb9\x8c",
|
| 295 |
"\xed" => "\xe0\xb9\x8d",
|
| 296 |
"\xee" => "\xe0\xb9\x8e",
|
| 297 |
"\xef" => "\xe0\xb9\x8f",
|
| 298 |
"\xf0" => "\xe0\xb9\x90",
|
| 299 |
"\xf1" => "\xe0\xb9\x91",
|
| 300 |
"\xf2" => "\xe0\xb9\x92",
|
| 301 |
"\xf3" => "\xe0\xb9\x93",
|
| 302 |
"\xf4" => "\xe0\xb9\x94",
|
| 303 |
"\xf5" => "\xe0\xb9\x95",
|
| 304 |
"\xf6" => "\xe0\xb9\x96",
|
| 305 |
"\xf7" => "\xe0\xb9\x97",
|
| 306 |
"\xf8" => "\xe0\xb9\x98",
|
| 307 |
"\xf9" => "\xe0\xb9\x99",
|
| 308 |
"\xfa" => "\xe0\xb9\x9a",
|
| 309 |
"\xfb" => "\xe0\xb9\x9b"
|
| 310 |
);
|
| 311 |
|
| 312 |
$string=strtr($string,$tis620);
|
| 313 |
return $string;
|
| 314 |
}
|