| 1 |
<?php |
<?php |
| 2 |
/* $Id: search404.module,v 1.15 2008/03/04 12:14:30 zyxware Exp $ */ |
// $Id$ |
| 3 |
|
|
| 4 |
|
/* @file |
| 5 |
|
* The search404 module files, does all the searching |
| 6 |
|
* when a 404 occurs |
| 7 |
|
*/ |
| 8 |
|
|
| 9 |
/** |
/** |
| 10 |
* Implementation of hook_menu(). |
* Implementation of hook_menu(). |
| 11 |
*/ |
*/ |
| 12 |
function search404_menu() { |
function search404_menu() { |
| 13 |
$items = array(); |
$items = array(); |
| 14 |
|
|
| 15 |
$items['search404'] = array( |
$items['search404'] = array( |
| 16 |
'title' => t('Page not found'), |
'title' => 'Page not found', |
| 17 |
'access callback' => TRUE, |
'access callback' => TRUE, |
| 18 |
'page callback' => 'search404_page', |
'page callback' => 'search404_page', |
| 19 |
'type' => MENU_LOCAL_TASK |
'type' => MENU_LOCAL_TASK |
| 20 |
); |
); |
| 21 |
|
|
| 22 |
$items['admin/settings/search404'] = array( |
$items['admin/settings/search404'] = array( |
| 23 |
'title' => t('Search 404 settings'), |
'title' => 'Search 404 settings', |
| 24 |
'description' => t('Administer search 404.'), |
'description' => 'Administer search 404.', |
| 25 |
'page callback' => 'drupal_get_form', |
'page callback' => 'drupal_get_form', |
| 26 |
'page arguments' => array('search404_settings'), |
'page arguments' => array('search404_settings'), |
| 27 |
'access callback' => 'user_access', |
'access callback' => 'user_access', |
| 28 |
'access arguments' => array('administer site configuration'), |
'access arguments' => array('administer site configuration'), |
| 29 |
'type' => MENU_NORMAL_ITEM, |
'type' => MENU_NORMAL_ITEM, |
| 30 |
); |
); |
| 41 |
// Abort query on certain extensions, e.g: gif jpg jpeg png |
// Abort query on certain extensions, e.g: gif jpg jpeg png |
| 42 |
$extensions = preg_split('/\s+/', variable_get('search404_ignore_query', 'gif jpg jpeg bmp png')); |
$extensions = preg_split('/\s+/', variable_get('search404_ignore_query', 'gif jpg jpeg bmp png')); |
| 43 |
$extensions = trim(implode('|', $extensions)); |
$extensions = trim(implode('|', $extensions)); |
| 44 |
if (!empty($extensions) && preg_match("/\.($extensions)$/", $_REQUEST['destination'])) { |
$keys = $_REQUEST['q']; |
| 45 |
return false; |
if (!empty($extensions) && preg_match("/\.($extensions)$/", $keys)) { |
| 46 |
|
return FALSE; |
| 47 |
|
} |
| 48 |
|
$regex_filter = variable_get('search404_regex', ''); |
| 49 |
|
$keys_array[] = $keys; |
| 50 |
|
if (!empty($regex_filter)) { |
| 51 |
|
$keys = preg_replace("/" . $regex_filter . "/", '', $keys); |
| 52 |
} |
} |
|
$keys = $_REQUEST['destination']; |
|
|
$misc_var = variable_get('search404_regex', ''); |
|
|
if (!empty($misc_var)) { |
|
|
$keys = preg_grep($misc_var, $keys); |
|
|
$keys = $keys[0]; |
|
|
} |
|
| 53 |
// Ingore certain extensions from query |
// Ingore certain extensions from query |
| 54 |
$extensions = preg_split('/\s+/', variable_get('search404_ignore_extensions', 'htm html php')); |
$extensions = preg_split('/\s+/', variable_get('search404_ignore_extensions', 'htm html php')); |
| 55 |
$extensions = trim(implode('|', $extensions)); |
$extensions = trim(implode('|', $extensions)); |
| 56 |
if (!empty($extensions)) { |
if (!empty($extensions)) { |
| 57 |
$keys = preg_replace("/\.($extensions)$/", '', $keys); |
$keys = preg_replace("/\.($extensions)$/", '', $keys); |
| 58 |
} |
} |
| 59 |
|
|
| 60 |
$keys = preg_split('/['. PREG_CLASS_SEARCH_EXCLUDE .']+/u', $keys); |
$keys = preg_split('/[' . PREG_CLASS_SEARCH_EXCLUDE . ']+/u', $keys); |
| 61 |
|
|
| 62 |
// Ignore certain words |
// Ignore certain words |
| 63 |
$keys = array_diff($keys, explode(' ', variable_get('search404_ignore', 'and or the'))); |
$keys = array_diff($keys, explode(' ', variable_get('search404_ignore', 'and or the'))); |
| 65 |
foreach ($keys as $a => $b) { |
foreach ($keys as $a => $b) { |
| 66 |
$keys[$a] = check_plain($b); |
$keys[$a] = check_plain($b); |
| 67 |
} |
} |
| 68 |
$modifier = variable_get('search404_use_or', false) ? ' OR ' : ' '; |
$modifier = variable_get('search404_use_or', FALSE) ? ' OR ' : ' '; |
| 69 |
$keys = trim(implode($modifier, $keys)); |
$keys = trim(implode($modifier, $keys)); |
| 70 |
return $keys; |
return $keys; |
| 71 |
} |
} |
| 72 |
|
|
| 73 |
/** |
/** |
| 74 |
* Detect search from search engine (BETA) |
* Detect search from search engine |
| 75 |
*/ |
*/ |
| 76 |
function search404_search_engine_query() { |
function search404_search_engine_query() { |
| 77 |
$engines = array( |
$engines = array( |
| 78 |
'altavista' => 'q', |
'altavista' => 'q', |
| 79 |
'aol' => 'query', |
'aol' => 'query', |
| 80 |
'google' => 'q', |
'google' => 'q', |
| 81 |
'live' => 'q', |
'bing' => 'q', |
| 82 |
'lycos' => 'query', |
'lycos' => 'query', |
|
'msn' => 'q', |
|
| 83 |
'yahoo' => 'p', |
'yahoo' => 'p', |
| 84 |
); |
); |
| 85 |
$parsed_url = parse_url($_SERVER['HTTP_REFERER']); |
$parsed_url = parse_url($_SERVER['HTTP_REFERER']); |
| 87 |
$query_string = $parsed_url['query']; |
$query_string = $parsed_url['query']; |
| 88 |
parse_str($query_string, $query); |
parse_str($query_string, $query); |
| 89 |
|
|
| 90 |
if (!$parsed_url === false && !empty($remote_host) && !empty($query_string) && count($query)) { |
if (!$parsed_url === FALSE && !empty($remote_host) && !empty($query_string) && count($query)) { |
| 91 |
foreach ($engines as $host => $key) { |
foreach ($engines as $host => $key) { |
| 92 |
if (strpos($remote_host, $host) !== false && array_key_exists($key, $query)) { |
if (strpos($remote_host, $host) !== FALSE && array_key_exists($key, $query)) { |
| 93 |
return trim($query[$key]); |
return trim($query[$key]); |
| 94 |
} |
} |
| 95 |
} |
} |
| 96 |
} |
} |
| 97 |
|
|
| 98 |
return false; |
return FALSE; |
| 99 |
} |
} |
| 100 |
|
|
| 101 |
/** |
/** |
| 105 |
* Beware of messy code |
* Beware of messy code |
| 106 |
*/ |
*/ |
| 107 |
function search404_page() { |
function search404_page() { |
| 108 |
$output = '<p>'. t('The page you requested was not found.') .'</p>'; |
drupal_set_title(t('Page not found')); |
| 109 |
if (module_exists('search') && user_access('search content')) { |
if (module_exists('search') && user_access('search content')) { |
| 110 |
$keys = ""; |
$keys = ""; |
| 111 |
|
// if apachesolr_search is installed use apachesolr_search instead default node search |
| 112 |
|
$type_search = (module_exists('apachesolr_search')) ? 'apachesolr_search' : 'node'; |
| 113 |
if (variable_get('search404_use_search_engine', false)) { |
if (variable_get('search404_use_search_engine', false)) { |
| 114 |
$keys = search404_search_engine_query(); |
$keys = search404_search_engine_query(); |
| 115 |
} |
} |
| 118 |
} |
} |
| 119 |
if ($keys) { |
if ($keys) { |
| 120 |
// TODO: watchdog? |
// TODO: watchdog? |
| 121 |
$results = module_invoke('node', 'search', 'search', $keys); |
if (module_exists('google') && user_access('search Google CSE') && variable_get('search404_do_google_cse', FALSE)) { |
| 122 |
if (isset($results) && is_array($results) && count($results) == 1 && variable_get('search404_jump', false)) { |
drupal_set_message(t('The page you requested does not exist. For your convenience, a google search was performed using the query %keys.', array('%keys' => check_plain($keys))), 'error'); |
| 123 |
// First, check to see if there is exactly 1 result |
drupal_goto('search/google/' . $keys); |
|
drupal_set_message(t('The page you requested does not exist. A search for %keys resulted in this page.', array('%keys' => check_plain($keys))), 'status'); |
|
|
// overwrite $_REQUEST['destination'] because it is set by drupal_not_found() |
|
|
$_REQUEST['destination'] = 'node/'. $results[0]['node']->nid; |
|
|
drupal_goto(); |
|
|
} |
|
|
elseif (isset($results) && is_array($results) && count($results) > 1 && variable_get('search404_first', false)) { |
|
|
drupal_set_message(t('The page you requested does not exist. A search for %keys resulted in this page.', array('%keys' => check_plain($keys))), 'status'); |
|
|
// overwrite $_REQUEST['destination'] because it is set by drupal_not_found() |
|
|
$_REQUEST['destination'] = 'node/'. $results[0]['node']->nid; |
|
|
drupal_goto(); |
|
| 124 |
} |
} |
| 125 |
else { |
else { |
| 126 |
drupal_set_message(t('The page you requested does not exist. For your convenience, a search was performed using the query %keys.', array('%keys' => check_plain($keys))), 'error'); |
$results = module_invoke($type_search, 'search', 'search', $keys); |
| 127 |
if (isset($results) && is_array($results) && count($results) > 0) {drupal_set_message('Inside Else1'); |
if (isset($results) && is_array($results) && count($results) == 1 && variable_get('search404_jump', FALSE)) { |
| 128 |
drupal_add_css(drupal_get_path('module', 'search') .'/search.css', 'module', 'all', FALSE); |
// First, check to see if there is exactly 1 result |
| 129 |
// EVIL HAXX! |
drupal_set_message(t('The page you requested does not exist. A search for %keys resulted in this page.', array('%keys' => check_plain($keys))), 'status'); |
| 130 |
$oldgetq = $_GET['q']; |
// overwrite $_REQUEST['destination'] because it is set by drupal_not_found() |
| 131 |
$olddestination = $_REQUEST['destination']; |
$_REQUEST['destination'] = 'node/' . $results[0]['node']->nid; |
| 132 |
unset($_REQUEST['destination']); |
drupal_goto(); |
| 133 |
$_GET['q'] = "search/node/$keys"; |
} |
| 134 |
$results = theme('search_results', $results, 'node'); |
elseif (isset($results) && is_array($results) && count($results) > 1 && variable_get('search404_first', FALSE)) { |
| 135 |
$_GET['q'] = $oldgetq; |
drupal_set_message(t('The page you requested does not exist. A search for %keys resulted in this page.', array('%keys' => check_plain($keys))), 'status'); |
| 136 |
$_REQUEST['destination'] = $olddestination; |
// overwrite $_REQUEST['destination'] because it is set by drupal_not_found() |
| 137 |
// END OF EVIL HAXX! |
$_REQUEST['q'] = 'node/' . $results[0]['node']->nid; |
| 138 |
|
drupal_goto(); |
| 139 |
} |
} |
| 140 |
else { |
else { |
| 141 |
$results = search_help('search#noresults', drupal_help_arg()); |
drupal_set_message(t('The page you requested does not exist. For your convenience, a search was performed using the query %keys.', array('%keys' => check_plain($keys))), 'error'); |
| 142 |
|
if (isset($results) && is_array($results) && count($results) > 0) { |
| 143 |
|
drupal_add_css(drupal_get_path('module', 'search') . '/search.css', 'module', 'all', FALSE); |
| 144 |
|
// EVIL HAXX! |
| 145 |
|
$oldgetq = $_GET['q']; |
| 146 |
|
$olddestination = $_REQUEST['q']; |
| 147 |
|
unset($_REQUEST['q']); |
| 148 |
|
$_GET['q'] = "search/$type_search/$keys"; |
| 149 |
|
$results = theme('search_results', $results, 'node'); |
| 150 |
|
$_GET['q'] = $oldgetq; |
| 151 |
|
$_REQUEST['destination'] = $olddestination; |
| 152 |
|
// END OF EVIL HAXX! |
| 153 |
|
} |
| 154 |
|
else { |
| 155 |
|
$results = search_help('search#noresults', drupal_help_arg()); |
| 156 |
|
} |
| 157 |
|
$results = theme('search_results_listing', t('Search results'), $results); |
| 158 |
} |
} |
|
$results = theme('box', t('Search results'), $results); |
|
| 159 |
} |
} |
| 160 |
} |
} |
| 161 |
// Construct the search form. |
// Construct the search form. |
| 162 |
$output .= drupal_get_form('search_form', NULL, $keys, 'node'); |
$output = drupal_render(drupal_get_form('search_form', NULL, $keys, $type_search)) . $results; |
|
$output .= $results; |
|
| 163 |
} |
} |
| 164 |
|
|
| 165 |
return $output; |
return $output; |
| 166 |
} |
} |
| 167 |
|
|
| 172 |
$form['search404_jump'] = array( |
$form['search404_jump'] = array( |
| 173 |
'#type' => 'checkbox', |
'#type' => 'checkbox', |
| 174 |
'#title' => t('Jump directly to the search result when there is only one result.'), |
'#title' => t('Jump directly to the search result when there is only one result.'), |
| 175 |
'#default_value' => variable_get('search404_jump', false), |
'#default_value' => variable_get('search404_jump', FALSE), |
| 176 |
); |
); |
| 177 |
$form['search404_first'] = array( |
$form['search404_first'] = array( |
| 178 |
'#type' => 'checkbox', |
'#type' => 'checkbox', |
| 179 |
'#title' => t('Jump directly to the first search result when there are multiple results.'), |
'#title' => t('Jump directly to the first search result even when there are multiple results.'), |
| 180 |
'#default_value' => variable_get('search404_first', false), |
'#default_value' => variable_get('search404_first', FALSE), |
| 181 |
); |
); |
| 182 |
|
|
| 183 |
|
$form['search404_do_google_cse'] = array( |
| 184 |
|
'#type' => 'checkbox', |
| 185 |
|
'#title' => t('Do a Google CSE Search instead of a Drupal Search when a 404 Occurs'), |
| 186 |
|
'#description' => t('Requires Google CSE and Google CSE Search Modules to be Enabled'), |
| 187 |
|
'#attributes' => module_exists('google') ? array() : array('disabled' => 'disabled'), |
| 188 |
|
'#default_value' => variable_get('search404_do_google_cse', FALSE), |
| 189 |
|
); |
| 190 |
|
|
| 191 |
$form['advanced'] = array( |
$form['advanced'] = array( |
| 192 |
'#type' => 'fieldset', |
'#type' => 'fieldset', |
| 193 |
'#title' => t('Advanced settings'), |
'#title' => t('Advanced settings'), |
| 194 |
'#description' => t("WARNING. Some of these settings can mess up stuff, don't touch unless you know what you are |
//'#description' => t("WARNING. Some of these settings can mess up stuff, don't touch unless you know what you are doing."), |
|
doing."), |
|
| 195 |
'#collapsible' => TRUE, |
'#collapsible' => TRUE, |
| 196 |
'#collapsed' => TRUE, |
'#collapsed' => TRUE, |
| 197 |
); |
); |
| 198 |
$form['advanced']['search404_use_or'] = array( |
$form['advanced']['search404_use_or'] = array( |
| 199 |
'#type' => 'checkbox', |
'#type' => 'checkbox', |
| 200 |
'#title' => t('Use OR between keywords when searching.'), |
'#title' => t('Use OR between keywords when searching.'), |
| 201 |
'#default_value' => variable_get('search404_use_or', false), |
'#default_value' => variable_get('search404_use_or', FALSE), |
| 202 |
); |
); |
| 203 |
$form['advanced']['search404_use_search_engine'] = array( |
$form['advanced']['search404_use_search_engine'] = array( |
| 204 |
'#type' => 'checkbox', |
'#type' => 'checkbox', |
| 205 |
'#title' => t('Use auto-detection of keywords from search engine referer. BETA! Not for production sites, use at your own risk.'), |
'#title' => t('Use auto-detection of keywords from search engine referer.'), |
| 206 |
'#default_value' => variable_get('search404_use_search_engine', false), |
'#description' => t('This feature will conduct a search based on the query string got from a search engine if the URL of the search result points to a 404 page in the current website. Currently supported search engines: Google, Yahoo, Altavista, Lycos, Bing and AOL'), |
| 207 |
|
'#default_value' => variable_get('search404_use_search_engine', FALSE), |
| 208 |
); |
); |
| 209 |
$form['advanced']['search404_ignore'] = array( |
$form['advanced']['search404_ignore'] = array( |
| 210 |
'#type' => 'textfield', |
'#type' => 'textfield', |
| 211 |
'#title' => t('Words to ignore'), |
'#title' => t('Words to ignore'), |
| 212 |
'#description' => t('These words will be ignored from query. Separate words with a space, e.g.: "and or the".'), |
'#description' => t('These words will be ignored from the search query. Separate words with a space, e.g.: "and or the".'), |
| 213 |
'#default_value' => variable_get('search404_ignore', 'and or the'), |
'#default_value' => variable_get('search404_ignore', 'and or the'), |
| 214 |
); |
); |
| 215 |
$form['advanced']['search404_ignore_extensions'] = array( |
$form['advanced']['search404_ignore_extensions'] = array( |
| 216 |
'#type' => 'textfield', |
'#type' => 'textfield', |
| 217 |
'#title' => t('Extensions to ignore'), |
'#title' => t('Extensions to ignore'), |
| 218 |
'#description' => t('These extensions will be ignored from query. Separate extensions with a space, e.g.: "htm html php". Do not include leading dot.'), |
'#description' => t('These extensions will be ignored from the search query, e.g.: http://www.example.com/invalid/page.php will only search for "invalid page". Separate extensions with a space, e.g.: "htm html php". Do not include leading dot.'), |
| 219 |
'#default_value' => variable_get('search404_ignore_extensions', 'htm html php'), |
'#default_value' => variable_get('search404_ignore_extensions', 'htm html php'), |
| 220 |
); |
); |
| 221 |
$form['advanced']['search404_ignore_query'] = array( |
$form['advanced']['search404_ignore_query'] = array( |
| 222 |
'#type' => 'textfield', |
'#type' => 'textfield', |
| 223 |
'#title' => t('Extensions to abort search'), |
'#title' => t('Extensions to abort search'), |
| 224 |
'#description' => t('A search will not be performed for a query ending in the following extensions. Separate extensions with a space, e.g.: "gif jpg jpeg bmp png". Do not include leading dot.'), |
'#description' => t('A search will not be performed for a query ending in these extensions. Separate extensions with a space, e.g.: "gif jpg jpeg bmp png". Do not include leading dot.'), |
| 225 |
'#default_value' => variable_get('search404_ignore_query', 'gif jpg jpeg bmp png'), |
'#default_value' => variable_get('search404_ignore_query', 'gif jpg jpeg bmp png'), |
| 226 |
); |
); |
| 227 |
$form['advanced']['search404_regex'] = array( |
$form['advanced']['search404_regex'] = array( |
| 228 |
'#type' => 'textfield', |
'#type' => 'textfield', |
| 229 |
'#title' => t('PCRE REGEX'), |
'#title' => t('PCRE filter'), |
| 230 |
'#description' => t('This regex will applied to all queries. It uses the code:<p>%code</p>Look directly at the source code to understand underlying syntax. See also <a href="http://php.net/pcre">PCRE pages in the PHP Manual</a>.', array('%code' => "\$keys = preg_grep(variable_get('search404_regex'), \$keys);\n\$keys = \$keys[0];", '%function' => 'search404_get_keys()')), |
'#description' => t('This regular expression will be applied to filter all queries. The parts of the path that match the expression will be EXCLUDED from the search. You do NOT have to enclose the regex in forward slashes when defining the PCRE. e.g.: use "[foo]bar" instead of "/[foo]bar/". On how to use a PCRE Regex please refer <a href="http://php.net/pcre">PCRE pages in the PHP Manual</a>.'), |
| 231 |
'#default_value' => variable_get('search404_regex',''), |
'#default_value' => variable_get('search404_regex', ''), |
| 232 |
); |
); |
| 233 |
return system_settings_form($form); |
return system_settings_form($form); |
| 234 |
} |
} |