| 1 |
<?php
|
| 2 |
// $Id: xapian.module,v 1.6 2008/06/02 01:37:56 simon Exp $
|
| 3 |
|
| 4 |
/**
|
| 5 |
* @file
|
| 6 |
* Implementation of a the xapian search library as a drupal module
|
| 7 |
*
|
| 8 |
* Initial Development and sample view code by Lachlan Gunn - http://drupal.org/user/277696
|
| 9 |
* Core patch, tighter drupal integration, adherance to drupal standards,
|
| 10 |
* making ready for release by Simon Lindsay - http://drupal.org/user/143
|
| 11 |
*/
|
| 12 |
|
| 13 |
include_once('xapian.php');
|
| 14 |
|
| 15 |
define('XAPIAN_WRITABLE', TRUE);
|
| 16 |
define('XAPIAN_FLUSH', TRUE);
|
| 17 |
|
| 18 |
/**
|
| 19 |
* Provide default error handler for xapian
|
| 20 |
*
|
| 21 |
* @param int $errno
|
| 22 |
* @param string $errstr
|
| 23 |
* @param string $errfile
|
| 24 |
* @param int $errline
|
| 25 |
* @param unknown_type $errcontext
|
| 26 |
*/
|
| 27 |
function _xapian_requirements_error_handler($errno, $errstr, $errfile = NULL, $errline = NULL, $errcontext = NULL) {
|
| 28 |
$GLOBALS['xapian_include_error'] = $errstr;
|
| 29 |
}
|
| 30 |
|
| 31 |
/**
|
| 32 |
* Attempt to include xapian.php. If there are errors, Xapian is not
|
| 33 |
* available, otherwise it is available.
|
| 34 |
*
|
| 35 |
* @return bool
|
| 36 |
*/
|
| 37 |
function xapian_available() {
|
| 38 |
static $available = FALSE;
|
| 39 |
|
| 40 |
if ($available === FALSE) {
|
| 41 |
$GLOBALS['xapian_include_error'] = NULL;
|
| 42 |
set_error_handler('_xapian_requirements_error_handler');
|
| 43 |
include_once('xapian.php');
|
| 44 |
restore_error_handler();
|
| 45 |
|
| 46 |
if (NULL == $GLOBALS['xapian_include_error']) {
|
| 47 |
$available = TRUE;
|
| 48 |
}
|
| 49 |
}
|
| 50 |
|
| 51 |
return $available;
|
| 52 |
}
|
| 53 |
|
| 54 |
/**
|
| 55 |
* Implementation of hook_requirements. Report Xapian bindings version if
|
| 56 |
* available, otherwise display include error.
|
| 57 |
*
|
| 58 |
* @param int $phase
|
| 59 |
* @return array
|
| 60 |
*/
|
| 61 |
function xapian_requirements($phase) {
|
| 62 |
if (xapian_available()) {
|
| 63 |
$severity = REQUIREMENT_OK;
|
| 64 |
// Get Xapian bindings version, supporting PHP4 and PHP5 methods
|
| 65 |
$version = (function_exists('xapian_version_string')) ? xapian_version_string() : Xapian::version_string();
|
| 66 |
}
|
| 67 |
else {
|
| 68 |
$severity = REQUIREMENT_ERROR;
|
| 69 |
$version = $GLOBALS['xapian_include_error'];
|
| 70 |
}
|
| 71 |
|
| 72 |
return array(
|
| 73 |
'xapian' => array(
|
| 74 |
'title' => t('Xapian bindings'),
|
| 75 |
'value' => $version,
|
| 76 |
'severity' => $severity,
|
| 77 |
),
|
| 78 |
);
|
| 79 |
}
|
| 80 |
|
| 81 |
/**
|
| 82 |
* Implementation of hook_menu
|
| 83 |
*
|
| 84 |
* @param bool $may_cache
|
| 85 |
* @return array
|
| 86 |
*/
|
| 87 |
function xapian_menu($may_cache) {
|
| 88 |
$items = array();
|
| 89 |
|
| 90 |
$items[] = array(
|
| 91 |
'path' => 'admin/settings/xapian',
|
| 92 |
'title' => t('Xapian settings'),
|
| 93 |
'description' => t('Settings for the Xapian search library'),
|
| 94 |
'callback' => 'drupal_get_form',
|
| 95 |
'callback arguments' => array('xapian_admin'),
|
| 96 |
'access' => user_access('access administration pages'),
|
| 97 |
'type' => MENU_NORMAL_ITEM,
|
| 98 |
);
|
| 99 |
$items[] = array(
|
| 100 |
'path' => 'admin/settings/xapian/wipe',
|
| 101 |
'title' => t('Rebuild index'),
|
| 102 |
'callback' => 'drupal_get_form',
|
| 103 |
'callback arguments' => array('xapian_reindex_all_confirm'),
|
| 104 |
'access' => user_access('administer search'),
|
| 105 |
'type' => MENU_CALLBACK,
|
| 106 |
);
|
| 107 |
|
| 108 |
return $items;
|
| 109 |
}
|
| 110 |
|
| 111 |
/**
|
| 112 |
* implemtation of hook_admin
|
| 113 |
*
|
| 114 |
* @return array
|
| 115 |
*/
|
| 116 |
function xapian_admin() {
|
| 117 |
$form = array();
|
| 118 |
$form['#cache'] = FALSE;
|
| 119 |
|
| 120 |
$database_type = variable_get('xapian_database_type', 0);
|
| 121 |
|
| 122 |
// Create a database fieldset
|
| 123 |
$form['database'] = array(
|
| 124 |
'#type' => 'fieldset',
|
| 125 |
'#title' => t('Xapian database')
|
| 126 |
);
|
| 127 |
|
| 128 |
// Database type
|
| 129 |
$form['database']['xapian_database_type'] = array(
|
| 130 |
'#type' => 'radios',
|
| 131 |
'#title' => t('Type'),
|
| 132 |
'#default_value' => $database_type,
|
| 133 |
'#options' => array(t('Local'), t('Remote')),
|
| 134 |
);
|
| 135 |
|
| 136 |
// local database settings
|
| 137 |
$form['database']['local_database'] = array(
|
| 138 |
'#type' => 'fieldset',
|
| 139 |
'#title' => t('Local database options'),
|
| 140 |
'#collapsible' => TRUE,
|
| 141 |
'#collapsed' => ($database_type != 0)
|
| 142 |
);
|
| 143 |
$form['database']['local_database']['xapian_database_path'] = array(
|
| 144 |
'#type' => 'textfield',
|
| 145 |
'#title' => t('Path to Xapian database'),
|
| 146 |
'#default_value' => variable_get('xapian_database_path', file_directory_path() .'/xapian_database'),
|
| 147 |
'#required' => ($database_type == 0),
|
| 148 |
'#description' => t('Directory where your local Xapian database will be created. Specify a directory writable by your web server process.'),
|
| 149 |
);
|
| 150 |
|
| 151 |
// Remote database settings
|
| 152 |
$form['database']['remote_database'] = array(
|
| 153 |
'#type' => 'fieldset',
|
| 154 |
'#title' => t('Remote database options'),
|
| 155 |
'#collapsible' => TRUE,
|
| 156 |
'#collapsed' => ($database_type != 1),
|
| 157 |
);
|
| 158 |
$form['database']['remote_database']['xapian_database_hostname'] = array(
|
| 159 |
'#type' => 'textfield',
|
| 160 |
'#title' => t('Database server'),
|
| 161 |
'#default_value' => variable_get('xapian_database_hostname', ''),
|
| 162 |
'#required' => ($database_type == 1),
|
| 163 |
'#description' => t('IP address or host name of remote server running xapian-tcpsrv.'),
|
| 164 |
);
|
| 165 |
$form['database']['remote_database']['xapian_database_port'] = array(
|
| 166 |
'#type' => 'textfield',
|
| 167 |
'#title' => t('Database port'),
|
| 168 |
'#default_value' => variable_get('xapian_database_port', '6431'),
|
| 169 |
'#required' => ($database_type == 1),
|
| 170 |
'#validate' => array('_xapian_validate_port' => array('xapian_database_port')),
|
| 171 |
'#description' => t('Remote port that xapian-tcpsrv is listening on.'),
|
| 172 |
);
|
| 173 |
|
| 174 |
// indexing settings
|
| 175 |
$form['performance'] = array(
|
| 176 |
'#type' => 'fieldset',
|
| 177 |
'#title' => t('Performance')
|
| 178 |
);
|
| 179 |
$count_result = db_query("SELECT COUNT(*) FROM {xapian_index_queue}");
|
| 180 |
$nodes = db_result($count_result);
|
| 181 |
$form['performance']['count'] = array(
|
| 182 |
'#value' => t('<p><strong>There are %count items waiting to be indexed.</strong></p>', array('%count' => $nodes)),
|
| 183 |
);
|
| 184 |
$form['performance']['index'] = array(
|
| 185 |
'#type' => 'submit',
|
| 186 |
'#value' => t('Re-index site'),
|
| 187 |
);
|
| 188 |
$form['performance']['xapian_index_immediately'] = array(
|
| 189 |
'#type' => 'checkbox',
|
| 190 |
'#title' => t('Index immediately'),
|
| 191 |
'#default_value' => variable_get('xapian_index_immediately', TRUE),
|
| 192 |
'#description' => t('Enable this option to index content immediately as it is created and updated. Disable this option to delay indexing until cron runs. Your should disable this option on larger websites.'),
|
| 193 |
);
|
| 194 |
$items = drupal_map_assoc(array(10, 20, 50, 100, 500, 1000, 5000, 10000));
|
| 195 |
$form['performance']['xapian_indexing_throttle'] = array(
|
| 196 |
'#type' => 'select',
|
| 197 |
'#title' => t('Items to index per cron run'),
|
| 198 |
'#description' => t('The maximum number of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'),
|
| 199 |
'#options' => $items,
|
| 200 |
'#default_value' => variable_get('xapian_indexing_throttle', 100),
|
| 201 |
);
|
| 202 |
|
| 203 |
// Display settings
|
| 204 |
$form['display'] = array(
|
| 205 |
'#type' => 'fieldset',
|
| 206 |
'#title' => t('Display')
|
| 207 |
);
|
| 208 |
$form['display']['xapian_search_results_per_page'] = array(
|
| 209 |
'#type' => 'textfield',
|
| 210 |
'#title' => t('Number of search results per page'),
|
| 211 |
'#size' => 3,
|
| 212 |
'#maxlength' => 3,
|
| 213 |
'#default_value' => variable_get('xapian_search_results_per_page', 10),
|
| 214 |
'#description' => t('This setting determines the number of entries per page displayed on the search results.'),
|
| 215 |
'#validate' => array('_xapian_validate_positive_integer'=> array('xapian_search_results_per_page')),
|
| 216 |
);
|
| 217 |
$form['display']['xapian_node_count_type'] = array(
|
| 218 |
'#type' => 'radios',
|
| 219 |
'#title' => t('Result count'),
|
| 220 |
'#description' => t('This setting determines the value that xapian returns for the result count returned from queries (used for number of pages in pagers, etc.)'),
|
| 221 |
'#default_value' => variable_get('xapian_node_count_type', 0),
|
| 222 |
'#options' => array(
|
| 223 |
t('Best estimate'),
|
| 224 |
t('Lower bound'),
|
| 225 |
t('Upper bound'),
|
| 226 |
),
|
| 227 |
);
|
| 228 |
|
| 229 |
// Logging options
|
| 230 |
$form['diagnostic'] = array(
|
| 231 |
'#type' => 'fieldset',
|
| 232 |
'#title' => t('Logging')
|
| 233 |
);
|
| 234 |
$form['diagnostic']['xapian_log_queries'] = array(
|
| 235 |
'#type' => 'checkbox',
|
| 236 |
'#title' => t('Log searches'),
|
| 237 |
'#description' => t('Log search queries and time taken for search to the watchdog log.'),
|
| 238 |
'#default_value' => variable_get('xapian_log_queries', FALSE),
|
| 239 |
);
|
| 240 |
|
| 241 |
return system_settings_form($form);
|
| 242 |
}
|
| 243 |
|
| 244 |
/**
|
| 245 |
* Validation function for admin form
|
| 246 |
*
|
| 247 |
* @param string $form_id
|
| 248 |
* @param array $form_values
|
| 249 |
*/
|
| 250 |
function xapian_admin_validate($form_id, $form_values) {
|
| 251 |
if (t('Re-index site') == $form_values['op']) {
|
| 252 |
drupal_goto('admin/settings/xapian/wipe');
|
| 253 |
}
|
| 254 |
}
|
| 255 |
|
| 256 |
/**
|
| 257 |
* Special port validation routing
|
| 258 |
*
|
| 259 |
* @param array $field
|
| 260 |
* @param string $field_name
|
| 261 |
*/
|
| 262 |
function _xapian_validate_port($field, $field_name) {
|
| 263 |
if (!is_numeric($field['#value']) || (int)$field['#value'] < 1 || (int)$field['#value'] > 65535) {
|
| 264 |
form_set_error($field_name, t('%value is not a valid port.', array('%value' => $field['#value'])));
|
| 265 |
}
|
| 266 |
}
|
| 267 |
|
| 268 |
/**
|
| 269 |
* Validate that the field in an integer
|
| 270 |
*
|
| 271 |
* @param array $field
|
| 272 |
* @param string $field_name
|
| 273 |
*/
|
| 274 |
function _xapian_validate_positive_integer($field, $field_name) {
|
| 275 |
if (!is_numeric($field['#value']) || (int)$field['#value'] < 0) {
|
| 276 |
form_set_error($field_name, t('"%value" is an invalid number of search results per page.', array('%value' => $field['#value'])));
|
| 277 |
}
|
| 278 |
}
|
| 279 |
|
| 280 |
/**
|
| 281 |
* Enter description here...
|
| 282 |
*
|
| 283 |
* @param bool $writable
|
| 284 |
* @param bool $flush
|
| 285 |
* If set, instead of initializing the database we close it, flushing the
|
| 286 |
* buffers to disk.
|
| 287 |
* @return object
|
| 288 |
*/
|
| 289 |
function _xapian_init_database($writable = FALSE, $flush = FALSE) {
|
| 290 |
static $database;
|
| 291 |
static $writable_database;
|
| 292 |
|
| 293 |
if ($flush) {
|
| 294 |
if (is_object($writable_database)) {
|
| 295 |
// Set the database handle to NULL to ensure that it gets flushed to
|
| 296 |
// disk.
|
| 297 |
$writable_database = NULL;
|
| 298 |
}
|
| 299 |
return;
|
| 300 |
}
|
| 301 |
|
| 302 |
if (!$writable && is_object($database)) {
|
| 303 |
return $database;
|
| 304 |
}
|
| 305 |
|
| 306 |
if ($writable && is_object($writable_database)) {
|
| 307 |
return $writable_database;
|
| 308 |
}
|
| 309 |
|
| 310 |
try {
|
| 311 |
$database_type = variable_get('xapian_database_type', 0);
|
| 312 |
|
| 313 |
if ($database_type == 0) { // Local database
|
| 314 |
$database_path = variable_get('xapian_database_path', NULL);
|
| 315 |
|
| 316 |
if (empty($database_path)) {
|
| 317 |
watchdog('xapian', t('No database path given.'));
|
| 318 |
return NULL;
|
| 319 |
}
|
| 320 |
|
| 321 |
$db_source = $database_path;
|
| 322 |
}
|
| 323 |
elseif ($database_type == 1) {
|
| 324 |
$database_host = variable_get('xapian_database_hostname', '');
|
| 325 |
$database_port = variable_get('xapian_database_port', 6431);
|
| 326 |
|
| 327 |
if (empty($database_host)) {
|
| 328 |
watchdog('xapian', 'No database host given.');
|
| 329 |
return NULL;
|
| 330 |
}
|
| 331 |
|
| 332 |
$db_source = Xapian::remote_open($database_host, (int)$database_port);
|
| 333 |
}
|
| 334 |
|
| 335 |
if ($writable) {
|
| 336 |
$writable_database = new XapianWritableDatabase($db_source, Xapian::DB_CREATE_OR_OPEN);
|
| 337 |
return $writable_database;
|
| 338 |
}
|
| 339 |
else {
|
| 340 |
$database = new XapianDatabase($db_source);
|
| 341 |
return $database;
|
| 342 |
}
|
| 343 |
}
|
| 344 |
catch (Exception $e) {
|
| 345 |
watchdog('xapian', $e->getMessage());
|
| 346 |
return NULL;
|
| 347 |
}
|
| 348 |
}
|
| 349 |
|
| 350 |
/**
|
| 351 |
* Queries the database.
|
| 352 |
*
|
| 353 |
* The xapian_query function queries the database using both a query string
|
| 354 |
* and application-defined terms.
|
| 355 |
*
|
| 356 |
* @param $query_string A string (perhaps supplied by the user) containing
|
| 357 |
* terms to search for. This string will be parsed and
|
| 358 |
* stemmed automatically.
|
| 359 |
*
|
| 360 |
* @param $start An integer defining the first document to return
|
| 361 |
* (ie. if $start = n, then the first node returned will
|
| 362 |
* be that in the nth document found).
|
| 363 |
*
|
| 364 |
* @param $length The number of results to return.
|
| 365 |
*
|
| 366 |
* @param $extra An array containing arrays of extra terms to search
|
| 367 |
* for.
|
| 368 |
*
|
| 369 |
* @return An array of nids corresponding to the results.
|
| 370 |
*/
|
| 371 |
function xapian_query($query_string, $start = 0, $length = 10, $extra = array(), $query_weight = NULL) {
|
| 372 |
try {
|
| 373 |
$start_time = microtime(TRUE);
|
| 374 |
|
| 375 |
$db = _xapian_init_database();
|
| 376 |
if (!is_object($db)) {
|
| 377 |
return NULL;
|
| 378 |
}
|
| 379 |
|
| 380 |
$enquire = new XapianEnquire($db);
|
| 381 |
$query_parser = new XapianQueryParser();
|
| 382 |
$stemmer = new XapianStem("english");
|
| 383 |
$query_parser->set_stemmer($stemmer);
|
| 384 |
$query_parser->set_database($db);
|
| 385 |
$query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
|
| 386 |
$query = $query_parser->parse_query($query_string);
|
| 387 |
|
| 388 |
// Build subqueries from $extra array.
|
| 389 |
foreach ($extra as $subq) {
|
| 390 |
if (!empty($subq)) {
|
| 391 |
$subquery = new XapianQuery(XapianQuery::OP_OR, $subq);
|
| 392 |
$query = new XapianQuery(XapianQuery::OP_AND, array($subquery, $query));
|
| 393 |
}
|
| 394 |
}
|
| 395 |
|
| 396 |
$enquire->set_query($query);
|
| 397 |
$matches = $enquire->get_mset((int)$start, (int)$length);
|
| 398 |
|
| 399 |
$results = array();
|
| 400 |
$i = $matches->begin();
|
| 401 |
$count = 0;
|
| 402 |
while (!$i->equals($matches->end())) {
|
| 403 |
$count++;
|
| 404 |
$document = $i->get_document();
|
| 405 |
if (is_object($document)) {
|
| 406 |
$results[$count]->type = 'node';
|
| 407 |
$results[$count]->sid = (int)($document->get_data());
|
| 408 |
$results[$count]->score = (int)($i->get_percent());
|
| 409 |
}
|
| 410 |
$i->next();
|
| 411 |
}
|
| 412 |
|
| 413 |
if (variable_get('xapian_log_queries', FALSE)) {
|
| 414 |
$time_taken = (microtime(TRUE) - $start_time) * 1000;
|
| 415 |
watchdog('xapian', t('<p>Query: %desc </p><p>Query time: %timems</p>',
|
| 416 |
array('%desc' => $query->get_description(), '%time' => $time_taken)));
|
| 417 |
}
|
| 418 |
|
| 419 |
$count_type = variable_get('xapian_node_count_type', 0);
|
| 420 |
|
| 421 |
switch ($count_type) {
|
| 422 |
case 1: // Lower bound
|
| 423 |
$count = $matches->get_matches_lower_bound();
|
| 424 |
break;
|
| 425 |
|
| 426 |
case 2: // Upper bound
|
| 427 |
$count = $matches->get_matches_upper_bound();
|
| 428 |
break;
|
| 429 |
|
| 430 |
case 0: // Best estimate
|
| 431 |
default:
|
| 432 |
$count = $matches->get_matches_estimated();
|
| 433 |
break;
|
| 434 |
}
|
| 435 |
|
| 436 |
return array($count, $results);
|
| 437 |
}
|
| 438 |
catch (Exception $e) {
|
| 439 |
watchdog('xapian', $e->getMessage());
|
| 440 |
return NULL;
|
| 441 |
}
|
| 442 |
}
|
| 443 |
|
| 444 |
/**
|
| 445 |
* Check to see if we can implement our own hook_search
|
| 446 |
*/
|
| 447 |
if (!function_exists('do_search')) {
|
| 448 |
/**
|
| 449 |
* Our xapian implementation of hook_search
|
| 450 |
*
|
| 451 |
* @param unknown_type $keywords
|
| 452 |
* @param unknown_type $type
|
| 453 |
* @param unknown_type $join1
|
| 454 |
* @param unknown_type $where1
|
| 455 |
* @param unknown_type $arguments1
|
| 456 |
* @param unknown_type $select2
|
| 457 |
* @param unknown_type $join2
|
| 458 |
* @param unknown_type $arguments2
|
| 459 |
* @param unknown_type $sort_parameters
|
| 460 |
* @return unknown
|
| 461 |
*/
|
| 462 |
function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $select2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') {
|
| 463 |
global $pager_total;
|
| 464 |
global $pager_page_array;
|
| 465 |
|
| 466 |
if (!empty($_REQUEST['page'])) {
|
| 467 |
$page = $_REQUEST['page'];
|
| 468 |
}
|
| 469 |
else {
|
| 470 |
$page = 0;
|
| 471 |
}
|
| 472 |
|
| 473 |
$start = variable_get('xapian_search_results_per_page', 10) * $page;
|
| 474 |
$length = variable_get('xapian_search_results_per_page', 10);
|
| 475 |
|
| 476 |
$terms = array();
|
| 477 |
$types = array();
|
| 478 |
foreach ($arguments1 as $condition) {
|
| 479 |
// If its a number, its a taxonomy term
|
| 480 |
if (is_numeric($condition)) {
|
| 481 |
$term = taxonomy_get_term($condition);
|
| 482 |
if ($term) {
|
| 483 |
$terms[] = 'XTAXONOMY'. strtolower($term->name);
|
| 484 |
}
|
| 485 |
}
|
| 486 |
else {
|
| 487 |
// Otherwise its a node type.
|
| 488 |
$types[] = 'XNODETYPE'. $condition;
|
| 489 |
}
|
| 490 |
}
|
| 491 |
$extra = array($terms, $types);
|
| 492 |
|
| 493 |
list($count, $results) = xapian_query($keywords, $start, $length, $extra);
|
| 494 |
|
| 495 |
$pager_total[0] = (int)($count / variable_get('xapian_search_results_per_page', 10)) + 1;
|
| 496 |
$pager_page_array[0] = $page;
|
| 497 |
|
| 498 |
return $results;
|
| 499 |
}
|
| 500 |
}
|
| 501 |
|
| 502 |
/**
|
| 503 |
* Function called for every node to be indexed
|
| 504 |
*
|
| 505 |
* @param object $node
|
| 506 |
*/
|
| 507 |
function _xapian_index_node($node) {
|
| 508 |
static $stemmer;
|
| 509 |
static $indexer;
|
| 510 |
|
| 511 |
$db = _xapian_init_database(XAPIAN_WRITABLE);
|
| 512 |
if (!is_object($db)) {
|
| 513 |
watchdog('xapian', t('Could not get writable database.'));
|
| 514 |
return FALSE;
|
| 515 |
}
|
| 516 |
|
| 517 |
// Track which content is being indexed until the Xapian cache is flushed.
|
| 518 |
// If we fail to flush the cache to disk for any reason, we will re-attempt
|
| 519 |
// to index this content. See bug #272140 for full details.
|
| 520 |
db_query('UPDATE {xapian_index_queue} SET status = status + 1 WHERE nid = %d', $node->nid);
|
| 521 |
|
| 522 |
// Provide mechanism to override default indexing behaviour
|
| 523 |
$function = $node->type .'_xapian_index';
|
| 524 |
|
| 525 |
// But fallback to standard node indexing
|
| 526 |
if (!function_exists($function)) {
|
| 527 |
$function = 'node_xapian_index';
|
| 528 |
}
|
| 529 |
|
| 530 |
$terms = $function($node);
|
| 531 |
if (is_array($terms)) {
|
| 532 |
|
| 533 |
try {
|
| 534 |
if (!is_object($indexer)) {
|
| 535 |
$indexer = new XapianTermGenerator();
|
| 536 |
}
|
| 537 |
|
| 538 |
if (!is_object($stemmer)) {
|
| 539 |
$stemmer = new XapianStem("english");
|
| 540 |
}
|
| 541 |
$indexer->set_stemmer($stemmer);
|
| 542 |
|
| 543 |
$document = new XapianDocument();
|
| 544 |
$indexer->set_document($document);
|
| 545 |
|
| 546 |
foreach ($terms as $term) {
|
| 547 |
if (!$term['weight']) {
|
| 548 |
$term['weight'] = 1;
|
| 549 |
}
|
| 550 |
switch ($term['type']) {
|
| 551 |
// General text
|
| 552 |
case 'text':
|
| 553 |
$indexer->index_text($term['data'], $term['weight']);
|
| 554 |
break;
|
| 555 |
|
| 556 |
// Taxonomy term
|
| 557 |
case 'term':
|
| 558 |
$document->add_term($term['data'], $term['weight']);
|
| 559 |
break;
|
| 560 |
}
|
| 561 |
}
|
| 562 |
|
| 563 |
$document->set_data($node->nid);
|
| 564 |
|
| 565 |
$document->add_term('Q'. $node->nid);
|
| 566 |
$document->add_term('XNODETYPE'. strtolower($node->type));
|
| 567 |
|
| 568 |
$db->replace_document('Q'. $node->nid, $document);
|
| 569 |
}
|
| 570 |
catch (Exception $e) {
|
| 571 |
watchdog('xapian', $e->getMessage());
|
| 572 |
}
|
| 573 |
}
|
| 574 |
|
| 575 |
return TRUE;
|
| 576 |
}
|
| 577 |
|
| 578 |
/**
|
| 579 |
* Implementation of hook_nodeapi to catch inserts and deletes and update the
|
| 580 |
* index.
|
| 581 |
*
|
| 582 |
* @param unknown_type $node
|
| 583 |
* @param unknown_type $op
|
| 584 |
*/
|
| 585 |
function xapian_nodeapi(&$node, $op) {
|
| 586 |
switch ($op) {
|
| 587 |
case 'delete revision':
|
| 588 |
case 'insert':
|
| 589 |
case 'update':
|
| 590 |
xapian_index_node($node);
|
| 591 |
break;
|
| 592 |
case 'delete':
|
| 593 |
xapian_remove_node_from_index($node);
|
| 594 |
}
|
| 595 |
}
|
| 596 |
|
| 597 |
/**
|
| 598 |
* Implementation of hook_comment to catch inserts, updates and deletes and
|
| 599 |
* update the index.
|
| 600 |
*
|
| 601 |
* @param unknown type $a1
|
| 602 |
* @param string $op
|
| 603 |
*/
|
| 604 |
function xapian_comment($a1, $op) {
|
| 605 |
switch ($op) {
|
| 606 |
case 'insert':
|
| 607 |
case 'update':
|
| 608 |
case 'delete':
|
| 609 |
case 'publish':
|
| 610 |
case 'unpublish':
|
| 611 |
$node = node_load($a1['nid']);
|
| 612 |
xapian_index_node($node);
|
| 613 |
break;
|
| 614 |
}
|
| 615 |
}
|
| 616 |
|
| 617 |
/**
|
| 618 |
* Queue a node for indexing
|
| 619 |
*
|
| 620 |
* @param object $node
|
| 621 |
*/
|
| 622 |
function _xapian_queue_index($node) {
|
| 623 |
$xid = db_result(db_query('SELECT xid FROM {xapian_index_queue} WHERE nid = %d', $node->nid));
|
| 624 |
// Only queue a node if it's not already queued.
|
| 625 |
if (!$xid) {
|
| 626 |
db_query('INSERT INTO {xapian_index_queue} (nid) VALUES (%d)', $node->nid);
|
| 627 |
}
|
| 628 |
}
|
| 629 |
|
| 630 |
/**
|
| 631 |
* Depending on whether queuing is enabled, either immediately index a node, or
|
| 632 |
* queue it for later indexing.
|
| 633 |
*
|
| 634 |
* @param object $node
|
| 635 |
*/
|
| 636 |
function xapian_index_node(&$node) {
|
| 637 |
if (variable_get('xapian_index_immediately', TRUE)) {
|
| 638 |
if (_xapian_index_node($node)) {
|
| 639 |
// Flush the changes to disk.
|
| 640 |
_xapian_init_database(XAPIAN_WRITABLE, XAPIAN_FLUSH);
|
| 641 |
db_query('DELETE FROM {xapian_index_queue} WHERE nid = %d', $node->nid);
|
| 642 |
}
|
| 643 |
else {
|
| 644 |
watchdog('xapian', t('Failed to immediately index node with nid %nid, queing to try again later.', array('%nid' => $node->nid)));
|
| 645 |
// Failed to update Xapian database, queue item to index later.
|
| 646 |
_xapian_queue_index($node);
|
| 647 |
}
|
| 648 |
}
|
| 649 |
else {
|
| 650 |
_xapian_queue_index($node);
|
| 651 |
}
|
| 652 |
}
|
| 653 |
|
| 654 |
/**
|
| 655 |
* Function to remove a node from the index after deletion
|
| 656 |
*
|
| 657 |
* @param object $node
|
| 658 |
*/
|
| 659 |
function xapian_remove_node_from_index($node) {
|
| 660 |
$db = _xapian_init_database(XAPIAN_WRITABLE);
|
| 661 |
if ($db) {
|
| 662 |
if (is_object($node)) {
|
| 663 |
if ($node->nid) {
|
| 664 |
$db->delete_document('Q'. $node->nid);
|
| 665 |
// Flush the changes to disk.
|
| 666 |
_xapian_init_database(XAPIAN_WRITABLE, XAPIAN_FLUSH);
|
| 667 |
}
|
| 668 |
}
|
| 669 |
}
|
| 670 |
}
|
| 671 |
|
| 672 |
/**
|
| 673 |
* Process queued nodes for indexing
|
| 674 |
*
|
| 675 |
*/
|
| 676 |
function xapian_index_queued_nodes() {
|
| 677 |
$indexed = 0;
|
| 678 |
$failed = 0;
|
| 679 |
$delete = FALSE;
|
| 680 |
$start_time = microtime(TRUE);
|
| 681 |
|
| 682 |
// Check to see if some indexed nodes failed to be flushed to disk last time
|
| 683 |
// we tried indexing nodes.
|
| 684 |
$not_flushed = db_result(db_query('SELECT COUNT(xid) as count FROM {xapian_index_queue} WHERE status > 0'));
|
| 685 |
if ($not_flushed > 1) {
|
| 686 |
// Re-index content that wasn't flushed to disk last time, minus the node
|
| 687 |
// which failed last time.
|
| 688 |
$sql = 'SELECT nid FROM {xapian_index_queue} WHERE status > 0 ORDER BY COALESCE(priority, 0) DESC, added ASC';
|
| 689 |
$limit = $not_flushed - 1;
|
| 690 |
}
|
| 691 |
else if ($not_flushed == 1) {
|
| 692 |
// Re-index a node that previously failed to index, removing it from the
|
| 693 |
// queue now as it may not be indexable, such as a php node calling
|
| 694 |
// drupal_goto()...
|
| 695 |
$delete = db_result(db_query('SELECT nid FROM {xapian_index_queue} WHERE status > 0 LIMIT 1'));
|
| 696 |
$sql = 'SELECT nid FROM {xapian_index_queue} WHERE nid = '. $delete;
|
| 697 |
$limit = 1;
|
| 698 |
}
|
| 699 |
else {
|
| 700 |
$sql = 'SELECT nid FROM {xapian_index_queue} ORDER BY COALESCE(priority, 0) DESC, added ASC';
|
| 701 |
$limit = variable_get('xapian_indexing_throttle', 100);
|
| 702 |
}
|
| 703 |
|
| 704 |
$result = db_query_range($sql, 0, $limit);
|
| 705 |
|
| 706 |
if (!$result) {
|
| 707 |
watchdog('xapian', t('Unable to read index queue.'));
|
| 708 |
return;
|
| 709 |
}
|
| 710 |
else if ($delete) {
|
| 711 |
db_query('DELETE FROM {xapian_index_queue} WHERE nid = %d', $delete);
|
| 712 |
}
|
| 713 |
|
| 714 |
|
| 715 |
while ($node = db_fetch_object($result)) {
|
| 716 |
$node = node_load($node->nid);
|
| 717 |
|
| 718 |
if ($node) {
|
| 719 |
// Found a node, index it
|
| 720 |
if (_xapian_index_node($node)) {
|
| 721 |
$indexed++;
|
| 722 |
}
|
| 723 |
else {
|
| 724 |
$failed++;
|
| 725 |
}
|
| 726 |
}
|
| 727 |
else {
|
| 728 |
// Not a node, log an error, but may as well clear it from the queue
|
| 729 |
watchdog('xapian', t('Could not load node with id %node for indexing', array('%node' => $node->nid)));
|
| 730 |
db_query('DELETE FROM {xapian_index_queue} WHERE nid = %d', $node->nid);
|
| 731 |
$failed++;
|
| 732 |
}
|
| 733 |
}
|
| 734 |
// Flush the changes to disk.
|
| 735 |
_xapian_init_database(XAPIAN_WRITABLE, XAPIAN_FLUSH);
|
| 736 |
$status = db_result(db_query('SELECT MAX(status) FROM {xapian_index_queue}'));
|
| 737 |
if ($status) {
|
| 738 |
db_query('DELETE FROM {xapian_index_queue} WHERE status = %d', $status);
|
| 739 |
}
|
| 740 |
|
| 741 |
if (variable_get('xapian_log_queries', FALSE)) {
|
| 742 |
$time_taken = (microtime(TRUE) - $start_time) * 1000;
|
| 743 |
watchdog('xapian', t('<p>Indexed %indexed nodes successfully, %failed failed in: %time ms</p>',
|
| 744 |
array('%indexed' => $indexed, '%failed' => $failed, '%time' => $time_taken)));
|
| 745 |
}
|
| 746 |
}
|
| 747 |
|
| 748 |
/**
|
| 749 |
* Implementation of hook_cron, index all queued content.
|
| 750 |
*
|
| 751 |
*/
|
| 752 |
function xapian_cron() {
|
| 753 |
// If one or more ids exist in the index queue, index them.
|
| 754 |
$xid = db_result(db_query('SELECT xid FROM {xapian_index_queue} LIMIT 1'));
|
| 755 |
if ($xid) {
|
| 756 |
xapian_index_queued_nodes();
|
| 757 |
}
|
| 758 |
}
|
| 759 |
|
| 760 |
/**
|
| 761 |
* Confirm recreation of search index.
|
| 762 |
*/
|
| 763 |
function xapian_reindex_all_confirm() {
|
| 764 |
return confirm_form(array(), t('Are you sure you want to re-index the site?'), 'admin/settings/xapian', t('<p>This will queue up all site content to be re-indexed when cron runs. The search index is not cleared, instead it is systematically updated. Searching will continue to work.</p><p>If you have immediate indexing enabled, new content will continue to be indexed. If you do not have immediate indexing enabled, new content will not be indexed until all existing content has been re-indexed. This action cannot be undone.</p>'), t('Re-index site'), t('Cancel'));
|
| 765 |
}
|
| 766 |
|
| 767 |
/**
|
| 768 |
* Reindex all site content.
|
| 769 |
*/
|
| 770 |
function xapian_reindex_all_confirm_submit($form_id, &$form) {
|
| 771 |
if ($form['confirm']) {
|
| 772 |
db_query('TRUNCATE {xapian_index_queue}');
|
| 773 |
db_query('INSERT INTO {xapian_index_queue} (nid) SELECT nid FROM {node}');
|
| 774 |
drupal_set_message(t('The search index will be rebuilt.'));
|
| 775 |
return 'admin/settings/xapian';
|
| 776 |
}
|
| 777 |
}
|
| 778 |
|
| 779 |
/**
|
| 780 |
* Default node index function
|
| 781 |
* Indexes title, body, terms in the node (as text), and the taxonomy terms
|
| 782 |
* as well.
|
| 783 |
*
|
| 784 |
* @param object $node
|
| 785 |
* @return array
|
| 786 |
*/
|
| 787 |
function node_xapian_index($node) {
|
| 788 |
$terms = array();
|
| 789 |
|
| 790 |
// Build the node body.
|
| 791 |
$node = node_build_content($node, FALSE, FALSE);
|
| 792 |
$node->body = drupal_render($node->content);
|
| 793 |
|
| 794 |
// Allow modules to modify the fully-built node.
|
| 795 |
node_invoke_nodeapi($node, 'alter');
|
| 796 |
|
| 797 |
$terms[] = array('type' => 'text', 'data' => strip_tags($node->title), 'weight' => 5);
|
| 798 |
$terms[] = array('type' => 'text', 'data' => strip_tags($node->body), 'weight' => 2);
|
| 799 |
|
| 800 |
$extra = module_invoke('node', 'invoke_nodeapi', $node, 'update index');
|
| 801 |
foreach ($extra as $t) {
|
| 802 |
$text .= $t;
|
| 803 |
}
|
| 804 |
|
| 805 |
if (strlen($text)) {
|
| 806 |
$terms[] = array('type' => 'text', 'data' => strip_tags($text), 'weight' => 1);
|
| 807 |
}
|
| 808 |
|
| 809 |
$taxonomy_terms = taxonomy_node_get_terms($node->nid);
|
| 810 |
if (is_array($taxonomy_terms)) {
|
| 811 |
foreach ($taxonomy_terms as $term) {
|
| 812 |
// Add taxonomy term as standard text
|
| 813 |
$terms[] = array(
|
| 814 |
'type' => 'text',
|
| 815 |
'weight' => 2,
|
| 816 |
'data' => $term->name,
|
| 817 |
);
|
| 818 |
|
| 819 |
// Add a special type, so we can query on taxonomy only.
|
| 820 |
$terms[] = array(
|
| 821 |
'type' => 'term',
|
| 822 |
'data' => 'XTAXONOMY'. strtolower($term->name),
|
| 823 |
);
|
| 824 |
}
|
| 825 |
}
|
| 826 |
|
| 827 |
return $terms;
|
| 828 |
}
|