| 1 |
<?php |
<?php |
| 2 |
// $Id: xapian.module,v 1.10.2.4 2009/01/16 00:18:39 simon Exp $ |
// $Id: xapian.module,v 1.10.2.5 2009/01/30 14:24:13 simon Exp $ |
| 3 |
|
|
| 4 |
/** |
/** |
| 5 |
* @file |
* @file |
| 24 |
* @param int $errline |
* @param int $errline |
| 25 |
* @param unknown_type $errcontext |
* @param unknown_type $errcontext |
| 26 |
*/ |
*/ |
| 27 |
function _xapian_requirements_error_handler($errno, $errstr, $errfile = NULL, $errline = NULL, $errcontext = NULL) { |
function _xapian_requirements_error_handler($errno, $errstr, $errfile = NULL, $errline = NULL, $errcontext = NULL) { |
| 28 |
$GLOBALS['xapian_include_error'] = $errstr; |
$GLOBALS['xapian_include_error'] = $errstr; |
| 29 |
} |
} |
| 30 |
|
|
| 47 |
$available = TRUE; |
$available = TRUE; |
| 48 |
} |
} |
| 49 |
} |
} |
| 50 |
|
|
| 51 |
return $available; |
return $available; |
| 52 |
} |
} |
| 53 |
|
|
| 121 |
function xapian_admin() { |
function xapian_admin() { |
| 122 |
$form = array(); |
$form = array(); |
| 123 |
$form['#cache'] = FALSE; |
$form['#cache'] = FALSE; |
| 124 |
|
|
| 125 |
$database_type = variable_get('xapian_database_type', 0); |
$database_type = variable_get('xapian_database_type', 0); |
| 126 |
|
|
| 127 |
// Create a database fieldset |
// Create a database fieldset |
| 128 |
$form['database'] = array( |
$form['database'] = array( |
| 129 |
'#type' => 'fieldset', |
'#type' => 'fieldset', |
| 130 |
'#title' => t('Xapian database') |
'#title' => t('Xapian database') |
| 131 |
); |
); |
| 132 |
|
|
| 133 |
// Database type |
// Database type |
| 134 |
$form['database']['xapian_database_type'] = array( |
$form['database']['xapian_database_type'] = array( |
| 135 |
'#type' => 'radios', |
'#type' => 'radios', |
| 243 |
'#default_value' => variable_get('xapian_log_queries', FALSE), |
'#default_value' => variable_get('xapian_log_queries', FALSE), |
| 244 |
); |
); |
| 245 |
|
|
| 246 |
// Node Type Settings |
// Node Type Settings |
| 247 |
$form['node_types'] = array( |
$form['node_types'] = array( |
| 248 |
'#type' => 'fieldset', |
'#type' => 'fieldset', |
| 249 |
'#title' => t('Node Types') |
'#title' => t('Node Types') |
| 250 |
); |
); |
| 333 |
if (!$writable && is_object($database)) { |
if (!$writable && is_object($database)) { |
| 334 |
return $database; |
return $database; |
| 335 |
} |
} |
| 336 |
|
|
| 337 |
if ($writable && is_object($writable_database)) { |
if ($writable && is_object($writable_database)) { |
| 338 |
return $writable_database; |
return $writable_database; |
| 339 |
} |
} |
| 384 |
* The xapian_query function queries the database using both a query string |
* The xapian_query function queries the database using both a query string |
| 385 |
* and application-defined terms. |
* and application-defined terms. |
| 386 |
* |
* |
| 387 |
* @param $query_string A string (perhaps supplied by the user) containing |
* @param $query_string A string (perhaps supplied by the user) containing |
| 388 |
* terms to search for. This string will be parsed and |
* terms to search for. This string will be parsed and |
| 389 |
* stemmed automatically. |
* stemmed automatically. |
| 390 |
* |
* |
| 391 |
* @param $start An integer defining the first document to return |
* @param $start An integer defining the first document to return |
| 453 |
case 1: // Lower bound |
case 1: // Lower bound |
| 454 |
$count = $matches->get_matches_lower_bound(); |
$count = $matches->get_matches_lower_bound(); |
| 455 |
break; |
break; |
| 456 |
|
|
| 457 |
case 2: // Upper bound |
case 2: // Upper bound |
| 458 |
$count = $matches->get_matches_upper_bound(); |
$count = $matches->get_matches_upper_bound(); |
| 459 |
break; |
break; |
| 511 |
if (is_numeric($condition)) { |
if (is_numeric($condition)) { |
| 512 |
$term = taxonomy_get_term($condition); |
$term = taxonomy_get_term($condition); |
| 513 |
if ($term) { |
if ($term) { |
| 514 |
$terms[] = 'XTAXONOMY'. strtolower($term->name); |
$terms[] = 'XTAXONOMY'. strtolower($term->name); |
| 515 |
} |
} |
| 516 |
} |
} |
| 517 |
else { |
else { |
| 525 |
|
|
| 526 |
$pager_total[0] = (int)($count / variable_get('xapian_search_results_per_page', 10)) + 1; |
$pager_total[0] = (int)($count / variable_get('xapian_search_results_per_page', 10)) + 1; |
| 527 |
$pager_page_array[0] = $page; |
$pager_page_array[0] = $page; |
| 528 |
|
|
| 529 |
return $results; |
return $results; |
| 530 |
} |
} |
| 531 |
} |
} |
| 544 |
watchdog('xapian', 'Could not get writable database.'); |
watchdog('xapian', 'Could not get writable database.'); |
| 545 |
return FALSE; |
return FALSE; |
| 546 |
} |
} |
| 547 |
|
|
| 548 |
// Track which content is being indexed until the Xapian cache is flushed. |
// Track which content is being indexed until the Xapian cache is flushed. |
| 549 |
// If we fail to flush the cache to disk for any reason, we will re-attempt |
// If we fail to flush the cache to disk for any reason, we will re-attempt |
| 550 |
// to index this content. See bug #272140 for full details. |
// to index this content. See bug #272140 for full details. |
| 551 |
db_query('UPDATE {xapian_index_queue} SET status = status + 1 WHERE nid = %d', $node->nid); |
db_query('UPDATE {xapian_index_queue} SET status = status + 1 WHERE nid = %d', $node->nid); |
| 552 |
|
|
| 553 |
// Provide mechanism to override default indexing behaviour |
// Provide mechanism to override default indexing behaviour |
| 554 |
$function = $node->type .'_xapian_index'; |
$function = $node->type .'_xapian_index'; |
| 555 |
|
|
| 556 |
// But fallback to standard node indexing |
// But fallback to standard node indexing |
| 557 |
if (!function_exists($function)) { |
if (!function_exists($function)) { |
| 558 |
$function = 'node_xapian_index'; |
$function = 'node_xapian_index'; |
| 559 |
} |
} |
| 560 |
|
|
| 561 |
$terms = $function($node); |
$terms = $function($node); |
| 562 |
if (is_array($terms)) { |
if (is_array($terms)) { |
| 563 |
|
|
| 564 |
try { |
try { |
|
|
|
| 565 |
if ($delete) { |
if ($delete) { |
| 566 |
watchdog('xapian', 'Delete attempt Node with id %node', array('%node' => $node->nid)); |
watchdog('xapian', 'Delete attempt Node with id %node', array('%node' => $node->nid)); |
| 567 |
$db->delete_document('Q'. $node->nid); |
$db->delete_document('Q'. $node->nid); |
| 568 |
} |
} |
| 569 |
else{ |
else{ |
| 570 |
if (!is_object($indexer)) { |
if (!is_object($indexer)) { |
| 571 |
$indexer = new XapianTermGenerator(); |
$indexer = new XapianTermGenerator(); |
| 572 |
} |
} |
| 573 |
|
|
| 574 |
if (!is_object($stemmer)) { |
if (!is_object($stemmer)) { |
| 575 |
$stemmer = new XapianStem("english"); |
$stemmer = new XapianStem("english"); |
| 576 |
} |
} |
| 577 |
$indexer->set_stemmer($stemmer); |
$indexer->set_stemmer($stemmer); |
| 578 |
|
|
| 579 |
$document = new XapianDocument(); |
$document = new XapianDocument(); |
| 580 |
$indexer->set_document($document); |
$indexer->set_document($document); |
| 581 |
|
|
| 582 |
foreach ($terms as $term) { |
foreach ($terms as $term) { |
| 583 |
if (!$term['weight']) { |
if (!$term['weight']) { |
| 584 |
$term['weight'] = 1; |
$term['weight'] = 1; |
| 595 |
break; |
break; |
| 596 |
} |
} |
| 597 |
} |
} |
| 598 |
|
|
| 599 |
$document->set_data($node->nid); |
$document->set_data($node->nid); |
| 600 |
|
|
| 601 |
$document->add_term('Q'. $node->nid); |
$document->add_term('Q'. $node->nid); |
| 602 |
$document->add_term('XNODETYPE'. strtolower($node->type)); |
$document->add_term('XNODETYPE'. strtolower($node->type)); |
| 603 |
|
|
| 604 |
$db->replace_document('Q'. $node->nid, $document); |
$db->replace_document('Q'. $node->nid, $document); |
| 605 |
} |
} |
| 606 |
} |
} |
| 607 |
catch (Exception $e) { |
catch (Exception $e) { |
| 608 |
watchdog('xapian', $e->getMessage()); |
watchdog('xapian', $e->getMessage()); |
| 609 |
} |
} |
| 610 |
} |
} |
| 611 |
|
|
| 612 |
return TRUE; |
return TRUE; |
| 613 |
} |
} |
| 614 |
|
|
| 632 |
} |
} |
| 633 |
|
|
| 634 |
/** |
/** |
| 635 |
* Implementation of hook_comment to catch inserts, updates and deletes and |
* Implementation of hook_comment to catch inserts, updates and deletes and |
| 636 |
* update the index. |
* update the index. |
| 637 |
* |
* |
| 638 |
* @param unknown type $a1 |
* @param unknown type $a1 |
| 659 |
} |
} |
| 660 |
|
|
| 661 |
/** |
/** |
| 662 |
* Queue a node for indexing |
* Queue a node for indexing |
| 663 |
* |
* |
| 664 |
* @param object $node |
* @param object $node |
| 665 |
*/ |
*/ |
| 680 |
function xapian_index_node(&$node) { |
function xapian_index_node(&$node) { |
| 681 |
$exclusions = array_values(variable_get('xapian_excluded_nodes', array())); |
$exclusions = array_values(variable_get('xapian_excluded_nodes', array())); |
| 682 |
if (in_array($node->type, $exclusions)) { |
if (in_array($node->type, $exclusions)) { |
| 683 |
return; |
return; |
| 684 |
} |
} |
| 685 |
if (variable_get('xapian_index_immediately', TRUE)) { |
if (variable_get('xapian_index_immediately', TRUE)) { |
| 686 |
if (_xapian_index_node($node)) { |
if (_xapian_index_node($node)) { |
| 726 |
$failed = 0; |
$failed = 0; |
| 727 |
$delete = FALSE; |
$delete = FALSE; |
| 728 |
$start_time = microtime(TRUE); |
$start_time = microtime(TRUE); |
| 729 |
|
|
| 730 |
// Check to see if some indexed nodes failed to be flushed to disk last time |
// Check to see if some indexed nodes failed to be flushed to disk last time |
| 731 |
// we tried indexing nodes. |
// we tried indexing nodes. |
| 732 |
$not_flushed = db_result(db_query('SELECT COUNT(xid) as count FROM {xapian_index_queue} WHERE status > 0')); |
$not_flushed = db_result(db_query('SELECT COUNT(xid) as count FROM {xapian_index_queue} WHERE status > 0')); |
| 738 |
} |
} |
| 739 |
else if ($not_flushed == 1) { |
else if ($not_flushed == 1) { |
| 740 |
// Re-index a node that previously failed to index, removing it from the |
// Re-index a node that previously failed to index, removing it from the |
| 741 |
// queue now as it may not be indexable, such as a php node calling |
// queue now as it may not be indexable, such as a php node calling |
| 742 |
// drupal_goto()... |
// drupal_goto()... |
| 743 |
$delete = db_result(db_query_range('SELECT nid FROM {xapian_index_queue} WHERE status > 0', 0, 1)); |
$delete = db_result(db_query_range('SELECT nid FROM {xapian_index_queue} WHERE status > 0', 0, 1)); |
| 744 |
$sql = 'SELECT nid FROM {xapian_index_queue} WHERE nid = '. $delete; |
$sql = 'SELECT nid FROM {xapian_index_queue} WHERE nid = '. $delete; |
| 748 |
$sql = 'SELECT nid FROM {xapian_index_queue} ORDER BY COALESCE(priority, 0) DESC, added ASC'; |
$sql = 'SELECT nid FROM {xapian_index_queue} ORDER BY COALESCE(priority, 0) DESC, added ASC'; |
| 749 |
$limit = variable_get('xapian_indexing_throttle', 100); |
$limit = variable_get('xapian_indexing_throttle', 100); |
| 750 |
} |
} |
| 751 |
|
|
| 752 |
$result = db_query_range($sql, 0, $limit); |
$result = db_query_range($sql, 0, $limit); |
| 753 |
|
|
| 754 |
if (!$result) { |
if (!$result) { |
| 761 |
|
|
| 762 |
$exclusions = array_values(variable_get('xapian_excluded_nodes', array())); |
$exclusions = array_values(variable_get('xapian_excluded_nodes', array())); |
| 763 |
while ($node = db_fetch_object($result)) { |
while ($node = db_fetch_object($result)) { |
| 764 |
$node = node_load($node->nid); |
$node = node_load(array('nid' => $node->nid)); |
| 765 |
if ($node) { |
if ($node) { |
| 766 |
if (in_array($node->type, $exclusions)) { |
if (in_array($node->type, $exclusions)) { |
| 767 |
// None indexable a node, log and clear it from the queue |
// None indexable a node, log and clear it from the queue |
| 793 |
if ($status) { |
if ($status) { |
| 794 |
db_query('DELETE FROM {xapian_index_queue} WHERE status = %d', $status); |
db_query('DELETE FROM {xapian_index_queue} WHERE status = %d', $status); |
| 795 |
} |
} |
| 796 |
|
|
| 797 |
if (variable_get('xapian_log_queries', FALSE)) { |
if (variable_get('xapian_log_queries', FALSE)) { |
| 798 |
$time_taken = (microtime(TRUE) - $start_time) * 1000; |
$time_taken = (microtime(TRUE) - $start_time) * 1000; |
| 799 |
watchdog('xapian', '<p>Indexed %indexed nodes successfully, %failed failed in: %time ms</p>', |
watchdog('xapian', '<p>Indexed %indexed nodes successfully, %failed failed in: %time ms</p>', |
| 834 |
|
|
| 835 |
/** |
/** |
| 836 |
* Default node index function |
* Default node index function |
| 837 |
* Indexes title, body, terms in the node (as text), and the taxonomy terms |
* Indexes title, body, terms in the node (as text), and the taxonomy terms |
| 838 |
* as well. |
* as well. |
| 839 |
* |
* |
| 840 |
* @param object $node |
* @param object $node |
| 852 |
|
|
| 853 |
$terms[] = array('type' => 'text', 'data' => strip_tags($node->title), 'weight' => 5); |
$terms[] = array('type' => 'text', 'data' => strip_tags($node->title), 'weight' => 5); |
| 854 |
$terms[] = array('type' => 'text', 'data' => strip_tags($node->body), 'weight' => 2); |
$terms[] = array('type' => 'text', 'data' => strip_tags($node->body), 'weight' => 2); |
| 855 |
|
|
| 856 |
// If the node type doesn't have taxonomy, Drupal 6 doesn't create the taxonomy array, |
// If the node type doesn't have taxonomy, Drupal 6 doesn't create the taxonomy array, |
| 857 |
// and this triggers an error in taxonomy_node_update_index() called next. |
// and this triggers an error in taxonomy_node_update_index() called next. |
| 858 |
// Make sure the array exists |
// Make sure the array exists |