| 1 |
<?php
|
| 2 |
// $Id$
|
| 3 |
|
| 4 |
/**
|
| 5 |
* @file
|
| 6 |
* Search query extender and helper functions.
|
| 7 |
*/
|
| 8 |
|
| 9 |
/**
|
| 10 |
* Do a query on the full-text search index for a word or words.
|
| 11 |
*
|
| 12 |
* This function is normally only called by each module that supports the
|
| 13 |
* indexed search (and thus, implements hook_update_index()).
|
| 14 |
*
|
| 15 |
* Results are retrieved in two logical passes. However, the two passes are
|
| 16 |
* joined together into a single query. And in the case of most simple
|
| 17 |
* queries the second pass is not even used.
|
| 18 |
*
|
| 19 |
* The first pass selects a set of all possible matches, which has the benefit
|
| 20 |
* of also providing the exact result set for simple "AND" or "OR" searches.
|
| 21 |
*
|
| 22 |
* The second portion of the query further refines this set by verifying
|
| 23 |
* advanced text conditions (such as negative or phrase matches).
|
| 24 |
*
|
| 25 |
* The used query object has the tag 'search_$type' and can be further extended
|
| 26 |
* with hook_query_alter().
|
| 27 |
*/
|
| 28 |
class SearchQuery extends SelectQueryExtender {
|
| 29 |
/**
|
| 30 |
* The search query that is used for searching.
|
| 31 |
*
|
| 32 |
* @var string
|
| 33 |
*/
|
| 34 |
protected $searchExpression;
|
| 35 |
|
| 36 |
/**
|
| 37 |
* Type of search.
|
| 38 |
*
|
| 39 |
* This maps to the value of the type column in search_index.
|
| 40 |
*
|
| 41 |
* @var string
|
| 42 |
*/
|
| 43 |
protected $type;
|
| 44 |
|
| 45 |
/**
|
| 46 |
* Positive and negative search keys.
|
| 47 |
*
|
| 48 |
* @var array
|
| 49 |
*/
|
| 50 |
protected $keys = array('positive' => array(), 'negative' => array());
|
| 51 |
|
| 52 |
/**
|
| 53 |
* Indicates if the first pass query requires complex conditions (LIKE).
|
| 54 |
*
|
| 55 |
* @var boolean.
|
| 56 |
*/
|
| 57 |
protected $simple = TRUE;
|
| 58 |
|
| 59 |
/**
|
| 60 |
* Conditions that are used for exact searches.
|
| 61 |
*
|
| 62 |
* This is always used for the second pass query but not for the first pass,
|
| 63 |
* unless $this->simple is FALSE.
|
| 64 |
*
|
| 65 |
* @var DatabaseCondition
|
| 66 |
*/
|
| 67 |
protected $conditions;
|
| 68 |
|
| 69 |
/**
|
| 70 |
* Indicates how many matches for a search query are necessary.
|
| 71 |
*
|
| 72 |
* @var int
|
| 73 |
*/
|
| 74 |
protected $matches = 0;
|
| 75 |
|
| 76 |
/**
|
| 77 |
* Array of search words.
|
| 78 |
*
|
| 79 |
* These words have to match against {search_index}.word.
|
| 80 |
*
|
| 81 |
* @var array
|
| 82 |
*/
|
| 83 |
protected $words = array();
|
| 84 |
|
| 85 |
/**
|
| 86 |
* Multiplier for the normalized search score.
|
| 87 |
*
|
| 88 |
* This value is calculated by the first pass query and multiplied with the
|
| 89 |
* actual score of a specific word to make sure that the resulting calculated
|
| 90 |
* score is between 0 and 1.
|
| 91 |
*
|
| 92 |
* @var float
|
| 93 |
*/
|
| 94 |
protected $normalize;
|
| 95 |
|
| 96 |
/**
|
| 97 |
* Indicates if the first pass query has been executed.
|
| 98 |
*
|
| 99 |
* @var boolean
|
| 100 |
*/
|
| 101 |
protected $executedFirstPass = FALSE;
|
| 102 |
|
| 103 |
/**
|
| 104 |
* Stores score expressions.
|
| 105 |
*
|
| 106 |
* @var array
|
| 107 |
*/
|
| 108 |
protected $scores = array();
|
| 109 |
|
| 110 |
/**
|
| 111 |
* Stores arguments for score expressions.
|
| 112 |
*
|
| 113 |
* @var array
|
| 114 |
*/
|
| 115 |
protected $scoresArguments = array();
|
| 116 |
|
| 117 |
/**
|
| 118 |
* Total value of all the multipliers.
|
| 119 |
*
|
| 120 |
* @var array
|
| 121 |
*/
|
| 122 |
protected $multiply = array();
|
| 123 |
|
| 124 |
/**
|
| 125 |
* Search items for the given search query string and type.
|
| 126 |
*
|
| 127 |
* @param $query
|
| 128 |
* A search query string, that can contain options.
|
| 129 |
* @param $type
|
| 130 |
* The type of search, this maps to {search_index}.type.
|
| 131 |
* @return
|
| 132 |
* The SearchQuery object.
|
| 133 |
*/
|
| 134 |
public function searchExpression($expression, $type) {
|
| 135 |
$this->searchExpression = $expression;
|
| 136 |
$this->type = $type;
|
| 137 |
|
| 138 |
return $this;
|
| 139 |
}
|
| 140 |
|
| 141 |
/**
|
| 142 |
* Apply a search option and remove it from the search query string.
|
| 143 |
*
|
| 144 |
* These options are in the form option:value,value2,value3.
|
| 145 |
*
|
| 146 |
* @param $option
|
| 147 |
* Name of the option.
|
| 148 |
* @param $column
|
| 149 |
* Name of the db column to which the value should be applied.
|
| 150 |
* @return
|
| 151 |
* TRUE if at least a value for that option has been found, FALSE if not.
|
| 152 |
*/
|
| 153 |
public function setOption($option, $column) {
|
| 154 |
if ($values = search_expression_extract($this->searchExpression, $option)) {
|
| 155 |
$or = db_or();
|
| 156 |
foreach (explode(',', $values) as $value) {
|
| 157 |
$or->condition($column, $value);
|
| 158 |
}
|
| 159 |
$this->condition($or);
|
| 160 |
$this->searchExpression = search_expression_insert($this->searchExpression, $option);
|
| 161 |
return TRUE;
|
| 162 |
}
|
| 163 |
return FALSE;
|
| 164 |
}
|
| 165 |
|
| 166 |
/**
|
| 167 |
* Parse a search query into SQL conditions.
|
| 168 |
*
|
| 169 |
* We build two queries that matches the dataset bodies.
|
| 170 |
*/
|
| 171 |
protected function parseSearchExpression() {
|
| 172 |
// Matchs words optionally prefixed by a dash. A word in this case is
|
| 173 |
// something between two spaces, optionally quoted.
|
| 174 |
preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER);
|
| 175 |
|
| 176 |
if (count($keywords) == 0) {
|
| 177 |
return;
|
| 178 |
}
|
| 179 |
|
| 180 |
// Classify tokens.
|
| 181 |
$or = FALSE;
|
| 182 |
$warning = '';
|
| 183 |
foreach ($keywords as $match) {
|
| 184 |
$phrase = FALSE;
|
| 185 |
// Strip off phrase quotes.
|
| 186 |
if ($match[2]{0} == '"') {
|
| 187 |
$match[2] = substr($match[2], 1, -1);
|
| 188 |
$phrase = TRUE;
|
| 189 |
$this->simple = FALSE;
|
| 190 |
}
|
| 191 |
// Simplify keyword according to indexing rules and external preprocessors.
|
| 192 |
$words = search_simplify($match[2]);
|
| 193 |
// Re-explode in case simplification added more words, except when
|
| 194 |
// matching a phrase.
|
| 195 |
$words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
|
| 196 |
// Negative matches.
|
| 197 |
if ($match[1] == '-') {
|
| 198 |
$this->keys['negative'] = array_merge($this->keys['negative'], $words);
|
| 199 |
}
|
| 200 |
// OR operator: instead of a single keyword, we store an array of all
|
| 201 |
// OR'd keywords.
|
| 202 |
elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
|
| 203 |
$last = array_pop($this->keys['positive']);
|
| 204 |
// Starting a new OR?
|
| 205 |
if (!is_array($last)) {
|
| 206 |
$last = array($last);
|
| 207 |
}
|
| 208 |
$this->keys['positive'][] = $last;
|
| 209 |
$or = TRUE;
|
| 210 |
continue;
|
| 211 |
}
|
| 212 |
// AND operator: implied, so just ignore it.
|
| 213 |
elseif ($match[2] == 'AND' || $match[2] == 'and') {
|
| 214 |
$warning = $match[2];
|
| 215 |
continue;
|
| 216 |
}
|
| 217 |
|
| 218 |
// Plain keyword.
|
| 219 |
else {
|
| 220 |
if ($match[2] == 'or') {
|
| 221 |
$warning = $match[2];
|
| 222 |
}
|
| 223 |
if ($or) {
|
| 224 |
// Add to last element (which is an array).
|
| 225 |
$this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
|
| 226 |
}
|
| 227 |
else {
|
| 228 |
$this->keys['positive'] = array_merge($this->keys['positive'], $words);
|
| 229 |
}
|
| 230 |
}
|
| 231 |
$or = FALSE;
|
| 232 |
}
|
| 233 |
|
| 234 |
// Convert keywords into SQL statements.
|
| 235 |
$this->conditions = db_and();
|
| 236 |
$simple_and = FALSE;
|
| 237 |
$simple_or = FALSE;
|
| 238 |
// Positive matches.
|
| 239 |
foreach ($this->keys['positive'] as $key) {
|
| 240 |
// Group of ORed terms.
|
| 241 |
if (is_array($key) && count($key)) {
|
| 242 |
$simple_or = TRUE;
|
| 243 |
$any = FALSE;
|
| 244 |
$queryor = db_or();
|
| 245 |
foreach ($key as $or) {
|
| 246 |
list($num_new_scores) = $this->parseWord($or);
|
| 247 |
$any |= $num_new_scores;
|
| 248 |
$queryor->condition('d.data', "% $or %", 'LIKE');
|
| 249 |
}
|
| 250 |
if (count($queryor)) {
|
| 251 |
$this->conditions->condition($queryor);
|
| 252 |
// A group of OR keywords only needs to match once.
|
| 253 |
$this->matches += ($any > 0);
|
| 254 |
}
|
| 255 |
}
|
| 256 |
// Single ANDed term.
|
| 257 |
else {
|
| 258 |
$simple_and = TRUE;
|
| 259 |
list($num_new_scores, $num_valid_words) = $this->parseWord($key);
|
| 260 |
$this->conditions->condition('d.data', "% $key %", 'LIKE');
|
| 261 |
if (!$num_valid_words) {
|
| 262 |
$this->simple = FALSE;
|
| 263 |
}
|
| 264 |
// Each AND keyword needs to match at least once.
|
| 265 |
$this->matches += $num_new_scores;
|
| 266 |
}
|
| 267 |
}
|
| 268 |
if ($simple_and && $simple_or) {
|
| 269 |
$this->simple = FALSE;
|
| 270 |
}
|
| 271 |
// Negative matches.
|
| 272 |
foreach ($this->keys['negative'] as $key) {
|
| 273 |
$this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
|
| 274 |
$this->simple = FALSE;
|
| 275 |
}
|
| 276 |
|
| 277 |
if ($warning == 'or') {
|
| 278 |
drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
|
| 279 |
}
|
| 280 |
}
|
| 281 |
|
| 282 |
/**
|
| 283 |
* Helper function for parseQuery().
|
| 284 |
*/
|
| 285 |
protected function parseWord($word) {
|
| 286 |
$num_new_scores = 0;
|
| 287 |
$num_valid_words = 0;
|
| 288 |
// Determine the scorewords of this word/phrase.
|
| 289 |
$split = explode(' ', $word);
|
| 290 |
foreach ($split as $s) {
|
| 291 |
$num = is_numeric($s);
|
| 292 |
if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
|
| 293 |
$s = $num ? ((int)ltrim($s, '-0')) : $s;
|
| 294 |
if (!isset($this->words[$s])) {
|
| 295 |
$this->words[$s] = $s;
|
| 296 |
$num_new_scores++;
|
| 297 |
}
|
| 298 |
$num_valid_words++;
|
| 299 |
}
|
| 300 |
}
|
| 301 |
// Return matching snippet and number of added words.
|
| 302 |
return array($num_new_scores, $num_valid_words);
|
| 303 |
}
|
| 304 |
|
| 305 |
/**
|
| 306 |
* Execute the first pass query.
|
| 307 |
*
|
| 308 |
* This can either be done explicitly, so that additional scores and
|
| 309 |
* conditions can be applied to the second pass query or implicitly by
|
| 310 |
* addScore() or execute().
|
| 311 |
*
|
| 312 |
* @return
|
| 313 |
* TRUE if search items exist, FALSE if not.
|
| 314 |
*/
|
| 315 |
public function executeFirstPass() {
|
| 316 |
$this->parseSearchExpression();
|
| 317 |
|
| 318 |
if (count($this->words) == 0) {
|
| 319 |
form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'));
|
| 320 |
return FALSE;
|
| 321 |
}
|
| 322 |
$this->executedFirstPass = TRUE;
|
| 323 |
|
| 324 |
if (!empty($this->words)) {
|
| 325 |
$or = db_or();
|
| 326 |
foreach ($this->words as $word) {
|
| 327 |
$or->condition('i.word', $word);
|
| 328 |
}
|
| 329 |
$this->condition($or);
|
| 330 |
}
|
| 331 |
// Build query for keyword normalization.
|
| 332 |
$this->join('search_total', 't', 'i.word = t.word');
|
| 333 |
$this
|
| 334 |
->condition('i.type', $this->type)
|
| 335 |
->groupBy('i.type')
|
| 336 |
->groupBy('i.sid')
|
| 337 |
->having('COUNT(*) >= :matches', array(':matches' => $this->matches));
|
| 338 |
|
| 339 |
// Clone the query object to do the firstPass query;
|
| 340 |
$first = clone $this->query;
|
| 341 |
|
| 342 |
// For complex search queries, add the LIKE conditions to the first pass query.
|
| 343 |
if (!$this->simple) {
|
| 344 |
$first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
|
| 345 |
$first->condition($this->conditions);
|
| 346 |
}
|
| 347 |
|
| 348 |
// Calculate maximum keyword relevance, to normalize it.
|
| 349 |
$first->addExpression('SUM(i.score * t.count)', 'calculated_score');
|
| 350 |
$this->normalize = $first
|
| 351 |
->range(0, 1)
|
| 352 |
->orderBy('calculated_score', 'DESC')
|
| 353 |
->execute()
|
| 354 |
->fetchField();
|
| 355 |
|
| 356 |
if ($this->normalize) {
|
| 357 |
return TRUE;
|
| 358 |
}
|
| 359 |
return FALSE;
|
| 360 |
}
|
| 361 |
|
| 362 |
/**
|
| 363 |
* Adds a custom score expression to the search query.
|
| 364 |
*
|
| 365 |
* Each score expression can optionally use a multiplicator and multiple
|
| 366 |
* expressions are combined.
|
| 367 |
*
|
| 368 |
* @param $score
|
| 369 |
* The score expression.
|
| 370 |
* @param $arguments
|
| 371 |
* Custom query arguments for that expression.
|
| 372 |
* @param $multiply
|
| 373 |
* If set, the score is multiplied with that value. Search query ensures
|
| 374 |
* that the search scores are still normalized.
|
| 375 |
*/
|
| 376 |
public function addScore($score, $arguments = array(), $multiply = FALSE) {
|
| 377 |
if ($multiply) {
|
| 378 |
$i = count($this->multiply);
|
| 379 |
$score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)";
|
| 380 |
$arguments[':multiply_' . $i] = $multiply;
|
| 381 |
$this->multiply[] = $multiply;
|
| 382 |
}
|
| 383 |
|
| 384 |
$this->scores[] = $score;
|
| 385 |
$this->scoresArguments += $arguments;
|
| 386 |
|
| 387 |
return $this;
|
| 388 |
}
|
| 389 |
|
| 390 |
/**
|
| 391 |
* Execute the search.
|
| 392 |
*
|
| 393 |
* If not already done, this executes the first pass query, then the complex
|
| 394 |
* conditions are applied to the query including score expressions and
|
| 395 |
* ordering.
|
| 396 |
*
|
| 397 |
* @return
|
| 398 |
* FALSE if the first pass query returned no results and a database result
|
| 399 |
* set if not.
|
| 400 |
*/
|
| 401 |
public function execute()
|
| 402 |
{
|
| 403 |
if (!$this->executedFirstPass) {
|
| 404 |
$this->executeFirstPass();
|
| 405 |
}
|
| 406 |
if (!$this->normalize) {
|
| 407 |
return FALSE;
|
| 408 |
}
|
| 409 |
|
| 410 |
$this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
|
| 411 |
$this->condition($this->conditions);
|
| 412 |
|
| 413 |
if (empty($this->scores)) {
|
| 414 |
// Add default score.
|
| 415 |
$this->addScore('i.relevance');
|
| 416 |
}
|
| 417 |
if (count($this->getOrderBy()) == 0) {
|
| 418 |
// Add default order.
|
| 419 |
$this->orderBy('calculated_score', 'DESC');
|
| 420 |
}
|
| 421 |
|
| 422 |
if (count($this->multiply)) {
|
| 423 |
// Add the total multiplicator as many times as requested to maintain
|
| 424 |
// normalization as far as possible.
|
| 425 |
$i = 0;
|
| 426 |
$sum = array_sum($this->multiply);
|
| 427 |
foreach ($this->multiply as $total) {
|
| 428 |
$this->scoresArguments['total_' . $i] = $sum;
|
| 429 |
}
|
| 430 |
}
|
| 431 |
|
| 432 |
// Replace i.relevance pseudo-field with the actual, normalized value.
|
| 433 |
$this->scores = str_replace('i.relevance', '(' . (1.0 / $this->normalize) . ' * i.score * t.count)', $this->scores);
|
| 434 |
// Convert scores to an expression.
|
| 435 |
$this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
|
| 436 |
|
| 437 |
// Add tag and useful metadata.
|
| 438 |
$this
|
| 439 |
->addTag('search_' . $this->type)
|
| 440 |
->addMetaData('normalize', $this->normalize)
|
| 441 |
->fields('i', array('type', 'sid'));
|
| 442 |
|
| 443 |
return $this->query->execute();
|
| 444 |
}
|
| 445 |
}
|