/[drupal]/drupal/modules/search/search.extender.inc
ViewVC logotype

Contents of /drupal/modules/search/search.extender.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (show annotations) (download) (as text)
Sat Aug 29 12:43:18 2009 UTC (2 months, 4 weeks ago) by dries
Branch: MAIN
CVS Tags: DRUPAL-7-0-UNSTABLE-9, DRUPAL-7-0-UNSTABLE-10, HEAD
File MIME type: text/x-php
- Added missing file.
1 <?php
2 // $Id$
3
4 /**
5 * @file
6 * Search query extender and helper functions.
7 */
8
9 /**
10 * Do a query on the full-text search index for a word or words.
11 *
12 * This function is normally only called by each module that supports the
13 * indexed search (and thus, implements hook_update_index()).
14 *
15 * Results are retrieved in two logical passes. However, the two passes are
16 * joined together into a single query. And in the case of most simple
17 * queries the second pass is not even used.
18 *
19 * The first pass selects a set of all possible matches, which has the benefit
20 * of also providing the exact result set for simple "AND" or "OR" searches.
21 *
22 * The second portion of the query further refines this set by verifying
23 * advanced text conditions (such as negative or phrase matches).
24 *
25 * The used query object has the tag 'search_$type' and can be further extended
26 * with hook_query_alter().
27 */
28 class SearchQuery extends SelectQueryExtender {
29 /**
30 * The search query that is used for searching.
31 *
32 * @var string
33 */
34 protected $searchExpression;
35
36 /**
37 * Type of search.
38 *
39 * This maps to the value of the type column in search_index.
40 *
41 * @var string
42 */
43 protected $type;
44
45 /**
46 * Positive and negative search keys.
47 *
48 * @var array
49 */
50 protected $keys = array('positive' => array(), 'negative' => array());
51
52 /**
53 * Indicates if the first pass query requires complex conditions (LIKE).
54 *
55 * @var boolean.
56 */
57 protected $simple = TRUE;
58
59 /**
60 * Conditions that are used for exact searches.
61 *
62 * This is always used for the second pass query but not for the first pass,
63 * unless $this->simple is FALSE.
64 *
65 * @var DatabaseCondition
66 */
67 protected $conditions;
68
69 /**
70 * Indicates how many matches for a search query are necessary.
71 *
72 * @var int
73 */
74 protected $matches = 0;
75
76 /**
77 * Array of search words.
78 *
79 * These words have to match against {search_index}.word.
80 *
81 * @var array
82 */
83 protected $words = array();
84
85 /**
86 * Multiplier for the normalized search score.
87 *
88 * This value is calculated by the first pass query and multiplied with the
89 * actual score of a specific word to make sure that the resulting calculated
90 * score is between 0 and 1.
91 *
92 * @var float
93 */
94 protected $normalize;
95
96 /**
97 * Indicates if the first pass query has been executed.
98 *
99 * @var boolean
100 */
101 protected $executedFirstPass = FALSE;
102
103 /**
104 * Stores score expressions.
105 *
106 * @var array
107 */
108 protected $scores = array();
109
110 /**
111 * Stores arguments for score expressions.
112 *
113 * @var array
114 */
115 protected $scoresArguments = array();
116
117 /**
118 * Total value of all the multipliers.
119 *
120 * @var array
121 */
122 protected $multiply = array();
123
124 /**
125 * Search items for the given search query string and type.
126 *
127 * @param $query
128 * A search query string, that can contain options.
129 * @param $type
130 * The type of search, this maps to {search_index}.type.
131 * @return
132 * The SearchQuery object.
133 */
134 public function searchExpression($expression, $type) {
135 $this->searchExpression = $expression;
136 $this->type = $type;
137
138 return $this;
139 }
140
141 /**
142 * Apply a search option and remove it from the search query string.
143 *
144 * These options are in the form option:value,value2,value3.
145 *
146 * @param $option
147 * Name of the option.
148 * @param $column
149 * Name of the db column to which the value should be applied.
150 * @return
151 * TRUE if at least a value for that option has been found, FALSE if not.
152 */
153 public function setOption($option, $column) {
154 if ($values = search_expression_extract($this->searchExpression, $option)) {
155 $or = db_or();
156 foreach (explode(',', $values) as $value) {
157 $or->condition($column, $value);
158 }
159 $this->condition($or);
160 $this->searchExpression = search_expression_insert($this->searchExpression, $option);
161 return TRUE;
162 }
163 return FALSE;
164 }
165
166 /**
167 * Parse a search query into SQL conditions.
168 *
169 * We build two queries that matches the dataset bodies.
170 */
171 protected function parseSearchExpression() {
172 // Matchs words optionally prefixed by a dash. A word in this case is
173 // something between two spaces, optionally quoted.
174 preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER);
175
176 if (count($keywords) == 0) {
177 return;
178 }
179
180 // Classify tokens.
181 $or = FALSE;
182 $warning = '';
183 foreach ($keywords as $match) {
184 $phrase = FALSE;
185 // Strip off phrase quotes.
186 if ($match[2]{0} == '"') {
187 $match[2] = substr($match[2], 1, -1);
188 $phrase = TRUE;
189 $this->simple = FALSE;
190 }
191 // Simplify keyword according to indexing rules and external preprocessors.
192 $words = search_simplify($match[2]);
193 // Re-explode in case simplification added more words, except when
194 // matching a phrase.
195 $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
196 // Negative matches.
197 if ($match[1] == '-') {
198 $this->keys['negative'] = array_merge($this->keys['negative'], $words);
199 }
200 // OR operator: instead of a single keyword, we store an array of all
201 // OR'd keywords.
202 elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
203 $last = array_pop($this->keys['positive']);
204 // Starting a new OR?
205 if (!is_array($last)) {
206 $last = array($last);
207 }
208 $this->keys['positive'][] = $last;
209 $or = TRUE;
210 continue;
211 }
212 // AND operator: implied, so just ignore it.
213 elseif ($match[2] == 'AND' || $match[2] == 'and') {
214 $warning = $match[2];
215 continue;
216 }
217
218 // Plain keyword.
219 else {
220 if ($match[2] == 'or') {
221 $warning = $match[2];
222 }
223 if ($or) {
224 // Add to last element (which is an array).
225 $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
226 }
227 else {
228 $this->keys['positive'] = array_merge($this->keys['positive'], $words);
229 }
230 }
231 $or = FALSE;
232 }
233
234 // Convert keywords into SQL statements.
235 $this->conditions = db_and();
236 $simple_and = FALSE;
237 $simple_or = FALSE;
238 // Positive matches.
239 foreach ($this->keys['positive'] as $key) {
240 // Group of ORed terms.
241 if (is_array($key) && count($key)) {
242 $simple_or = TRUE;
243 $any = FALSE;
244 $queryor = db_or();
245 foreach ($key as $or) {
246 list($num_new_scores) = $this->parseWord($or);
247 $any |= $num_new_scores;
248 $queryor->condition('d.data', "% $or %", 'LIKE');
249 }
250 if (count($queryor)) {
251 $this->conditions->condition($queryor);
252 // A group of OR keywords only needs to match once.
253 $this->matches += ($any > 0);
254 }
255 }
256 // Single ANDed term.
257 else {
258 $simple_and = TRUE;
259 list($num_new_scores, $num_valid_words) = $this->parseWord($key);
260 $this->conditions->condition('d.data', "% $key %", 'LIKE');
261 if (!$num_valid_words) {
262 $this->simple = FALSE;
263 }
264 // Each AND keyword needs to match at least once.
265 $this->matches += $num_new_scores;
266 }
267 }
268 if ($simple_and && $simple_or) {
269 $this->simple = FALSE;
270 }
271 // Negative matches.
272 foreach ($this->keys['negative'] as $key) {
273 $this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
274 $this->simple = FALSE;
275 }
276
277 if ($warning == 'or') {
278 drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
279 }
280 }
281
282 /**
283 * Helper function for parseQuery().
284 */
285 protected function parseWord($word) {
286 $num_new_scores = 0;
287 $num_valid_words = 0;
288 // Determine the scorewords of this word/phrase.
289 $split = explode(' ', $word);
290 foreach ($split as $s) {
291 $num = is_numeric($s);
292 if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
293 $s = $num ? ((int)ltrim($s, '-0')) : $s;
294 if (!isset($this->words[$s])) {
295 $this->words[$s] = $s;
296 $num_new_scores++;
297 }
298 $num_valid_words++;
299 }
300 }
301 // Return matching snippet and number of added words.
302 return array($num_new_scores, $num_valid_words);
303 }
304
305 /**
306 * Execute the first pass query.
307 *
308 * This can either be done explicitly, so that additional scores and
309 * conditions can be applied to the second pass query or implicitly by
310 * addScore() or execute().
311 *
312 * @return
313 * TRUE if search items exist, FALSE if not.
314 */
315 public function executeFirstPass() {
316 $this->parseSearchExpression();
317
318 if (count($this->words) == 0) {
319 form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'));
320 return FALSE;
321 }
322 $this->executedFirstPass = TRUE;
323
324 if (!empty($this->words)) {
325 $or = db_or();
326 foreach ($this->words as $word) {
327 $or->condition('i.word', $word);
328 }
329 $this->condition($or);
330 }
331 // Build query for keyword normalization.
332 $this->join('search_total', 't', 'i.word = t.word');
333 $this
334 ->condition('i.type', $this->type)
335 ->groupBy('i.type')
336 ->groupBy('i.sid')
337 ->having('COUNT(*) >= :matches', array(':matches' => $this->matches));
338
339 // Clone the query object to do the firstPass query;
340 $first = clone $this->query;
341
342 // For complex search queries, add the LIKE conditions to the first pass query.
343 if (!$this->simple) {
344 $first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
345 $first->condition($this->conditions);
346 }
347
348 // Calculate maximum keyword relevance, to normalize it.
349 $first->addExpression('SUM(i.score * t.count)', 'calculated_score');
350 $this->normalize = $first
351 ->range(0, 1)
352 ->orderBy('calculated_score', 'DESC')
353 ->execute()
354 ->fetchField();
355
356 if ($this->normalize) {
357 return TRUE;
358 }
359 return FALSE;
360 }
361
362 /**
363 * Adds a custom score expression to the search query.
364 *
365 * Each score expression can optionally use a multiplicator and multiple
366 * expressions are combined.
367 *
368 * @param $score
369 * The score expression.
370 * @param $arguments
371 * Custom query arguments for that expression.
372 * @param $multiply
373 * If set, the score is multiplied with that value. Search query ensures
374 * that the search scores are still normalized.
375 */
376 public function addScore($score, $arguments = array(), $multiply = FALSE) {
377 if ($multiply) {
378 $i = count($this->multiply);
379 $score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)";
380 $arguments[':multiply_' . $i] = $multiply;
381 $this->multiply[] = $multiply;
382 }
383
384 $this->scores[] = $score;
385 $this->scoresArguments += $arguments;
386
387 return $this;
388 }
389
390 /**
391 * Execute the search.
392 *
393 * If not already done, this executes the first pass query, then the complex
394 * conditions are applied to the query including score expressions and
395 * ordering.
396 *
397 * @return
398 * FALSE if the first pass query returned no results and a database result
399 * set if not.
400 */
401 public function execute()
402 {
403 if (!$this->executedFirstPass) {
404 $this->executeFirstPass();
405 }
406 if (!$this->normalize) {
407 return FALSE;
408 }
409
410 $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
411 $this->condition($this->conditions);
412
413 if (empty($this->scores)) {
414 // Add default score.
415 $this->addScore('i.relevance');
416 }
417 if (count($this->getOrderBy()) == 0) {
418 // Add default order.
419 $this->orderBy('calculated_score', 'DESC');
420 }
421
422 if (count($this->multiply)) {
423 // Add the total multiplicator as many times as requested to maintain
424 // normalization as far as possible.
425 $i = 0;
426 $sum = array_sum($this->multiply);
427 foreach ($this->multiply as $total) {
428 $this->scoresArguments['total_' . $i] = $sum;
429 }
430 }
431
432 // Replace i.relevance pseudo-field with the actual, normalized value.
433 $this->scores = str_replace('i.relevance', '(' . (1.0 / $this->normalize) . ' * i.score * t.count)', $this->scores);
434 // Convert scores to an expression.
435 $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
436
437 // Add tag and useful metadata.
438 $this
439 ->addTag('search_' . $this->type)
440 ->addMetaData('normalize', $this->normalize)
441 ->fields('i', array('type', 'sid'));
442
443 return $this->query->execute();
444 }
445 }

  ViewVC Help
Powered by ViewVC 1.1.2