/[drupal]/contributions/modules/sphinxsearch/sphinxsearch.common.inc
ViewVC logotype

Contents of /contributions/modules/sphinxsearch/sphinxsearch.common.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.3 - (show annotations) (download) (as text)
Fri Sep 12 02:44:22 2008 UTC (14 months, 1 week ago) by markuspetrux
Branch: MAIN
CVS Tags: HEAD
Branch point for: DRUPAL-6--1
Changes since 1.2: +22 -1 lines
File MIME type: text/x-php
- Ported module from D5 to D6.
- Bugfix: undefined class method in sphinxsearch_check_connection_page().
- Bugfix: added criterion class to taxonomy elements in advanced search form.
- Bugfix: added support for mysqli and pgsql to _sphinxsearch_db_reconnect().
1 <?php
2 // $Id: sphinxsearch.common.inc,v 1.1.2.4 2008/09/03 01:38:55 markuspetrux Exp $
3
4 /**
5 * @file
6 * Common functions for the Sphinx search module.
7 *
8 * WARNING: Implementation is not finished yet, and it is subject to change in
9 * future versions with no backwards compatibility. I should probably create a
10 * sample module before it gets into something really reusable... and useful.
11 */
12
13 /**
14 * Delta index identifier; used for XMLPipe processing (internal).
15 */
16 define('SPHINXSEARCH_DELTA_INDEX_ID', -1);
17
18 /**
19 * List of supported search matching modes.
20 */
21 define('SPHINXSEARCH_MATCH_ALL', 0);
22 define('SPHINXSEARCH_MATCH_ANY', 1);
23 define('SPHINXSEARCH_MATCH_PHRASE', 2);
24
25 /**
26 * Obtain list of supported search matching modes.
27 *
28 * @return array
29 */
30 function sphinxsearch_get_matching_modes() {
31 return array(
32 SPHINXSEARCH_MATCH_ALL => t('All words'),
33 SPHINXSEARCH_MATCH_ANY => t('Any word'),
34 SPHINXSEARCH_MATCH_PHRASE => t('Exact phrase'),
35 );
36 }
37
38 /**
39 * Obtain list of sortable fields.
40 *
41 * @return array
42 */
43 function sphinxsearch_get_sortable_fields() {
44 return array(
45 '@weight' => t('Relevance'),
46 'created' => t('Creation time'),
47 'last_updated' => t('Last updated time'),
48 );
49 }
50
51 /**
52 * Obtain the path to Sphinx search page.
53 *
54 * @return string
55 */
56 function sphinxsearch_get_search_path() {
57 return variable_get('sphinxsearch_search_path', 'search-content');
58 }
59
60 /**
61 * Check if specified path is the Sphinx search page.
62 *
63 * @param string $path
64 * Path to check. Defaults to $_GET['q'].
65 * @return boolean
66 */
67 function sphinxsearch_is_search_path($path = NULL) {
68 if (!isset($path)) {
69 $path = $_GET['q'];
70 }
71 return (strpos($path, sphinxsearch_get_search_path()) === 0);
72 }
73
74 /**
75 * Redirect to search page with specified query options.
76 *
77 * @param mixed $query
78 * This argument accepts a query string array or an already escaped query string.
79 */
80 function sphinxsearch_goto_search($query = NULL) {
81 if (isset($query) && is_array($query)) {
82 $query = (!empty($query) ? drupal_query_string_encode($query) : NULL);
83 }
84 drupal_goto(sphinxsearch_get_search_path(), $query);
85 }
86
87 /**
88 * Check if user has exceeded flood limit.
89 *
90 * This function is aimed to hide implementation details.
91 *
92 * @see sphinxsearch_search_page()
93 * @see sphinxsearch_block()
94 *
95 * @param boolean $exceeded
96 * TRUE to enable the flag. This argument is optional.
97 *
98 * @return boolean
99 * TRUE is flood limit has been exceeded.
100 */
101 function sphinxsearch_flood_limit_exceeded($exceeded = NULL) {
102 if (!empty($exceeded)) {
103 $GLOBALS['sphinxsearch_flood_limit_exceeded'] = TRUE;
104 }
105 return isset($GLOBALS['sphinxsearch_flood_limit_exceeded']);
106 }
107
108 /**
109 * Instatiate a Sphinx search client object.
110 *
111 * Usage:
112 * $sphinxsearch = &sphinxsearch_get_client();
113 *
114 * @return object
115 * Sphinx client instance.
116 */
117 function &sphinxsearch_get_client() {
118 static $sphinxsearch;
119 if (!isset($sphinxsearch)) {
120 require_once(drupal_get_path('module', 'sphinxsearch') .'/lib/sphinxapi.php');
121
122 $sphinxsearch = new SphinxClient();
123 $sphinxsearch->SetServer(variable_get('sphinxsearch_searchd_host', 'localhost'), (int)variable_get('sphinxsearch_searchd_port', '3312'));
124
125 // Setup connection timeout?
126 if (($sphinxsearch_searchd_timeout = (int)variable_get('sphinxsearch_searchd_timeout', 0)) > 0) {
127 $sphinxsearch->SetConnectTimeout($sphinxsearch_searchd_timeout);
128 }
129
130 // Setup max query time?
131 if (($sphinxsearch_searchd_maxquerytime = (int)variable_get('sphinxsearch_searchd_maxquerytime', 0)) > 0) {
132 $sphinxsearch->SetMaxQueryTime($sphinxsearch_searchd_maxquerytime * 1000);
133 }
134
135 // Setup distributed retries?
136 if (($sphinxsearch_retries_count = (int)variable_get('sphinxsearch_retries_count', 0)) > 0) {
137 $sphinxsearch_retries_delay = (int)variable_get('sphinxsearch_retries_delay', 0);
138 $sphinxsearch->SetRetries($sphinxsearch_retries_count, $sphinxsearch_retries_delay * 1000);
139 }
140 }
141 return $sphinxsearch;
142 }
143
144 /**
145 * Check connection with Sphinx searchd daemon.
146 *
147 * @return boolean
148 */
149 function sphinxsearch_check_connection() {
150 $sphinxsearch = &sphinxsearch_get_client();
151 return $sphinxsearch->_Connect() ? TRUE : FALSE;
152 }
153
154 /**
155 * Build search URL data based on the given search options structure.
156 *
157 * @param array $search_options
158 * Search options structure.
159 * @return string
160 * Encoded query string. NULL indicates no search filter has been specified.
161 */
162 function sphinxsearch_get_query_string($search_options) {
163 $query = array();
164
165 // Search keywords.
166 if (!empty($search_options['filters']['keys'])) {
167 $query['keys'] = $search_options['filters']['keys'];
168 }
169
170 // Matching modes.
171 if (!empty($search_options['matchmode'])) {
172 $matchmodes = sphinxsearch_get_matching_modes();
173 $matchmode = (int)$search_options['matchmode'];
174 if (isset($matchmodes[$matchmode]) && $matchmode != SPHINXSEARCH_MATCH_ALL) {
175 $query['matchmode'] = $matchmode;
176 }
177 }
178
179 // Filter by content author.
180 if (!empty($search_options['filters']['author']['name'])) {
181 $author = trim($search_options['filters']['author']['name']);
182 if (!empty($author)) {
183 $query['author'] = $author;
184 }
185 }
186
187 // Filter by content type.
188 if (!empty($search_options['filters']['types'])) {
189 $query['types'] = implode(',', $search_options['filters']['types']);
190 }
191
192 // Filter by taxonomy.
193 if (module_exists('taxonomy')) {
194 foreach (sphinxsearch_get_enabled_vocabularies() as $vid => $vocabulary) {
195 if (!empty($search_options['filters']['taxonomy'][$vid])) {
196 $terms_key = 'terms'. $vid;
197 $query[$terms_key] = sphinxsearch_taxonomy_encode_typed_terms($search_options['filters']['taxonomy'][$vid], ',');
198 }
199 }
200 }
201
202 // Sort options.
203 if (!empty($search_options['sortfield'])) {
204 $sortable_fields = sphinxsearch_get_sortable_fields();
205 if (isset($sortable_fields[$search_options['sortfield']]) && $search_options['sortfield'] != '@weight') {
206 $query['sortfield'] = $search_options['sortfield'];
207 }
208 }
209 if (!empty($search_options['sortdir']) && $search_options['sortdir'] == 'ASC') {
210 $query['sortdir'] = 'ASC';
211 }
212
213 return (!empty($query) ? str_replace('%2C', ',', drupal_query_string_encode($query)) : NULL);
214 }
215
216 /**
217 * Parse search request and build search options structure.
218 *
219 * @param array $request_options
220 * Requested search options.
221 * @return array
222 * Search options structure.
223 */
224 function sphinxsearch_parse_request($request_options = array()) {
225 $search_options = array(
226 'matchmode' => SPHINXSEARCH_MATCH_ALL,
227 'results_per_page' => (int)variable_get('sphinxsearch_results_per_page', 10),
228 'excerpts_limit' => (int)variable_get('sphinxsearch_excerpts_limit', 256),
229 'excerpts_around' => (int)variable_get('sphinxsearch_excerpts_around', 5),
230 'excerpts_single_passage' => (int)variable_get('sphinxsearch_excerpts_single_passage', 0),
231 'filters' => array(),
232 'group_by' => '',
233 'errors' => array(),
234 );
235
236 // Search keywords.
237 if (isset($request_options['keys'])) {
238 $search_options['filters']['keys'] = preg_replace('#\s+#', ' ', trim($request_options['keys']));
239 }
240
241 // Matching modes.
242 if (isset($request_options['matchmode'])) {
243 $matchmodes = sphinxsearch_get_matching_modes();
244 $matchmode = (int)$request_options['matchmode'];
245 if (isset($matchmodes[$matchmode])) {
246 $search_options['matchmode'] = $matchmode;
247 }
248 }
249
250 // Filter by content author.
251 if (isset($request_options['author'])) {
252 $name = trim($request_options['author']);
253 if (!empty($name)) {
254 $uid = (int)db_result(db_query("SELECT uid FROM {users} WHERE name = '%s'", $name));
255 if ($uid <= 0) {
256 $search_options['errors']['author'] = t('Specified author %name not found.', array('%name' => $name));
257 $search_options['filters']['author'] = array('uid' => -1, 'name' => $name);
258 }
259 else {
260 $search_options['filters']['author'] = array('uid' => $uid, 'name' => $name);
261 }
262 }
263 }
264
265 // Filter by content type.
266 if (!empty($request_options['types'])) {
267 $enabled_node_types = sphinxsearch_get_enabled_node_types();
268 if (count($enabled_node_types) > 1) {
269 if (is_array($request_options['types'])) {
270 $types = array_values(array_filter($request_options['types']));
271 }
272 else {
273 $types = array_filter(array_map('trim', explode(',', $request_options['types'])));
274 }
275 $unknown_types = array();
276 foreach ($types as $type) {
277 if (in_array($type, $enabled_node_types)) {
278 if (!isset($search_options['filters']['types'])) {
279 $search_options['filters']['types'] = array();
280 }
281 $search_options['filters']['types'][] = $type;
282 }
283 else {
284 $unknown_types[] = $type;
285 }
286 }
287 if (!empty($unknown_types)) {
288 $search_options['errors']['types'] = t('The following content types are invalid: %types.', array(
289 '%types' => implode(', ', $unknown_types),
290 ));
291 }
292 }
293 }
294
295 // Filter by taxonomy.
296 if (module_exists('taxonomy')) {
297 foreach (sphinxsearch_get_enabled_vocabularies() as $vid => $vocabulary) {
298 $terms_key = 'terms'. $vid;
299 if (!empty($request_options[$terms_key])) {
300 // Attempt to extract list of tids.
301 $tids = array_filter(array_map('intval', array_map('trim', explode(',', $request_options[$terms_key]))));
302 if ($request_options[$terms_key] != implode(',', $tids)) {
303 // Request came with a comma separated list of terms.
304 $terms = sphinxsearch_taxonomy_decode_typed_terms($vid, $request_options[$terms_key]);
305 }
306 else {
307 // Request came with a comma separated list of tids.
308 $terms = sphinxsearch_taxonomy_get_terms($vid, $tids);
309 }
310 // Check if we got not found terms.
311 if (isset($terms[-1])) {
312 $search_options['errors'][$terms_key] = t('The following terms have not been found in category %category: %terms.', array(
313 '%category' => $vocabulary->name,
314 '%terms' => $terms[-1],
315 ));
316 unset($terms[-1]);
317 }
318 if (!empty($terms)) {
319 if (!isset($search_options['filters']['taxonomy'])) {
320 $search_options['filters']['taxonomy'] = array();
321 }
322 $search_options['filters']['taxonomy'][$vid] = $terms;
323 }
324 }
325 }
326 }
327
328 // Sort options.
329 if (!empty($request_options['sortfield'])) {
330 $sortable_fields = sphinxsearch_get_sortable_fields();
331 if (isset($sortable_fields[$request_options['sortfield']])) {
332 $search_options['sortfield'] = $request_options['sortfield'];
333 }
334 }
335 if (!empty($request_options['sortdir'])) {
336 $search_options['sortdir'] = $request_options['sortdir'];
337 }
338
339 return $search_options;
340 }
341
342 /**
343 * Execute a search query on the given options.
344 *
345 * @param array $search_options
346 * Search options structure.
347 * @return array
348 * Search results structure.
349 */
350 function sphinxsearch_execute_query($search_options) {
351 $search_results = array(
352 'error_message' => '',
353 'warnings' => array(),
354 'total_found' => 0,
355 'total_available' => 0,
356 'time' => 0,
357 'words' => array(),
358 'nodes' => array(),
359 'titles' => array(),
360 'excerpts' => array(),
361 'groups' => array(),
362 );
363 $sphinx_query_keywords = $search_options['filters']['keys'];
364
365 // Obtain distributed index name, required to resolve search query.
366 $sphinxsearch_query_index = variable_get('sphinxsearch_query_index', '');
367 if (empty($sphinxsearch_query_index)) {
368 $search_results['error_message'] = t('Sphinx query index not specified.');
369 return $search_results;
370 }
371
372 // Obtain excerpts index name, required to build excerpts.
373 $sphinxsearch_excerpts_index = variable_get('sphinxsearch_excerpts_index', '');
374 if (empty($sphinxsearch_excerpts_index)) {
375 $search_results['error_message'] = t('Sphinx excerpts index not specified.');
376 return $search_results;
377 }
378
379 // Validate results per page option.
380 if (!isset($search_options['results_per_page']) || $search_options['results_per_page'] <= 0) {
381 $search_options['results_per_page'] = 10;
382 }
383
384 // Quit if no search filter has been specified.
385 if (empty($search_options['filters']) && empty($search_options['group_by'])) {
386 return $search_results;
387 }
388
389 // Prepare Sphinx client for search queries.
390 $current_page = sphinxsearch_get_current_page();
391 $sphinxsearch = &sphinxsearch_get_client();
392 $sphinxsearch->ResetFilters();
393 $sphinxsearch->ResetGroupBy();
394 $sphinxsearch->SetLimits($current_page * $search_options['results_per_page'], $search_options['results_per_page']);
395 $sphinxsearch->SetFieldWeights(array('subject' => 2, 'content' => 1));
396 $sphinxsearch->SetFilter('is_deleted', array(0));
397
398 // Matching modes.
399 if ($search_options['matchmode'] == SPHINXSEARCH_MATCH_PHRASE) {
400 $sphinx_query_keywords = '"'. trim($sphinxsearch->EscapeString($sphinx_query_keywords)) .'"';
401 }
402 else {
403 $sphinx_query_keywords = implode(
404 ($search_options['matchmode'] == SPHINXSEARCH_MATCH_ALL ? ' ' : ' | '),
405 array_filter(array_map('trim', explode(' ', $sphinxsearch->EscapeString($sphinx_query_keywords))))
406 );
407 }
408 $sphinxsearch->SetMatchMode(SPH_MATCH_EXTENDED2);
409
410 // Filter by content author.
411 if (!empty($search_options['filters']['author'])) {
412 $sphinxsearch->SetFilter('uid', array($search_options['filters']['author']['uid']));
413 }
414
415 // Filter by content type.
416 if (isset($search_options['filters']['types'])) {
417 $filter_values = array();
418 foreach ($search_options['filters']['types'] as $type) {
419 $filter_values[] = sphinxsearch_xmlpipe_nodetype('id', $type);
420 }
421 if (!empty($filter_values)) {
422 $sphinxsearch->SetFilter('nodetype', $filter_values);
423 }
424 }
425
426 // Filter by taxonomy.
427 if (isset($search_options['filters']['taxonomy'])) {
428 foreach ($search_options['filters']['taxonomy'] as $vid => $terms) {
429 foreach ($terms as $tid => $term) {
430 $sphinxsearch->SetFilter('terms'. $vid, array((int)$tid));
431 }
432 }
433 }
434
435 // Sort options.
436 $sortdir = (!empty($search_options['sortdir']) && $search_options['sortdir'] == 'ASC' ? 'ASC' : 'DESC');
437 if (empty($search_options['sortfield']) || $search_options['sortfield'] == '@weight') {
438 $sphinxsearch->SetSortMode(SPH_SORT_EXTENDED, '@weight '. $sortdir .', last_updated '. $sortdir);
439 }
440 else {
441 $sphinxsearch->SetSortMode(SPH_SORT_EXTENDED, $search_options['sortfield'] .' '. $sortdir .', @weight '. $sortdir);
442 }
443
444 // Grouping options.
445 if (!empty($search_options['group_by'])) {
446 $sphinxsearch->SetArrayResult(TRUE);
447 $sphinxsearch->SetGroupBy($search_options['group_by'], SPH_GROUPBY_ATTR, '@count DESC, @weight DESC');
448 }
449
450 // Send query to Sphinx.
451 $sphinx_results = $sphinxsearch->Query($sphinx_query_keywords, $sphinxsearch_query_index);
452 if (!$sphinx_results) {
453 $message = $sphinxsearch->GetLastError();
454 if (!sphinxsearch_check_connection()) {
455 $search_results['error_message'] = t('Search service is disabled temporarily. Please, try again later.');
456 return $search_results;
457 }
458 $search_results['error_message'] = t('Search failed using index %index. Sphinx error: %message', array(
459 '%index' => $sphinxsearch_query_index,
460 '%message' => $message,
461 ));
462 return $search_results;
463 }
464 $message = $sphinxsearch->GetLastWarning();
465 if (!empty($message)) {
466 $search_results['warnings'][] = t('Search query warning: %message', array('%message' => $message));
467 }
468 if (empty($sphinx_results['matches'])) {
469 return $search_results;
470 }
471
472 // Save Sphinx query results.
473 $search_results['total_found'] = (int)$sphinx_results['total_found'];
474 $search_results['total_available'] = (int)$sphinx_results['total'];
475 $search_results['time'] = $sphinx_results['time'];
476 $search_results['words'] = (isset($sphinx_results['words']) && is_array($sphinx_results['words']) ? $sphinx_results['words'] : array());
477
478 // Parse grouping results.
479 if (!empty($search_options['group_by'])) {
480 foreach ($sphinx_results['matches'] as $sphinx_match) {
481 if (isset($sphinx_match['attrs']['@groupby']) && isset($sphinx_match['attrs']['@count'])) {
482 $group_id = $sphinx_match['attrs']['@groupby'];
483 if ($search_options['group_by'] == 'nodetype') {
484 $group_id = sphinxsearch_xmlpipe_nodetype('name', $group_id);
485 }
486 $search_results['groups'][$group_id] = array(
487 'count' => $sphinx_match['attrs']['@count'],
488 'weight' => $sphinx_match['weight'],
489 );
490 }
491 }
492 return $search_results;
493 }
494
495 // Load nodes referenced by returned results.
496 foreach ($sphinx_results['matches'] as $sphinx_docid => $sphinx_match) {
497 if (isset($sphinx_match['attrs']['nid']) && ($node = node_load($sphinx_match['attrs']['nid']))) {
498 $search_results['nodes'][] = $node;
499 $search_results['titles'][] = check_plain($node->title);
500 $search_results['excerpts'][] = sphinxsearch_get_node_text($node);
501 }
502 }
503
504 // Use Sphinx to build excerpts.
505 if (!empty($sphinxsearch_excerpts_index)) {
506 // Build node titles with highlighted keywords.
507 $search_results['titles'] = $sphinxsearch->BuildExcerpts($search_results['titles'], $sphinxsearch_excerpts_index, $sphinx_query_keywords, array(
508 'before_match' => '<span class="search-keyword-match">',
509 'after_match' => '</span>',
510 'chunk_separator' => '',
511 'limit' => 1024, // We want all text here, so using a high enough number.
512 'around' => 200, // Ignored when single_passage is TRUE.
513 'exact_phrase' => ($search_options['matchmode'] == SPHINXSEARCH_MATCH_PHRASE),
514 'single_passage' => TRUE,
515 ));
516 if (!$search_results['titles']) {
517 $search_results['titles'] = array();
518 $search_results['warnings'][] = t('Unable to build excerpts for content titles. Sphinx error: %message', array('%message' => $sphinxsearch->GetLastError()));
519 }
520
521 // Build node excerpts with highlighted keywords.
522 $search_results['excerpts'] = $sphinxsearch->BuildExcerpts($search_results['excerpts'], $sphinxsearch_excerpts_index, $sphinx_query_keywords, array(
523 'before_match' => '<span class="search-keyword-match">',
524 'after_match' => '</span>',
525 'chunk_separator' => '<span class="search-chunk-separator"> ... </span>',
526 'limit' => $search_options['excerpts_limit'],
527 'around' => $search_options['excerpts_around'],
528 'exact_phrase' => ($search_options['matchmode'] == SPHINXSEARCH_MATCH_PHRASE),
529 'single_passage' => $search_options['excerpts_single_passage'],
530 ));
531 if (!$search_results['excerpts']) {
532 $search_results['excerpts'] = array();
533 $search_results['warnings'][] = t('Unable to build excerpts for content snippets. Sphinx error: %message', array('%message' => $sphinxsearch->GetLastError()));
534 }
535 }
536
537 return $search_results;
538 }
539
540 /**
541 * Obtain current page from search results navigation.
542 *
543 * @param int $pager_element
544 * An optional integer to distinguish between multiple pagers on one page.
545 *
546 * @return int
547 */
548 function sphinxsearch_get_current_page($pager_element = 0) {
549 $pager_page_array = (isset($_GET['page']) ? explode(',', $_GET['page']) : array());
550 return (isset($pager_page_array[$pager_element]) ? (int)$pager_page_array[$pager_element] : 0);
551 }
552
553 /**
554 * Compute pager options and invoke theme pager.
555 *
556 * @param int $total_results
557 * The total number of returned search results.
558 * @param int $results_per_page
559 * The number of query results to display per page.
560 * @param int $pager_element
561 * An optional integer to distinguish between multiple pagers on one page.
562 *
563 * @return string
564 */
565 function sphinxsearch_pager($total_results, $results_per_page, $pager_element = 0) {
566 $GLOBALS['pager_page_array'] = explode(',', $_GET['page']);
567 $GLOBALS['pager_total_items'][$pager_element] = $total_results;
568 $GLOBALS['pager_total'][$pager_element] = ceil($total_results / $results_per_page);
569 $GLOBALS['pager_page_array'][$pager_element] = max(0, min((int)$GLOBALS['pager_page_array'][$pager_element], ((int)$GLOBALS['pager_total'][$pager_element]) - 1));
570 return theme('pager', array(), $results_per_page, $pager_element);
571 }

  ViewVC Help
Powered by ViewVC 1.1.2