update CHANGELOG
[project/apachesolr.git] / Solr_Base_Query.php
1 <?php
2
3 class Solr_Base_Query implements Drupal_Solr_Query_Interface {
4
5 /**
6 * Extract all uses of one named field from a filter string e.g. 'type:book'
7 */
8 public function filter_extract(&$filterstring, $name) {
9 $extracted = array();
10 $name = preg_quote($name, '/');
11 // Range queries. The "TO" is case-sensitive.
12 $patterns[] = '/(^| |-)'. $name .':([\[\{](\S+) TO (\S+)[\]\}])/';
13 // Match quoted values.
14 $patterns[] = '/(^| |-)'. $name .':"([^"]*)"/';
15 // Match unquoted values.
16 $patterns[] = '/(^| |-)'. $name .':([^ ]*)/';
17 foreach ($patterns as $p) {
18 if (preg_match_all($p, $filterstring, $matches, PREG_SET_ORDER)) {
19 // Sort matches longest to shortest to avoid accidentally
20 // removing a sub-string.
21 usort($matches, array($this, 'filter_extract_cmp'));
22 foreach ($matches as $match) {
23 $filter = array();
24 $filter['#query'] = $match[0];
25 $filter['#exclude'] = ($match[1] == '-');
26 $filter['#value'] = trim($match[2]);
27 // Empty values cause Lucene parse errors, so skip them.
28 if (strlen($filter['#value'])) {
29 if (isset($match[3])) {
30 // Extra data for range queries
31 $filter['#start'] = $match[3];
32 $filter['#end'] = $match[4];
33 }
34 $extracted[] = $filter;
35 }
36 // Update the local copy of $filters by removing the match.
37 $filterstring = str_replace($match[0], '', $filterstring);
38 }
39 }
40 }
41 return $extracted;
42 }
43
44 public function filter_extract_cmp($a, $b) {
45 if (strlen($a[0]) == strlen($b[0])) {
46 return 0;
47 }
48 return (strlen($a[0]) > strlen($b[0])) ? -1 : 1;
49 }
50
51 /**
52 * Takes an array $field and combines the #name and #value in a way
53 * suitable for use in a Solr query.
54 */
55 public function make_filter(array $filter) {
56 // If the field value has spaces, or : in it, wrap it in double quotes.
57 // unless it is a range query.
58 if (preg_match('/[ :]/', $filter['#value']) && !isset($filter['#start']) && !preg_match('/[\[\{]\S+ TO \S+[\]\}]/', $filter['#value'])) {
59 $filter['#value'] = '"' . $filter['#value'] . '"';
60 }
61 $prefix = empty($filter['#exclude']) ? '' : '-';
62 return $prefix . $filter['#name'] . ':' . $filter['#value'];
63 }
64
65 /**
66 * Static shared by all instances, used to increment ID numbers.
67 */
68 protected static $idCount = 0;
69
70 /**
71 * Each query/subquery will have a unique ID
72 */
73 public $id;
74
75 /**
76 * A keyed array where the key is a position integer and the value
77 * is an array with #name and #value properties. Each value is a
78 * used for filter queries, e.g. array('#name' => 'uid', '#value' => 0)
79 * for anonymous content.
80 */
81 protected $fields = array();
82 protected $fields_added = array();
83 protected $fields_removed = array();
84
85 /**
86 * The complete filter string for a query. Usually from $_GET['filters']
87 * Contains name:value pairs for filter queries. For example,
88 * "type:book" for book nodes.
89 */
90 protected $filterstring;
91
92 /**
93 * A mapping of field names from the URL to real index field names.
94 */
95 protected $field_map = array();
96
97 /**
98 * An array of subqueries.
99 */
100 protected $subqueries = array();
101
102 /**
103 * The search keywords.
104 */
105 protected $keys;
106
107 /**
108 * The search base path.
109 */
110 protected $base_path;
111
112 /**
113 * Apache_Solr_Service object
114 */
115 protected $solr;
116
117 protected $available_sorts;
118
119 // Makes sure we always have a valid sort.
120 protected $solrsort = array('#name' => 'score', '#direction' => 'asc');
121
122 /**
123 * @param $solr
124 * An instantiated Apache_Solr_Service Object.
125 * Can be instantiated from apachesolr_get_solr().
126 *
127 * @param $keys
128 * The string that a user would type into the search box. Suitable input
129 * may come from search_get_keys().
130 *
131 * @param $filterstring
132 * Key and value pairs that are applied as filter queries.
133 *
134 * @param $sortstring
135 * Visible string telling solr how to sort - added to GET query params.
136 *
137 * @param $base_path
138 * The search base path (without the keywords) for this query, without trailing slash.
139 */
140 function __construct($solr, $keys, $filterstring, $sortstring, $base_path) {
141 $this->solr = $solr;
142 $this->keys = trim($keys);
143 $this->filterstring = trim($filterstring);
144 $this->parse_filters();
145 $this->available_sorts = $this->default_sorts();
146 $this->sortstring = trim($sortstring);
147 $this->parse_sortstring();
148 $this->base_path = $base_path;
149 $this->id = ++self::$idCount;
150 }
151
152 function __clone() {
153 $this->id = ++self::$idCount;
154 }
155
156 public function get_filters($name = NULL) {
157 if (empty($name)) {
158 return $this->fields;
159 }
160 reset($this->fields);
161 $matches = array();
162 foreach ($this->fields as $filter) {
163 if ($filter['#name'] == $name) {
164 $matches[] = $filter;
165 }
166 }
167 return $matches;
168 }
169
170 public function has_filter($name, $value) {
171 foreach ($this->fields as $pos => $values) {
172 if (isset($values['#name']) && isset($values['#value']) && $values['#name'] == $name && $values['#value'] == $value) {
173 return TRUE;
174 }
175 }
176 return FALSE;
177 }
178
179 public function add_filter($name, $value, $exclude = FALSE, $callbacks = array()) {
180 $filter = array('#exclude' => $exclude, '#name' => $name, '#value' => trim($value), '#callbacks' => $callbacks);
181 // Record the addition.
182 $this->fields_added[] = $filter;
183 // Add to the public list of filters.
184 $this->fields[] = $filter;
185 // Remove from the record of removed filters.
186 $this->unset_filter($this->fields_removed, $name, $value);
187 }
188
189 public function remove_filter($name, $value = NULL) {
190 // We can only remove named fields.
191 if (empty($name)) {
192 return;
193 }
194 // Record the removal.
195 $this->fields_removed[$name][] = $value;
196 // Remove from the public list of filters.
197 $this->unset_filter($this->fields, $name, $value);
198 // Remove from the record of added filters.
199 $this->unset_filter($this->fields_added, $name, $value);
200 }
201
202 protected function unset_filter(&$fields, $name, $value) {
203 if (!isset($value)) {
204 foreach ($fields as $pos => $values) {
205 if ($values['#name'] == $name) {
206 unset($fields[$pos]);
207 }
208 }
209 }
210 else {
211 foreach ($fields as $pos => $values) {
212 if ($values['#name'] == $name && $values['#value'] == $value) {
213 unset($fields[$pos]);
214 }
215 }
216 }
217 }
218
219 /**
220 * Handle aliases for field to make nicer URLs
221 *
222 * @param $field_map
223 * An array keyed with real Solr index field names, with value being the alias.
224 */
225 function add_field_aliases($field_map) {
226 $this->field_map = array_merge($this->field_map, $field_map);
227 // We have to re-parse the filters.
228 $this->parse_filters();
229 }
230
231 function get_field_aliases() {
232 return $this->field_map;
233 }
234
235 function clear_field_aliases() {
236 $this->field_map = array();
237 // We have to re-parse the filters.
238 $this->parse_filters();
239 }
240
241 function get_keys() {
242 return $this->keys;
243 }
244
245 function set_keys($keys) {
246 $this->keys = $keys;
247 }
248
249 public function remove_keys() {
250 $this->keys = '';
251 }
252
253 public function add_subquery(Drupal_Solr_Query_Interface $query, $fq_operator = 'OR', $q_operator = 'AND') {
254 $this->subqueries[$query->id] = array('#query' => $query, '#fq_operator' => $fq_operator, '#q_operator' => $q_operator);
255 }
256
257 public function remove_subquery(Drupal_Solr_Query_Interface $query) {
258 unset($this->subqueries[$query->id]);
259 }
260
261 public function remove_subqueries() {
262 $this->subqueries = array();
263 }
264
265 protected function parse_sortstring() {
266 // Substitute any field aliases with real field names.
267 $sortstring = strtr($this->sortstring, array_flip($this->field_map));
268 // Score is a special case - it's the default sort for Solr.
269 if ('' == $sortstring) {
270 $this->set_solrsort('score', 'asc');
271 }
272 else {
273 // Validate and set sort parameter
274 $fields = implode('|', array_keys($this->available_sorts));
275 if (preg_match('/^(?:('. $fields .') (asc|desc),?)+$/', $sortstring, $matches)) {
276 // We only use the last match.
277 $this->set_solrsort($matches[1], $matches[2]);
278 }
279 }
280 }
281
282 /**
283 * Returns a default list of sorts.
284 */
285 protected function default_sorts() {
286 // The array keys must always be real Solr index fields.
287 return array(
288 'score' => array('title' => t('Relevancy'), 'default' => 'asc'),
289 'sort_title' => array('title' => t('Title'), 'default' => 'asc'),
290 'type' => array('title' => t('Type'), 'default' => 'asc'),
291 'sort_name' => array('title' => t('Author'), 'default' => 'asc'),
292 'created' => array('title' => t('Date'), 'default' => 'desc'),
293 );
294 }
295
296 public function get_available_sorts() {
297 return $this->available_sorts;
298 }
299
300 public function set_available_sort($name, $sort) {
301 // We expect non-aliased sorts to be added.
302 $this->available_sorts[$name] = $sort;
303 // Re-parse the sortstring.
304 $this->parse_sortstring();
305 }
306
307 public function remove_available_sort($name) {
308 unset($this->available_sorts[$name]);
309 // Re-parse the sortstring.
310 $this->parse_sortstring();
311 }
312
313 public function get_solrsort() {
314 return $this->solrsort;
315 }
316
317 public function set_solrsort($name, $direction) {
318 if (isset($this->available_sorts[$name])) {
319 $this->solrsort = array('#name' => $name, '#direction' => $direction);
320 }
321 }
322
323 /**
324 * Return the search path (including the search keywords).
325 *
326 * @param string $new_keywords
327 * Optional. When set, this string overrides the query's current keywords.
328 */
329 public function get_path($new_keywords = NULL) {
330 if (isset($new_keywords)) {
331 return $this->base_path . '/' . $new_keywords;
332 }
333 return $this->base_path . '/' . $this->get_query_basic();
334 }
335
336 public function get_url_queryvalues() {
337 $queryvalues = array();
338 if ($fq = $this->rebuild_fq(TRUE)) {
339 $queryvalues['filters'] = implode(' ', $fq);
340 }
341 $solrsort = $this->solrsort;
342 if ($solrsort && ($solrsort['#name'] != 'score' || $solrsort['#direction'] != 'asc')) {
343 if (isset($this->field_map[$solrsort['#name']])) {
344 $solrsort['#name'] = $this->field_map[$solrsort['#name']];
345 }
346 $queryvalues['solrsort'] = $solrsort['#name'] .' '. $solrsort['#direction'];
347 }
348 return $queryvalues;
349 }
350
351 public function get_query_basic() {
352 return $this->rebuild_query();
353 }
354
355 public function get_fq() {
356 return $this->rebuild_fq();
357 }
358
359 /**
360 * Build additional breadcrumb elements relative to $base.
361 */
362 public function get_breadcrumb($base = NULL) {
363 $breadcrumb = array();
364
365 $progressive_crumb = array();
366 if (!isset($base)) {
367 $base = $this->get_path();
368 }
369
370 $search_keys = $this->get_query_basic();
371 if ($search_keys) {
372 $breadcrumb[] = l($search_keys, $base);
373 }
374
375 foreach ($this->fields as $field) {
376 $name = $field['#name'];
377 // Look for a field alias.
378 if (isset($this->field_map[$name])) {
379 $field['#name'] = $this->field_map[$name];
380 }
381 $progressive_crumb[] = $this->make_filter($field);
382 $options = array('query' => 'filters=' . rawurlencode(implode(' ', $progressive_crumb)));
383 if ($themed = theme("apachesolr_breadcrumb_" . $name, $field['#value'], $field['#exclude'])) {
384 $breadcrumb[] = l($themed, $base, $options);
385 }
386 else {
387 $breadcrumb[] = l($field['#value'], $base, $options);
388 }
389 }
390
391 if (!empty($breadcrumb)) {
392 // The last breadcrumb is the current page, so it shouldn't be a link.
393 $last = count($breadcrumb) - 1;
394 $breadcrumb[$last] = strip_tags($breadcrumb[$last]);
395 }
396
397 return $breadcrumb;
398 }
399
400 /**
401 * Parse the filter string in $this->filters into $this->fields.
402 *
403 * Builds an array of field name/value pairs.
404 */
405 protected function parse_filters() {
406 $this->fields = array();
407 $parsed_fields = array();
408 $filterstring = $this->filterstring;
409
410 // Gets information about the fields already in solr index.
411 $index_fields = $this->solr->getFields();
412 foreach ((array) $index_fields as $name => $data) {
413 // Look for a field alias.
414 $alias = isset($this->field_map[$name]) ? $this->field_map[$name] : $name;
415 // Get the values for $name
416 $extracted = $this->filter_extract($filterstring, $alias);
417 if (count($extracted)) {
418 // A trailing space is required since we match all individual
419 // filter terms using a trailing space.
420 $filter_pos_string = $this->filterstring . ' ';
421 foreach ($extracted as $filter) {
422 // The trailing space on $filter['#query'] avoids incorrect
423 // matches to a substring. See http://drupal.org/node/891962
424 $pos = strpos($filter_pos_string, $filter['#query'] . ' ');
425 // $solr_keys and $solr_crumbs are keyed on $pos so that query order
426 // is maintained. This is important for breadcrumbs.
427 $filter['#name'] = $name;
428 $parsed_fields[$pos] = $filter;
429 }
430 }
431 }
432 // Even though the array has the right keys they are likely in the wrong
433 // order. ksort() sorts the array by key while maintaining the key.
434 ksort($parsed_fields);
435 foreach ($this->fields_removed as $name => $values) {
436 foreach ($values as $val) {
437 $this->unset_filter($parsed_fields, $name, $val);
438 }
439 }
440 $this->fields = array_merge(array_values($parsed_fields), $this->fields_added);
441 }
442
443 /**
444 * Builds a set of filter queries from $this->fields and all subqueries.
445 *
446 * Returns an array of strings that can be combined into
447 * a URL query parameter or passed to Solr as fq paramters.
448 */
449 protected function rebuild_fq($aliases = FALSE) {
450 $fq = array();
451 $fields = array();
452 foreach ($this->fields as $pos => $field) {
453 // Look for a field alias.
454 if ($aliases && isset($this->field_map[$field['#name']])) {
455 $field['#name'] = $this->field_map[$field['#name']];
456 }
457 $fq[] = $this->make_filter($field);
458 }
459 foreach ($this->subqueries as $id => $data) {
460 $subfq = $data['#query']->rebuild_fq($aliases);
461 if ($subfq) {
462 $operator = $data['#fq_operator'];
463 $fq[] = "(" . implode(" {$operator} ", $subfq) .")";
464 }
465 }
466 return $fq;
467 }
468
469 protected function rebuild_query() {
470 $query = $this->keys;
471 foreach ($this->subqueries as $id => $data) {
472 $operator = $data['#q_operator'];
473 $subquery = $data['#query']->get_query_basic();
474 if ($subquery) {
475 $query .= " {$operator} ({$subquery})";
476 }
477 }
478 return $query;
479 }
480 }