/[drupal]/contributions/modules/google_appliance/GoogleMini.php
ViewVC logotype

Contents of /contributions/modules/google_appliance/GoogleMini.php

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.2 - (show annotations) (download) (as text)
Tue Apr 8 19:54:36 2008 UTC (19 months, 2 weeks ago) by jacobsingh
Branch: MAIN
CVS Tags: HEAD
Branch point for: DRUPAL-6--2
Changes since 1.1: +1 -2 lines
File MIME type: text/x-php
Small change to docs
1 <?php
2
3 define('GOOGLE_MINI_MAX_RESULTS', variable_get('google_appliance_max_results', 1000));
4
5 class GoogleMini {
6
7 private $_metaDataFilters = array();
8 private $_metaDataRequested = array();
9 public $baseUrl = ''; // REQUIRED
10 public $frontEnd = ''; // IF SET WILL DISABLE parsing of results.
11 public $collection = ''; // REQUIRED
12 private $_queryParts;
13
14
15 public function __construct($debug = false, $debug_callback = null) {
16 if ($debug) {
17 $this->debug = true;
18 if ($debug_callback && function_exists($debug_callback)) {
19 $this->debug_callback = $debug_callback;
20 }
21 }
22 }
23
24 function log($message = null) {
25 if ($this->debug_callback) {
26 $callback = $this->debug_callback;
27 call_user_func($callback,$message);
28 }
29 watchdog('google_search',$message);
30 }
31
32 /**
33 * Helper function, just builds the array for processing, may have validation later.
34 *
35 * @param string $key
36 * @param string $value
37 */
38 public function setQueryPart($key, $value) {
39 $this->_queryParts[$key] = $value;
40 }
41
42 /**
43 * Helper function, returns a pre-assigned query part.
44 *
45 * @param string $key
46 * @return the query part corresponding to $key, or false if it doesn't exist
47 */
48 public function getQueryPart($key) {
49 if ($this->_queryParts[$key]) {
50 return $this->_queryParts[$key];
51 }
52 return false;
53 }
54
55 /**
56 * Adds a meta data filter to the query. Currently has limited flexibility.
57 * Pass a key as a meta field and values as an array of values to be OR'd together.
58 * or you can pass a value as a string to be the only value (for ease of use).
59 *
60 * When you filter on many fields they are all AND'd together.
61 *
62 * @param fieldname $key
63 * @param string|array $values
64 * @param type either requiredfields or partialfields
65 * @param string $join either AND or OR
66 */
67 public function addMetaDataFilter($key, $values, $type = 'partialfields', $join = 'OR') {
68 if (!in_array($type,array('partialfields','requiredfields'))) {
69 throw new GoogleMiniCriteriaException("You must provide a type of either partialfields or requiredfields",'-99');
70 }
71 if (is_array($values)) {
72 $this->_metaDataFilters[$type][$key] = new stdClass();
73 $this->_metaDataFilters[$type][$key]->type = $join;
74 foreach ($values as $k => $value) {
75 $this->_metaDataFilters[$type][$key]->values[] = urlencode($value);
76 }
77 } else {
78 $this->_metaDataFilters[$type][$key]->type = $join;
79 $this->_metaDataFilters[$type][$key]->values = array (urlencode($values));
80 }
81 }
82
83
84 /**
85 * Sets the languages to be used in the search, if none specified, searches all languages
86 *
87 * @param array $languages
88 */
89 public function setLanguageFilter($languages = null) {
90 if ($languages) {
91 if (is_array($languages)) {
92 $this->setQueryPart("lr",implode('|',$languages));
93 } else {
94 $this->setQueryPart("lr",$languages);
95 }
96 return true;
97 }
98 return false;
99 }
100
101 /**
102 * Creates a date filter
103 *
104 * @param date $date_before Date in YYYY-MM-DD format.
105 * @param date $date_after Date in YYYY-MM-DD format.
106 */
107 public function setDateFilter($date_before, $date_after) {
108 if ($this->_queryParts['q']) {
109 $this->_queryParts['q'] .= "%20daterange:$date_before..$date_after";
110 } else {
111 $this->setQueryPart('q',"daterange:$date_before..$date_after");
112 }
113 }
114
115 /**
116 * Adds a site restriction. Useful if just querying by date, as that won't work
117 * unless you search for words or at least one other keyword search.
118 *
119 * @param string $domain
120 */
121 public function setDomainRestriction($domain) {
122 if ($this->_queryParts['q']) {
123 $this->_queryParts['q'] .= "%20site:$domain";
124 } else {
125 $this->setQueryPart('q',"site:" . urlencode($domain));
126 }
127 }
128
129 /**
130 * Sets sorting type (date or relevancy) and direction
131 *
132 * @param string $dir
133 * A - Sort in Ascending order
134 * D - Sort in Descending order
135 * @param string $mode
136 * S - Return the 1,000 most relevant results, sorted by date.
137 * R - Return all results, sorted by date.
138 * *** WARNING *** Do not use this filter if your collection contains more than 50,000 documents.
139 * If the result set is very large, the sort operation could create significant delays in the display of results.
140 */
141 public function setDateSort($dir = "D",$mode = 'S') {
142 if ($dir != 'A' && $dir != 'D') {
143 throw new GoogleMiniCriteriaException(sprintf("The Sort direction provided is incorrect. Got %s, needs to be A or D",htmlentities($dir)),E_WARNING);
144 }
145 if ($mode != 'S' && $mode != 'R') {
146 throw new GoogleMiniCriteriaException(sprintf("The Sort mode provided is incorrect. Got %s, needs to be S or R",htmlentities($mode)),E_WARNING);
147 }
148
149 // build sort string
150 // http://code.google.com/apis/searchappliance/documentation/46/xml_reference.html#request_sort_by_date
151 $this->setQueryPart('sort',"date:$dir:$mode:d1");
152 return true;
153 }
154
155
156 /**
157 * Set the keywords used for keyword search
158 *
159 * @param string $keys
160 */
161 public function setKeywords($keys) {
162 if ($this->_queryParts['q']) {
163 $this->_queryParts['q'] .= "%20" . urlencode($keys);
164 } else {
165 $this->setQueryPart('q', urlencode($keys));
166 }
167 }
168
169 /**
170 * Set fields to show in results. For all fields, send an asterisk (*)
171 *
172 * @param array $fields
173 */
174 public function setMetaDataRequested($fields = null) {
175 if (is_array($fields)) {
176 $this->setQueryPart('getfields', implode('.',$fields));
177 } else {
178 $this->setQueryPart('getfields', $fields);
179 }
180 }
181
182 /**
183 * Set page of result set to be shown and sets number of results per page
184 *
185 * @param int $page
186 */
187 public function setPageAndResultsPerPage($page = 0, $results = 10) {
188 $end = $page * $results + $results;
189 if ($end > GOOGLE_MINI_MAX_RESULTS) {
190 throw new GoogleMiniCriteriaException("You cannot get more than ".GOOGLE_MINI_MAX_RESULTS." results per page, requested $end",2);
191 }
192 $this->setQueryPart('start', $page * $results);
193 $this->setQueryPart('num',$results);
194 return true;
195 }
196
197
198 /**
199 * Set the encoding for data coming out of the search
200 *
201 * @param string $enc
202 */
203 public function setOutputEncoding($enc) {
204 $this->setQueryPart('oe',$enc);
205 }
206
207 /**
208 * Set the encoding for data going into the search
209 *
210 * @param string $enc
211 */
212 public function setInputEncoding($enc) {
213 $this->setQueryPart('ie',$enc);
214 }
215
216 /**
217 * Fires the query to google
218 *
219 */
220
221 public function buildQuery() {
222 if (!$this->baseUrl || !$this->collection) {
223 throw new GoogleMiniQueryException("Required variables (baseUrl or collection) missing", E_WARNING);
224 }
225
226 if (count($this->_metaDataFilters)) {
227 foreach ($this->_metaDataFilters as $type => $fields ) {
228 $_metafilter = '';
229 foreach ($fields as $field => $mdf) {
230 if ($mdf->type == "ANDNEG") {
231 foreach ($mdf->values as $value) {
232 $metafilter .= '-' . $field . ':' . $value .'.';
233 }
234 } elseif ($mdf->type == 'OR' || $mdf->type == 'OROR') {
235 $vals = array();
236 foreach ($mdf->values as $v) {
237 $vals[] = $field . ':' . $v;
238 }
239 // The 'OROR' case is used on the Related Information pages, where you want
240 // to search documents with one of multiple terms in multiple vocabularies.
241 // You have to join the different types with a | otherwise the date sorting gets messed up.
242 if ($mdf->type == 'OROR') {
243 $metafilter .= join("|", $vals) . "|";
244 } else {
245 $metafilter .= join("|", $vals) . ".";
246 }
247 } else {
248 foreach ($mdf->values as $value) {
249 $metafilter .= $field . ':' . $value .'.';
250 }
251 }
252 }
253 $metafilter = substr($metafilter,0,-1);
254 $this->setQueryPart($type,$metafilter);
255 }
256 }
257
258 $this->setQueryPart('output','xml_no_dtd');
259
260 $query = $this->baseUrl;
261 $query .= "?site=" . $this->collection;
262
263 if ($this->debug) {
264 $this->log('Building Query');
265 $this->log(var_export($this->_queryParts,1));
266 }
267
268 foreach ($this->_queryParts as $label => $value) {
269 $query .= "&$label=$value";
270 }
271
272 $this->_query = $query;
273 if ($this->debug) {
274 $this->log($query);
275 }
276
277 return $query;
278
279 }
280
281
282
283 public function query($iteratorClass = 'GoogleMiniResultIterator') {
284
285 $query = $this->buildQuery();
286 // get search results in XML using cURL
287 $ch = curl_init();
288 curl_setopt($ch, CURLOPT_URL, $query);
289 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
290 curl_setopt($ch, CURLOPT_VERBOSE, true);
291
292 $resultXML = curl_exec($ch);
293 if ($this->debug) {
294 $this->log('Made CURL request to ' . $query);
295 }
296
297 return self::resultFactory($resultXML,$iteratorClass);
298 }
299
300 function resultFactory($resultXML,$className = 'GoogleMiniResultIterator') {
301 $results = array();
302
303
304 $payload = simplexml_load_string($resultXML);
305
306 $totalResults = $payload->RES->M;
307
308 if ($totalResults == 0) {
309 if (!$payload->GM) {
310 throw new GoogleMiniResultException("No Results found", '1');
311 }
312 } else {
313 foreach ($payload->xpath('//R') as $res) {
314 $results[] = $res;
315 }
316 }
317 $iterator = new $className($results);
318 $iterator->payload = $payload;
319 $iterator->time = $payload->TM;
320 $iterator->totalResults = $totalResults;
321 return $iterator;
322 }
323 }
324
325 class GoogleResult extends SimpleXMLIterator {
326
327 }
328
329 class GoogleMiniResultIterator extends ArrayIterator {
330
331 public $time;
332 public $payload;
333 public $totalResults;
334
335 function current() {
336 $result = parent::current();
337 return new GoogleMiniResult($result);
338 }
339
340 function __get($key) {
341 return $this->payload->$key;
342 }
343
344 /**
345 * Returns an array of keymatches keyed with:
346 * [link] => [title]
347 *
348 */
349 function getKeyMatches() {
350 $output = array();
351 if ($this->GM) {
352 foreach ($this->GM as $match) {
353 $output[(string)$match->GL] = (string)$match->GD;
354 }
355 }
356
357 return $output;
358 }
359
360 }
361
362 class GoogleMiniResult {
363 var $metaData;
364 function __construct($result) {
365 $this->result = $result;
366 }
367
368 function __get($key) {
369 return $this->result->$key;
370 }
371
372 function getMetaData($key) {
373 if (!$this->metaData) {
374 $this->buildMetaData();
375 }
376 return $this->metaData[$key];
377 }
378
379 function buildMetaData() {
380 foreach ($this->result->MT as $metaTag) {
381 $name = $metaTag['N'];
382 $value = $metaTag['V'];
383 $this->metaData[(string)$name] = (string)$value;
384 }
385 }
386 }
387
388
389 class GoogleMiniQueryException extends GoogleMiniException {
390
391 }
392 class GoogleMiniCriteriaException extends GoogleMiniException {
393
394 }
395
396 class GoogleMiniResultException extends GoogleMiniException {
397 var $log_messages = array();
398 }
399
400 class GoogleMiniException extends Exception {
401
402 function __construct($message, $code = null) {
403 parent::__construct($message,$code);
404 $this->userMessage = GoogleMiniException::getUserMessage($code);
405 if (!$this->userMessage) {
406 $this->userMessage = $message;
407 }
408 }
409
410 function getErrorCodes() {
411 static $error_codes;
412 if (!$error_codes) {
413 $error_codes = array (
414 '-100' => 'We apologize, but the connection to our search engine appears to be down at the moment, please try again later.',
415 '-99' => 'We apologize, but your search cannot be completed at this time, please try again later.',
416 '1' => 'No results were found that matched your criteria. Please try broadening your search.',
417 '2' => 'Sorry, but our search does not return more than 1,000 records, please refine your criteria.',
418 );
419 }
420 return $error_codes;
421 }
422
423
424 function getUserMessage($code) {
425 $error_codes = $this->getErrorCodes();
426 return $error_codes[$code];
427 }
428 }
429
430 ?>

  ViewVC Help
Powered by ViewVC 1.1.2