/[drupal]/contributions/modules/sphinxsearch/sphinxsearch.xmlpipe.inc
ViewVC logotype

Contents of /contributions/modules/sphinxsearch/sphinxsearch.xmlpipe.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (show annotations) (download) (as text)
Fri Sep 12 02:44:22 2008 UTC (14 months, 2 weeks ago) by markuspetrux
Branch: MAIN
CVS Tags: HEAD
Branch point for: DRUPAL-6--1
Changes since 1.3: +113 -39 lines
File MIME type: text/x-php
- Ported module from D5 to D6.
- Bugfix: undefined class method in sphinxsearch_check_connection_page().
- Bugfix: added criterion class to taxonomy elements in advanced search form.
- Bugfix: added support for mysqli and pgsql to _sphinxsearch_db_reconnect().
1 <?php
2 // $Id: sphinxsearch.xmlpipe.inc,v 1.1.2.8 2008/08/29 19:08:31 markuspetrux Exp $
3
4 /**
5 * @file
6 * XMLPipe generator invoked by Sphinx indexer.
7 */
8
9 /**
10 * Entry point for XMLPipe generator.
11 *
12 * This function is invoked from sphinxsearch_scripts/sphinxsearch_xmlpipe.php
13 *
14 * Note this process may eat a lot of resources, and it will be executed by
15 * anonymous user, so access control is based on IP.
16 */
17 function sphinxsearch_xmlpipe($caller_version = 0) {
18 // Check caller version.
19 $sphinxsearch_xmlpipe_generator_version = 2;
20 if ((int)$caller_version != $sphinxsearch_xmlpipe_generator_version) {
21 $message = t('Oops! It looks like version of your sphinxsearch_xmlpipe.php script does not match the version expected by the sphinxsearch module. Maybe someone forgot to upgrade the contents of the sphinxsearch_scripts subdirectory when updating the module.');
22 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
23 print $message ."\n";
24 exit;
25 }
26
27 // Check access to XMLPipe process by IP.
28 $access_xmlpipe = FALSE;
29 $sphinxsearch_indexer_ips = array_map('trim', explode(',', variable_get('sphinxsearch_indexer_ips', '')));
30 if (!empty($sphinxsearch_indexer_ips)) {
31 foreach ($sphinxsearch_indexer_ips as $cidr) {
32 if (sphinxsearch_ip_check_cidr(ip_address(), $cidr)) {
33 $access_xmlpipe = TRUE;
34 break;
35 }
36 }
37 }
38 if (!$access_xmlpipe) {
39 $message = t('Not authorized.');
40 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
41 print $message ."\n";
42 exit;
43 }
44
45 // Obtain current mode of operation.
46 $mode = (isset($_GET['mode']) ? trim($_GET['mode']) : '');
47
48 // Prepare global data for XMLPipe generation.
49 $GLOBALS['sphinxsearch_start_execution_time'] = time();
50 $GLOBALS['sphinxsearch_max_execution_time'] = (int)ini_get('max_execution_time');
51 $GLOBALS['sphinxsearch_memory_limit'] = sphinxsearch_get_memory_limit();
52 $GLOBALS['sphinxsearch_initial_memory_usage'] = memory_get_usage();
53 $GLOBALS['sphinxsearch_taxonomy_module_exists'] = module_exists('taxonomy');
54
55 if ($GLOBALS['sphinxsearch_taxonomy_module_exists']) {
56 require_once(drupal_get_path('module', 'sphinxsearch') .'/sphinxsearch.taxonomy.inc');
57 }
58
59 // Check PHP settings for resource consumption control.
60 if ($GLOBALS['sphinxsearch_memory_limit'] < 0) {
61 $message = t('Unable to obtain PHP memory limit. Please, make sure your PHP installation is compiled with --enable-memory-limit option.');
62 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
63 print $message ."\n";
64 exit;
65 }
66 if ($GLOBALS['sphinxsearch_max_execution_time'] < 30) {
67 $message = t('Maximum PHP execution time is set to @max_execution_time seconds, but this window is too short for this process.', array(
68 '@max_execution_time' => $GLOBALS['sphinxsearch_max_execution_time']
69 ));
70 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
71 print $message ."\n";
72 exit;
73 }
74
75 // Make sure no output buffering is being used.
76 if (ob_get_level()) {
77 ob_end_clean();
78 }
79
80 if ($mode == 'main') {
81 // Obtain the main index identifier.
82 $main_index_id = (isset($_GET['id']) ? max(0, (int)$_GET['id']) : 0);
83 // Obtain range of nodes to be included in this main index.
84 $first_nid = (isset($_GET['first_nid']) ? max(0, (int)$_GET['first_nid']) : 0);
85 $last_nid = (isset($_GET['last_nid']) ? max(0, (int)$_GET['last_nid']) : -1);
86 if ($last_nid < 0) {
87 $sql_node_types_condition = sphinxsearch_get_enabled_node_types_condition();
88 if (!empty($sql_node_types_condition)) {
89 $sql_node_types_condition = ' AND '. $sql_node_types_condition;
90 }
91 $last_nid = (int)db_result(db_query('SELECT nid FROM {node} WHERE status = 1'. $sql_node_types_condition .' ORDER BY nid DESC LIMIT 1'));
92 if ($last_nid <= 0) {
93 $message = t('Could not obtain last nid.');
94 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
95 print $message ."\n";
96 exit;
97 }
98 }
99 sphinxsearch_generate_xmlpipe_main($main_index_id, $first_nid, $last_nid);
100 }
101 else if ($mode == 'delta') {
102 sphinxsearch_generate_xmlpipe_delta();
103 }
104 else {
105 $message = t('Invalid argument.');
106 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
107 print $message ."\n";
108 exit;
109 }
110 }
111
112 /**
113 * Generate a XMLPipe stream to build a main index for specified range.
114 *
115 * Main index processing will terminate when one of the following conditions is met:
116 * a) All nodes specified by range have been processed.
117 * b) There are less than 30 seconds to reach maximum PHP execution time (max_execution_time).
118 * c) Memory used is more than 90% of available PHP memory (memory_limit).
119 *
120 * @param int $main_index_id
121 * Main index identifier. Main indexes should be numbered from 0 to n.
122 * @param int $first_nid
123 * First node identifier to be included in this main index (inclusive).
124 * @param int $last_nid
125 * Last node identifier to be included in this main index (inclusive).
126 */
127 function sphinxsearch_generate_xmlpipe_main($main_index_id, $first_nid, $last_nid) {
128 $range_start = $first_nid;
129 $range_step = (int)variable_get('sphinxsearch_nodes_per_chunk', 0);
130 $chunks_before_restart = variable_get('sphinxsearch_chunks_before_restart', 0);
131 $chunks_counter = 0;
132 $nodes_counter = 0;
133
134 if ($range_step <= 0) {
135 $range_step = ($last_nid - $first_nid) + 1;
136 $chunks_before_restart = 0;
137 }
138
139 watchdog('sphinxsearch', t('XMLPipe processing for main index @main_index_id has started.<ul>
140 <li>Nodes range: @first_nid-@last_nid.</li>
141 <li>PHP max_execution_time: @max_execution_time seconds.</li>
142 <li>PHP memory_limit: @memory_limit_bytes bytes (@memory_limit_kb KB).</li>
143 <li>Initial memory usage: @initial_memory_bytes bytes (@initial_memory_kb KB).</li>
144 </ul>', array(
145 '@main_index_id' => $main_index_id,
146 '@first_nid' => $first_nid,
147 '@last_nid' => $last_nid,
148 '@max_execution_time' => $GLOBALS['sphinxsearch_max_execution_time'],
149 '@memory_limit_bytes' => $GLOBALS['sphinxsearch_memory_limit'],
150 '@memory_limit_kb' => round($GLOBALS['sphinxsearch_memory_limit'] / 1024, 2),
151 '@initial_memory_bytes' => $GLOBALS['sphinxsearch_initial_memory_usage'],
152 '@initial_memory_kb' => round($GLOBALS['sphinxsearch_initial_memory_usage'] / 1024, 2),
153 )), NULL, WATCHDOG_INFO);
154
155 // Generate XMLPipe header.
156 print sphinxsearch_xmlpipe_header();
157
158 $sql_common_conditions = sphinxsearch_get_enabled_node_types_condition();
159 if (!empty($sql_common_conditions)) {
160 $sql_common_conditions = 'status = 1 AND '. $sql_common_conditions;
161 }
162 else {
163 $sql_common_conditions = 'status = 1';
164 }
165
166 while ( TRUE )
167 {
168 // Abort processing if current memory usage is more than 90%.
169 $current_memory_bytes = memory_get_usage();
170 if (intval($current_memory_bytes * 100 / $GLOBALS['sphinxsearch_memory_limit']) > 90) {
171 $message = t('Short on resources. Current memory usage is higher than 90% of PHP memory_limit.<ul>
172 <li>PHP memory_limit: @memory_limit_bytes bytes (@memory_limit_kb KB).</li>
173 <li>Initial memory usage: @initial_memory_bytes bytes (@initial_memory_kb KB).</li>
174 <li>Current memory usage: @current_memory_bytes bytes (@current_memory_kb KB).</li>
175 </ul>', array(
176 '@memory_limit_bytes' => $GLOBALS['sphinxsearch_memory_limit'],
177 '@memory_limit_kb' => round($GLOBALS['sphinxsearch_memory_limit'] / 1024, 2),
178 '@initial_memory_bytes' => $GLOBALS['sphinxsearch_initial_memory_usage'],
179 '@initial_memory_kb' => round($GLOBALS['sphinxsearch_initial_memory_usage'] / 1024, 2),
180 '@current_memory_bytes' => $current_memory_bytes,
181 '@current_memory_kb' => round($current_memory_bytes / 1024, 2),
182 ));
183 _sphinxsearch_wrapper_watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
184 print strip_tags($message) ."\n";
185 exit;
186 }
187 // Let's supose we need less than 30 seconds to process one single chunk of nodes.
188 // Abort processing if current execution time is about to be higher than max.
189 $current_execution_time = time() - $GLOBALS['sphinxsearch_start_execution_time'];
190 if (($GLOBALS['sphinxsearch_max_execution_time'] - $current_execution_time) < 30) {
191 $message = t('Short on resources. Current execution time is about to exceed PHP max_execution_time.<ul>
192 <li>PHP max_execution_time: @max_execution_time seconds.</li>
193 <li>Currrent execution time: @current_execution_time seconds.</li>
194 </ul>', array(
195 '@max_execution_time' => $GLOBALS['sphinxsearch_max_execution_time'],
196 '@current_execution_time' => $current_execution_time,
197 ));
198 _sphinxsearch_wrapper_watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
199 print strip_tags($message) ."\n";
200 exit;
201 }
202
203 // Load the nids we are about to process within current loop.
204 $range_end = min($range_start + $range_step, $last_nid);
205 $nids = array();
206 $result = db_query('SELECT nid FROM {node}
207 WHERE '. $sql_common_conditions .' AND nid >= %d AND nid <= %d
208 ORDER BY nid ASC', array($range_start, $range_end));
209 while ($row = db_fetch_object($result)) {
210 $nids[] = $row->nid;
211 }
212
213 // Process nodes for this loop.
214 foreach ($nids as $nid) {
215 if ($nid > $last_nid) {
216 break;
217 }
218 $nodes_counter++;
219 $xmlpipe_document = sphinxsearch_xmlpipe_document($main_index_id, $nid);
220 if ($xmlpipe_document) {
221 print $xmlpipe_document;
222 }
223 }
224 unset($nids);
225
226 // Are we done?
227 $range_start = $range_end + 1;
228 if ($range_start > $last_nid) {
229 break;
230 }
231
232 // Need to restart database connection?
233 if ($chunks_before_restart > 0) {
234 $chunks_counter++;
235 if ($chunks_counter >= $chunks_before_restart) {
236 _sphinxsearch_db_reconnect();
237 $current_memory_bytes = memory_get_usage();
238 $current_execution_time = time() - $GLOBALS['sphinxsearch_start_execution_time'];
239 $message = t('Database server connection has been restarted.<ul>
240 <li>Initial memory usage: @initial_memory_bytes bytes (@initial_memory_kb KB).</li>
241 <li>Current memory usage: @current_memory_bytes bytes (@current_memory_kb KB).</li>
242 <li>Current execution time: @current_execution_time seconds.</li>
243 <li>Nodes processed: @nodes_counter (@nodes_per_second nodes/sec).</li>
244 </ul>', array(
245 '@initial_memory_bytes' => $GLOBALS['sphinxsearch_initial_memory_usage'],
246 '@initial_memory_kb' => round($GLOBALS['sphinxsearch_initial_memory_usage'] / 1024, 2),
247 '@current_memory_bytes' => $current_memory_bytes,
248 '@current_memory_kb' => round($current_memory_bytes / 1024, 2),
249 '@current_execution_time' => $current_execution_time,
250 '@nodes_counter' => $nodes_counter,
251 '@nodes_per_second' => ($current_execution_time > 0 ? round($nodes_counter / $current_execution_time, 2) : $nodes_counter),
252 ));
253 _sphinxsearch_wrapper_watchdog('sphinxsearch', $message, NULL, WATCHDOG_NOTICE);
254 $chunks_counter = 0;
255 }
256 }
257 }
258
259 // Generate XMLPipe footer.
260 print sphinxsearch_xmlpipe_footer();
261
262 // Store process statistics to watchdog.
263 $current_memory_bytes = memory_get_usage();
264 $total_execution_time = time() - $GLOBALS['sphinxsearch_start_execution_time'];
265 $message = t('XMLPipe processing for main index @main_index_id has finished successfully.<ul>
266 <li>Initial memory usage: @initial_memory_bytes bytes (@initial_memory_kb KB).</li>
267 <li>Current memory usage: @current_memory_bytes bytes (@current_memory_kb KB).</li>
268 <li>Total execution time: @total_execution_time seconds.</li>
269 <li>Nodes processed: @nodes_counter (@nodes_per_second nodes/sec).</li>
270 </ul>', array(
271 '@main_index_id' => $main_index_id,
272 '@initial_memory_bytes' => $GLOBALS['sphinxsearch_initial_memory_usage'],
273 '@initial_memory_kb' => round($GLOBALS['sphinxsearch_initial_memory_usage'] / 1024, 2),
274 '@current_memory_bytes' => $current_memory_bytes,
275 '@current_memory_kb' => round($current_memory_bytes / 1024, 2),
276 '@total_execution_time' => $total_execution_time,
277 '@nodes_counter' => $nodes_counter,
278 '@nodes_per_second' => ($total_execution_time > 0 ? round($nodes_counter / $total_execution_time, 2) : $nodes_counter),
279 ));
280 _sphinxsearch_wrapper_watchdog('sphinxsearch', $message, NULL, WATCHDOG_INFO);
281 exit;
282 }
283
284 /**
285 * Generate a XMLPipe stream to build a delta index.
286 *
287 * Delta index is built based on data stored on current main indexes.
288 *
289 * This process works as follows:
290 * - First, Sphinx is queried using distributed index specified in module
291 * settings to obtain
292 * a) The list of main index identifiers behind this distributed index.
293 * b) For each main index, we get nid ranges and last updated time.
294 * - Finally, for each main index found, one particular SQL query is built
295 * and executed to obtain the list of new or updated nodes within its
296 * own interval.
297 *
298 * This method reduces data dependencies between Drupal site database and
299 * current number and contents of main indexes used for the site.
300 */
301 function sphinxsearch_generate_xmlpipe_delta() {
302 $sphinxsearch_query_index = variable_get('sphinxsearch_query_index', '');
303 if (empty($sphinxsearch_query_index)) {
304 $message = t('XMLPipe for delta index failed: Sphinx query index not specified. Please, check module settings.');
305 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
306 print $message ."\n";
307 exit;
308 }
309
310 // Prepare process statistics.
311 $watchdog_report_items = array();
312 $nodes_counter = 0;
313
314 // Setup Sphinx search client.
315 $sphinxsearch = &sphinxsearch_get_client();
316 $sphinxsearch->SetArrayResult(TRUE);
317
318 // Obtain list of main index identifiers.
319 // Note that index identifier used for documents stored on
320 // delta index is ignored here.
321 $sphinxsearch->SetLimits(0, 100);
322 $sphinxsearch->SetFilter('main_index_id', array(SPHINXSEARCH_DELTA_INDEX_ID), TRUE);
323 $sphinxsearch->SetGroupBy('main_index_id', SPH_GROUPBY_ATTR, 'main_index_id ASC');
324 $query_result = $sphinxsearch->Query('', $sphinxsearch_query_index);
325 $main_indexes_info = array();
326 if ($query_result && is_array($query_result['matches'])) {
327 foreach ($query_result['matches'] as $match) {
328 if (isset($match['attrs']['main_index_id'])) {
329 $main_index_id = (int)$match['attrs']['main_index_id'];
330 if (!isset($main_indexes_info[$main_index_id])) {
331 $main_indexes_info[$main_index_id] = array();
332 }
333 }
334 }
335 }
336 if (empty($main_indexes_info)) {
337 $message = t('XMLPipe for delta index failed: Could not obtain list of main indexes from Sphinx.');
338 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
339 print $message ."\n";
340 exit;
341 }
342
343 // Obtain index boundaries currently stored on each main index.
344 $query_keys = array(
345 'last_updated' => 'last_updated',
346 'first_nid' => 'nid',
347 'last_nid' => 'nid',
348 );
349 foreach ($main_indexes_info as $main_index_id => $main_index_info) {
350 // Reset query internals for current main index.
351 $query_ids = array();
352 $sphinxsearch->ResetFilters();
353 $sphinxsearch->ResetGroupBy();
354 $sphinxsearch->SetLimits(0, 1);
355 $sphinxsearch->SetFilter('main_index_id', array($main_index_id));
356
357 // Ask for last_updated document in current main index.
358 $sphinxsearch->SetSortMode(SPH_SORT_EXTENDED, 'last_updated DESC');
359 $query_ids['last_updated'] = $sphinxsearch->AddQuery('', $sphinxsearch_query_index);
360
361 // Ask for first nid in current main index.
362 $sphinxsearch->SetSortMode(SPH_SORT_EXTENDED, 'nid ASC');
363 $query_ids['first_nid'] = $sphinxsearch->AddQuery('', $sphinxsearch_query_index);
364
365 // Ask for last nid in current main index.
366 $sphinxsearch->SetSortMode(SPH_SORT_EXTENDED, 'nid DESC');
367 $query_ids['last_nid'] = $sphinxsearch->AddQuery('', $sphinxsearch_query_index);
368
369 // Run queries and parse results.
370 $run_results = $sphinxsearch->RunQueries();
371 if (is_array($run_results)) {
372 foreach ($query_ids as $query_key => $results_key) {
373 if (is_array($run_results[$results_key])) {
374 $results = $run_results[$results_key];
375 if (isset($results['matches'])) {
376 $tmpdoc = array_pop($results['matches']);
377 if (is_array($tmpdoc['attrs'])) {
378 $main_indexes_info[$main_index_id][$query_key] = $tmpdoc['attrs'];
379 }
380 }
381 }
382 }
383 }
384 foreach ($query_keys as $query_key => $field_key) {
385 if (!is_array($main_indexes_info[$main_index_id][$query_key])) {
386 $message = t('XMLPipe for delta index failed: Could not obtain @query_key data for main index @main_index_id.', array(
387 '@query_key' => $query_key,
388 '@main_index_id' => $main_index_id,
389 ));
390 watchdog('sphinxsearch', $message, NULL, WATCHDOG_ERROR);
391 print $message ."\n";
392 exit;
393 }
394 $main_indexes_info[$main_index_id][$query_key] = $main_indexes_info[$main_index_id][$query_key][$field_key];
395 }
396 }
397
398 // Generate XMLPipe header.
399 print sphinxsearch_xmlpipe_header();
400
401 // Get new and/or updated documents for each main index.
402 $main_indexes_count = count($main_indexes_info);
403 $last_main_index_id = array_pop(array_keys($main_indexes_info));
404
405 $sql_common_conditions = sphinxsearch_get_enabled_node_types_condition('n');
406 if (!empty($sql_common_conditions)) {
407 $sql_common_conditions = 'n.status = 1 AND '. $sql_common_conditions;
408 }
409 else {
410 $sql_common_conditions = 'n.status = 1';
411 }
412
413 foreach ($main_indexes_info as $main_index_id => $main_index_info) {
414 // Load the nids we are about to process for current main index interval.
415 $nids = array();
416 $query_sql = 'SELECT n.nid FROM {node} n LEFT JOIN {node_comment_statistics} c ON c.nid = n.nid
417 WHERE '. $sql_common_conditions .'
418 AND GREATEST(IF(c.last_comment_timestamp IS NULL, 0, c.last_comment_timestamp), n.changed) > %d
419 AND n.nid >= %d';
420 $query_args = array($main_index_info['last_updated'], $main_index_info['first_nid']);
421 if ($main_index_id != $last_main_index_id) {
422 $query_sql .= ' AND n.nid <= %d';
423 $query_args[] = $main_index_info['last_nid'];
424 }
425 $query_sql .= ' ORDER BY n.nid ASC';
426 $result = db_query($query_sql, $query_args);
427 while ($row = db_fetch_object($result)) {
428 $nids[] = $row->nid;
429 }
430 $nids_count = count($nids);
431 $nodes_counter += $nids_count;
432
433 // Process nodes for current main index.
434 foreach ($nids as $nid) {
435 $xmlpipe_document = sphinxsearch_xmlpipe_document(SPHINXSEARCH_DELTA_INDEX_ID, $nid);
436 if ($xmlpipe_document) {
437 print $xmlpipe_document;
438 }
439 }
440 unset($nids);
441
442 // Build statistics for this main index.
443 $watchdog_report_items[] = '<li>'. t('Main index @main_index_id:<ul>
444 <li>First nid: @first_nid.</li>
445 <li>Last nid: @last_nid.</li>
446 <li>Last updated time: @last_updated.</li>
447 <li>Nodes processed: @nids_count.</li>
448 </ul>', array(
449 '@main_index_id' => $main_index_id,
450 '@first_nid' => $main_index_info['first_nid'],
451 '@last_nid' => $main_index_info['last_nid'],
452 '@last_updated' => format_date($main_index_info['last_updated'], 'custom', 'Y-m-d H:s:i'),
453 '@nids_count' => $nids_count,
454 )) .'</li>';
455 }
456
457 // Generate XMLPipe footer.
458 print sphinxsearch_xmlpipe_footer();
459
460 // Store process statistics to watchdog.
461 $current_memory_bytes = memory_get_usage();
462 $total_execution_time = time() - $GLOBALS['sphinxsearch_start_execution_time'];
463 $message = t('XMLPipe processing for delta index has finished successfully.<ul>
464 <li>Initial memory usage: @initial_memory_bytes bytes (@initial_memory_kb KB).</li>
465 <li>Current memory usage: @current_memory_bytes bytes (@current_memory_kb KB).</li>
466 <li>Total execution time: @total_execution_time seconds.</li>
467 <li>Nodes processed: @nodes_counter (@nodes_per_second nodes/sec).</li>
468 </ul>', array(
469 '@initial_memory_bytes' => $GLOBALS['sphinxsearch_initial_memory_usage'],
470 '@initial_memory_kb' => round($GLOBALS['sphinxsearch_initial_memory_usage'] / 1024, 2),
471 '@current_memory_bytes' => $current_memory_bytes,
472 '@current_memory_kb' => round($current_memory_bytes / 1024, 2),
473 '@total_execution_time' => $total_execution_time,
474 '@nodes_counter' => $nodes_counter,
475 '@nodes_per_second' => ($total_execution_time > 0 ? round($nodes_counter / $total_execution_time, 2) : $nodes_counter),
476 ));
477 $message .= t('Statistics related to main indexes processed for this delta:') .'<ul>'. implode('', $watchdog_report_items) .'</ul>';
478 watchdog('sphinxsearch', $message, NULL, WATCHDOG_INFO);
479 exit;
480 }
481
482 /**
483 * Generate the XMLPipe header.
484 */
485 function sphinxsearch_xmlpipe_header() {
486 // Start XMLPipe stream.
487 $output = '<'.'?xml version="1.0" encoding="utf-8"?'.'>' ."\n";
488 $output .= '<sphinx:docset>' ."\n";
489
490 // Build document schema.
491 $output .= '<sphinx:schema>' ."\n";
492
493 // Text fields.
494 $output .= '<sphinx:field name="subject"/>' ."\n";
495 $output .= '<sphinx:field name="content"/>' ."\n";
496
497 // Node related attributes.
498 $output .= '<sphinx:attr name="nid" type="int" bits="32" default="0"/>' ."\n";
499 $output .= '<sphinx:attr name="uid" type="int" bits="32" default="0"/>' ."\n";
500 $output .= '<sphinx:attr name="nodetype" type="int" bits="32" default="0"/>' ."\n";
501 $output .= '<sphinx:attr name="created" type="timestamp"/>' ."\n";
502 $output .= '<sphinx:attr name="last_updated" type="timestamp"/>' ."\n";
503
504 // Taxonomy related attributes.
505 if ($GLOBALS['sphinxsearch_taxonomy_module_exists']) {
506 foreach (sphinxsearch_get_enabled_vocabularies() as $vid => $void) {
507 $output .= '<sphinx:attr name="terms'. $vid .'" type="multi"/>' ."\n";
508 }
509 $output .= '<sphinx:attr name="vid" type="multi"/>' ."\n";
510 }
511
512 // Internal attributes.
513 $output .= '<sphinx:attr name="is_deleted" type="bool" default="0"/>' ."\n";
514 $output .= '<sphinx:attr name="main_index_id" type="int" bits="32" default="0"/>' ."\n";
515 $output .= '</sphinx:schema>' ."\n";
516 return $output;
517 }
518
519 /**
520 * Generate the XMLPipe footer.
521 */
522 function sphinxsearch_xmlpipe_footer() {
523 $output = '</sphinx:docset>';
524 return $output;
525 }
526
527 /**
528 * Generate a single XMLPipe document.
529 */
530 function sphinxsearch_xmlpipe_document($main_index_id, $nid) {
531 static $vocabularies;
532 if (!isset($vocabularies)) {
533 $vocabularies = sphinxsearch_get_enabled_vocabularies();
534 }
535
536 $node = node_load($nid, NULL, TRUE);
537 $output = '';
538 if (!$node) {
539 return FALSE;
540 }
541
542 // Obtain text representation of the node.
543 $text = sphinxsearch_get_node_text($node);
544
545 // Generate the XML for this document.
546 $output .= '<sphinx:document id="'. $nid .'">' ."\n";
547
548 // Text fields.
549 $output .= '<subject><![CDATA[['. check_plain($node->title) .']]></subject>' ."\n";
550 $output .= '<content><![CDATA[['. check_plain($text) .']]></content>' ."\n";
551
552 // Node related attributes.
553 $output .= '<nid>'. $nid .'</nid>' ."\n";
554 $output .= '<uid>'. $node->uid .'</uid>' ."\n";
555 $output .= '<nodetype>'. sphinxsearch_xmlpipe_nodetype('id', $node->type) .'</nodetype>' ."\n";
556 $output .= '<created>'. $node->created .'</created>' ."\n";
557
558 // Taxonomy related attributes.
559 if ($GLOBALS['sphinxsearch_taxonomy_module_exists']) {
560 $vids = array();
561 foreach ($node->taxonomy as $tid => $term) {
562 if (empty($vocabularies) || isset($vocabularies[$term->vid])) {
563 if (!isset($vids[$term->vid])) {
564 $vids[$term->vid] = array();
565 }
566 $vids[$term->vid][] = $tid;
567 }
568 }
569 if (!empty($vids)) {
570 foreach ($vids as $vid => $tids) {
571 $output .= '<terms'. $vid .'>'. implode(' ', $tids) .'</terms'. $vid .'>' ."\n";
572 }
573 $output .= '<vid>'. implode(' ', array_keys($vids)) .'</vid>' ."\n";
574 }
575 }
576
577 // Internal attributes.
578 if (!empty($node->last_comment_timestamp) && $node->last_comment_timestamp > $node->changed) {
579 $output .= '<last_updated>'. $node->last_comment_timestamp .'</last_updated>' ."\n";
580 }
581 else {
582 $output .= '<last_updated>'. $node->changed .'</last_updated>' ."\n";
583 }
584 $output .= '<main_index_id>'. $main_index_id .'</main_index_id>' ."\n";
585
586 $output .= '</sphinx:document>' ."\n";
587 return $output;
588 }
589
590 /**
591 * Restart database server connection.
592 *
593 * This function is necessary because Drupal does not have a method to restart
594 * a database server connection.
595 * Note the main problem is that db_set_active() uses static variables to store
596 * the list of opened connections ($db_conns array). We can still access the
597 * currently active connection resource ($active_db), but this method breaks
598 * core capability to switch database server connections via db_set_active().
599 * This is normally no problem while XMLPipe processing takes place. However,
600 * when we invoke external hooks, some of them may rely on db_set_active() to
601 * perform their job, and in that case, restarting database server connection
602 * from here will break those hooks. sphinxsearch module users in that
603 * situation won't be able to use this feature, which is fortunately something
604 * totally optional that can be set from the module settings panel, and it is
605 * disabled by default.
606 * Ideally, db_set_active() would have to be patched to offer the possibility
607 * to restart database server connections.
608 *
609 * @see db_set_active()
610 */
611 function _sphinxsearch_db_reconnect() {
612 global $db_url, $db_type, $active_db;
613 static $connect_url;
614 if (!isset($connect_url)) {
615 if (is_array($db_url)) {
616 $connect_url = $db_url['default'];
617 }
618 else {
619 $connect_url = $db_url;
620 }
621 }
622 switch ($db_type) {
623 case 'mysql':
624 mysql_close($active_db);
625 break;
626 case 'mysqli':
627 mysqli_close($active_db);
628 break;
629 case 'pgsql':
630 pg_close($active_db);
631 break;
632 default:
633 return;
634 }
635 $active_db = db_connect($connect_url);
636 }
637
638 /**
639 * Log a system message.
640 *
641 * Actually, dblog_watchdog() implementation makes use of db_set_active() which
642 * breaks our database reconnection logic.
643 *
644 * @param $type
645 * The category to which this message belongs.
646 * @param $message
647 * The message to store in the log. See t() for documentation
648 * on how $message and $variables interact. Keep $message
649 * translatable by not concatenating dynamic values into it!
650 * @param $variables
651 * Array of variables to replace in the message on display or
652 * NULL if message is already translated or not possible to
653 * translate.
654 * @param $severity
655 * The severity of the message, as per RFC 3164
656 * @param $link
657 * A link to associate with the message.
658 *
659 * @see sphinxsearch_generate_xmlpipe_main()
660 * @see _sphinxsearch_db_reconnect()
661 * @see watchdog()
662 * @see dblog_watchdog()
663 * @see db_set_active()
664 */
665 function _sphinxsearch_wrapper_watchdog($type, $message, $variables = array(), $severity = WATCHDOG_NOTICE, $link = NULL) {
666 global $user, $base_root;
667
668 // Prepare the fields to be logged
669 $log_message = array(
670 'type' => $type,
671 'message' => $message,
672 'variables' => $variables,
673 'severity' => $severity,
674 'link' => $link,
675 'user' => $user,
676 'request_uri' => $base_root . request_uri(),
677 'referer' => referer_uri(),
678 'ip' => ip_address(),
679 'timestamp' => time(),
680 );
681
682 // Call the logging hooks to log/process the message
683 foreach (module_implements('watchdog', TRUE) as $module) {
684 if ($module == 'dblog') {
685 // This is exact same code than dblog_watchdog(), except we do not make
686 // use of db_set_active(), we can't and it is not really necessary here.
687 db_query("INSERT INTO {watchdog}
688 (uid, type, message, variables, severity, link, location, referer, hostname, timestamp)
689 VALUES
690 (%d, '%s', '%s', '%s', %d, '%s', '%s', '%s', '%s', %d)",
691 $log_message['user']->uid,
692 $log_message['type'],
693 $log_message['message'],
694 serialize($log_message['variables']),
695 $log_message['severity'],
696 $log_message['link'],
697 $log_message['request_uri'],
698 $log_message['referer'],
699 $log_message['ip'],
700 $log_message['timestamp']
701 );
702 }
703 else {
704 module_invoke($module, 'watchdog', $log_message);
705 }
706 }
707 }
708
709 /**
710 * Check if IP address belongs to specified CIDR range.
711 * Note: IPv6 addresses are not supported.
712 *
713 * @param string $ip
714 * IPv4 address. ie. 192.168.0.1
715 * @param string $cidr
716 * CIDR mask. ie. 192.168.0.0/24
717 * @return boolean
718 * TRUE if $ip matches specified CIDR mask, FALSE otherwise.
719 */
720 function sphinxsearch_ip_check_cidr($ip, $cidr) {
721 list($net, $mask) = explode('/', $cidr);
722 $ip_net = ip2long($net);
723 $ip_mask = ~((1 << (32 - $mask)) - 1);
724 $ip_ip = ip2long($ip);
725 $ip_ip_net = $ip_ip & $ip_mask;
726 return ($ip_ip_net == $ip_net);
727 }

  ViewVC Help
Powered by ViewVC 1.1.2