| 1 |
<?php |
<?php |
| 2 |
// $Id: apachesolr.index.inc,v 1.1.2.12 2009/10/14 13:28:40 pwolanin Exp $ |
// $Id: apachesolr.index.inc,v 1.1.2.13 2009/10/14 19:08:19 pwolanin Exp $ |
| 3 |
|
|
| 4 |
/** |
/** |
| 5 |
* @file |
* @file |
| 16 |
} |
} |
| 17 |
|
|
| 18 |
/** |
/** |
|
* Strip control characters that cause Jetty/Solr to fail. |
|
|
*/ |
|
|
function apachesolr_strip_ctl_chars($text) { |
|
|
// See: http://w3.org/International/questions/qa-forms-utf-8.html |
|
|
// Printable utf-8 does not include any of these chars below x7F |
|
|
return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $text); |
|
|
} |
|
|
|
|
|
/** |
|
| 19 |
* Strip html tags and also control characters that cause Jetty/Solr to fail. |
* Strip html tags and also control characters that cause Jetty/Solr to fail. |
| 20 |
*/ |
*/ |
| 21 |
function apachesolr_clean_text($text) { |
function apachesolr_clean_text($text) { |
| 22 |
// Add spaces before stripping tags to avoid running words together. |
// Add spaces before stripping tags to avoid running words together. |
| 23 |
$text = filter_xss(str_replace(array('<', '>'), array(' <', '> '), $text), array()); |
$text = filter_xss(str_replace(array('<', '>'), array(' <', '> '), $text), array()); |
| 24 |
// Decode entities and then make safe any < or > characters. |
// Decode entities and then make safe any < or > characters. |
| 25 |
$text = htmlspecialchars(html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'), ENT_NOQUOTES, 'UTF-8'); |
return htmlspecialchars(html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'), ENT_NOQUOTES, 'UTF-8'); |
|
// We must strip low bytes second in case there was an encoded |
|
|
// low-byte character. |
|
|
return apachesolr_strip_ctl_chars($text); |
|
| 26 |
} |
} |
| 27 |
|
|
| 28 |
/** |
/** |
| 67 |
} |
} |
| 68 |
} |
} |
| 69 |
$text .= "\n\n" . implode(' ', $extra); |
$text .= "\n\n" . implode(' ', $extra); |
|
$text = apachesolr_strip_ctl_chars($text); |
|
| 70 |
|
|
| 71 |
$document = new Apache_Solr_Document(); |
$document = new Apache_Solr_Document(); |
| 72 |
$document->id = apachesolr_document_id($node->nid); |
$document->id = apachesolr_document_id($node->nid); |
| 86 |
} |
} |
| 87 |
$document->body = apachesolr_clean_text($text); |
$document->body = apachesolr_clean_text($text); |
| 88 |
$document->type = $node->type; |
$document->type = $node->type; |
| 89 |
$document->type_name = apachesolr_strip_ctl_chars(node_get_types('name', $node)); |
$document->type_name = node_get_types('name', $node); |
| 90 |
$document->created = apachesolr_date_iso($node->created); |
$document->created = apachesolr_date_iso($node->created); |
| 91 |
$document->changed = apachesolr_date_iso($node->changed); |
$document->changed = apachesolr_date_iso($node->changed); |
| 92 |
$last_change = (isset($node->last_comment_timestamp) && $node->last_comment_timestamp > $node->changed) ? $node->last_comment_timestamp : $node->changed; |
$last_change = (isset($node->last_comment_timestamp) && $node->last_comment_timestamp > $node->changed) ? $node->last_comment_timestamp : $node->changed; |
| 93 |
$document->last_comment_or_change = apachesolr_date_iso($last_change); |
$document->last_comment_or_change = apachesolr_date_iso($last_change); |
| 94 |
$document->comment_count = isset($node->comment_count) ? $node->comment_count : 0; |
$document->comment_count = isset($node->comment_count) ? $node->comment_count : 0; |
| 95 |
$document->name = apachesolr_strip_ctl_chars($node->name); |
$document->name = $node->name; |
| 96 |
|
|
| 97 |
$path = 'node/' . $node->nid; |
$path = 'node/' . $node->nid; |
| 98 |
$document->url = url($path, array('absolute' => TRUE)); |
$document->url = url($path, array('absolute' => TRUE)); |
| 105 |
$language = empty($node->language) ? '' : $node->language; |
$language = empty($node->language) ? '' : $node->language; |
| 106 |
$output = drupal_get_path_alias($path, $language); |
$output = drupal_get_path_alias($path, $language); |
| 107 |
if ($output && $output != $path) { |
if ($output && $output != $path) { |
| 108 |
$document->path_alias = apachesolr_strip_ctl_chars($output); |
$document->path_alias = $output; |
| 109 |
} |
} |
| 110 |
} |
} |
| 111 |
|
|
| 301 |
$time = time(); |
$time = time(); |
| 302 |
try { |
try { |
| 303 |
$solr = apachesolr_get_solr(); |
$solr = apachesolr_get_solr(); |
| 304 |
$solr->deleteMultipleById($ids); |
$solr->deleteByMultipleIds($ids); |
| 305 |
apachesolr_index_updated($time); |
apachesolr_index_updated($time); |
| 306 |
foreach ($nodes as $node) { |
foreach ($nodes as $node) { |
| 307 |
// There was no exception, so update the table. |
// There was no exception, so update the table. |
| 327 |
} |
} |
| 328 |
try { |
try { |
| 329 |
$solr = apachesolr_get_solr(); |
$solr = apachesolr_get_solr(); |
| 330 |
$solr->deleteMultipleById($ids); |
$solr->deleteByMultipleIds($ids); |
| 331 |
apachesolr_index_updated($time); |
apachesolr_index_updated($time); |
| 332 |
// There was no exception, so update the table. |
// There was no exception, so update the table. |
| 333 |
db_query("DELETE FROM {apachesolr_search_node} WHERE nid IN (" . db_placeholders($nids) . ")", $nids); |
db_query("DELETE FROM {apachesolr_search_node} WHERE nid IN (" . db_placeholders($nids) . ")", $nids); |