/[drupal]/contributions/sandbox/unconed/search.diff
ViewVC logotype

Contents of /contributions/sandbox/unconed/search.diff

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.6 - (show annotations) (download) (as text)
Thu Oct 14 05:21:01 2004 UTC (5 years, 1 month ago) by unconed
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +83 -64 lines
File MIME type: text/x-diff
Update (see mailinglist)
1 ? search.diff
2 cvs diff: Diffing .
3 cvs diff: Diffing database
4 Index: database/database.mysql
5 ===================================================================
6 RCS file: /cvs/drupal/drupal/database/database.mysql,v
7 retrieving revision 1.150
8 diff -u -r1.150 database.mysql
9 --- database/database.mysql 8 Oct 2004 18:15:15 -0000 1.150
10 +++ database/database.mysql 14 Oct 2004 05:19:14 -0000
11 @@ -534,10 +534,22 @@
12
13 CREATE TABLE search_index (
14 word varchar(50) NOT NULL default '',
15 - lno int(10) unsigned NOT NULL default '0',
16 + sid int(10) unsigned NOT NULL default '0',
17 type varchar(16) default NULL,
18 + fromsid int(10) unsigned NOT NULL default '0',
19 + fromtype varchar(16) default NULL,
20 + score int(10) unsigned default NULL,
21 + KEY sid (sid),
22 + KEY word (word)
23 +) TYPE=MyISAM;
24 +
25 +--
26 +-- Table structure for table 'search_total'
27 +--
28 +
29 +CREATE TABLE search_total (
30 + word varchar(50) NOT NULL default '',
31 count int(10) unsigned default NULL,
32 - KEY lno (lno),
33 KEY word (word)
34 ) TYPE=MyISAM;
35
36 cvs diff: Diffing includes
37 Index: includes/common.inc
38 ===================================================================
39 RCS file: /cvs/drupal/drupal/includes/common.inc,v
40 retrieving revision 1.392
41 diff -u -r1.392 common.inc
42 --- includes/common.inc 12 Oct 2004 19:55:53 -0000 1.392
43 +++ includes/common.inc 14 Oct 2004 05:19:16 -0000
44 @@ -668,134 +668,6 @@
45 * @} End of "defgroup validation".
46 */
47
48 -/**
49 - * @defgroup search Search interface
50 - * @{
51 - * The Drupal search interface manages a global search mechanism.
52 - *
53 - * Modules may plug into this system to provide searches of different types of
54 - * data. Most of the system is handled by search.module, so this must be enabled
55 - * for all of the search features to work.
56 - */
57 -
58 -/**
59 - * Format a single result entry of a search query.
60 - *
61 - * Modules may implement hook_search_item() in order to override this default
62 - * function to display search results.
63 - *
64 - * @param $item
65 - * A single search result as returned by hook_search(). The result should be
66 - * an array with keys "count", "link", "title", "user", "date", and "keywords".
67 - * @param $type
68 - * The type of item found, such as "user" or "comment".
69 - */
70 -function search_item($item, $type) {
71 - if (module_hook($type, 'search_item')) {
72 - $output = module_invoke($type, 'search_item', $item);
73 - }
74 - else {
75 - $output = ' <dt class="title"><a href="'. $item['link'] .'">'. $item['title'] .'</a></dt>';
76 - $output .= ' <dd class="small">' . t($type) . ($item['user'] ? ' - '. $item['user'] : '') .''. ($item['date'] ? ' - '. format_date($item['date'], 'small') : '') .'</dd>';
77 - }
78 -
79 - return $output;
80 -}
81 -
82 -/**
83 - * Render a generic search form.
84 - *
85 - * This form must be usable not only within "http://example.com/search", but also
86 - * as a simple search box (without "Restrict search to", help text, etc.), in the
87 - * theme's header, and so forth. This means we must provide options to
88 - * conditionally render certain parts of this form.
89 - *
90 - * @param $action
91 - * Form action. Defaults to "search".
92 - * @param $keys
93 - * The search string entered by the user, containing keywords for the search.
94 - * @param $options
95 - * Whether to render the optional form fields and text ("Restrict search
96 - * to", help text, etc.).
97 - * @return
98 - * An HTML string containing the search form.
99 - */
100 -function search_form($action = '', $keys = '', $options = FALSE) {
101 - $edit = $_POST['edit'];
102 -
103 - if (!$action) {
104 - $action = url('search');
105 - }
106 -
107 - $output = ' <div class="search-form"><br /><input type="text" class="form-text" size="50" value="'. check_form($keys) .'" name="keys" />';
108 - $output .= ' <input type="submit" class="form-submit" value="'. t('Search') ."\" />\n";
109 -
110 - if ($options) {
111 - $output .= '<br />';
112 - $output .= t('Restrict search to') .': ';
113 -
114 - foreach (module_list() as $name) {
115 - if (module_hook($name, 'search')) {
116 - $output .= ' <input type="checkbox" name="edit[type]['. $name .']" '. ($edit['type'][$name] ? ' checked="checked"' : '') .' /> '. t($name);
117 - }
118 - }
119 - }
120 - $output .= '</div>';
121 -
122 - return form($output, 'post', $action);
123 -}
124 -
125 -/**
126 - * Perform a global search on the given keys, and return the formatted results.
127 - */
128 -function search_data($keys = NULL) {
129 - $edit = $_POST['edit'];
130 - $output = '';
131 -
132 - if (isset($keys)) {
133 - foreach (module_list() as $name) {
134 - if (module_hook($name, 'search') && (!$edit['type'] || $edit['type'][$name])) {
135 - list($title, $results) = module_invoke($name, 'search', $keys);
136 - if ($results) {
137 - $output .= '<h2>'. $title .'</h2>';
138 - $output .= '<dl class="search-results">';
139 - foreach ($results as $entry) {
140 - $output .= search_item($entry, $name);
141 - }
142 - $output .= '</dl>';
143 - }
144 - }
145 - }
146 - }
147 -
148 - return $output;
149 -}
150 -
151 -/**
152 - * Display a search form for a particular type of data.
153 - *
154 - * @param $type
155 - * The type of content to search within.
156 - * @param $action
157 - * Form action. Defaults to "search".
158 - * @param $keys
159 - * The search string entered by the user, containing keywords for the search.
160 - * @param $options
161 - * Whether to render the optional form fields and text ("Restrict search
162 - * to", help text, etc.).
163 - * @return
164 - * An HTML string containing the search form and results.
165 - */
166 -function search_type($type, $action = '', $keys = '', $options = FALSE) {
167 - $_POST['edit']['type'][$type] = 'on';
168 -
169 - return search_form($action, $keys, $options) . '<br />'. search_data($keys);
170 -}
171 -
172 -/**
173 - * @} End of "defgroup search".
174 - */
175 -
176 function check_form($text) {
177 return drupal_specialchars($text, ENT_QUOTES);
178 }
179 @@ -1835,7 +1707,7 @@
180 /**
181 * Encodes MIME/HTTP header values that contain non US-ASCII characters.
182 *
183 - * For example, mime_header_encode('tést.txt') returns "=?UTF-8?B?dMOpc3QudHh0?=".
184 + * For example, mime_header_encode('tést.txt') returns "=?UTF-8?B?dMOpc3QudHh0?=".
185 *
186 * See http://www.rfc-editor.org/rfc/rfc2047.txt for more information.
187 *
188 @@ -1858,6 +1730,59 @@
189 }
190
191 /**
192 + * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
193 + */
194 +function decode_entities($text) {
195 + static $table;
196 + // We store named entities in a table for quick processing.
197 + if (!isset($table)) {
198 + // Get all named HTML entities.
199 + $table = array_flip(get_html_translation_table(HTML_ENTITIES, $special));
200 + // PHP gives us Windows-1252/ISO-8859-1 data, we need UTF-8.
201 + $table = array_map('utf8_encode', $table);
202 + }
203 + $text = strtr($text, $table);
204 +
205 + // Any remaining entities are numerical. Use a regexp to replace them.
206 + return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2")', $text);
207 +}
208 +
209 +/**
210 + * Helper function for decode_entities
211 + */
212 +function _decode_entities($hex, $codepoint) {
213 + if ($hex != '') {
214 + $codepoint = base_convert($codepoint, 16, 10);
215 + }
216 + if ($codepoint < 0x80) {
217 + return chr($codepoint);
218 + }
219 + else if ($codepoint < 0x800) {
220 + return chr(0xC0 | ($codepoint >> 6))
221 + . chr(0x80 | ($codepoint & 0x3F));
222 + }
223 + else if ($codepoint < 0x10000) {
224 + return chr(0xE0 | ( $codepoint >> 12))
225 + . chr(0x80 | (($codepoint >> 6) & 0x3F))
226 + . chr(0x80 | ( $codepoint & 0x3F));
227 + }
228 + else if ($codepoint < 0x200000) {
229 + return chr(0xF0 | ( $codepoint >> 18))
230 + . chr(0x80 | (($codepoint >> 12) & 0x3F))
231 + . chr(0x80 | (($codepoint >> 6) & 0x3F))
232 + . chr(0x80 | ( $codepoint & 0x3F));
233 + }
234 +}
235 +
236 +/**
237 + * Count the amount of characters in a UTF-8 string. This is less than or
238 + * equal to the byte count.
239 + */
240 +function string_length(&$text) {
241 + return strlen(preg_replace("/[\x80-\xBF]/", '', $text));
242 +}
243 +
244 +/**
245 * Evaluate a string of PHP code.
246 *
247 * This is a wrapper around PHP's eval(). It uses output buffering to capture both
248 cvs diff: Diffing misc
249 Index: misc/drupal.css
250 ===================================================================
251 RCS file: /cvs/drupal/drupal/misc/drupal.css,v
252 retrieving revision 1.84
253 diff -u -r1.84 drupal.css
254 --- misc/drupal.css 21 Sep 2004 01:35:40 -0000 1.84
255 +++ misc/drupal.css 14 Oct 2004 05:19:16 -0000
256 @@ -383,6 +383,31 @@
257 display: block;
258 margin: 2px;
259 }
260 +.search-form {
261 + margin-bottom: 1em;
262 +}
263 +.search-form p {
264 + margin-top: 0;
265 + margin-bottom: 0.2em;
266 + padding-top: 0;
267 + padding-bottom: 0;
268 +}
269 +.search-form input {
270 + margin-top: 0;
271 + margin-bottom: 0;
272 +}
273 +.search-results p {
274 + margin-top: 0;
275 +}
276 +.search-results dt {
277 + font-size: 1.1em;
278 +}
279 +.search-results dd {
280 + margin-bottom: 1em;
281 +}
282 +.search-results .search-info {
283 + font-size: 0.85em;
284 +}
285 #tracker td.replies {
286 text-align: center;
287 }
288 cvs diff: Diffing modules
289 Index: modules/comment.module
290 ===================================================================
291 RCS file: /cvs/drupal/drupal/modules/comment.module,v
292 retrieving revision 1.295
293 diff -u -r1.295 comment.module
294 --- modules/comment.module 12 Oct 2004 16:16:22 -0000 1.295
295 +++ modules/comment.module 14 Oct 2004 05:19:22 -0000
296 @@ -101,10 +101,6 @@
297 'type' => MENU_DEFAULT_LOCAL_TASK, 'weight' => -10);
298 $items[] = array('path' => 'admin/comment/configure', 'title' => t('configure'),
299 'callback' => 'comment_configure', 'access' => $access, 'type' => MENU_LOCAL_TASK);
300 - if (module_exist('search')) {
301 - $items[] = array('path' => 'admin/comment/search', 'title' => t('search'),
302 - 'callback' => 'comment_search', 'access' => $access, 'type' => MENU_LOCAL_TASK);
303 - }
304
305 // Subtabs:
306 $items[] = array('path' => 'admin/comment/list/new', 'title' => t('new comments'),
307 @@ -263,37 +259,6 @@
308 }
309
310 /**
311 - * Implementation of hook_search().
312 - *
313 - * This search function uses search.module's built-in content index by
314 - * calling do_search(). The "nid" identifier in the select is used to
315 - * present search results in the context of their associated node.
316 - *
317 - * This function doubles as a menu callback for the administrative comment search.
318 - */
319 -function comment_search($keys = NULL) {
320 - if (!$keys) {
321 - // if there are no keys, we've been called from our menu callback,
322 - // so we hook into the search.module to pass the $keys back to us.
323 - print theme('page', search_type('comment', url('admin/comment/search'), $_POST['keys']));
324 - }
325 - else if ($keys) {
326 - $find = do_search(array('keys' => $keys, 'type' => 'comment', 'select' => 'SELECT s.lno AS lno, c.nid AS nid, c.subject AS title, c.timestamp AS created, u.uid AS uid, u.name AS name, s.count AS count FROM {search_index} s, {comments} c INNER JOIN {users} u ON c.uid = u.uid '. node_access_join_sql('c') .' WHERE '. node_access_where_sql() ." AND s.lno = c.cid AND s.type = 'comment' AND c.status = 0 AND s.word like '%'"));
327 - return array(t('Matching comments ranked in order of relevance'), $find);
328 - }
329 -}
330 -
331 -/**
332 - * Implementation of hook_update_index().
333 - *
334 - * The SQL statement returned checks for the last time the index was updated
335 - * so as not to cause redundant work for the indexer.
336 - */
337 -function comment_update_index() {
338 - return array('last_update' => 'comment_cron_last', 'node_type' => 'comment', 'select' => 'SELECT c.cid as lno, c.subject as text1, c.comment as text2 FROM {comments} c WHERE c.status = 0 AND timestamp > '. variable_get('comment_cron_last', 1));
339 -}
340 -
341 -/**
342 * Implementation of hook_user().
343 *
344 * Provides signature customization for the user's comments.
345 Index: modules/node.module
346 ===================================================================
347 RCS file: /cvs/drupal/drupal/modules/node.module,v
348 retrieving revision 1.409
349 diff -u -r1.409 node.module
350 --- modules/node.module 12 Oct 2004 20:01:25 -0000 1.409
351 +++ modules/node.module 14 Oct 2004 05:19:26 -0000
352 @@ -572,10 +572,19 @@
353 * The select must always provide the following fields: lno, title,
354 * created, uid, name, and count.
355 */
356 -function node_search($keys) {
357 - $find = do_search(array('keys' => $keys, 'type' => 'node', 'select' => "SELECT DISTINCT s.lno as lno, n.title as title, n.created as created, u.uid as uid, u.name as name, s.count as count FROM {search_index} s, {node} n ". node_access_join_sql() ." INNER JOIN {users} u ON n.uid = u.uid WHERE s.lno = n.nid AND s.type = 'node' AND s.word like '%' AND n.status = 1 AND ". node_access_where_sql()));
358 -
359 - return array(t('Matching nodes ranked in order of relevance'), $find);
360 +function node_search($op = 'search', $keys = null) {
361 + switch ($op) {
362 + case 'name':
363 + return t('content');
364 + case 'search':
365 + $find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid '. node_access_join_sql() .' INNER JOIN {users} u ON n.uid = u.uid', 'n.status = 1 AND '. node_access_where_sql());
366 + $results = array();
367 + foreach ($find as $item) {
368 + $node = node_load(array('nid' => $item->sid));
369 + $results[] = array('link' => url('node/'. $item->sid), 'type' => node_invoke($node, 'node_name'), 'title' => $node->title, 'user' => $node->name, 'date' => $node->changed, 'snippet' => search_excerpt($keys, check_output($node->body, $node->format)));
370 + }
371 + return $results;
372 + }
373 }
374
375 /**
376 @@ -1517,9 +1526,40 @@
377 * last run date for the nodes update.
378 */
379 function node_update_index() {
380 - return array('last_update' => 'node_cron_last',
381 - 'node_type' => 'node',
382 - 'select' => "SELECT n.nid as lno, n.title as text1, n.body as text2 FROM {node} n WHERE n.status = 1 AND moderate = 0 and (created > " . variable_get('node_cron_last', 1) . " or changed > " . variable_get('node_cron_last', 1) . ")");
383 + $last = variable_get('node_cron_last', 0);
384 + $limit = (int)variable_get('search_cron_limit', 100);
385 +
386 + $result = db_query_range('SELECT nid FROM {node} n WHERE n.status = 1 AND moderate = 0 AND (created > %d OR changed > %d) ORDER BY GREATEST(created, changed) ASC', $last, $last, 0, $limit);
387 +
388 + while ($node = db_fetch_object($result)) {
389 + $node = node_load(array('nid' => $node->nid));
390 +
391 + // We update this variable per node in case cron times out, or if the node
392 + // cannot be indexed (PHP nodes which call drupal_goto, for example).
393 + // In rare cases this can mean a node is only partially indexed, but the
394 + // chances of this happening are very small.
395 + variable_set('node_cron_last', max($node->changed, $node->created));
396 +
397 + if (node_hook($node, 'view')) {
398 + node_invoke($node, 'view', false, true);
399 + }
400 + else {
401 + $node = node_prepare($node, false);
402 + }
403 +
404 + $text = '<h1>'. drupal_specialchars($node->title) .'</h1>'. $node->body;
405 +
406 + // Fetch comments
407 + if (module_exist('comment')) {
408 + $comments = db_query('SELECT subject, comment, format FROM {comments} WHERE nid = %d AND status = 0', $node->nid);
409 + while ($comment = db_fetch_object($comments)) {
410 + $text .= '<h2>'. $comment->subject .'</h2>'. check_output($comment->comment, $comment->format);
411 + }
412 + }
413 +
414 + // Update index
415 + search_index($node->nid, 'node', $text);
416 + }
417 }
418
419 /**
420 Index: modules/search.module
421 ===================================================================
422 RCS file: /cvs/drupal/drupal/modules/search.module,v
423 retrieving revision 1.87
424 diff -u -r1.87 search.module
425 --- modules/search.module 21 Sep 2004 18:35:30 -0000 1.87
426 +++ modules/search.module 14 Oct 2004 05:19:28 -0000
427 @@ -2,6 +2,40 @@
428 // $Id: search.module,v 1.87 2004/09/21 18:35:30 dries Exp $
429
430 /**
431 + * Unicode character classes to include in the index.
432 + * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
433 + *
434 + * Lu Letter, Uppercase
435 + * Ll Letter, Lowercase
436 + * Lt Letter, Titlecase
437 + * Lm Letter, Modifier
438 + * Lo Letter, Other
439 + * Mn Mark, Nonspacing
440 + * Mc Mark, Spacing Combining
441 + * Nd Number, Decimal Digit
442 + * Nl Number, Letter
443 + * No Number, Other
444 + * Sm Symbol, Math
445 + * Sc Symbol, Currency
446 + * Sk Symbol, Modifier
447 + * So Symbol, Other
448 + *
449 + * Matches all character classes not in the list above (enclosing marks, punctuation and control codes, spacers).
450 + * 'Me', 'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Cs', 'Co'
451 + */
452 +define('PREG_CLASS_SEARCH_EXCLUDE', '\x{0}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{7f}-\x{a1}\x{ab}\x{ad}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{488}\x{489}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{600}-\x{603}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{6dd}\x{6de}\x{700}-\x{70d}\x{70f}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17b4}\x{17b5}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{180e}\x{1944}\x{1945}\x{2000}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{205f}-\x{2063}\x{206a}-\x{206f}\x{207d}\x{207e}\x{208d}\x{208e}\x{20dd}-\x{20e0}\x{20e2}-\x{20e4}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3000}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{d800}\x{db7f}\x{db80}\x{dbff}\x{dc00}\x{dfff}\x{e000}\x{f8ff}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{feff}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{fff9}-\x{fffb}\x{10100}\x{10101}\x{1039f}\x{1d173}-\x{1d17a}\x{e0001}\x{e0020}-\x{e007f}\x{f0000}\x{ffffd}\x{100000}');
453 +
454 +/**
455 + * Matches all 'N' character classes (numbers)
456 + */
457 +define('PREG_CLASS_NUMBERS', '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}\x{10107}-\x{10133}\x{10320}-\x{10323}\x{1034a}\x{104a0}-\x{104a9}\x{1d7ce}-\x{1d7ff}');
458 +
459 +/**
460 + * Matches all 'P' character classes (punctuation)
461 + */
462 +define('PREG_CLASS_PUNCTUATION', '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{10100}\x{10101}\x{1039f}');
463 +
464 +/**
465 * @file
466 * Enables site-wide keyword searching.
467 */
468 @@ -11,16 +45,16 @@
469 */
470 function search_help($section = 'admin/help#search') {
471 switch ($section) {
472 - case 'admin/help#search':
473 - return t("
474 - <strong>Search guidelines</strong>
475 - <p>The search page allows you to search the web site's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.</p>
476 - <strong>Words excluded from the search</strong>
477 - <p>Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also ignored.</p>", array('%number' => variable_get('minimum_word_size', 2)));
478 case 'admin/modules#description':
479 return t('Enables site-wide keyword searching.');
480 case 'admin/settings/search':
481 return t('The search engine works by keeping an index of "interesting" words. To make sure we only get "interesting" words you need to set the following.');
482 + case 'search#noresults':
483 + return t('<p>Your search yielded no results.<ul>
484 +<li>Check if your spelling is correct.</li>
485 +<li>Try using wildcards: <em>walk*</em> matches <em>walker</em>, <em>walking</em>, ...</li>
486 +<li>Use longer words (words shorter than %number letters are ignored).</li>
487 +</ul></p>', array('%number' => variable_get('minimum_word_size', 3)));
488 }
489 }
490
491 @@ -79,315 +113,341 @@
492 }
493
494 // Indexing settings:
495 - $group = form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 2), 10, 10, t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
496 - $group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 0), 10, 10, t('The number of characters a word has to be to be searched for.'));
497 - $group .= form_textarea(t('Noise words'), 'noisewords', variable_get('noisewords', ''), 70, 10, t('These words will not be indexed. Enter a comma separated list; linebreaks and whitespace do not matter. Example: and, or, not, a, to, I, it, ...'));
498 + $group = form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 3), 10, 10, t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
499 + $group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 3), 10, 10, t('The number of characters a word has to be to be searched for.'));
500 + $group .= form_textfield(t('Items to index per cron run'), 'search_cron_limit', variable_get('search_cron_limit', 100), 10, 10, t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'));
501 $output = form_group(t('Indexing settings'), $group);
502
503 - // Visual settings:
504 - $group = form_radios(t('Help text position'), 'help_pos', variable_get('help_pos', 1), array('1' => t('Above search output'), '2' => t('Below search output'), '3' => t('Link from above search output'), '4' => t('Link from below search output')), t('Where to show the help text for users on the search page.'));
505 - $output .= form_group(t('Viewing options'), $group);
506 -
507 print theme('page', system_settings_form($output));
508 }
509
510 /**
511 + * Marks a word as dirty (or retrieves the list of dirty words). Words which are
512 + * dirty have outdated total counts in the search_total table, and need to be
513 + * recounted.
514 + */
515 +function search_dirty($word = null) {
516 + static $dirty = array();
517 + if ($word !== null) {
518 + $dirty[$word] = true;
519 + }
520 + else {
521 + return $dirty;
522 + }
523 +}
524 +
525 +/**
526 * Implementation of hook_cron().
527 *
528 - * Fires hook_update_index() in all modules and uses the results to make
529 - * the search index current.
530 + * Fires hook_update_index() in all modules.
531 */
532 function search_cron() {
533 + /* Update word index */
534 foreach (module_list() as $module) {
535 - $module_array = module_invoke($module, 'update_index');
536 - if ($module_array) {
537 - update_index($module_array);
538 - }
539 - $module_array = null;
540 + module_invoke($module, 'update_index');
541 + }
542 + /* Update word counts for new/changed words */
543 + foreach (search_dirty() as $word => $dummy) {
544 + db_query("DELETE FROM {search_total} WHERE word = '%s'", $word);
545 + $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
546 + db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %d)", $word, $total);
547 }
548 - return;
549 }
550
551 /**
552 - * Perform a search on a word or words.
553 - *
554 - * This function is called by each node that supports the indexed search.
555 - *
556 - * @param $search_array
557 - * An array as returned from hook_search(). The format of this array is
558 - * array('keys' => ..., 'type' => ..., 'select' => ...). See the hook_search()
559 - * documentation for an explanation of the array values.
560 - *
561 - * @return
562 - * An array of search results, of which each element is an array with the
563 - * keys "count", "title", "link", "user" (name), "date", and "keywords".
564 + * Splits a string into component words,
565 */
566 -function do_search($search_array) {
567 +function search_keywords_split($text) {
568 + static $last = null;
569 + static $lastsplit = null;
570 +
571 + if ($last == $text) {
572 + return $lastsplit;
573 + }
574
575 - $keys = strtolower($search_array['keys']);
576 - $type = $search_array['type'];
577 - $select = $search_array['select'];
578 + // Decode entities to UTF-8
579 + $text = decode_entities($text);
580
581 - // Replace wildcards with MySQL wildcards.
582 - $keys = str_replace('*', '%', $keys);
583 + // Call an external processor for word handling.
584 + search_preprocess($text);
585
586 - // Split the words entered into an array.
587 - $words = explode(' ', $keys);
588 + // To improve searching for numerical data such as dates, IP addresses
589 + // or version numbers, we consider a group of numerical characters
590 + // separated only by punctuation characters to be one piece.
591 + // This also means that searching for e.g. '20/03/1984' also returns
592 + // results with '20-03-1984' in them.
593 + // Readable regexp: ([number]+)[punctuation]+(?=[number])
594 + $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
595
596 - foreach ($words as $word) {
597 + // The dot, underscore and dash are simply removed. This allows meaningful
598 + // search behaviour with acronyms and URLs.
599 + $text = preg_replace('/[._-]+/', '', $text);
600
601 - // If the word is too short, and we've got it set to skip them, loop.
602 - if (strlen($word) < variable_get('remove_short', 0)) {
603 - continue;
604 - }
605 + // With the exception of the rules above, we consider all punctuation,
606 + // marks, spacers, etc, to be a word boundary,
607 + $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
608
609 - // Put the next search word into the query and do the query.
610 - $query = str_replace("'%'", "'". check_query($word) ."'", $select);
611 - $result = db_query($query);
612 -
613 - if (db_num_rows($result) != 0) {
614 - // At least one matching record was found.
615 - $found = 1;
616 -
617 - // Create an in memory array of the results.
618 - while ($row = db_fetch_array($result)) {
619 - $lno = $row['lno'];
620 - $nid = $row['nid'];
621 - $title = $row['title'];
622 - $created = $row['created'];
623 - $uid = $row['uid'];
624 - $name = $row['name'];
625 - $count = $row['count'];
626 -
627 - // Build reduction variable.
628 - $reduction[$lno][$word] = true;
629 -
630 - // Check whether the just-fetched row is already in the table.
631 - if ($results[$lno]['lno'] != $lno) {
632 - $results[$lno]['count'] = $count;
633 -
634 - $results[$lno]['lno'] = $lno;
635 - $results[$lno]['nid'] = $nid;
636 - $results[$lno]['title'] = $title;
637 - $results[$lno]['created'] = $created;
638 - $results[$lno]['uid'] = $uid;
639 - $results[$lno]['name'] = $name;
640 - }
641 - else {
642 - // Different word, but existing "lno". Increase the count of
643 - // matches against this "lno" by the number of times this
644 - // word appears in the text.
645 - $results[$lno]['count'] = $results[$lno]['count'] + $count;
646 - }
647 - }
648 - }
649 - }
650 + // Process words
651 + $words = explode(' ', $text);
652
653 - if ($found) {
654 - foreach ($results as $lno => $values) {
655 - $pass = true;
656 - foreach ($words as $word) {
657 - if (!$reduction[$lno][$word]) {
658 - $pass = false;
659 - }
660 - }
661 - if ($pass) {
662 - $fullresults[$lno] = $values;
663 + // Save last keyword result
664 + $last = $text;
665 + $lastsplit = $words;
666 +
667 + return $words;
668 +}
669 +
670 +/**
671 + * Invokes hook_search_preprocess() in modules.
672 + */
673 +function search_preprocess(&$text) {
674 + static $modules = null;
675 + // Cache list of modules which implement this hook
676 + if (!is_array($modules)) {
677 + $modules = array();
678 + foreach (module_list() as $module) {
679 + if (module_hook($module, 'search_preprocess')) {
680 + $modules[] = $module;
681 }
682 }
683 - $results = $fullresults;
684 - if (!is_array($results)) {
685 - $found = 0;
686 - }
687 - }
688 - if ($found) {
689 - // Black magic here to sort the results.
690 - array_multisort($results, SORT_DESC);
691 -
692 - // Now, output the results.
693 - foreach ($results as $key => $value) {
694 - $lno = $value['lno'];
695 - $nid = $value['nid'];
696 - $title = $value['title'];
697 - $created = $value['created'];
698 - $uid = $value['uid'];
699 - $name = $value['name'];
700 - $count = $value['count'];
701 - switch ($type) {
702 - case 'node':
703 - $find[$i++] = array('count' => $count, 'title' => $title, 'link' => url("node/$lno"), 'user' => $name, 'date' => $created, 'keywords' => implode('|', $words));
704 - break;
705 - case 'comment':
706 - $find[$i++] = array('count' => $count, 'title' => $title, 'link' => (strstr(request_uri(), 'admin') ? url("admin/comment/edit/$lno") : url("node/$nid", NULL, "comment-$lno")), 'user' => $name, 'date' => $created, 'keywords' => implode('|', $words));
707 - break;
708 - break;
709 - }
710 + }
711 + // Process $text
712 + if (count($modules) > 0) {
713 + foreach ($modules as $module) {
714 + $text = module_invoke($module, 'search_preprocess', $text);
715 }
716 }
717 -
718 - return $find;
719 }
720
721 +
722 /**
723 - * Update the search_index table.
724 + * Update the search index for a particular item.
725 + *
726 + * @param $sid
727 + * A number identifying this particular item (e.g. node id).
728 *
729 - * @param $search_array
730 - * An array as returned from hook_update_index().
731 + * @param $type
732 + * A string defining this type of item (e.g. 'node')
733 + *
734 + * @param $text
735 + * The content of this item. Must be a piece of HTML text.
736 */
737 -function update_index($search_array) {
738 - $last_update = variable_get($search_array['last_update'], 1);
739 - $node_type = $search_array['node_type'];
740 - $select = $search_array['select'];
741 - $minimum_word_size = variable_get('minimum_word_size', 2);
742 -
743 - //watchdog('user', "$last_update<br />$node_type<br />$select");
744 -
745 - $result = db_query($select);
746 -
747 - if (db_num_rows($result)) {
748 - // Results were found. Look through the nodes we just selected.
749 - while ($node = db_fetch_array ($result)) {
750 -
751 - // Trash any existing entries in the search index for this node,
752 - // in case it is a modified node.
753 - db_query("DELETE from {search_index} WHERE lno = '". $node['lno'] ."' AND type = '". $node_type ."'");
754 -
755 - // Build the word list (teaser not included, as it would give a
756 - // false count of the number of hits).
757 - $wordlist = $node['text1'] .' '. $node['text2'];
758 -
759 - // Strip heaps of stuff out of it.
760 - $wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", '', $wordlist);
761 -
762 - // Remove punctuation/special characters.
763 - $wordlist = preg_replace("'(\xBB|\xAB|!|\xA1|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", '', $wordlist);
764 +function search_index($sid, $type, $text) {
765 + $minimum_word_size = variable_get('minimum_word_size', 3);
766
767 - // Strip out (now mangled) http and tags.
768 - $wordlist = preg_replace("'http\w+'", '', $wordlist);
769 - $wordlist = preg_replace("'www\w+'", '', $wordlist);
770 + global $base_url;
771 + $node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i';
772
773 - // Remove all newlines of any type.
774 - $wordlist = preg_replace("'([\r\n]|[\r]|[\n])'", ' ', $wordlist);
775 -
776 - // Lower case the whole thing.
777 - $wordlist = strtolower($wordlist);
778 -
779 - // Remove "noise words".
780 - $noise = explode(',', variable_get('noisewords', ''));
781 - foreach ($noise as $word) {
782 - $word = trim($word);
783 - $wordlist = trim(preg_replace("' $word '", ' ', ' ' .$wordlist. ' '));
784 + // Multipliers for scores of words inside certain HTML tags.
785 + // Note: 'a' must be included for link ranking to work.
786 + $tags = array('h1' => 21,
787 + 'h2' => 18,
788 + 'h3' => 15,
789 + 'h4' => 12,
790 + 'h5' => 9,
791 + 'h6' => 6,
792 + 'u' => 5,
793 + 'b' => 5,
794 + 'strong' => 5,
795 + 'em' => 5,
796 + 'a' => 10);
797 +
798 + // Strip off all ignored tags to speed up processing, but insert space before/after
799 + // them to keep word boundaries.
800 + $text = str_replace(array('<', '>'), array(' <', '> '), $text);
801 + $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
802 +
803 + // Split HTML tags from plain text.
804 + $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
805 + // Note: PHP ensures the array consists of alternating delimiters and literals
806 + // and begins and ends with a literal (inserting $null as required).
807 +
808 + $tag = false; // Odd/even counter. Tag or no tag.
809 + $link = false; // State variable for link analyser
810 + $score = 1; // Starting score per word
811 +
812 + $results = array(0 => array());
813 +
814 + foreach ($split as $value) {
815 + if ($tag) {
816 + // Increase or decrease score per word based on tag
817 + list($tagname) = explode(' ', $value, 2);
818 + $tagname = strtolower($tagname);
819 + if ($tagname{0} == '/') {
820 + $score -= $tags[substr($tagname, 1)];
821 + if ($score < 1) { // possible due to bad HTML
822 + $score = 1;
823 + }
824 + if ($tagname == '/a') {
825 + $link = false;
826 + }
827 }
828 -
829 - // Remove whitespace.
830 - $wordlist = preg_replace("'[\s]+'", ' ', $wordlist);
831 -
832 - // Make it an array.
833 - $eachword = explode(' ', $wordlist);
834 -
835 - // Walk through the array, giving a "weight" to each word based on
836 - // the number of times it appears in a page.
837 - foreach ($eachword as $word) {
838 - if (strlen($word) >= $minimum_word_size && strlen($word) <= 50) {
839 - if ($newwords[$word]) {
840 - $newwords[$word]++;
841 - }
842 - else {
843 - $newwords[$word] = 1;
844 + else {
845 + if ($tagname == 'a') {
846 + // Check if link points to a node on this site
847 + if (preg_match($node_regexp, $value, $match)) {
848 + $path = drupal_get_normal_path($match[1]);
849 + if (preg_match('!node/(?:view/)?([0-9]+)!i', $path, $match)) {
850 + $linknid = $match[1];
851 + if ($linknid > 0) {
852 + $link = true;
853 + }
854 + }
855 }
856 }
857 + $score += $tags[$tagname];
858 }
859 -
860 - // Walk through the weighted words array, inserting them into
861 - // the search index.
862 - if ($newwords) {
863 - foreach ($newwords as $key => $value) {
864 - db_query("INSERT INTO {search_index} VALUES('%s', %d, '%s', %d)", $key, $node['lno'], $node_type, $value);
865 + }
866 + else {
867 + // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
868 + if ($value != '') {
869 + $words = search_keywords_split($value);
870 + foreach ($words as $word) {
871 + // Check wordlength
872 + if (string_length($word) >= $minimum_word_size) {
873 + $word = strtolower($word);
874 + if ($link) {
875 + if (!isset($results[$linknid])) {
876 + $results[$linknid] = array();
877 + }
878 + $results[$linknid][$word] += $score;
879 + }
880 + else {
881 + $results[0][$word] += $score;
882 + }
883 + }
884 }
885 }
886 -
887 - // Reset the weighted words array, so we don't add multiples.
888 - $newwords = array ();
889 }
890 + $tag = !$tag;
891 }
892
893 - // Update the last time this process was run.
894 - variable_set($search_array['last_update'], time());
895 -
896 - return true;
897 -}
898 + db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
899
900 + // Insert results into search index
901 + foreach ($results[0] as $word => $score) {
902 + db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score);
903 + search_dirty($word);
904 + }
905 + unset($results[0]);
906
907 -function search_invalidate() {
908 - foreach (module_list() as $module) {
909 - $module_array = module_invoke($module, 'update_index');
910 - if ($module_array) {
911 - variable_set($module_array['last_update'], 1);
912 + // Now insert links to nodes
913 + foreach ($results as $nid => $words) {
914 + foreach ($words as $word => $score) {
915 + db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %d)", $word, $nid, 'node', $sid, $type, $score);
916 + search_dirty($word);
917 }
918 - $module_array = null;
919 }
920 - return;
921 }
922
923 /**
924 - * Save the values entered by the administrator for the search module
925 + * Perform a search on a word or words.
926 + *
927 + * This function is called by each module that supports the indexed search.
928 + *
929 + * @param $keys
930 + * A search string as entered by the user.
931 + *
932 + * @param $type
933 + * A string identifying the calling module.
934 + *
935 + * @param $join
936 + * (optional) A string to be inserted into the JOIN part of the SQL query.
937 + * For example "INNER JOIN {node} n ON n.nid = i.sid".
938 *
939 - * @param $edit
940 - * An array of fields as set up by calling form_textfield(),
941 - * form_textarea(), etc.
942 + * @param $where
943 + * (optional) A string to be inserted into the WHERE part of the SQL query.
944 + * For example "(n.status > 0)".
945 + *
946 + * @return
947 + * An array of search results, of which each element is an array with the
948 + * keys "count", "title", "link", "user" (name), "date", and "keywords".
949 */
950 -function search_save($edit) {
951 - variable_set('minimum_word_size', $edit['minimum_word_size']);
952 +function do_search($keys, $type, $join = '', $where = '1') {
953 + // Note, we replace the wildcards with U+FFFD (Replacement character) to pass
954 + // through the keyword extractor.
955 + $keys = str_replace('*', '�', $keys);
956 +
957 + // Split into words
958 + $keys = search_keywords_split($keys);
959 + // Lowercase
960 + foreach ($keys as $k => $v) {
961 + $keys[$k] = strtolower($v);
962 + }
963 +
964 + $words = array();
965 + $arguments = array();
966 + // Build WHERE clause
967 + foreach ($keys as $word) {
968 + if (string_length($word) < variable_get('remove_short', 3)) {
969 + continue;
970 + }
971 + if (strpos($word, '�') !== false) {
972 + $words[] = "i.word LIKE '%s'";
973 + $arguments[] = str_replace('�', '%', $word);
974 + }
975 + else {
976 + $words[] = "i.word = '%s'";
977 + $arguments[] = $word;
978 + }
979 + }
980 + if (count($words) == 0) {
981 + return array();
982 + }
983 + $where .= ' AND ('. implode(' OR ', $words) .')';
984
985 - $data = strtr($edit['noisewords'], "\n\r\t", ' ');
986 - $data = str_replace(' ', '', $data);
987 - variable_set('noisewords', $data);
988 - variable_set('help_pos', $edit['help_pos']);
989 - variable_set('remove_short', $edit['remove_short']);
990 + // Get result count (for pager)
991 + $count = db_result(db_query("SELECT COUNT(DISTINCT i.sid, i.type) FROM {search_index} i $join WHERE $where", $arguments));
992 + if ($count == 0) {
993 + return array();
994 + }
995 + $count_query = "SELECT $count";
996 +
997 + // Do pager query
998 + $query = "SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $where GROUP BY i.type, i.sid ORDER BY score DESC";
999 + $arguments = array_merge(array($query, 15, 0, $count_query), $arguments);
1000 + $result = call_user_func_array('pager_query', $arguments);
1001 +
1002 + $results = array();
1003 + while ($item = db_fetch_object($result)) {
1004 + $results[] = $item;
1005 + }
1006 +
1007 + return $results;
1008 }
1009
1010 /**
1011 * Menu callback; presents the search form and/or search results.
1012 */
1013 function search_view() {
1014 - global $type;
1015 - $keys = isset($_GET['keys']) ? $_GET['keys'] : $_POST['keys'];
1016 + $keys = isset($_GET['keys']) ? $_GET['keys'] : $_POST['edit']['keys'];
1017 + $type = isset($_GET['type']) ? $_GET['type'] : ($_POST['edit']['type'] ? $_POST['edit']['type'] : 'node');
1018
1019 if (user_access('search content')) {
1020 - // Construct the search form.
1021 - $output = search_form(NULL, $keys, TRUE);
1022 -
1023 - // Display form and search results.
1024 - $help_link = l(t('search help'), 'search/help');
1025 - switch (variable_get('help_pos', 1)) {
1026 - case '1':
1027 - $output = search_help(). $output .'<br />';
1028 - break;
1029 - case '2':
1030 - $output .= search_help() .'<br />';
1031 - break;
1032 - case '3':
1033 - $output = $help_link. '<br />'. $output .'<br />';
1034 - break;
1035 - case '4':
1036 - $output .= '<br />'. $help_link .'<br />';
1037 - }
1038 -
1039 // Only perform search if there is non-whitespace search term:
1040 if (trim($keys)) {
1041 // Log the search keys:
1042 - watchdog('search', t('Search: %keys.', array('%keys' => "<em>$keys</em>")), l(t('results'), 'search', NULL, 'keys='. urlencode($keys)));
1043 + watchdog('search', t('Search: %keys (%type).', array('%keys' => "<em>$keys</em>", '%type' => $type)), l(t('results'), 'search', NULL, 'keys='. urlencode($keys) . '&type='. urlencode($type)));
1044
1045 // Collect the search results:
1046 - $results = search_data($keys);
1047 + $results = search_data($keys, $type);
1048
1049 if ($results) {
1050 - $output .= theme('box', t('Search Results'), $results);
1051 + $results = theme('box', t('Search results'), $results);
1052 }
1053 else {
1054 - $output .= theme('box', t('Search Results'), t('Your search yielded no results.'));
1055 + $results = theme('box', t('No search results'), search_help('search#noresults'));
1056 }
1057 }
1058 + else if (isset($_POST['edit'])) {
1059 + form_set_error('keys', t('Please enter some keywords.'));
1060 + }
1061 +
1062 + // Construct the search form.
1063 + // Note, we do this last because of the form_set_error() above.
1064 + $output = search_form(NULL, $keys, $type, TRUE);
1065 +
1066 + $output .= $results;
1067
1068 print theme('page', $output, t('Search'));
1069 }
1070 @@ -404,4 +464,208 @@
1071 print theme('page', search_help());
1072 }
1073
1074 +/**
1075 + * @defgroup search Search interface
1076 + * @{
1077 + * The Drupal search interface manages a global search mechanism.
1078 + *
1079 + * Modules may plug into this system to provide searches of different types of
1080 + * data. Most of the system is handled by search.module, so this must be enabled
1081 + * for all of the search features to work.
1082 + */
1083 +
1084 +/**
1085 + * Render a search form.
1086 + *
1087 + * This form must be usable not only within "http://example.com/search", but also
1088 + * as a simple search box (without "Restrict search to", help text, etc.), in the
1089 + * theme's header, and so forth. This means we must provide options to
1090 + * conditionally render certain parts of this form.
1091 + *
1092 + * @param $action
1093 + * Form action. Defaults to "search".
1094 + * @param $keys
1095 + * The search string entered by the user, containing keywords for the search.
1096 + * @param $options
1097 + * Whether to render the optional form fields and text ("Restrict search
1098 + * to", help text, etc.).
1099 + * @return
1100 + * An HTML string containing the search form.
1101 + */
1102 +function search_form($action = '', $keys = '', $type = null, $options = FALSE) {
1103 + $edit = $_POST['edit'];
1104 +
1105 + if (!$action) {
1106 + $action = url('search');
1107 + }
1108 +
1109 + $output = ' <div class="search-form">';
1110 + $box = '<div class="container-inline">';
1111 + $box .= form_textfield('', 'keys', $keys, 40, 255);
1112 + $box .= form_submit(t('Search'));;
1113 + $box .= '</div>';
1114 + $output .= form_item(t('Enter your keywords'), $box);
1115 +
1116 + if ($options) {
1117 + $output .= '<div class="container-inline">'. t('Search for') .': ';
1118 +
1119 + if (!isset($edit['type'])) {
1120 + $edit['type'] = $type;
1121 + }
1122 +
1123 + foreach (module_list() as $name) {
1124 + if (module_hook($name, 'search')) {
1125 + $output .= form_radio(module_invoke($name, 'search', 'name'), 'type', $name, $edit['type'] == $name);
1126 + }
1127 + }
1128 + $output .= '</div>';
1129 + }
1130 + else if ($type) {
1131 + $output .= form_hidden('type', $type);
1132 + }
1133 + $output .= '</div>';
1134 +
1135 + return form($output, 'post', $action);
1136 +}
1137 +
1138 +/**
1139 + * Perform a search on the given keys, and return the formatted results.
1140 + */
1141 +function search_data($keys = NULL, $type = 'node') {
1142 + $output = '';
1143 +
1144 + if (isset($keys)) {
1145 + if (module_hook($type, 'search')) {
1146 + $results = module_invoke($type, 'search', 'search', $keys);
1147 + if (is_array($results) && count($results)) {
1148 + $output .= '<dl class="search-results">';
1149 + foreach ($results as $entry) {
1150 + $output .= theme('search_item', $entry, $type);
1151 + }
1152 + $output .= '</dl>';
1153 + $output .= theme('pager', NULL, 15, 0, array('keys' => $keys, 'type' => $type));
1154 + }
1155 + }
1156 + }
1157 +
1158 + return $output;
1159 +}
1160 +
1161 +/**
1162 + * @} End of "defgroup search".
1163 + */
1164 +
1165 +/**
1166 + * Returns snippets from a piece of text, with certain keywords highlighted.
1167 + * Used for formatting search results.
1168 + */
1169 +function search_excerpt($keys, $text) {
1170 + $keys = search_keywords_split($keys);
1171 + $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text));
1172 +
1173 + // Extract a fragment per keyword for at most 4 keywords.
1174 + // First we collect ranges of text around each keyword, starting/ending
1175 + // at spaces.
1176 + $ranges = array();
1177 + foreach ($keys as $k => $key) {
1178 + if (strlen($key) == 0) {
1179 + unset($keys[$k]);
1180 + continue;
1181 + }
1182 + if (count($out) == 4) {
1183 + break;
1184 + }
1185 + // Note: workaround for lack of stripos() in PHP4
1186 + if (($p = strpos($text, stristr($text, $key))) !== false) {
1187 + if (($q = strpos($text, ' ', max(0, $p - 60))) !== false) {
1188 + $end = substr($text, $p, 80);
1189 + if (($s = strrpos($end, ' ')) !== false) {
1190 + $ranges[$q] = $p + $s;
1191 + }
1192 + }
1193 + }
1194 + }
1195 +
1196 + // If we didn't find anything, return the beginning.
1197 + if (count($ranges) == 0) {
1198 + return truncate_utf8($text, 128) . ' ...';
1199 + }
1200 +
1201 + // Sort the text ranges by starting position.
1202 + ksort($ranges);
1203 +
1204 + // Now we collapse overlapping text ranges into one. The sorting makes it O(n).
1205 + $newranges = array();
1206 + foreach ($ranges as $from2 => $to2) {
1207 + if (!isset($from1)) {
1208 + $from1 = $from2;
1209 + $to1 = $to2;
1210 + continue;
1211 + }
1212 + if ($from2 <= $to1) {
1213 + $to1 = max($to1, $to2);
1214 + }
1215 + else {
1216 + $newranges[$from1] = $to1;
1217 + $from1 = $from2;
1218 + $to1 = $to2;
1219 + }
1220 + }
1221 + $newranges[$from1] = $to1;
1222 +
1223 + // Fetch text
1224 + $out = array();
1225 + foreach ($newranges as $from => $to) {
1226 + $out[] = substr($text, $from, $to - $from);
1227 + }
1228 + $text = ' ... '. implode(' ... ', $out) .' ... ';
1229 +
1230 + // Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
1231 + array_walk($keys, '_search_excerpt_replace');
1232 + $text = preg_replace('/('. implode('|', $keys) .')/i', '<strong>\0</strong>', $text);
1233 + return $text;
1234 +}
1235 +
1236 +/**
1237 + * Helper function for array_walk in search_except.
1238 + */
1239 +function _search_excerpt_replace($text) {
1240 + return preg_quote($text);
1241 +}
1242 +
1243 +/**
1244 + * Format a single result entry of a search query.
1245 + *
1246 + * Modules may implement hook_search_item() in order to override this default
1247 + * function to display search results.
1248 + *
1249 + * @param $item
1250 + * A single search result as returned by hook_search(). The result should be
1251 + * an array with keys "count", "link", "title", "user", "date", and "keywords".
1252 + * @param $type
1253 + * The type of item found, such as "user" or "comment".
1254 + */
1255 +function theme_search_item($item, $type) {
1256 + if (module_hook($type, 'search_item')) {
1257 + $output = module_invoke($type, 'search_item', $item);
1258 + }
1259 + else {
1260 + $output = ' <dt class="title"><a href="'. $item['link'] .'">'. $item['title'] .'</a></dt>';
1261 + $info = array();
1262 + if ($item['type']) {
1263 + $info[] = $item['type'];
1264 + }
1265 + if ($item['user']) {
1266 + $info[] = $item['user'];
1267 + }
1268 + if ($item['date']) {
1269 + $info[] = format_date($item['date'], 'small');
1270 + }
1271 + $output .= ' <dd>'. ($item['snippet'] ? '<p>'. $item['snippet'] . '</p>' : '') . '<p class="search-info">' . implode(' - ', $info) .'</p></dd>';
1272 + }
1273 +
1274 + return $output;
1275 +}
1276 +
1277 +
1278 ?>
1279 Index: modules/user.module
1280 ===================================================================
1281 RCS file: /cvs/drupal/drupal/modules/user.module,v
1282 retrieving revision 1.404
1283 diff -u -r1.404 user.module
1284 --- modules/user.module 12 Oct 2004 16:16:22 -0000 1.404
1285 +++ modules/user.module 14 Oct 2004 05:19:34 -0000
1286 @@ -398,17 +398,20 @@
1287 /**
1288 * Implementation of hook_search().
1289 */
1290 -function user_search($keys) {
1291 - $find = array();
1292 -
1293 - // Replace wildcards with MySQL/PostgreSQL wildcards.
1294 - $keys = str_replace('*', '%', $keys);
1295 -
1296 - $result = db_query_range("SELECT * FROM {users} WHERE LOWER(name) LIKE '%%%s%%'", strtolower($keys), 0, 20);
1297 - while ($account = db_fetch_object($result)) {
1298 - $find[] = array('title' => $account->name, 'link' => url("user/$account->uid/view"), 'user' => $account->name);
1299 +function user_search($op = 'search', $keys = null) {
1300 + switch ($op) {
1301 + case 'name':
1302 + return t('users');
1303 + case 'search':
1304 + $find = array();
1305 + // Replace wildcards with MySQL/PostgreSQL wildcards.
1306 + $keys = str_replace('*', '%', $keys);
1307 + $result = db_query_range("SELECT * FROM {users} WHERE LOWER(name) LIKE '%%%s%%'", strtolower($keys), 0, 20);
1308 + while ($account = db_fetch_object($result)) {
1309 + $find[] = array('title' => $account->name, 'link' => url("user/$account->uid/view"), 'user' => $account->name);
1310 + }
1311 + return $find;
1312 }
1313 - return array(t('Matching users'), $find);
1314 }
1315
1316 /**
1317 cvs diff: Diffing scripts
1318 cvs diff: Diffing themes
1319 cvs diff: Diffing themes/bluemarine
1320 cvs diff: Diffing themes/chameleon
1321 cvs diff: Diffing themes/chameleon/marvin
1322 cvs diff: Diffing themes/engines
1323 cvs diff: Diffing themes/engines/xtemplate
1324 cvs diff: Diffing themes/pushbutton
1325
1326
1327 ***** CVS exited normally with code 1 *****
1328

  ViewVC Help
Powered by ViewVC 1.1.2