/[drupal]/contributions/sandbox/fago/relevance_feedback/relevance_feedback.module
ViewVC logotype

Contents of /contributions/sandbox/fago/relevance_feedback/relevance_feedback.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (show annotations) (download) (as text)
Tue Sep 23 16:18:52 2008 UTC (14 months ago) by fago
Branch: MAIN
CVS Tags: HEAD
File MIME type: text/x-php
added the relevance feedback module to the sandbox
1 <?php
2 // $Id$
3
4 /**
5 * @file Relevance feedback module
6 */
7
8 /**
9 * Implementation of hook_init()
10 */
11 function relevance_feedback_init() {
12 if (arg(0) == 'search' && arg(1) == 'node') {
13 if (variable_get('relevance_feedback_type', 'explicit') == 'implicit') {
14 drupal_add_js(drupal_get_path('module', 'relevance_feedback'). '/relevance_feedback.implicit.js', 'module');
15 }
16 else if (variable_get('relevance_feedback_type', 'explicit') == 'explicit') {
17 drupal_add_js(drupal_get_path('module', 'relevance_feedback'). '/relevance_feedback.explicit.js', 'module');
18 drupal_add_css(drupal_get_path('module', 'relevance_feedback'). '/relevance_feedback.css', 'module');
19 }
20 }
21 }
22
23 /**
24 * Implementation of hook_menu()
25 */
26 function relevance_feedback_menu() {
27 $items = array();
28 $items['relevance_feedback/js'] = array(
29 'page callback' => 'relevance_feedback_js',
30 'access arguments' => array('access content'),
31 'type' => MENU_CALLBACK,
32 );
33 $items['admin/settings/relevance_feedback'] = array(
34 'title' => 'Relevance Feedback',
35 'description' => 'Configure relevance feedback.',
36 'page callback' => 'drupal_get_form',
37 'page arguments' => array('relevance_feedback_settings'),
38 'access arguments' => array('administer site configuration'),
39 'file' => 'relevance_feedback.admin.inc',
40 );
41 return $items;
42 }
43
44 function relevance_feedback_js() {
45
46 $search = relevance_feedback_get_search();
47
48 if (isset($search['keys']) && $search['keys'] == $_GET['keys']) {
49 if (isset($_GET['url'])) {
50 $url = parse_url($_GET['url']);
51 $path = drupal_get_normal_path($url['path']);
52 if (strpos($path, '/node') === 0) {
53 $nid = substr($path, 6);
54 }
55 }
56 else if (isset($_GET['nid'])) {
57 $nid = $_GET['nid'];
58 }
59
60 if (isset($nid) && is_numeric($nid) && isset($_GET['type'])) {
61 if ($_GET['type'] == 'rel') {
62 relevance_feedback_add_relevant_result($nid);
63 }
64 else if ($_GET['type'] == 'nonrel') {
65 relevance_feedback_add_nonrelevant_result($nid);
66 }
67 else {
68 relevance_feedback_clear_result_status($nid);
69 }
70 relevance_feedback_process();
71 print isset($_SESSION['relevance_feedback']['refresh']) ? 1 : '';
72 }
73 }
74 }
75
76
77 /**
78 * Implementation of hook_nodeapi()
79 */
80 function relevance_feedback_nodeapi($node, $op) {
81 if ($op == 'search result' && variable_get('relevance_feedback_type', 'explicit') == 'explicit') {
82 $path = drupal_get_path('module', 'relevance_feedback');
83 $output = theme('image', $path .'/images/up.gif', 'up', t('Mark this result as relevant.'), array('class' => 'relevance up'));
84 $output .= theme('image', $path .'/images/down.gif', 'down', t('Mark this result as non-relevant.'), array('class' => 'relevance down'));
85 $status = relevance_feedback_get_result_status($node->nid);
86 $status = $status ? 'rf_status = "1" ' : ($status === FALSE ? 'rf_status = "0" ' : '');
87 return '<span class="relevance_feedback" id="relevance_feedback_'. $node->nid .'" '. $status .'>'. $output. '</span>';
88 }
89 }
90
91 /**
92 * Implementation of hook_form_alter()
93 */
94 function relevance_feedback_form_alter(&$form, &$form_state, $form_id) {
95 if ($form_id == 'search_form' && $form['module']['#value'] == 'node') {
96 if ($form_state['post']) {
97 $form['#submit'][] = 'relevance_feedback_init_search';
98 //relevance_feedback_init_search($form_state['post']['keys']);
99 }
100 else {
101 $search = relevance_feedback_get_search();
102 $value = $form['basic']['inline']['keys']['#default_value'];
103 if ($search && $value && $search['keys']) {
104 //show the original search query to the user
105 $form['basic']['inline']['keys']['#default_value'] = $search['keys'];
106 //refresh if necessary
107 if ($query = relevance_feedback_get_new_search_query()) {
108 unset($_SESSION['relevance_feedback']['refresh']);
109 drupal_set_message(t('Your search query was refined for better search results.'));
110 drupal_goto('search/node/'. $query);
111 }
112 }
113 }
114 }
115 }
116
117 function relevance_feedback_init_search($form, $form_state) {
118 $keys = $form_state['values']['processed_keys'];
119 $_SESSION['relevance_feedback'] = array();
120 $_SESSION['relevance_feedback']['keys'] = $keys;
121 $_SESSION['relevance_feedback']['query'] = $keys;
122 drupal_set_message("Detected new search: ". check_plain($keys));
123 }
124
125 function relevance_feedback_get_search() {
126 return isset($_SESSION['relevance_feedback']) ? $_SESSION['relevance_feedback'] : array();
127 }
128
129 function relevance_feedback_add_relevant_result($nid) {
130 relevance_feedback_clear_result_status($nid);
131 $_SESSION['relevance_feedback'] += array('rel' => array());
132 $_SESSION['relevance_feedback']['rel'][$nid] = $nid;
133 }
134
135 function relevance_feedback_add_nonrelevant_result($nid) {
136 relevance_feedback_clear_result_status($nid);
137 $_SESSION['relevance_feedback'] += array('nonrel' => array());
138 $_SESSION['relevance_feedback']['nonrel'][$nid] = $nid;
139 }
140
141 function relevance_feedback_clear_result_status($nid) {
142 if (isset($_SESSION['relevance_feedback']['nonrel'])) {
143 unset($_SESSION['relevance_feedback']['nonrel'][$nid]);
144 }
145 if (isset($_SESSION['relevance_feedback']['rel'])) {
146 unset($_SESSION['relevance_feedback']['rel'][$nid]);
147 }
148 }
149
150 /**
151 * Status of a result:
152 * true -> relevant
153 * false -> non relevant
154 * null -> undefined
155 */
156 function relevance_feedback_get_result_status($nid) {
157 if (isset($_SESSION['relevance_feedback']['rel'][$nid])) {
158 return TRUE;
159 }
160 else if (isset($_SESSION['relevance_feedback']['nonrel'][$nid])) {
161 return FALSE;
162 }
163 return NULL;
164 }
165
166 /**
167 * Use the algorithm on the gathered data
168 */
169 function relevance_feedback_process() {
170 unset($_SESSION['relevance_feedback']['refresh']);
171
172 $_SESSION['relevance_feedback'] += array('rel' => array(), 'nonrel' => array(), 'used' => 0);
173 $count = count($_SESSION['relevance_feedback']['rel']) - $_SESSION['relevance_feedback']['used'];
174
175 if ($count >= variable_get('relevance_feedback_count', 5)) {
176 $_SESSION['relevance_feedback']['used'] += variable_get('relevance_feedback_count', 5);
177
178 // calculate new query here.
179 $query = relevance_feedback_get_refined_query();
180
181 relevance_feedback_process_set_new_search_query($query);
182 }
183 }
184
185 /**
186 * Sets a new search query
187 */
188 function relevance_feedback_process_set_new_search_query($query) {
189 $_SESSION['relevance_feedback']['refresh'] = TRUE;
190 $_SESSION['relevance_feedback']['query'] = $query;
191 }
192
193 function relevance_feedback_get_new_search_query() {
194 return isset($_SESSION['relevance_feedback']['refresh']) ? $_SESSION['relevance_feedback']['query'] : FALSE;
195 }
196
197 /**
198 * Refines the query making use of the feedback
199 */
200 function relevance_feedback_get_refined_query() {
201 $words = explode(' ', $_SESSION['relevance_feedback']['keys']);
202
203 // Build the original search query
204 $q0 = $special = array();
205 foreach ($words as $word) {
206 if (strpos($word, ':') === FALSE) {
207 $q0[$word] = variable_get('relevance_feedback_treshold', 0.10);
208 }
209 else {
210 //filter out special commands
211 $special[] = $word;
212 }
213 }
214
215 $function = 'relevance_feedback_apply_'. variable_get('relevance_feedback_algo', 'rocchio');
216 $qm = $function($q0);
217
218
219 relevance_feedback_debug("Original search query: <pre>".print_r($q0, 1)."</pre>");
220 asort($qm);
221 relevance_feedback_debug("Optimised query: <pre>".print_r($qm, 1)."</pre>");
222
223 // Filter out words below the treshold
224 $qm = array_filter($qm, '_relevance_feedback_filter');
225 relevance_feedback_debug("Add search keywords: <pre>".print_r($qm, 1)."</pre>");
226
227 // Return the new keywords to look for
228 return implode(' ', $special) . ' '. implode(' OR ', array_keys($qm));
229 }
230
231 function _relevance_feedback_filter($value) {
232 return $value >= variable_get('relevance_feedback_treshold', 0.10);
233 }
234
235 function relevance_feedback_debug($msg) {
236 if (variable_get('relevance_feedback_debug', 0)) {
237 drupal_set_message($msg);
238 }
239 }
240
241 function relevance_feedback_apply_rocchio($q0) {
242 $count_rel = count($_SESSION['relevance_feedback']['rel']);
243 $count_nonrel = count($_SESSION['relevance_feedback']['nonrel']);
244 $alpha = variable_get('relevance_feedback_rocchio_alpha', 1);
245 $beta = variable_get('relevance_feedback_rocchio_beta', 0.75);
246 $gamma = variable_get('relevance_feedback_rocchio_gamma', 0.15);
247
248 $qm = array();
249
250 // Process q0
251 foreach ($q0 as $word => $weight) {
252 $qm[$word] = $weight * $alpha;
253 }
254
255 // Process relevant documents
256 foreach ($_SESSION['relevance_feedback']['rel'] as $nid) {
257 // get drupal's tf idf like value for each word of the document
258 $result = db_query("SELECT si.word, st.count * si.score AS weight FROM {search_index} si ".
259 "INNER JOIN {search_total} st ON st.word = si.word WHERE si.sid = %d and si.type ='node' ", $nid);
260 while ($row = db_fetch_object($result)) {
261 $qm += array($row->word => 0);
262 $qm[$row->word] += $row->weight * $beta / $count_rel;
263 }
264 }
265 // Process non-relevant documents
266 foreach ($_SESSION['relevance_feedback']['nonrel'] as $nid) {
267 // get drupal's tf idf like value for each word of the document
268 $result = db_query("SELECT si.word, st.count * si.score AS weight FROM {search_index} si ".
269 "INNER JOIN {search_total} st ON st.word = si.word WHERE si.sid = %d and si.type ='node' ", $nid);
270 while ($row = db_fetch_object($result)) {
271 $qm += array($row->word => 0);
272 $qm[$row->word] -= $row->weight * $gamma / $count_nonrel;
273 }
274 }
275 return $qm;
276 }
277

  ViewVC Help
Powered by ViewVC 1.1.2