| 1 |
<?php |
<?php |
| 2 |
// $Id: csplitter.module,v 1.3 2006/04/29 05:18:43 zealy Exp $ |
// $Id: csplitter.module,v 1.5 2007/03/26 14:34:52 zealy Exp $ |
| 3 |
// mailto: i.zealy AT gmail dot com |
// mailto: i.zealy AT gmail dot com |
| 4 |
|
|
| 5 |
/** |
/** |
| 19 |
/** |
/** |
| 20 |
* Implementation of hook_menu |
* Implementation of hook_menu |
| 21 |
*/ |
*/ |
| 22 |
function csplitter_menu($may_cache) { |
function csplitter_menu() { |
| 23 |
$items = array(); |
$items = array(); |
| 24 |
|
|
| 25 |
if ($may_cache) { |
$items['csplitter'] = array( |
|
|
|
|
$items[] = array('path' => 'csplitter', |
|
| 26 |
'title' => t('chinese splitter'), |
'title' => t('chinese splitter'), |
| 27 |
'callback' => 'csplitter_view', |
'page callback' => 'csplitter_view', |
| 28 |
'access' => user_access('search content'), |
'access arguments' => array('search content'), |
| 29 |
'type' => MENU_CALLBACK); |
'type' => MENU_CALLBACK); |
| 30 |
|
|
| 31 |
$items[] = array('path' => 'admin/settings/csplitter', |
$items['admin/settings/csplitter'] = array( |
| 32 |
'title' => t('Chinese splitter'), |
'title' => t('Chinese splitter'), |
| 33 |
'description' => t('Configure relevance settings for splitter options'), |
'description' => t('Configure relevance settings for splitter options'), |
| 34 |
'callback' => 'drupal_get_form', |
'page callback' => 'drupal_get_form', |
| 35 |
'callback arguments' => array('csplitter_admin_settings'), |
'page arguments' => array('csplitter_admin_settings'), |
| 36 |
'access' => user_access('administer search'), |
'access arguments' => array('administer search'), |
| 37 |
'type' => MENU_NORMAL_ITEM); |
'type' => MENU_NORMAL_ITEM); |
| 38 |
} |
|
| 39 |
return $items; |
return $items; |
| 40 |
} |
} |
| 41 |
|
|
| 47 |
|
|
| 48 |
$form['weight'] = array( |
$form['weight'] = array( |
| 49 |
'#type' => 'markup', |
'#type' => 'markup', |
| 50 |
'#value' => t('Optional. You can test your configuration '.l('here','csplitter').'.'), |
'#value' => 'Optional. You can test your configuration '.l('here','csplitter').'.', |
| 51 |
); |
); |
| 52 |
|
|
| 53 |
$form['Config Algorithm'] = array('#type' => 'fieldset', |
$form['Config Algorithm'] = array('#type' => 'fieldset', |
| 62 |
return system_settings_form($form); |
return system_settings_form($form); |
| 63 |
} |
} |
| 64 |
|
|
| 65 |
$algors = array(1 => t('Forward maximum matching'), 2 => t('Conversely maximum mathing')); |
$algors = array(1 => t('Forward maximum matching'), 2 => t('Conversely maximum matching'), 3 => t('Forward minimum matching'), 4 => t('Conversely minimum matching')); |
| 66 |
$form['Config Algorithm']['csplitter_algorithm'] = array('#type' => 'radios', |
$form['Config Algorithm']['csplitter_algorithm'] = array('#type' => 'radios', |
| 67 |
'#title' => t('Default Algorithm'), |
'#title' => t('Default Algorithm'), |
| 68 |
'#default_value' => variable_get('csplitter_algorithm', '1'), |
'#default_value' => variable_get('csplitter_algorithm', '3'), |
| 69 |
'#options' => $algors, |
'#options' => $algors, |
| 70 |
'#description' => t('The default algorithm for word splitting.'), |
'#description' => t('The default algorithm for word splitting. Minimum algorithm is much faster, but it will less precise.'), |
| 71 |
|
); |
| 72 |
|
|
| 73 |
|
$form['Search Word Length'] = array('#type' => 'fieldset', |
| 74 |
|
'#title' => t('Search Word Length'), |
| 75 |
|
'#collapsible' => TRUE, |
| 76 |
|
'#description' => t('Search word length will visibly effect performance, 4 is default.'), |
| 77 |
|
); |
| 78 |
|
$form['Search Word Length']['csplitter_word_len'] = array('#type' => 'radios', |
| 79 |
|
'#title' => t('Select Word Length'), |
| 80 |
|
'#default_value' => variable_get('csplitter_word_len', 2), |
| 81 |
|
'#options' => array(2,3,4,5,6,7), |
| 82 |
|
'#description' => t("Longer word will take more time to process. If you don't know its meaning, keep it to default"), |
| 83 |
); |
); |
| 84 |
|
|
| 85 |
$form['Define Cache'] = array('#type' => 'fieldset', |
$form['Define Cache'] = array('#type' => 'fieldset', |
| 86 |
'#title' => t('Define Cache'), |
'#title' => t('Define Cache'), |
| 87 |
'#collapsible' => TRUE, |
'#collapsible' => TRUE, |
| 88 |
'#description' => t('Config using cache for dictionary or not. You can test performance <a href="/csplitter">here</a>.'), |
'#description' => t('Config using cache for dictionary or not. You can test performance <a href="/csplitter">here</a>.'), |
| 89 |
); |
); |
| 90 |
$cache_method = array('none' => t('No caching'), 'disk' => t('Caching to disk'), 'database' => t('Caching to database')); |
$cache_method = array( |
| 91 |
|
'none' => t('No caching'), |
| 92 |
|
'disk' => t('Caching to disk'), |
| 93 |
|
'database' => t('Caching to database'), |
| 94 |
|
'bplus' => t('Using B+ Indexed Dictionary')); |
| 95 |
$form['Define Cache']['csplitter_cache'] = array('#type' => 'radios', |
$form['Define Cache']['csplitter_cache'] = array('#type' => 'radios', |
| 96 |
'#title' => t('Define Dictionary Caching'), |
'#title' => t('Define Dictionary Caching'), |
| 97 |
'#default_value' => variable_get('csplitter_cache', 'none'), |
'#default_value' => variable_get('csplitter_cache', 'bplus'), |
| 98 |
'#options' => $cache_method, |
'#options' => $cache_method, |
| 99 |
'#description' => t('In most server system, disk caching is fastest. If you use database caching, you should sure your database can accept very large sql sentence(max_allowed_packet for mysql should be > 5M possible). '), |
'#description' => t('In most server system, B+ dictionary is fastest, disk is second. The methods except B+ will take you large memory. If you use database caching, you should sure your database can accept very large sql sentence(max_allowed_packet for mysql should be > 5M possible). '), |
| 100 |
); |
); |
| 101 |
|
|
| 102 |
return system_settings_form($form); |
return system_settings_form($form); |
| 104 |
|
|
| 105 |
/* Menu callback; lists all nodes posted on a given date. |
/* Menu callback; lists all nodes posted on a given date. |
| 106 |
*/ |
*/ |
| 107 |
function csplitter_view() { |
function csplitter_view($test = NULL) { |
| 108 |
global $user; |
global $user; |
| 109 |
$test = variable_get('csplitter_test', NULL); |
//$test = variable_get('csplitter_test', NULL); |
|
if ($test){ |
|
|
$output = ''; |
|
|
$test = trim($test); |
|
|
$function = '_csplitter_splite_'. variable_get('csplitter_algorithm', '1'); |
|
|
|
|
|
if (mb_strlen($test)>0){ |
|
|
$cache_method = variable_get('csplitter_cache', 'none'); |
|
|
timer_start('csplitter'); |
|
|
$outstr = call_user_func($function, $test);//_csplitter_splite_1($test); |
|
|
drupal_set_message($function.', cache:'.$cache_method.'. Processing used '.timer_read('csplitter')."ms"); |
|
|
} |
|
|
variable_set('csplitter_test', NULL); |
|
|
$output .= "<h4>input: ".$test."</h4>"; |
|
|
$output .= "<h4>output: ".$outstr."</h4>"; |
|
|
} |
|
| 110 |
|
|
| 111 |
//return theme('page', $output.csplitter_form()); |
//return theme('page', $output.csplitter_form()); |
| 112 |
return $output.drupal_get_form('csplitter_form'); |
return $output.drupal_get_form('csplitter_form', $test); |
| 113 |
} |
} |
| 114 |
|
|
| 115 |
function csplitter_form(){ |
function csplitter_form($form_state, $test = NULL){ |
| 116 |
|
|
| 117 |
$form['test'] = array( |
$form['test'] = array( |
| 118 |
'#type' => 'textarea', |
'#type' => 'textarea', |
| 128 |
return $form; |
return $form; |
| 129 |
} |
} |
| 130 |
|
|
| 131 |
function csplitter_form_submit($form_id, $form_values){ |
function csplitter_form_submit($form, $form_state){ |
| 132 |
if (mb_strlen($form_values['test'])<=1000){ |
if (mb_strlen($form_state['values']['test']) > 10000){ |
| 133 |
variable_set('csplitter_test', $form_values['test']); |
drupal_set_message('Csplitter: Test String is too long. ( >10000 characters )', 'error'); |
| 134 |
} |
} |
| 135 |
else{ |
else{ |
| 136 |
drupal_set_message('Csplitter: Test String is too long. ( >1000 characters )', 'error'); |
$test = $form_state['values']['test']; |
| 137 |
|
$output = ''; |
| 138 |
|
$test = trim($test); |
| 139 |
|
$function = '_csplitter_splite_'. variable_get('csplitter_algorithm', '3'); |
| 140 |
|
|
| 141 |
|
if (mb_strlen($test)>0){ |
| 142 |
|
$cache_method = variable_get('csplitter_cache', 'bplus'); |
| 143 |
|
timer_start('csplitter'); |
| 144 |
|
$outstr = call_user_func($function, $test);//_csplitter_splite_1($test); |
| 145 |
|
drupal_set_message($function.', cache:'.$cache_method.'. Processing used '.timer_read('csplitter')."ms"); |
| 146 |
|
} |
| 147 |
|
//$output .= "<h4>input: ".$test."</h4>"; |
| 148 |
|
//$output .= "<h4>output: ".$outstr."</h4>"; |
| 149 |
|
drupal_set_message(t('Output:').'<br />'.$outstr); |
| 150 |
} |
} |
| 151 |
} |
} |
| 152 |
|
|
| 163 |
return $keys; |
return $keys; |
| 164 |
} |
} |
| 165 |
|
|
| 166 |
function _csplitter_splite_1($str){ |
function _csplitter_splite_3($str){ |
| 167 |
|
return _csplitter_splite_1($str, true); |
| 168 |
|
} |
| 169 |
|
|
| 170 |
|
function _csplitter_splite_1($str, $minmatch = false){ |
| 171 |
// Search common pattern is ignored |
// Search common pattern is ignored |
| 172 |
// char value is setted by search.module |
// char value is setted by search.module |
| 173 |
//drupal_set_message("$str|".ord($str[0])." ".ord($str[mb_strlen($str)-1])); |
//drupal_set_message("$str|".ord($str[0])." ".ord($str[mb_strlen($str)-1])); |
| 175 |
return $str; |
return $str; |
| 176 |
} |
} |
| 177 |
|
|
| 178 |
|
$word_len = variable_get('csplitter_word_len', 2) + 2; |
| 179 |
|
// init dictionary keys and cache array to disk |
| 180 |
|
$cache_method = variable_get('csplitter_cache', 'bplus'); |
| 181 |
|
|
| 182 |
$filepath = drupal_get_path('module', 'csplitter')."/dict.txt"; |
$filepath = drupal_get_path('module', 'csplitter')."/dict.txt"; |
| 183 |
$fdicpath = drupal_get_path('module', 'csplitter')."/filter.txt"; |
// for big B+ dictionary, we use smallest filter keys |
| 184 |
|
if ($cache_method == 'bplus') |
| 185 |
|
$fdicpath = drupal_get_path('module', 'csplitter')."/bfilter.txt"; |
| 186 |
|
else |
| 187 |
|
$fdicpath = drupal_get_path('module', 'csplitter')."/filter.txt"; |
| 188 |
$arraypath = drupal_get_path('module', 'csplitter')."/cs_keys.obj"; |
$arraypath = drupal_get_path('module', 'csplitter')."/cs_keys.obj"; |
| 189 |
$fobjpath = drupal_get_path('module', 'csplitter')."/filter_keys.obj"; |
$fobjpath = drupal_get_path('module', 'csplitter')."/filter_keys.obj"; |
| 190 |
|
|
| 191 |
$cs_keys = array(); |
$cs_keys = array(); |
| 192 |
$cf_keys = array(); |
$cf_keys = array(); |
| 193 |
|
|
|
// init dictionary keys and cache array to disk |
|
|
$cache_method = variable_get('csplitter_cache', 'none'); |
|
| 194 |
switch ($cache_method){ |
switch ($cache_method){ |
| 195 |
case 'none': |
case 'none': |
| 196 |
$cs_keys = _csplitter_readkeys($filepath); |
$cs_keys = _csplitter_readkeys($filepath); |
| 197 |
$cf_keys = _csplitter_readkeys($fdicpath); |
$cf_keys = _csplitter_readkeys($fdicpath); |
| 198 |
break; |
break; |
| 199 |
|
case 'bplus': |
| 200 |
|
$cf_keys = _csplitter_readkeys($fdicpath); |
| 201 |
|
break; |
| 202 |
case 'disk': |
case 'disk': |
| 203 |
if (!file_exists($arraypath)){ |
if (!file_exists($arraypath)){ |
| 204 |
$cs_keys = _csplitter_readkeys($filepath); |
$cs_keys = _csplitter_readkeys($filepath); |
| 257 |
//forward max match |
//forward max match |
| 258 |
$outstr = ""; |
$outstr = ""; |
| 259 |
//drupal_set_message(mb_strlen($str)."|$str|"); |
//drupal_set_message(mb_strlen($str)."|$str|"); |
| 260 |
|
|
| 261 |
|
if ($cache_method == 'bplus'){ |
| 262 |
|
$handle = fopen(drupal_get_path('module', 'csplitter').'/btree.dat', 'rb'); |
| 263 |
|
|
| 264 |
|
if ($handle == false){ |
| 265 |
|
drupal_set_message("CSplitter: open b+ file failed!"); |
| 266 |
|
return ""; |
| 267 |
|
} |
| 268 |
|
} |
| 269 |
|
|
| 270 |
for ($i=0; $i<mb_strlen($str);){ |
for ($i=0; $i<mb_strlen($str);){ |
| 271 |
$found = 1; |
$found = 1; |
| 272 |
for ($len = 1; $len< 8 && $i - $len>= 0; $len++){ |
$from = 1; |
| 273 |
|
$to = $word_len + 1; |
| 274 |
|
$step = 1; |
| 275 |
|
for ($len = $from; $len< $to; $len += $step){ |
| 276 |
|
if ($minmatch && $found != 1) break; |
| 277 |
|
|
| 278 |
$word = mb_substr($str, $i, $len, "UTF-8"); |
$word = mb_substr($str, $i, $len, "UTF-8"); |
| 279 |
|
//drupal_set_message("$i, $len,$word"); |
| 280 |
|
|
| 281 |
if (ord($word[0])<176){ // not chinese |
if (ord($word[0])<176){ // not chinese |
| 282 |
break; |
break; |
| 283 |
} |
} |
| 284 |
|
|
| 285 |
|
$lastchar = mb_substr($word, $len-1, 1, "UTF-8"); |
| 286 |
|
if (array_key_exists($lastchar, $cf_keys)) |
| 287 |
|
break; |
| 288 |
|
|
| 289 |
//now process chinese |
//now process chinese |
| 290 |
if ($len ==1 ||array_key_exists($word, $cs_keys)){ |
|
| 291 |
$found = $len; |
if ($cache_method == 'bplus'){ |
| 292 |
continue; |
if ($len ==1 || search_in_file($word, $handle)){ |
| 293 |
|
$found = $len; |
| 294 |
|
continue; |
| 295 |
|
} |
| 296 |
|
} |
| 297 |
|
else{ |
| 298 |
|
if ($len ==1 ||array_key_exists($word, $cs_keys)){ |
| 299 |
|
$found = $len; |
| 300 |
|
continue; |
| 301 |
|
} |
| 302 |
|
//else{ |
| 303 |
|
// $len--; |
| 304 |
|
// break; |
| 305 |
|
//} |
| 306 |
} |
} |
|
//else{ |
|
|
// $len--; |
|
|
// break; |
|
|
//} |
|
| 307 |
} |
} |
| 308 |
$len = $found; |
$word = mb_substr($str, $i, $found, "UTF-8"); |
|
$word = mb_substr($str, $i, $len, "UTF-8"); |
|
| 309 |
//drupal_set_message(mb_strlen($str).": $i - $found :".$word); |
//drupal_set_message(mb_strlen($str).": $i - $found :".$word); |
| 310 |
|
|
| 311 |
if ($len >1) // || ($len == 1 && ord($word[0])>=176)) |
if ($found >1) // || ($found == 1 && ord($word[0])>=176)) |
| 312 |
$outstr .= " ".$word." "; |
$outstr .= " ".$word." "; |
| 313 |
else{ |
else { |
| 314 |
if (ord($word[0])>=176){ |
if (ord($word[0])>=176){ |
| 315 |
if (array_key_exists($word, $cf_keys)) |
if (array_key_exists($word, $cf_keys)) |
| 316 |
$outstr .= " ".$word." "; |
$outstr .= " ".$word." "; |
| 317 |
else{ |
else{ |
| 318 |
$nextchar = mb_substr($str, $i+1, 1, "UTF-8"); |
$lastchar = mb_substr($str, $i-1, 1, "UTF-8"); |
| 319 |
//Need Test: if (mb_strlen($nextchar) > 0 && ord($nextchar[0])<176){ |
$nextchar = mb_substr($str, $i+$found, 1, "UTF-8"); |
| 320 |
if (ord($nextchar[0])<176){ |
if (ord($lastchar[0])<176){ |
| 321 |
$outstr .= $word. " "; |
//drupal_set_message($lastchar.":".$word.":".$nextchar); |
| 322 |
|
$word = " ".$word; |
| 323 |
} |
} |
| 324 |
else{ |
if (ord($nextchar[0])<176){ |
| 325 |
$outstr .= "".$word.""; |
//drupal_set_message($lastchar.":".$word.":".$nextchar); |
| 326 |
|
$word = $word." "; |
| 327 |
} |
} |
| 328 |
|
$outstr .= $word; |
| 329 |
} |
} |
| 330 |
} |
} |
| 331 |
else{ |
else{ |
| 332 |
$outstr .= $word; |
$outstr .= $word; |
| 333 |
} |
} |
| 334 |
} |
} |
| 335 |
|
$i += $found; |
|
$i += $len; |
|
| 336 |
} |
} |
| 337 |
|
|
| 338 |
|
if ($cache_method == 'bplus'){ |
| 339 |
|
fclose($handle); |
| 340 |
|
} |
| 341 |
//drupal_set_message($outstr); |
//drupal_set_message($outstr); |
| 342 |
return $outstr; |
return $outstr; |
| 343 |
} |
} |
| 344 |
|
|
| 345 |
function _csplitter_splite_2($str){ |
function _csplitter_splite_4($str){ |
| 346 |
|
return _csplitter_splite_2($str, true); |
| 347 |
|
} |
| 348 |
|
|
| 349 |
|
function _csplitter_splite_2($str, $minmatch = fasle){ |
| 350 |
|
$word_len = variable_get('csplitter_word_len', 2) + 2; |
| 351 |
|
// init dictionary keys and cache array to disk |
| 352 |
|
$cache_method = variable_get('csplitter_cache', 'bplus'); |
| 353 |
|
|
| 354 |
$filepath = drupal_get_path('module', 'csplitter')."/dict.txt"; |
$filepath = drupal_get_path('module', 'csplitter')."/dict.txt"; |
| 355 |
$fdicpath = drupal_get_path('module', 'csplitter')."/filter.txt"; |
if ($cache_method == 'bplus') |
| 356 |
|
$fdicpath = drupal_get_path('module', 'csplitter')."/bfilter.txt"; |
| 357 |
|
else |
| 358 |
|
$fdicpath = drupal_get_path('module', 'csplitter')."/filter.txt"; |
| 359 |
$arraypath = drupal_get_path('module', 'csplitter')."/cs_keys.obj"; |
$arraypath = drupal_get_path('module', 'csplitter')."/cs_keys.obj"; |
| 360 |
$fobjpath = drupal_get_path('module', 'csplitter')."/filter_keys.obj"; |
$fobjpath = drupal_get_path('module', 'csplitter')."/filter_keys.obj"; |
| 361 |
|
|
| 362 |
$cs_keys = array(); |
$cs_keys = array(); |
| 363 |
$cf_keys = array(); |
$cf_keys = array(); |
| 364 |
|
|
|
// init dictionary keys and cache array to disk |
|
|
$cache_method = variable_get('csplitter_cache', 'none'); |
|
| 365 |
switch ($cache_method){ |
switch ($cache_method){ |
| 366 |
case 'none': |
case 'none': |
| 367 |
$cs_keys = _csplitter_readkeys($filepath); |
$cs_keys = _csplitter_readkeys($filepath); |
| 368 |
$cf_keys = _csplitter_readkeys($fdicpath); |
$cf_keys = _csplitter_readkeys($fdicpath); |
| 369 |
break; |
break; |
| 370 |
|
case 'bplus': |
| 371 |
|
$cf_keys = _csplitter_readkeys($fdicpath); |
| 372 |
|
break; |
| 373 |
case 'disk': |
case 'disk': |
| 374 |
if (!file_exists($arraypath)){ |
if (!file_exists($arraypath)){ |
| 375 |
$cs_keys = _csplitter_readkeys($filepath); |
$cs_keys = _csplitter_readkeys($filepath); |
| 427 |
|
|
| 428 |
//converse max match |
//converse max match |
| 429 |
$outstr = ""; |
$outstr = ""; |
| 430 |
|
|
| 431 |
|
if ($cache_method == 'bplus'){ |
| 432 |
|
$handle = fopen(drupal_get_path('module', 'csplitter').'/btree.dat', 'rb'); |
| 433 |
|
|
| 434 |
|
if ($handle == false){ |
| 435 |
|
drupal_set_message("CSplitter: open b+ file failed!"); |
| 436 |
|
return ""; |
| 437 |
|
} |
| 438 |
|
} |
| 439 |
|
|
| 440 |
for ($i=mb_strlen($str); $i>0;){ |
for ($i=mb_strlen($str); $i>0;){ |
| 441 |
$found = 1; |
$found = 1; |
| 442 |
for ($len = 1; $len< 8 && $i - $len>= 0; $len++){ |
for ($len = 1; $len< ($word_len+1); $len++){ |
| 443 |
|
if ($minmatch && $found != 1) break; |
| 444 |
$word = mb_substr($str, $i-$len, $len, "UTF-8"); |
$word = mb_substr($str, $i-$len, $len, "UTF-8"); |
| 445 |
|
|
| 446 |
|
if (ord($word[0])<176){ // not chinese |
| 447 |
|
break; |
| 448 |
|
} |
| 449 |
|
|
| 450 |
|
$firstchar = mb_substr($word, 0, 1, "UTF-8"); |
| 451 |
|
if (array_key_exists($firstchar, $cf_keys)) |
| 452 |
|
break; |
| 453 |
|
|
| 454 |
//now process chinese |
//now process chinese |
| 455 |
if ($len ==1 ||array_key_exists($word, $cs_keys)){ |
|
| 456 |
$found = $len; |
if ($cache_method == 'bplus'){ |
| 457 |
continue; |
if ($len ==1 || search_in_file($word, $handle)){ |
| 458 |
|
$found = $len; |
| 459 |
|
continue; |
| 460 |
|
} |
| 461 |
|
} |
| 462 |
|
else{ |
| 463 |
|
if ($len ==1 ||array_key_exists($word, $cs_keys)){ |
| 464 |
|
$found = $len; |
| 465 |
|
continue; |
| 466 |
|
} |
| 467 |
} |
} |
| 468 |
//else{ |
//else{ |
| 469 |
// $len--; |
// $len--; |
| 470 |
// break; |
// break; |
| 471 |
//} |
//} |
|
|
|
|
if (ord($word[0])<176){ // not chinese |
|
|
break; |
|
|
} |
|
| 472 |
} |
} |
| 473 |
$len = $found; |
$len = $found; |
| 474 |
|
|
| 494 |
$outstr = $word. " " .$outstr; |
$outstr = $word. " " .$outstr; |
| 495 |
else { |
else { |
| 496 |
$outstr = $word . $outstr; |
$outstr = $word . $outstr; |
|
//drupal_set_message($word); |
|
| 497 |
} |
} |
| 498 |
} |
} |
| 499 |
|
|
| 500 |
$i -= $len; |
$i -= $len; |
| 501 |
} |
} |
| 502 |
|
|
| 503 |
|
if ($cache_method == 'bplus'){ |
| 504 |
|
fclose($handle); |
| 505 |
|
} |
| 506 |
|
|
| 507 |
return $outstr; |
return $outstr; |
| 508 |
} |
} |
| 509 |
|
|
| 510 |
function csplitter_search_preprocess($text) { |
function csplitter_search_preprocess($text) { |
| 511 |
$function = '_csplitter_splite_'. variable_get('csplitter_algorithm', '1'); |
$function = '_csplitter_splite_'. variable_get('csplitter_algorithm', '3'); |
| 512 |
return call_user_func($function, $text);//_csplitter_splite_1($text); |
return call_user_func($function, $text);//_csplitter_splite_1($text); |
| 513 |
} |
} |
| 514 |
|
|
| 515 |
|
//Advanced B+ Search |
| 516 |
|
function search_in_file($SearchKey, $handle){ |
| 517 |
|
|
| 518 |
|
//Read First Record |
| 519 |
|
fseek($handle, 160); |
| 520 |
|
$NumItemBin = fread($handle, 4); |
| 521 |
|
$NumNodesBin= fread($handle, 4); |
| 522 |
|
$RootBin = fread($handle, 4); |
| 523 |
|
|
| 524 |
|
$NumItemArray = unpack('l',$NumItemBin); |
| 525 |
|
$NumNodesArray = unpack('l',$NumNodesBin); |
| 526 |
|
$RootArray = unpack('l',$RootBin); |
| 527 |
|
|
| 528 |
|
$found = false; |
| 529 |
|
$NodeSize = 208; |
| 530 |
|
$CurrentRoot = $RootArray[1]; |
| 531 |
|
|
| 532 |
|
while(($CurrentRoot != NULL && $CurrentRoot != -1) && (!$found)){ |
| 533 |
|
|
| 534 |
|
fseek($handle, $CurrentRoot * $NodeSize); |
| 535 |
|
$CurNodeCount = fread($handle, 4); |
| 536 |
|
$CurNodeCount = unpack('l', $CurNodeCount); |
| 537 |
|
$CurNodeCount = $CurNodeCount[1]; |
| 538 |
|
|
| 539 |
|
for ($i=0; $i<11; $i++){ |
| 540 |
|
$StrBin[$i] = fread($handle, 14); |
| 541 |
|
$StrArray = explode(chr(0x0), $StrBin[$i]); |
| 542 |
|
$StrBin[$i] = trim($StrArray[0]); |
| 543 |
|
//fread($handle, 38); |
| 544 |
|
} |
| 545 |
|
//Fix aligned bytes |
| 546 |
|
fread($handle, 2); |
| 547 |
|
for ($i=0; $i<12; $i++){ |
| 548 |
|
$BranchArray = unpack('l', fread($handle, 4)); |
| 549 |
|
$Branch[$i] = $BranchArray[1]; |
| 550 |
|
} |
| 551 |
|
|
| 552 |
|
if (strcmp($SearchKey, $StrBin[0]) < 0) |
| 553 |
|
$Location = -1; |
| 554 |
|
else{ |
| 555 |
|
$Location = $CurNodeCount -1; |
| 556 |
|
while ((strcmp($SearchKey, $StrBin[$Location])<0) && ($Location > 0)){ |
| 557 |
|
$Location--; |
| 558 |
|
} |
| 559 |
|
|
| 560 |
|
if (strcmp($SearchKey, $StrBin[$Location]) == 0){ |
| 561 |
|
$found = true; |
| 562 |
|
} |
| 563 |
|
} |
| 564 |
|
|
| 565 |
|
if ($found == false) |
| 566 |
|
$CurrentRoot = $Branch[$Location +1]; |
| 567 |
|
} |
| 568 |
|
|
| 569 |
|
// drupal_set_message($SearchKey.':'.$found); |
| 570 |
|
return $found; |
| 571 |
|
} |
| 572 |
|
|
| 573 |
|
|
| 574 |
?> |
?> |