* Allows Porter Stemmer to display better search excerpts with the
* Search by page module.
*/
-function porterstemmer_sbp_excerpt_match( $key, $text, $offset, $boundary ) {
+function porterstemmer_sbp_excerpt_match($key, $text, $offset, $boundary) {
// Stem the keyword down to its root form.
- $key = porterstemmer_stem( $key );
+ $key = porterstemmer_stem($key);
// In many cases, the root word is a substring of the full word, but not
// all. The cases where it is not, the root ends in e, i, or y, and if this
// last letter is removed, the root is a substring of the full word.
// So remove these letters at the end of the root.
-
$didit = FALSE;
- porterstemmer_suffix( $key, 'i', '', $didit, NULL, 2 ) OR
- porterstemmer_suffix( $key, 'e', '', $didit, NULL, 2 ) OR
- porterstemmer_suffix( $key, 'y', '', $didit, NULL, 2 );
+ porterstemmer_suffix($key, 'i', '', $didit, NULL, 2) OR
+ porterstemmer_suffix($key, 'e', '', $didit, NULL, 2) OR
+ porterstemmer_suffix($key, 'y', '', $didit, NULL, 2);
// Look for this modified key at the start of a word.
-
$match = array();
- if (!preg_match('/' . $boundary . $key . '/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset )) {
- // didn't match our modified key.
+ if (!preg_match('/'. $boundary . '(' . $key . ')/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset)) {
+ // Didn't match our modified key.
return FALSE;
}
// actually matched, so it can be highlighted (making sure it's a real match
// for our key).
$newmatch = array();
- foreach ( $match as $item ) {
- $pos = $match[0][1];
- // Note: Do not use drupal_strlen/drupal_substr here! Need the real PHP
- // string lengths/pos.
- if (preg_match('/' . $boundary . '/iu', $text, $newmatch,
- PREG_OFFSET_CAPTURE, $pos + strlen($key))) {
- $keyfound = substr($text, $pos, $newmatch[0][1] - $pos);
- $foundstem = porterstemmer_stem( $keyfound );
- porterstemmer_suffix( $foundstem, 'i', '', $didit, NULL, 2 ) OR
- porterstemmer_suffix( $foundstem, 'e', '', $didit, NULL, 2 ) OR
- porterstemmer_suffix( $foundstem, 'y', '', $didit, NULL, 2 );
-
- if ($foundstem == $key) {
- return array('where' => $pos, 'keyword' => $keyfound);
- }
- }
+ $pos = $match[1][1];
+ // Note: Do not use drupal_strlen/drupal_substr here! Need the real PHP
+ // string lengths/pos.
+ if (preg_match('/' . $boundary . '/iu', $text, $newmatch,
+ PREG_OFFSET_CAPTURE, $pos + strlen($key))) {
+ $keyfound = substr($text, $pos, $newmatch[0][1] - $pos);
+ }
+ else {
+ // Assume we're going to the end of the string.
+ $keyfound = substr($text, $pos);
}
- // If we get here, none of the potential matches worked out.
- return FALSE;
+ $foundstem = porterstemmer_stem($keyfound);
+ porterstemmer_suffix($foundstem, 'i', '', $didit, NULL, 2) OR
+ porterstemmer_suffix($foundstem, 'e', '', $didit, NULL, 2) OR
+ porterstemmer_suffix($foundstem, 'y', '', $didit, NULL, 2);
+
+ // Both $foundstem and $key may contain upper case.
+ if (drupal_strtolower($foundstem) == drupal_strtolower($key)) {
+ return array('where' => $pos, 'keyword' => $keyfound);
+ }
+
+ // If we get here, then it was a false match, and we should probably
+ // search again later in the string.
+ return porterstemmer_sbp_excerpt_match($key, $text, $pos + strlen($keyfound), $boundary);
}
/**