Stripping CVS keywords
[project/porterstemmer.git] / porterstemmer.test
CommitLineData
a4ce718f 1<?php
a4ce718f
JH
2
3/**
4 * @file
5 * Tests for the Porter Stemmer module.
6 * By Jennifer Hodgdon of Poplar ProductivityWare, www.poplarware.com
73ea7032 7 * Unit tests are based on sample words from
a4ce718f
JH
8 * http://snowball.tartarus.org/algorithms/english/stemmer.html
9 * which are stored in a separate file (testwords.txt).
10 */
11
12/**
73ea7032
JH
13 * Functional test for Porter Stemmer.
14 */
15class PorterStemmerFunctionalTest extends DrupalWebTestCase {
16 public static function getInfo() {
17 return array(
18 'name' => t('Functional test'),
19 'description' => t('Test that stemmed searching works.'),
20 'group' => t('Porter Stemmer'),
21 );
22 }
23
24 public function setUp() {
25 parent::setUp('porterstemmer', 'search');
26
27 // Set up a super-user
28 $this->superuser = $this->drupalCreateUser( array( 'administer nodes',
29 'access content', 'administer content types',
30 'administer search', 'search content',
31 'access administration pages',
32 'administer site configuration' ));
33 $this->drupalLogin( $this->superuser );
34
35 // Create some content to search, with the words walk, walking, and walked.
36 $info = array(
fae24be6 37 'body' => array(LANGUAGE_NONE => array(array('value' => 'I walk through the streets, looking around for trouble'))),
73ea7032 38 'type' => 'page',
fae24be6 39 'language' => LANGUAGE_NONE,
73ea7032
JH
40 'title' => 'first page',
41 );
42 $node = $this->drupalCreateNode($info);
43
fae24be6
JH
44 $this->drupalGet('node/' . $node->nid);
45 $this->assertText('walk');
46
73ea7032 47 $info = array(
fae24be6 48 'body' => array(LANGUAGE_NONE => array(array('value' => 'I walked home from work today.'))),
73ea7032 49 'type' => 'page',
fae24be6 50 'language' => LANGUAGE_NONE,
73ea7032
JH
51 'title' => 'second page',
52 );
53 $node = $this->drupalCreateNode($info);
fae24be6
JH
54 $this->drupalGet('node/' . $node->nid);
55 $this->assertText('walked');
73ea7032
JH
56
57 $info = array(
fae24be6 58 'body' => array(LANGUAGE_NONE => array(array('value' => 'I am always walking everywhere.'))),
73ea7032 59 'type' => 'page',
fae24be6 60 'language' => LANGUAGE_NONE,
73ea7032
JH
61 'title' => 'third page',
62 );
63 $node = $this->drupalCreateNode($info);
fae24be6
JH
64 $this->drupalGet('node/' . $node->nid);
65 $this->assertText('walking');
73ea7032 66
fae24be6 67 $this->cronRun();
73ea7032
JH
68 }
69
70 /**
71 * Tests that all three pages can be found in searches.
72 */
73 function testSearch() {
74 $this->drupalLogin( $this->superuser );
75
76 // Search for 'walk' and verify all three pages were found.
77 $this->drupalPost('search',
78 array('keys' => 'walk' ),
79 'Search'
80 );
81 $this->assertText('first', 'First page was found with search for walk');
82 $this->assertText('second', 'Second page was found with search for walk');
83 $this->assertText('third', 'Third page was found with search for walk');
84
85 // Search for 'walking' and verify all three pages were found.
86 $this->drupalPost('search',
87 array('keys' => 'walking' ),
88 'Search'
89 );
90 $this->assertText('first', 'First page was found with search for walking');
91 $this->assertText('second', 'Second page was found with search for walking');
92 $this->assertText('third', 'Third page was found with search for walking');
93
94 // Search for 'walked' and verify all three pages were found.
95 $this->drupalPost('search',
96 array('keys' => 'walked' ),
97 'Search'
98 );
99 $this->assertText('first', 'First page was found with search for walked');
100 $this->assertText('second', 'Second page was found with search for walked');
101 $this->assertText('third', 'Third page was found with search for walked');
102
103 // Search for 'blahblahblah' and verify none of the pages were found.
104 $this->drupalPost('search',
105 array('keys' => 'blahblahblah' ),
106 'Search'
107 );
108 $this->assertNoText('first', 'First page was not found with bogus search');
109 $this->assertNoText('second', 'Second page was not found with bogus search');
110 $this->assertNoText('third', 'Third page was not found with bogus search');
111 }
112}
113
114/**
a4ce718f
JH
115 * Unit tests for Porter Stemmer - Stemming output part 1.
116 */
117class PorterStemmerOutput1UnitTest extends DrupalWebTestCase {
118
73ea7032
JH
119 public $has_pecl_stem = FALSE;
120
a4ce718f
JH
121 public static function getInfo() {
122 return array(
123 'name' => t('Stemming output 1'),
124 'description' => t('Test that the stemming function returns the correct stemmed words'),
125 'group' => t('Porter Stemmer'),
126 );
127 }
128
73ea7032
JH
129 public function setUp() {
130 parent::setUp('porterstemmer');
131
132 // See if the PECL stemming library is installed
133
134 $this->has_pecl_stem = _porterstemmer_pecl_loaded();
135 }
a4ce718f
JH
136
137 /**
138 * Tests the first 2000 words in the file for stemming accuracy.
139 */
140 function testStemmingUnitTest() {
141 $this->_run_porterstemmer_stem_test( 0, 2000 );
142 }
143
144 /**
145 * Runs a unit test for a portion of words in the test file.
146 *
147 * The output of the porterstemmer_stem() function is tested against
148 * known input/output pairs from
149 * http://snowball.tartarus.org/algorithms/english/stemmer.html
150 *
73ea7032
JH
151 * Or if $use_pecl is set to TRUE, the PECL stem_english() function is
152 * used instead.
a4ce718f
JH
153 *
154 * @param $skipto
155 * Line of file to start on (count starts at 0), not counting short ones.
156 * @param $runto
157 * Number of lines to test, not counting short ones.
73ea7032
JH
158 * @param $use_pecl
159 * If TRUE, use the PECL library if it is installed. If it isn't installed,
160 * no tests are performed.
a4ce718f 161 */
73ea7032
JH
162 function _run_porterstemmer_stem_test( $skipto = 0, $runto = 2000, $use_pecl = FALSE ) {
163
164 if ( $use_pecl && !$this->has_pecl_stem ) {
165 $this->assertTrue( 1, "No PECL stem library found, aborting test" );
166 return;
167 }
a4ce718f
JH
168
169 // Open word file
170 $file = drupal_get_path('module', 'porterstemmer') . '/testwords.txt';
171
172 $handle = @fopen($file, "r");
173 $this->assertTrue($handle, "Open file containing words to test", 'Startup');
174 if ( !$handle ) {
175 // no point doing rest of test...
176 return;
177 }
178
73ea7032
JH
179 // Set min characters for search to 2 so we test whole word list
180 variable_set( 'minimum_word_size', 2 );
181 porterstemmer_too_short( '', TRUE );
182
a4ce718f
JH
183 $ran = 0;
184 $skipped = 0;
185
73ea7032
JH
186 $start = time();
187 $mstart = 0;
188 if ( function_exists( 'microtime' )) {
189 $mstart = microtime(TRUE);
190 }
191
a4ce718f
JH
192 while ( !feof( $handle ) && $ran < $runto ) {
193 // Read a line of the file, and split into words
194 $line = fgets($handle, 4096);
195 $words = preg_split( "/\s+/", $line, -1, PREG_SPLIT_NO_EMPTY );
196 if ( count( $words ) < 2 ) {
197 continue;
198 }
199
a4ce718f
JH
200 $in = $words[0];
201 $right = $words[1];
73ea7032
JH
202 // Skip words less than 2 characters, which is minimum for Porter
203 if ( drupal_strlen( $in ) < 2 ||
204 drupal_strlen( $right ) < 2 ) {
a4ce718f
JH
205 continue;
206 }
207
208 $skipped++;
209 if ( $skipped < $skipto ) {
210 continue;
211 }
212
213 // Stem the word
73ea7032
JH
214 if ( $use_pecl && $this->has_pecl_stem ) {
215 $stem = stem_english( $in );
216 }
217 else {
218 $stem = porterstemmer_stem( $in );
219 }
a4ce718f
JH
220
221 // Test correctness
222 $this->assertEqual( $right, $stem, "Stemming $in results in $right (was $stem)",
223 t( 'Stemming test' ));
224 $ran++;
225 }
73ea7032
JH
226
227 $start = time() - $start;
228 if ( function_exists( 'microtime' )) {
229 $mstart = microtime(TRUE) - $mstart;
230 }
231 $this->assertTrue( 1, "Elapsed time: $start seconds ($mstart)" );
232
a4ce718f
JH
233 fclose($handle);
234 }
235}
236
237/**
238 * Unit tests for Porter Stemmer - Stemming output part 2.
239 */
240class PorterStemmerOutput2UnitTest extends PorterStemmerOutput1UnitTest {
241
242 public static function getInfo() {
243 return array(
244 'name' => t('Stemming output 2'),
245 'description' => t('Test that the stemming function returns the correct stemmed words'),
246 'group' => t('Porter Stemmer'),
247 );
248 }
249
250 /**
251 * Tests the next 2000 words in the file for stemming accuracy.
252 */
253 function testStemmingUnitTest() {
254 $this->_run_porterstemmer_stem_test( 2000, 2000 );
255 }
256}
257
258/**
259 * Unit tests for Porter Stemmer - Stemming output part 3.
260 */
261class PorterStemmerOutput3UnitTest extends PorterStemmerOutput1UnitTest {
262
263 public static function getInfo() {
264 return array(
265 'name' => t('Stemming output 3'),
266 'description' => t('Test that the stemming function returns the correct stemmed words'),
267 'group' => t('Porter Stemmer'),
268 );
269 }
270
271 /**
272 * Tests the next 2000 words in the file for stemming accuracy.
273 */
274 function testStemmingUnitTest() {
275 $this->_run_porterstemmer_stem_test( 4000, 2000 );
276 }
277}
278
279/**
280 * Unit tests for Porter Stemmer - Stemming output part 4.
281 */
282class PorterStemmerOutput4UnitTest extends PorterStemmerOutput1UnitTest {
283
284 public static function getInfo() {
285 return array(
286 'name' => t('Stemming output 4'),
287 'description' => t('Test that the stemming function returns the correct stemmed words'),
288 'group' => t('Porter Stemmer'),
289 );
290 }
291
292 /**
293 * Tests the next 2000 words in the file for stemming accuracy.
294 */
295 function testStemmingUnitTest() {
296 $this->_run_porterstemmer_stem_test( 6000, 2000 );
297 }
298}
299
300/**
301 * Unit tests for Porter Stemmer - Stemming output part 5.
302 */
303class PorterStemmerOutput5UnitTest extends PorterStemmerOutput1UnitTest {
304
305 public static function getInfo() {
306 return array(
307 'name' => t('Stemming output 5'),
308 'description' => t('Test that the stemming function returns the correct stemmed words'),
309 'group' => t('Porter Stemmer'),
310 );
311 }
312
313 /**
314 * Tests the next 2000 words in the file for stemming accuracy.
315 */
316 function testStemmingUnitTest() {
317 $this->_run_porterstemmer_stem_test( 8000, 2000 );
318 }
319}
320
321/**
322 * Unit tests for Porter Stemmer - Stemming output part 6.
323 */
324class PorterStemmerOutput6UnitTest extends PorterStemmerOutput1UnitTest {
325
326 public static function getInfo() {
327 return array(
328 'name' => t('Stemming output 6'),
329 'description' => t('Test that the stemming function returns the correct stemmed words'),
330 'group' => t('Porter Stemmer'),
331 );
332 }
333
334 /**
335 * Tests the next 2000 words in the file for stemming accuracy.
336 */
337 function testStemmingUnitTest() {
338 $this->_run_porterstemmer_stem_test( 10000, 2000 );
339 }
340}
341
342/**
343 * Unit tests for Porter Stemmer - Stemming output part 7.
344 */
345class PorterStemmerOutput7UnitTest extends PorterStemmerOutput1UnitTest {
346
347 public static function getInfo() {
348 return array(
349 'name' => t('Stemming output 7'),
350 'description' => t('Test that the stemming function returns the correct stemmed words'),
351 'group' => t('Porter Stemmer'),
352 );
353 }
354
355 /**
356 * Tests the next 2000 words in the file for stemming accuracy.
357 */
358 function testStemmingUnitTest() {
359 $this->_run_porterstemmer_stem_test( 12000, 2000 );
360 }
361}
362
363/**
364 * Unit tests for Porter Stemmer - Stemming output part 8.
365 */
366class PorterStemmerOutput8UnitTest extends PorterStemmerOutput1UnitTest {
367
368 public static function getInfo() {
369 return array(
370 'name' => t('Stemming output 8'),
371 'description' => t('Test that the stemming function returns the correct stemmed words'),
372 'group' => t('Porter Stemmer'),
373 );
374 }
375
376 /**
377 * Tests the next 2000 words in the file for stemming accuracy.
378 */
379 function testStemmingUnitTest() {
380 $this->_run_porterstemmer_stem_test( 14000, 2000 );
381 }
382}
383
384/**
385 * Unit tests for Porter Stemmer - Stemming output part 9.
386 */
387class PorterStemmerOutput9UnitTest extends PorterStemmerOutput1UnitTest {
388
389 public static function getInfo() {
390 return array(
391 'name' => t('Stemming output 9'),
392 'description' => t('Test that the stemming function returns the correct stemmed words'),
393 'group' => t('Porter Stemmer'),
394 );
395 }
396
397 /**
398 * Tests the next 2000 words in the file for stemming accuracy.
399 */
400 function testStemmingUnitTest() {
401 $this->_run_porterstemmer_stem_test( 16000, 2000 );
402 }
403}
404
405/**
406 * Unit tests for Porter Stemmer - Stemming output part 10.
407 */
408class PorterStemmerOutput10UnitTest extends PorterStemmerOutput1UnitTest {
409
410 public static function getInfo() {
411 return array(
412 'name' => t('Stemming output 10'),
413 'description' => t('Test that the stemming function returns the correct stemmed words'),
414 'group' => t('Porter Stemmer'),
415 );
416 }
417
418 /**
419 * Tests the next 2000 words in the file for stemming accuracy.
420 */
421 function testStemmingUnitTest() {
422 $this->_run_porterstemmer_stem_test( 18000, 2000 );
423 }
424}
425
426/**
427 * Unit tests for Porter Stemmer - Stemming output part 11.
428 */
429class PorterStemmerOutput11UnitTest extends PorterStemmerOutput1UnitTest {
430
431 public static function getInfo() {
432 return array(
433 'name' => t('Stemming output 11'),
434 'description' => t('Test that the stemming function returns the correct stemmed words'),
435 'group' => t('Porter Stemmer'),
436 );
437 }
438
439 /**
440 * Tests the next 2000 words in the file for stemming accuracy.
441 */
442 function testStemmingUnitTest() {
443 $this->_run_porterstemmer_stem_test( 20000, 2000 );
444 }
445}
446
447/**
448 * Unit tests for Porter Stemmer - Stemming output part 12.
449 */
450class PorterStemmerOutput12UnitTest extends PorterStemmerOutput1UnitTest {
451
452 public static function getInfo() {
453 return array(
454 'name' => t('Stemming output 12'),
455 'description' => t('Test that the stemming function returns the correct stemmed words'),
456 'group' => t('Porter Stemmer'),
457 );
458 }
459
460 /**
461 * Tests the next 2000 words in the file for stemming accuracy.
462 */
463 function testStemmingUnitTest() {
464 $this->_run_porterstemmer_stem_test( 22000, 2000 );
465 }
466}
467
468/**
469 * Unit tests for Porter Stemmer - Stemming output part 13.
470 */
471class PorterStemmerOutput13UnitTest extends PorterStemmerOutput1UnitTest {
472
473 public static function getInfo() {
474 return array(
475 'name' => t('Stemming output 13'),
476 'description' => t('Test that the stemming function returns the correct stemmed words'),
477 'group' => t('Porter Stemmer'),
478 );
479 }
480
481 /**
482 * Tests the next 2000 words in the file for stemming accuracy.
483 */
484 function testStemmingUnitTest() {
485 $this->_run_porterstemmer_stem_test( 24000, 2000 );
486 }
487}
488
489/**
490 * Unit tests for Porter Stemmer - Stemming output part 14.
491 */
492class PorterStemmerOutput14UnitTest extends PorterStemmerOutput1UnitTest {
493
494 public static function getInfo() {
495 return array(
496 'name' => t('Stemming output 14'),
497 'description' => t('Test that the stemming function returns the correct stemmed words'),
498 'group' => t('Porter Stemmer'),
499 );
500 }
501
502 /**
503 * Tests the next 2000 words in the file for stemming accuracy.
504 */
505 function testStemmingUnitTest() {
506 $this->_run_porterstemmer_stem_test( 26000, 2000 );
507 }
508}
509
510/**
511 * Unit tests for Porter Stemmer - Stemming output part 15.
512 */
513class PorterStemmerOutput15UnitTest extends PorterStemmerOutput1UnitTest {
514
515 public static function getInfo() {
516 return array(
517 'name' => t('Stemming output 15'),
518 'description' => t('Test that the stemming function returns the correct stemmed words'),
519 'group' => t('Porter Stemmer'),
520 );
521 }
522
523 /**
524 * Tests the next 2000 words in the file for stemming accuracy.
525 */
526 function testStemmingUnitTest() {
527 $this->_run_porterstemmer_stem_test( 28000, 2000 );
528 }
529}
530
531/**
532 * Unit tests for Porter Stemmer - Stemming internals.
533 */
534class PorterStemmerInternalsUnitTest extends DrupalWebTestCase {
535
536 public static function getInfo() {
537 return array(
538 'name' => t('Stemming internals'),
539 'description' => t('Test that various algorithm steps and internal functions are working correctly'),
540 'group' => t('Porter Stemmer'),
541 );
542 }
543
73ea7032
JH
544 public function setUp() {
545 parent::setUp('porterstemmer');
546 }
547
a4ce718f
JH
548 /**
549 * Verify that short words are not stemmed, and longer ones are.
550 */
73ea7032
JH
551 function testStemLength3UnitTest() {
552 // Words 3 letters or less should not be stemmed if min word length is 3
553 variable_set( 'minimum_word_size', 3 );
554 porterstemmer_too_short( '', TRUE );
555
a4ce718f
JH
556 $words = array(
557 'a' => 'a',
558 'at' => 'at',
559 'say' => 'say',
560 'fished' => 'fish',
561 'saying' => 'say',
562 );
563
564 foreach ( $words as $in => $out ) {
565 $stem = porterstemmer_stem( $in );
73ea7032
JH
566 $this->assertEqual( $out, $stem, "Stemming length 3 test for $in gives $out (was $stem)", t( 'Stemming length' ));
567 }
568 }
569
570 /**
571 * Verify that short words are not stemmed, and longer ones are.
572 */
573 function testStemLength4UnitTest() {
574 // Words 4 letters or less should not be stemmed if min word length is 4
575 variable_set( 'minimum_word_size', 4 );
576 porterstemmer_too_short( '', TRUE );
577
578 $words = array(
579 'a' => 'a',
580 'at' => 'at',
581 'say' => 'say',
582 'fished' => 'fish',
583 'saying' => 'saying',
584 );
585
586 foreach ( $words as $in => $out ) {
587 $stem = porterstemmer_stem( $in );
588 $this->assertEqual( $out, $stem, "Stemming length 4 test for $in gives $out (was $stem)", t( 'Stemming length' ));
a4ce718f
JH
589 }
590 }
591
592 /**
593 * Tests the function that determines if a word is "short".
594 */
595 function testShortWord() {
596 // Test "shortness", using examples from algorithm web page,
597 // as well as several variations on the word "administ...".
598
73ea7032
JH
599 variable_set( 'minimum_word_size', 2 );
600 porterstemmer_too_short( '', TRUE );
601
a4ce718f
JH
602 $this->assertFalse( porterstemmer_short_word( 'administered', 2, 'Stemmer steps' ),
603 "administered should not be a short word" );
604
605 $this->assertFalse( porterstemmer_short_word( 'administer', 2, 'Stemmer steps'),
606 "administer should not be a short word" );
607
608 $this->assertFalse( porterstemmer_short_word( 'admin', 2, 'Stemmer steps' ),
609 "admin should not be a short word" );
610
611 $this->assertTrue( porterstemmer_short_word( 'bed', 3, 'Stemmer steps' ),
612 "bed should be a short word" );
613
614 $this->assertTrue( porterstemmer_short_word( 'shed', 4, 'Stemmer steps' ),
615 "shed should be a short word" );
616
617 $this->assertTrue( porterstemmer_short_word( 'shred', 5, 'Stemmer steps' ),
618 "shred should be a short word" );
619
620 $this->assertFalse( porterstemmer_short_word( 'bead', 4, 'Stemmer steps' ),
621 "bead should not be a short word" );
622
623 $this->assertFalse( porterstemmer_short_word( 'beds', 3, 'Stemmer steps' ),
624 "beds should not be a short word" );
625
626 $this->assertFalse( porterstemmer_short_word( 'bake', 3, 'Stemmer steps' ),
627 "bake should not be a short word" );
628
629 $this->assertTrue( porterstemmer_short_word( 'bak', 3, 'Stemmer steps' ),
630 "bak should be a short word" );
631 }
632
633 /**
634 * Test internal steps on the word "administered".
635 */
636 function testAdministered() {
637
73ea7032
JH
638 variable_set( 'minimum_word_size', 2 );
639 porterstemmer_too_short( '', TRUE );
640
a4ce718f
JH
641 $r1 = 0;
642 $r2 = 0;
643 $word = 'administered';
644
645 porterstemmer_prestemming( $word, $r1, $r2 );
646
647 // Test calculation of R1 and R2
648 $this->assertEqual( $r1, 2, "R1 for administered should be 2, was $r1", 'Stemmer steps' );
649 $this->assertEqual( $r2, 5, "R2 for administered should be 5, was $r2", 'Stemmer steps' );
650
651 // Test step 1b of the algorithm
652 porterstemmer_step1b($word, $r1);
653 $this->assertEqual( $word, 'administer', "Step1b should be administer, was $word", 'Stemmer steps' );
654
655 // Test step 4 of the algorithm
656 porterstemmer_step4($word, $r2);
657 $this->assertEqual( $word, 'administ', "Step4 should be administ, was $word", 'Stemmer steps' );
658 }
659
660 /**
661 * Test internal steps on the word "baked".
662 */
663 function testBaked() {
664
73ea7032
JH
665 variable_set( 'minimum_word_size', 2 );
666 porterstemmer_too_short( '', TRUE );
667
a4ce718f
JH
668 $r1 = 0;
669 $r2 = 0;
670 $word = 'baked';
671
672 porterstemmer_prestemming( $word, $r1, $r2 );
673
674 // Test calculation of R1 and R2
675 $this->assertEqual( $r1, 3, "R1 for baked should be 3, was $r1", 'Stemmer steps' );
676 $this->assertEqual( $r2, 5, "R2 for baked should be 5, was $r2", 'Stemmer steps' );
677
678 // Test step 1b of the algorithm
679 porterstemmer_step1b($word, $r1);
680 $this->assertEqual( $word, 'bake', "Step1b should be bake, was $word", 'Stemmer steps' );
681
682 // Test step 5 of the algorithm
683 porterstemmer_step5($word, $r1, $r2);
684 $this->assertEqual( $word, 'bake', "Step5 should be bake, was $word", 'Stemmer steps' );
685 }
686
687 /**
688 * Test internal steps on the word "geology".
689 */
690 function testGeology() {
691
692 $r1 = 0;
693 $r2 = 0;
694 $word = 'geology';
695
73ea7032
JH
696 variable_set( 'minimum_word_size', 2 );
697 porterstemmer_too_short( '', TRUE );
698
a4ce718f
JH
699 porterstemmer_prestemming( $word, $r1, $r2 );
700
701 // Test calculation of R1 and R2
702 $this->assertEqual( $r1, 4, "R1 for geology should be 4, was $r1", 'Stemmer steps' );
703 $this->assertEqual( $r2, 6, "R2 for geology should be 6, was $r2", 'Stemmer steps' );
704
705 // Test step 1c of the algorithm
706 porterstemmer_step1c($word);
707 $this->assertEqual( $word, 'geologi', "Step1c should be geologi, was $word", 'Stemmer steps' );
708
709 // Test step 2 of the algorithm
710 porterstemmer_step2($word, $r1);
711 $this->assertEqual( $word, 'geolog', "Step2 should be geolog, was $word", 'Stemmer steps' );
712 }
713
73ea7032
JH
714 /**
715 * Test internal steps on the words "ies" and "ied".
716 */
717 function testIesIed() {
718
719 $r1a = 0;
720 $r2a = 0;
721 $r1b = 0;
722 $r2b = 0;
723 $worda = 'ied';
724 $wordb = 'ies';
725
726 variable_set( 'minimum_word_size', 2 );
727 porterstemmer_too_short( '', TRUE );
728
729 porterstemmer_prestemming( $worda, $r1a, $r2a );
730 porterstemmer_prestemming( $wordb, $r1b, $r2b );
731
732 // Test step 1a of the algorithm
733
734 porterstemmer_step1a( $worda );
735 porterstemmer_step1a( $wordb );
736
737 $this->assertEqual( $worda, 'ie', "Step1a should be ie, was $worda", 'Stemmer steps' );
738 $this->assertEqual( $wordb, 'ie', "Step1a should be ie, was $wordb", 'Stemmer steps' );
739 }
73ea7032 740
0e82812e
JH
741 /**
742 * Tests the excerpt function porterstemmer_sbp_excerpt_match().
743 */
744 function testExcerpts() {
745 // Test with simple stemmed match.
746 $key = 'walking';
747 $text = 'I walked to the Grand Walks yesterday.';
748 $offset = 0;
749 $boundary = '[ ]+';
750 $result1 = porterstemmer_sbp_excerpt_match($key, $text, $offset, $boundary);
751 $this->assertEqual($result1['where'], 2, 'Found match 1 in right place', 'Excerpt');
752 $this->assertEqual($result1['keyword'], 'walked', 'Found right keyword for match 1', 'Excerpt');
753
587ab6eb
JH
754 // Test with upper-case in the keyword.
755 $result1b = porterstemmer_sbp_excerpt_match('waLk', $text, $offset, $boundary);
756 $this->assertEqual($result1b['where'], 2, 'Found match 1b in right place', 'Excerpt');
757 $this->assertEqual($result1b['keyword'], 'walked', 'Found right keyword for match 1b', 'Excerpt');
758
759 // Test with upper-case in the text.
0e82812e
JH
760 $offset = 12;
761 $result2 = porterstemmer_sbp_excerpt_match($key, $text, $offset, $boundary);
762 $this->assertEqual($result2['where'], 22, 'Found match 2 in right place', 'Excerpt');
763 $this->assertEqual($result2['keyword'], 'Walks', 'Found right keyword for match 2', 'Excerpt');
764
765 // Test with a false match followed by a real match.
766 $text = 'I walknotawordhere to walk to school.';
767 $offset = 0;
768 $result3 = porterstemmer_sbp_excerpt_match($key, $text, $offset, $boundary);
769 $this->assertEqual($result3['where'], 22, 'Found match 3 in right place', 'Excerpt');
770 $this->assertEqual($result3['keyword'], 'walk', 'Found right keyword for match 3', 'Excerpt');
771
772 }
773}
73ea7032
JH
774
775/**
776 * Unit tests for Porter Stemmer - Stemming output part 1 PECL.
777 */
778class PorterStemmerPECLOutput1UnitTest extends PorterStemmerOutput1UnitTest {
779
780 public static function getInfo() {
781 return array(
782 'name' => t('Stemming output 1 PECL'),
783 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
784 'group' => t('Porter Stemmer'),
785 );
786 }
787
788 /**
789 * Tests the next 2000 words in the file for stemming accuracy.
790 */
791 function testStemmingUnitTest() {
792 $this->_run_porterstemmer_stem_test( 0, 2000, TRUE );
793 }
794}
795
796/**
797 * Unit tests for Porter Stemmer - Stemming output part 2 PECL.
798 */
799class PorterStemmerPECLOutput2UnitTest extends PorterStemmerOutput1UnitTest {
800
801 public static function getInfo() {
802 return array(
803 'name' => t('Stemming output 2 PECL'),
804 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
805 'group' => t('Porter Stemmer'),
806 );
807 }
808
809 /**
810 * Tests the next 2000 words in the file for stemming accuracy.
811 */
812 function testStemmingUnitTest() {
813 $this->_run_porterstemmer_stem_test( 2000, 2000, TRUE );
814 }
815}
816
817/**
818 * Unit tests for Porter Stemmer - Stemming output part 3 PECL.
819 */
820class PorterStemmerPECLOutput3UnitTest extends PorterStemmerOutput1UnitTest {
821
822 public static function getInfo() {
823 return array(
824 'name' => t('Stemming output 3 PECL'),
825 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
826 'group' => t('Porter Stemmer'),
827 );
828 }
829
830 /**
831 * Tests the next 2000 words in the file for stemming accuracy.
832 */
833 function testStemmingUnitTest() {
834 $this->_run_porterstemmer_stem_test( 4000, 2000, TRUE );
835 }
836}
837
838/**
839 * Unit tests for Porter Stemmer - Stemming output part 4 PECL.
840 */
841class PorterStemmerPECLOutput4UnitTest extends PorterStemmerOutput1UnitTest {
842
843 public static function getInfo() {
844 return array(
845 'name' => t('Stemming output 4 PECL'),
846 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
847 'group' => t('Porter Stemmer'),
848 );
849 }
850
851 /**
852 * Tests the next 2000 words in the file for stemming accuracy.
853 */
854 function testStemmingUnitTest() {
855 $this->_run_porterstemmer_stem_test( 6000, 2000, TRUE );
856 }
857}
858
859/**
860 * Unit tests for Porter Stemmer - Stemming output part 5 PECL.
861 */
862class PorterStemmerPECLOutput5UnitTest extends PorterStemmerOutput1UnitTest {
863
864 public static function getInfo() {
865 return array(
866 'name' => t('Stemming output 5 PECL'),
867 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
868 'group' => t('Porter Stemmer'),
869 );
870 }
871
872 /**
873 * Tests the next 2000 words in the file for stemming accuracy.
874 */
875 function testStemmingUnitTest() {
876 $this->_run_porterstemmer_stem_test( 8000, 2000, TRUE );
877 }
878}
879
880/**
881 * Unit tests for Porter Stemmer - Stemming output part 6 PECL.
882 */
883class PorterStemmerPECLOutput6UnitTest extends PorterStemmerOutput1UnitTest {
884
885 public static function getInfo() {
886 return array(
887 'name' => t('Stemming output 6 PECL'),
888 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
889 'group' => t('Porter Stemmer'),
890 );
891 }
892
893 /**
894 * Tests the next 2000 words in the file for stemming accuracy.
895 */
896 function testStemmingUnitTest() {
897 $this->_run_porterstemmer_stem_test( 10000, 2000, TRUE );
898 }
899}
900
901/**
902 * Unit tests for Porter Stemmer - Stemming output part 7 PECL.
903 */
904class PorterStemmerPECLOutput7UnitTest extends PorterStemmerOutput1UnitTest {
905
906 public static function getInfo() {
907 return array(
908 'name' => t('Stemming output 7 PECL'),
909 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
910 'group' => t('Porter Stemmer'),
911 );
912 }
913
914 /**
915 * Tests the next 2000 words in the file for stemming accuracy.
916 */
917 function testStemmingUnitTest() {
918 $this->_run_porterstemmer_stem_test( 12000, 2000, TRUE );
919 }
920}
921
922/**
923 * Unit tests for Porter Stemmer - Stemming output part 8 PECL.
924 */
925class PorterStemmerPECLOutput8UnitTest extends PorterStemmerOutput1UnitTest {
926
927 public static function getInfo() {
928 return array(
929 'name' => t('Stemming output 8 PECL'),
930 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
931 'group' => t('Porter Stemmer'),
932 );
933 }
934
935 /**
936 * Tests the next 2000 words in the file for stemming accuracy.
937 */
938 function testStemmingUnitTest() {
939 $this->_run_porterstemmer_stem_test( 14000, 2000, TRUE );
940 }
941}
942
943/**
944 * Unit tests for Porter Stemmer - Stemming output part 9 PECL.
945 */
946class PorterStemmerPECLOutput9UnitTest extends PorterStemmerOutput1UnitTest {
947
948 public static function getInfo() {
949 return array(
950 'name' => t('Stemming output 9 PECL'),
951 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
952 'group' => t('Porter Stemmer'),
953 );
954 }
955
956 /**
957 * Tests the next 2000 words in the file for stemming accuracy.
958 */
959 function testStemmingUnitTest() {
960 $this->_run_porterstemmer_stem_test( 16000, 2000, TRUE );
961 }
962}
963
964/**
965 * Unit tests for Porter Stemmer - Stemming output part 10 PECL.
966 */
967class PorterStemmerPECLOutput10UnitTest extends PorterStemmerOutput1UnitTest {
968
969 public static function getInfo() {
970 return array(
971 'name' => t('Stemming output 10 PECL'),
972 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
973 'group' => t('Porter Stemmer'),
974 );
975 }
976
977 /**
978 * Tests the next 2000 words in the file for stemming accuracy.
979 */
980 function testStemmingUnitTest() {
981 $this->_run_porterstemmer_stem_test( 18000, 2000, TRUE );
982 }
983}
984
985/**
986 * Unit tests for Porter Stemmer - Stemming output part 11 PECL.
987 */
988class PorterStemmerPECLOutput11UnitTest extends PorterStemmerOutput1UnitTest {
989
990 public static function getInfo() {
991 return array(
992 'name' => t('Stemming output 11 PECL'),
993 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
994 'group' => t('Porter Stemmer'),
995 );
996 }
997
998 /**
999 * Tests the next 2000 words in the file for stemming accuracy.
1000 */
1001 function testStemmingUnitTest() {
1002 $this->_run_porterstemmer_stem_test( 20000, 2000, TRUE );
1003 }
1004}
1005
1006/**
1007 * Unit tests for Porter Stemmer - Stemming output part 12 PECL.
1008 */
1009class PorterStemmerPECLOutput12UnitTest extends PorterStemmerOutput1UnitTest {
1010
1011 public static function getInfo() {
1012 return array(
1013 'name' => t('Stemming output 12 PECL'),
1014 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
1015 'group' => t('Porter Stemmer'),
1016 );
1017 }
1018
1019 /**
1020 * Tests the next 2000 words in the file for stemming accuracy.
1021 */
1022 function testStemmingUnitTest() {
1023 $this->_run_porterstemmer_stem_test( 22000, 2000, TRUE );
1024 }
1025}
1026/**
1027 * Unit tests for Porter Stemmer - Stemming output part 13 PECL.
1028 */
1029class PorterStemmerPECLOutput13UnitTest extends PorterStemmerOutput1UnitTest {
1030
1031 public static function getInfo() {
1032 return array(
1033 'name' => t('Stemming output 13 PECL'),
1034 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
1035 'group' => t('Porter Stemmer'),
1036 );
1037 }
1038
1039 /**
1040 * Tests the next 2000 words in the file for stemming accuracy.
1041 */
1042 function testStemmingUnitTest() {
1043 $this->_run_porterstemmer_stem_test( 24000, 2000, TRUE );
1044 }
1045}
1046/**
1047 * Unit tests for Porter Stemmer - Stemming output part 14 PECL.
1048 */
1049class PorterStemmerPECLOutput14UnitTest extends PorterStemmerOutput1UnitTest {
1050
1051 public static function getInfo() {
1052 return array(
1053 'name' => t('Stemming output 14 PECL'),
1054 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
1055 'group' => t('Porter Stemmer'),
1056 );
1057 }
1058
1059 /**
1060 * Tests the next 2000 words in the file for stemming accuracy.
1061 */
1062 function testStemmingUnitTest() {
1063 $this->_run_porterstemmer_stem_test( 26000, 2000, TRUE );
1064 }
1065}
1066
1067/**
1068 * Unit tests for Porter Stemmer - Stemming output part 15 PECL.
1069 */
1070class PorterStemmerPECLOutput15UnitTest extends PorterStemmerOutput1UnitTest {
1071
1072 public static function getInfo() {
1073 return array(
1074 'name' => t('Stemming output 15 PECL'),
1075 'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
1076 'group' => t('Porter Stemmer'),
1077 );
1078 }
1079
1080 /**
1081 * Tests the next 2000 words in the file for stemming accuracy.
1082 */
1083 function testStemmingUnitTest() {
1084 $this->_run_porterstemmer_stem_test( 28000, 2000, TRUE );
1085 }
a4ce718f 1086}