/[drupal]/contributions/modules/porterstemmer/porterstemmer.test
ViewVC logotype

Diff of /contributions/modules/porterstemmer/porterstemmer.test

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph | View Patch Patch

revision 1.1.2.2, Wed Oct 14 19:00:04 2009 UTC revision 1.1.2.3, Wed Oct 21 23:39:54 2009 UTC
# Line 1  Line 1 
1  <?php  <?php
2  // $Id: porterstemmer.test,v 1.1.2.1 2009/08/26 19:22:49 jhodgdon Exp $  // $Id: porterstemmer.test,v 1.1.2.2 2009/10/14 19:00:04 jhodgdon Exp $
3    
4  /**  /**
5   * @file   * @file
# Line 15  Line 15 
15   */   */
16  class PorterStemmerOutput1UnitTest extends DrupalWebTestCase {  class PorterStemmerOutput1UnitTest extends DrupalWebTestCase {
17    
18      public $has_pecl_stem = FALSE;
19    
20    public static function getInfo() {    public static function getInfo() {
21      return array(      return array(
22        'name' => t('Stemming output 1'),        'name' => t('Stemming output 1'),
# Line 25  class PorterStemmerOutput1UnitTest exten Line 27  class PorterStemmerOutput1UnitTest exten
27    
28    public function setUp() {    public function setUp() {
29      parent::setUp('porterstemmer');      parent::setUp('porterstemmer');
30    
31        // See if the PECL stemming library is installed
32    
33        $this->has_pecl_stem = FALSE;
34    
35        if (extension_loaded( 'stem')) {
36          $this->has_pecl_stem = TRUE;
37        }
38        else {
39          // dynamic loading of extensions is going away in PHP 6, so leave this
40          // check here!
41          if (function_exists( 'dl' )) {
42            $this->has_pecl_stem = dl('stem.so');
43          }
44        }
45    
46        $this->has_pecl_stem = $this->has_pecl_stem &&
47          function_exists('stem_english');
48    
49    }    }
50    
51    /**    /**
# Line 41  class PorterStemmerOutput1UnitTest exten Line 62  class PorterStemmerOutput1UnitTest exten
62     * known input/output pairs from     * known input/output pairs from
63     *   http://snowball.tartarus.org/algorithms/english/stemmer.html     *   http://snowball.tartarus.org/algorithms/english/stemmer.html
64     *     *
65     * Note that words whose input or stemmed version is less than 3     * Or if $use_pecl is set to TRUE, the PECL stem_english() function is
66     * characters are always skipped.     * used instead.
67     *     *
68     * @param $skipto     * @param $skipto
69     *    Line of file to start on (count starts at 0), not counting short ones.     *    Line of file to start on (count starts at 0), not counting short ones.
70     * @param $runto     * @param $runto
71     *    Number of lines to test, not counting short ones.     *    Number of lines to test, not counting short ones.
72       * @param $use_pecl
73       *    If TRUE, use the PECL library if it is installed. If it isn't installed,
74       *    no tests are performed.
75     */     */
76    function _run_porterstemmer_stem_test( $skipto = 0, $runto = 2000 ) {    function _run_porterstemmer_stem_test( $skipto = 0, $runto = 2000, $use_pecl = FALSE ) {
77    
78        if ( $use_pecl && !$this->has_pecl_stem ) {
79          $this->assertTrue( 1, "No PECL stem library found, aborting test" );
80          return;
81        }
82    
83      // Open word file      // Open word file
84      $file = drupal_get_path('module', 'porterstemmer') . '/testwords.txt';      $file = drupal_get_path('module', 'porterstemmer') . '/testwords.txt';
# Line 68  class PorterStemmerOutput1UnitTest exten Line 97  class PorterStemmerOutput1UnitTest exten
97      $ran = 0;      $ran = 0;
98      $skipped = 0;      $skipped = 0;
99    
100        $start = time();
101        $mstart = 0;
102        if ( function_exists( 'microtime' )) {
103          $mstart = microtime(TRUE);
104        }
105    
106      while ( !feof( $handle ) && $ran < $runto ) {      while ( !feof( $handle ) && $ran < $runto ) {
107        // Read a line of the file, and split into words        // Read a line of the file, and split into words
108        $line = fgets($handle, 4096);        $line = fgets($handle, 4096);
# Line 90  class PorterStemmerOutput1UnitTest exten Line 125  class PorterStemmerOutput1UnitTest exten
125        }        }
126    
127        // Stem the word        // Stem the word
128        $stem = porterstemmer_stem( $in );        if ( $use_pecl && $this->has_pecl_stem ) {
129            $stem = stem_english( $in );
130          }
131          else {
132            $stem = porterstemmer_stem( $in );
133          }
134    
135        // Test correctness        // Test correctness
136        $this->assertEqual( $right, $stem, "Stemming $in results in $right (was $stem)",        $this->assertEqual( $right, $stem, "Stemming $in results in $right (was $stem)",
137          t( 'Stemming test' ));          t( 'Stemming test' ));
138        $ran++;        $ran++;
139      }      }
140    
141        $start = time() - $start;
142        if ( function_exists( 'microtime' )) {
143          $mstart = microtime(TRUE) - $mstart;
144        }
145        $this->assertTrue( 1, "Elapsed time: $start seconds ($mstart)" );
146    
147      fclose($handle);      fclose($handle);
148    }    }
149  }  }
# Line 605  class PorterStemmerInternalsUnitTest ext Line 652  class PorterStemmerInternalsUnitTest ext
652      $this->assertEqual( $wordb, 'ie', "Step1a should be ie, was $wordb", 'Stemmer steps' );      $this->assertEqual( $wordb, 'ie', "Step1a should be ie, was $wordb", 'Stemmer steps' );
653    }    }
654  }  }
655    
656    
657    /**
658     * Unit tests for Porter Stemmer - Stemming output part 1 PECL.
659     */
660    class PorterStemmerPECLOutput1UnitTest extends PorterStemmerOutput1UnitTest {
661    
662      public static function getInfo() {
663        return array(
664          'name' => t('Stemming output 1 PECL'),
665          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
666          'group' => t('Porter Stemmer'),
667        );
668      }
669    
670      /**
671       * Tests the next 2000 words in the file for stemming accuracy.
672       */
673      function testStemmingUnitTest() {
674        $this->_run_porterstemmer_stem_test( 0, 2000, TRUE );
675      }
676    }
677    
678    /**
679     * Unit tests for Porter Stemmer - Stemming output part 2 PECL.
680     */
681    class PorterStemmerPECLOutput2UnitTest extends PorterStemmerOutput1UnitTest {
682    
683      public static function getInfo() {
684        return array(
685          'name' => t('Stemming output 2 PECL'),
686          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
687          'group' => t('Porter Stemmer'),
688        );
689      }
690    
691      /**
692       * Tests the next 2000 words in the file for stemming accuracy.
693       */
694      function testStemmingUnitTest() {
695        $this->_run_porterstemmer_stem_test( 2000, 2000, TRUE );
696      }
697    }
698    
699    /**
700     * Unit tests for Porter Stemmer - Stemming output part 3 PECL.
701     */
702    class PorterStemmerPECLOutput3UnitTest extends PorterStemmerOutput1UnitTest {
703    
704      public static function getInfo() {
705        return array(
706          'name' => t('Stemming output 3 PECL'),
707          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
708          'group' => t('Porter Stemmer'),
709        );
710      }
711    
712      /**
713       * Tests the next 2000 words in the file for stemming accuracy.
714       */
715      function testStemmingUnitTest() {
716        $this->_run_porterstemmer_stem_test( 4000, 2000, TRUE );
717      }
718    }
719    
720    /**
721     * Unit tests for Porter Stemmer - Stemming output part 4 PECL.
722     */
723    class PorterStemmerPECLOutput4UnitTest extends PorterStemmerOutput1UnitTest {
724    
725      public static function getInfo() {
726        return array(
727          'name' => t('Stemming output 4 PECL'),
728          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
729          'group' => t('Porter Stemmer'),
730        );
731      }
732    
733      /**
734       * Tests the next 2000 words in the file for stemming accuracy.
735       */
736      function testStemmingUnitTest() {
737        $this->_run_porterstemmer_stem_test( 6000, 2000, TRUE );
738      }
739    }
740    
741    /**
742     * Unit tests for Porter Stemmer - Stemming output part 5 PECL.
743     */
744    class PorterStemmerPECLOutput5UnitTest extends PorterStemmerOutput1UnitTest {
745    
746      public static function getInfo() {
747        return array(
748          'name' => t('Stemming output 5 PECL'),
749          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
750          'group' => t('Porter Stemmer'),
751        );
752      }
753    
754      /**
755       * Tests the next 2000 words in the file for stemming accuracy.
756       */
757      function testStemmingUnitTest() {
758        $this->_run_porterstemmer_stem_test( 8000, 2000, TRUE );
759      }
760    }
761    
762    /**
763     * Unit tests for Porter Stemmer - Stemming output part 6 PECL.
764     */
765    class PorterStemmerPECLOutput6UnitTest extends PorterStemmerOutput1UnitTest {
766    
767      public static function getInfo() {
768        return array(
769          'name' => t('Stemming output 6 PECL'),
770          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
771          'group' => t('Porter Stemmer'),
772        );
773      }
774    
775      /**
776       * Tests the next 2000 words in the file for stemming accuracy.
777       */
778      function testStemmingUnitTest() {
779        $this->_run_porterstemmer_stem_test( 10000, 2000, TRUE );
780      }
781    }
782    
783    /**
784     * Unit tests for Porter Stemmer - Stemming output part 7 PECL.
785     */
786    class PorterStemmerPECLOutput7UnitTest extends PorterStemmerOutput1UnitTest {
787    
788      public static function getInfo() {
789        return array(
790          'name' => t('Stemming output 7 PECL'),
791          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
792          'group' => t('Porter Stemmer'),
793        );
794      }
795    
796      /**
797       * Tests the next 2000 words in the file for stemming accuracy.
798       */
799      function testStemmingUnitTest() {
800        $this->_run_porterstemmer_stem_test( 12000, 2000, TRUE );
801      }
802    }
803    
804    /**
805     * Unit tests for Porter Stemmer - Stemming output part 8 PECL.
806     */
807    class PorterStemmerPECLOutput8UnitTest extends PorterStemmerOutput1UnitTest {
808    
809      public static function getInfo() {
810        return array(
811          'name' => t('Stemming output 8 PECL'),
812          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
813          'group' => t('Porter Stemmer'),
814        );
815      }
816    
817      /**
818       * Tests the next 2000 words in the file for stemming accuracy.
819       */
820      function testStemmingUnitTest() {
821        $this->_run_porterstemmer_stem_test( 14000, 2000, TRUE );
822      }
823    }
824    
825    /**
826     * Unit tests for Porter Stemmer - Stemming output part 9 PECL.
827     */
828    class PorterStemmerPECLOutput9UnitTest extends PorterStemmerOutput1UnitTest {
829    
830      public static function getInfo() {
831        return array(
832          'name' => t('Stemming output 9 PECL'),
833          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
834          'group' => t('Porter Stemmer'),
835        );
836      }
837    
838      /**
839       * Tests the next 2000 words in the file for stemming accuracy.
840       */
841      function testStemmingUnitTest() {
842        $this->_run_porterstemmer_stem_test( 16000, 2000, TRUE );
843      }
844    }
845    
846    /**
847     * Unit tests for Porter Stemmer - Stemming output part 10 PECL.
848     */
849    class PorterStemmerPECLOutput10UnitTest extends PorterStemmerOutput1UnitTest {
850    
851      public static function getInfo() {
852        return array(
853          'name' => t('Stemming output 10 PECL'),
854          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
855          'group' => t('Porter Stemmer'),
856        );
857      }
858    
859      /**
860       * Tests the next 2000 words in the file for stemming accuracy.
861       */
862      function testStemmingUnitTest() {
863        $this->_run_porterstemmer_stem_test( 18000, 2000, TRUE );
864      }
865    }
866    
867    /**
868     * Unit tests for Porter Stemmer - Stemming output part 11 PECL.
869     */
870    class PorterStemmerPECLOutput11UnitTest extends PorterStemmerOutput1UnitTest {
871    
872      public static function getInfo() {
873        return array(
874          'name' => t('Stemming output 11 PECL'),
875          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
876          'group' => t('Porter Stemmer'),
877        );
878      }
879    
880      /**
881       * Tests the next 2000 words in the file for stemming accuracy.
882       */
883      function testStemmingUnitTest() {
884        $this->_run_porterstemmer_stem_test( 20000, 2000, TRUE );
885      }
886    }
887    
888    /**
889     * Unit tests for Porter Stemmer - Stemming output part 12 PECL.
890     */
891    class PorterStemmerPECLOutput12UnitTest extends PorterStemmerOutput1UnitTest {
892    
893      public static function getInfo() {
894        return array(
895          'name' => t('Stemming output 12 PECL'),
896          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
897          'group' => t('Porter Stemmer'),
898        );
899      }
900    
901      /**
902       * Tests the next 2000 words in the file for stemming accuracy.
903       */
904      function testStemmingUnitTest() {
905        $this->_run_porterstemmer_stem_test( 22000, 2000, TRUE );
906      }
907    }
908    /**
909     * Unit tests for Porter Stemmer - Stemming output part 13 PECL.
910     */
911    class PorterStemmerPECLOutput13UnitTest extends PorterStemmerOutput1UnitTest {
912    
913      public static function getInfo() {
914        return array(
915          'name' => t('Stemming output 13 PECL'),
916          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
917          'group' => t('Porter Stemmer'),
918        );
919      }
920    
921      /**
922       * Tests the next 2000 words in the file for stemming accuracy.
923       */
924      function testStemmingUnitTest() {
925        $this->_run_porterstemmer_stem_test( 24000, 2000, TRUE );
926      }
927    }
928    /**
929     * Unit tests for Porter Stemmer - Stemming output part 14 PECL.
930     */
931    class PorterStemmerPECLOutput14UnitTest extends PorterStemmerOutput1UnitTest {
932    
933      public static function getInfo() {
934        return array(
935          'name' => t('Stemming output 14 PECL'),
936          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
937          'group' => t('Porter Stemmer'),
938        );
939      }
940    
941      /**
942       * Tests the next 2000 words in the file for stemming accuracy.
943       */
944      function testStemmingUnitTest() {
945        $this->_run_porterstemmer_stem_test( 26000, 2000, TRUE );
946      }
947    }
948    
949    ///////////////////////////////////////
950    /**
951     * Unit tests for Porter Stemmer - Stemming output part 15 PECL.
952     */
953    class PorterStemmerPECLOutput15UnitTest extends PorterStemmerOutput1UnitTest {
954    
955      public static function getInfo() {
956        return array(
957          'name' => t('Stemming output 15 PECL'),
958          'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'),
959          'group' => t('Porter Stemmer'),
960        );
961      }
962    
963      /**
964       * Tests the next 2000 words in the file for stemming accuracy.
965       */
966      function testStemmingUnitTest() {
967        $this->_run_porterstemmer_stem_test( 28000, 2000, TRUE );
968      }
969    }

Legend:
Removed from v.1.1.2.2  
changed lines
  Added in v.1.1.2.3

  ViewVC Help
Powered by ViewVC 1.1.2