| 1 |
<?php |
<?php |
| 2 |
// $Id: porterstemmer.test,v 1.1.2.1 2009/08/26 19:22:49 jhodgdon Exp $ |
// $Id: porterstemmer.test,v 1.1.2.2 2009/10/14 19:00:04 jhodgdon Exp $ |
| 3 |
|
|
| 4 |
/** |
/** |
| 5 |
* @file |
* @file |
| 15 |
*/ |
*/ |
| 16 |
class PorterStemmerOutput1UnitTest extends DrupalWebTestCase { |
class PorterStemmerOutput1UnitTest extends DrupalWebTestCase { |
| 17 |
|
|
| 18 |
|
public $has_pecl_stem = FALSE; |
| 19 |
|
|
| 20 |
public static function getInfo() { |
public static function getInfo() { |
| 21 |
return array( |
return array( |
| 22 |
'name' => t('Stemming output 1'), |
'name' => t('Stemming output 1'), |
| 27 |
|
|
| 28 |
public function setUp() { |
public function setUp() { |
| 29 |
parent::setUp('porterstemmer'); |
parent::setUp('porterstemmer'); |
| 30 |
|
|
| 31 |
|
// See if the PECL stemming library is installed |
| 32 |
|
|
| 33 |
|
$this->has_pecl_stem = FALSE; |
| 34 |
|
|
| 35 |
|
if (extension_loaded( 'stem')) { |
| 36 |
|
$this->has_pecl_stem = TRUE; |
| 37 |
|
} |
| 38 |
|
else { |
| 39 |
|
// dynamic loading of extensions is going away in PHP 6, so leave this |
| 40 |
|
// check here! |
| 41 |
|
if (function_exists( 'dl' )) { |
| 42 |
|
$this->has_pecl_stem = dl('stem.so'); |
| 43 |
|
} |
| 44 |
|
} |
| 45 |
|
|
| 46 |
|
$this->has_pecl_stem = $this->has_pecl_stem && |
| 47 |
|
function_exists('stem_english'); |
| 48 |
|
|
| 49 |
} |
} |
| 50 |
|
|
| 51 |
/** |
/** |
| 62 |
* known input/output pairs from |
* known input/output pairs from |
| 63 |
* http://snowball.tartarus.org/algorithms/english/stemmer.html |
* http://snowball.tartarus.org/algorithms/english/stemmer.html |
| 64 |
* |
* |
| 65 |
* Note that words whose input or stemmed version is less than 3 |
* Or if $use_pecl is set to TRUE, the PECL stem_english() function is |
| 66 |
* characters are always skipped. |
* used instead. |
| 67 |
* |
* |
| 68 |
* @param $skipto |
* @param $skipto |
| 69 |
* Line of file to start on (count starts at 0), not counting short ones. |
* Line of file to start on (count starts at 0), not counting short ones. |
| 70 |
* @param $runto |
* @param $runto |
| 71 |
* Number of lines to test, not counting short ones. |
* Number of lines to test, not counting short ones. |
| 72 |
|
* @param $use_pecl |
| 73 |
|
* If TRUE, use the PECL library if it is installed. If it isn't installed, |
| 74 |
|
* no tests are performed. |
| 75 |
*/ |
*/ |
| 76 |
function _run_porterstemmer_stem_test( $skipto = 0, $runto = 2000 ) { |
function _run_porterstemmer_stem_test( $skipto = 0, $runto = 2000, $use_pecl = FALSE ) { |
| 77 |
|
|
| 78 |
|
if ( $use_pecl && !$this->has_pecl_stem ) { |
| 79 |
|
$this->assertTrue( 1, "No PECL stem library found, aborting test" ); |
| 80 |
|
return; |
| 81 |
|
} |
| 82 |
|
|
| 83 |
// Open word file |
// Open word file |
| 84 |
$file = drupal_get_path('module', 'porterstemmer') . '/testwords.txt'; |
$file = drupal_get_path('module', 'porterstemmer') . '/testwords.txt'; |
| 97 |
$ran = 0; |
$ran = 0; |
| 98 |
$skipped = 0; |
$skipped = 0; |
| 99 |
|
|
| 100 |
|
$start = time(); |
| 101 |
|
$mstart = 0; |
| 102 |
|
if ( function_exists( 'microtime' )) { |
| 103 |
|
$mstart = microtime(TRUE); |
| 104 |
|
} |
| 105 |
|
|
| 106 |
while ( !feof( $handle ) && $ran < $runto ) { |
while ( !feof( $handle ) && $ran < $runto ) { |
| 107 |
// Read a line of the file, and split into words |
// Read a line of the file, and split into words |
| 108 |
$line = fgets($handle, 4096); |
$line = fgets($handle, 4096); |
| 125 |
} |
} |
| 126 |
|
|
| 127 |
// Stem the word |
// Stem the word |
| 128 |
$stem = porterstemmer_stem( $in ); |
if ( $use_pecl && $this->has_pecl_stem ) { |
| 129 |
|
$stem = stem_english( $in ); |
| 130 |
|
} |
| 131 |
|
else { |
| 132 |
|
$stem = porterstemmer_stem( $in ); |
| 133 |
|
} |
| 134 |
|
|
| 135 |
// Test correctness |
// Test correctness |
| 136 |
$this->assertEqual( $right, $stem, "Stemming $in results in $right (was $stem)", |
$this->assertEqual( $right, $stem, "Stemming $in results in $right (was $stem)", |
| 137 |
t( 'Stemming test' )); |
t( 'Stemming test' )); |
| 138 |
$ran++; |
$ran++; |
| 139 |
} |
} |
| 140 |
|
|
| 141 |
|
$start = time() - $start; |
| 142 |
|
if ( function_exists( 'microtime' )) { |
| 143 |
|
$mstart = microtime(TRUE) - $mstart; |
| 144 |
|
} |
| 145 |
|
$this->assertTrue( 1, "Elapsed time: $start seconds ($mstart)" ); |
| 146 |
|
|
| 147 |
fclose($handle); |
fclose($handle); |
| 148 |
} |
} |
| 149 |
} |
} |
| 652 |
$this->assertEqual( $wordb, 'ie', "Step1a should be ie, was $wordb", 'Stemmer steps' ); |
$this->assertEqual( $wordb, 'ie', "Step1a should be ie, was $wordb", 'Stemmer steps' ); |
| 653 |
} |
} |
| 654 |
} |
} |
| 655 |
|
|
| 656 |
|
|
| 657 |
|
/** |
| 658 |
|
* Unit tests for Porter Stemmer - Stemming output part 1 PECL. |
| 659 |
|
*/ |
| 660 |
|
class PorterStemmerPECLOutput1UnitTest extends PorterStemmerOutput1UnitTest { |
| 661 |
|
|
| 662 |
|
public static function getInfo() { |
| 663 |
|
return array( |
| 664 |
|
'name' => t('Stemming output 1 PECL'), |
| 665 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 666 |
|
'group' => t('Porter Stemmer'), |
| 667 |
|
); |
| 668 |
|
} |
| 669 |
|
|
| 670 |
|
/** |
| 671 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 672 |
|
*/ |
| 673 |
|
function testStemmingUnitTest() { |
| 674 |
|
$this->_run_porterstemmer_stem_test( 0, 2000, TRUE ); |
| 675 |
|
} |
| 676 |
|
} |
| 677 |
|
|
| 678 |
|
/** |
| 679 |
|
* Unit tests for Porter Stemmer - Stemming output part 2 PECL. |
| 680 |
|
*/ |
| 681 |
|
class PorterStemmerPECLOutput2UnitTest extends PorterStemmerOutput1UnitTest { |
| 682 |
|
|
| 683 |
|
public static function getInfo() { |
| 684 |
|
return array( |
| 685 |
|
'name' => t('Stemming output 2 PECL'), |
| 686 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 687 |
|
'group' => t('Porter Stemmer'), |
| 688 |
|
); |
| 689 |
|
} |
| 690 |
|
|
| 691 |
|
/** |
| 692 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 693 |
|
*/ |
| 694 |
|
function testStemmingUnitTest() { |
| 695 |
|
$this->_run_porterstemmer_stem_test( 2000, 2000, TRUE ); |
| 696 |
|
} |
| 697 |
|
} |
| 698 |
|
|
| 699 |
|
/** |
| 700 |
|
* Unit tests for Porter Stemmer - Stemming output part 3 PECL. |
| 701 |
|
*/ |
| 702 |
|
class PorterStemmerPECLOutput3UnitTest extends PorterStemmerOutput1UnitTest { |
| 703 |
|
|
| 704 |
|
public static function getInfo() { |
| 705 |
|
return array( |
| 706 |
|
'name' => t('Stemming output 3 PECL'), |
| 707 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 708 |
|
'group' => t('Porter Stemmer'), |
| 709 |
|
); |
| 710 |
|
} |
| 711 |
|
|
| 712 |
|
/** |
| 713 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 714 |
|
*/ |
| 715 |
|
function testStemmingUnitTest() { |
| 716 |
|
$this->_run_porterstemmer_stem_test( 4000, 2000, TRUE ); |
| 717 |
|
} |
| 718 |
|
} |
| 719 |
|
|
| 720 |
|
/** |
| 721 |
|
* Unit tests for Porter Stemmer - Stemming output part 4 PECL. |
| 722 |
|
*/ |
| 723 |
|
class PorterStemmerPECLOutput4UnitTest extends PorterStemmerOutput1UnitTest { |
| 724 |
|
|
| 725 |
|
public static function getInfo() { |
| 726 |
|
return array( |
| 727 |
|
'name' => t('Stemming output 4 PECL'), |
| 728 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 729 |
|
'group' => t('Porter Stemmer'), |
| 730 |
|
); |
| 731 |
|
} |
| 732 |
|
|
| 733 |
|
/** |
| 734 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 735 |
|
*/ |
| 736 |
|
function testStemmingUnitTest() { |
| 737 |
|
$this->_run_porterstemmer_stem_test( 6000, 2000, TRUE ); |
| 738 |
|
} |
| 739 |
|
} |
| 740 |
|
|
| 741 |
|
/** |
| 742 |
|
* Unit tests for Porter Stemmer - Stemming output part 5 PECL. |
| 743 |
|
*/ |
| 744 |
|
class PorterStemmerPECLOutput5UnitTest extends PorterStemmerOutput1UnitTest { |
| 745 |
|
|
| 746 |
|
public static function getInfo() { |
| 747 |
|
return array( |
| 748 |
|
'name' => t('Stemming output 5 PECL'), |
| 749 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 750 |
|
'group' => t('Porter Stemmer'), |
| 751 |
|
); |
| 752 |
|
} |
| 753 |
|
|
| 754 |
|
/** |
| 755 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 756 |
|
*/ |
| 757 |
|
function testStemmingUnitTest() { |
| 758 |
|
$this->_run_porterstemmer_stem_test( 8000, 2000, TRUE ); |
| 759 |
|
} |
| 760 |
|
} |
| 761 |
|
|
| 762 |
|
/** |
| 763 |
|
* Unit tests for Porter Stemmer - Stemming output part 6 PECL. |
| 764 |
|
*/ |
| 765 |
|
class PorterStemmerPECLOutput6UnitTest extends PorterStemmerOutput1UnitTest { |
| 766 |
|
|
| 767 |
|
public static function getInfo() { |
| 768 |
|
return array( |
| 769 |
|
'name' => t('Stemming output 6 PECL'), |
| 770 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 771 |
|
'group' => t('Porter Stemmer'), |
| 772 |
|
); |
| 773 |
|
} |
| 774 |
|
|
| 775 |
|
/** |
| 776 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 777 |
|
*/ |
| 778 |
|
function testStemmingUnitTest() { |
| 779 |
|
$this->_run_porterstemmer_stem_test( 10000, 2000, TRUE ); |
| 780 |
|
} |
| 781 |
|
} |
| 782 |
|
|
| 783 |
|
/** |
| 784 |
|
* Unit tests for Porter Stemmer - Stemming output part 7 PECL. |
| 785 |
|
*/ |
| 786 |
|
class PorterStemmerPECLOutput7UnitTest extends PorterStemmerOutput1UnitTest { |
| 787 |
|
|
| 788 |
|
public static function getInfo() { |
| 789 |
|
return array( |
| 790 |
|
'name' => t('Stemming output 7 PECL'), |
| 791 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 792 |
|
'group' => t('Porter Stemmer'), |
| 793 |
|
); |
| 794 |
|
} |
| 795 |
|
|
| 796 |
|
/** |
| 797 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 798 |
|
*/ |
| 799 |
|
function testStemmingUnitTest() { |
| 800 |
|
$this->_run_porterstemmer_stem_test( 12000, 2000, TRUE ); |
| 801 |
|
} |
| 802 |
|
} |
| 803 |
|
|
| 804 |
|
/** |
| 805 |
|
* Unit tests for Porter Stemmer - Stemming output part 8 PECL. |
| 806 |
|
*/ |
| 807 |
|
class PorterStemmerPECLOutput8UnitTest extends PorterStemmerOutput1UnitTest { |
| 808 |
|
|
| 809 |
|
public static function getInfo() { |
| 810 |
|
return array( |
| 811 |
|
'name' => t('Stemming output 8 PECL'), |
| 812 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 813 |
|
'group' => t('Porter Stemmer'), |
| 814 |
|
); |
| 815 |
|
} |
| 816 |
|
|
| 817 |
|
/** |
| 818 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 819 |
|
*/ |
| 820 |
|
function testStemmingUnitTest() { |
| 821 |
|
$this->_run_porterstemmer_stem_test( 14000, 2000, TRUE ); |
| 822 |
|
} |
| 823 |
|
} |
| 824 |
|
|
| 825 |
|
/** |
| 826 |
|
* Unit tests for Porter Stemmer - Stemming output part 9 PECL. |
| 827 |
|
*/ |
| 828 |
|
class PorterStemmerPECLOutput9UnitTest extends PorterStemmerOutput1UnitTest { |
| 829 |
|
|
| 830 |
|
public static function getInfo() { |
| 831 |
|
return array( |
| 832 |
|
'name' => t('Stemming output 9 PECL'), |
| 833 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 834 |
|
'group' => t('Porter Stemmer'), |
| 835 |
|
); |
| 836 |
|
} |
| 837 |
|
|
| 838 |
|
/** |
| 839 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 840 |
|
*/ |
| 841 |
|
function testStemmingUnitTest() { |
| 842 |
|
$this->_run_porterstemmer_stem_test( 16000, 2000, TRUE ); |
| 843 |
|
} |
| 844 |
|
} |
| 845 |
|
|
| 846 |
|
/** |
| 847 |
|
* Unit tests for Porter Stemmer - Stemming output part 10 PECL. |
| 848 |
|
*/ |
| 849 |
|
class PorterStemmerPECLOutput10UnitTest extends PorterStemmerOutput1UnitTest { |
| 850 |
|
|
| 851 |
|
public static function getInfo() { |
| 852 |
|
return array( |
| 853 |
|
'name' => t('Stemming output 10 PECL'), |
| 854 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 855 |
|
'group' => t('Porter Stemmer'), |
| 856 |
|
); |
| 857 |
|
} |
| 858 |
|
|
| 859 |
|
/** |
| 860 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 861 |
|
*/ |
| 862 |
|
function testStemmingUnitTest() { |
| 863 |
|
$this->_run_porterstemmer_stem_test( 18000, 2000, TRUE ); |
| 864 |
|
} |
| 865 |
|
} |
| 866 |
|
|
| 867 |
|
/** |
| 868 |
|
* Unit tests for Porter Stemmer - Stemming output part 11 PECL. |
| 869 |
|
*/ |
| 870 |
|
class PorterStemmerPECLOutput11UnitTest extends PorterStemmerOutput1UnitTest { |
| 871 |
|
|
| 872 |
|
public static function getInfo() { |
| 873 |
|
return array( |
| 874 |
|
'name' => t('Stemming output 11 PECL'), |
| 875 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 876 |
|
'group' => t('Porter Stemmer'), |
| 877 |
|
); |
| 878 |
|
} |
| 879 |
|
|
| 880 |
|
/** |
| 881 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 882 |
|
*/ |
| 883 |
|
function testStemmingUnitTest() { |
| 884 |
|
$this->_run_porterstemmer_stem_test( 20000, 2000, TRUE ); |
| 885 |
|
} |
| 886 |
|
} |
| 887 |
|
|
| 888 |
|
/** |
| 889 |
|
* Unit tests for Porter Stemmer - Stemming output part 12 PECL. |
| 890 |
|
*/ |
| 891 |
|
class PorterStemmerPECLOutput12UnitTest extends PorterStemmerOutput1UnitTest { |
| 892 |
|
|
| 893 |
|
public static function getInfo() { |
| 894 |
|
return array( |
| 895 |
|
'name' => t('Stemming output 12 PECL'), |
| 896 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 897 |
|
'group' => t('Porter Stemmer'), |
| 898 |
|
); |
| 899 |
|
} |
| 900 |
|
|
| 901 |
|
/** |
| 902 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 903 |
|
*/ |
| 904 |
|
function testStemmingUnitTest() { |
| 905 |
|
$this->_run_porterstemmer_stem_test( 22000, 2000, TRUE ); |
| 906 |
|
} |
| 907 |
|
} |
| 908 |
|
/** |
| 909 |
|
* Unit tests for Porter Stemmer - Stemming output part 13 PECL. |
| 910 |
|
*/ |
| 911 |
|
class PorterStemmerPECLOutput13UnitTest extends PorterStemmerOutput1UnitTest { |
| 912 |
|
|
| 913 |
|
public static function getInfo() { |
| 914 |
|
return array( |
| 915 |
|
'name' => t('Stemming output 13 PECL'), |
| 916 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 917 |
|
'group' => t('Porter Stemmer'), |
| 918 |
|
); |
| 919 |
|
} |
| 920 |
|
|
| 921 |
|
/** |
| 922 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 923 |
|
*/ |
| 924 |
|
function testStemmingUnitTest() { |
| 925 |
|
$this->_run_porterstemmer_stem_test( 24000, 2000, TRUE ); |
| 926 |
|
} |
| 927 |
|
} |
| 928 |
|
/** |
| 929 |
|
* Unit tests for Porter Stemmer - Stemming output part 14 PECL. |
| 930 |
|
*/ |
| 931 |
|
class PorterStemmerPECLOutput14UnitTest extends PorterStemmerOutput1UnitTest { |
| 932 |
|
|
| 933 |
|
public static function getInfo() { |
| 934 |
|
return array( |
| 935 |
|
'name' => t('Stemming output 14 PECL'), |
| 936 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 937 |
|
'group' => t('Porter Stemmer'), |
| 938 |
|
); |
| 939 |
|
} |
| 940 |
|
|
| 941 |
|
/** |
| 942 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 943 |
|
*/ |
| 944 |
|
function testStemmingUnitTest() { |
| 945 |
|
$this->_run_porterstemmer_stem_test( 26000, 2000, TRUE ); |
| 946 |
|
} |
| 947 |
|
} |
| 948 |
|
|
| 949 |
|
/////////////////////////////////////// |
| 950 |
|
/** |
| 951 |
|
* Unit tests for Porter Stemmer - Stemming output part 15 PECL. |
| 952 |
|
*/ |
| 953 |
|
class PorterStemmerPECLOutput15UnitTest extends PorterStemmerOutput1UnitTest { |
| 954 |
|
|
| 955 |
|
public static function getInfo() { |
| 956 |
|
return array( |
| 957 |
|
'name' => t('Stemming output 15 PECL'), |
| 958 |
|
'description' => t('Test that the stemming function returns the correct stemmed words with PECL library'), |
| 959 |
|
'group' => t('Porter Stemmer'), |
| 960 |
|
); |
| 961 |
|
} |
| 962 |
|
|
| 963 |
|
/** |
| 964 |
|
* Tests the next 2000 words in the file for stemming accuracy. |
| 965 |
|
*/ |
| 966 |
|
function testStemmingUnitTest() { |
| 967 |
|
$this->_run_porterstemmer_stem_test( 28000, 2000, TRUE ); |
| 968 |
|
} |
| 969 |
|
} |