int CPorterStemming::Stem(char *word) /* in/out: the word stemmed */ { int rule; /* which rule is fired in replacing an end */ /* Part 1: Check to ensure the word is all alphabetic */ for ( end = word; *end != '\0'; end++ ) { if ( !isalpha(*end) ) return( 0 ); else *end = tolower( *end ); } end--; /* Part 2: Run through the Porter algorithm */ (void)ReplaceEnd( word, step1a_rules ); rule = ReplaceEnd( word, step1b_rules ); if ( (106 == rule) || (107 == rule) ) (void)ReplaceEnd( word, step1b1_rules ); (void)ReplaceEnd( word, step1c_rules ); (void)ReplaceEnd( word, step2_rules ); (void)ReplaceEnd( word, step3_rules ); (void)ReplaceEnd( word, step4_rules ); (void)ReplaceEnd( word, step5a_rules ); (void)ReplaceEnd( word, step5b_rules ); /* Part 3: Return an indication of successful stemming */ return( 1 ); } /* Stem */
static FUZZY_WORD *Stem( FUZZY_OBJECT *fi, const char *inword) { char *end; /* pointer to the end of the word */ char word[MAXWORDLEN+1]; int length; int rule_result; /* which rule is fired in replacing an end */ int i; FUZZY_WORD *fw = create_fuzzy_word( inword, 1 ); /* Make sure the word is not too large from the start. */ if ( strlen( inword ) >= MAXWORDLEN ) { fw->error = STEM_WORD_TOO_BIG; return fw; } /* make working copy */ strcpy( word, inword ); /* Part 1: Check to ensure the word is all alphabetic */ /* no longer converts to lower case -- word should be lower before calling */ for ( end = word; *end; end++ ) if ( !isalpha( (unsigned int) *end ) ) { fw->error = STEM_NOT_ALPHA; return fw; } /* Part 2: Run through the Porter algorithm */ for (i = 0; i < (int)(sizeof(all_steps)/sizeof(all_steps[0])); i++) { rule_result = ReplaceEnd(word, all_steps[i]); if ((rule_result == 106) || (rule_result == 107)) rule_result = ReplaceEnd(word, step1b1_rules); if ( rule_result == STEM_WORD_TOO_BIG ) { fw->error = rule_result; return fw; } } length = strlen( word ); /* Stem must be two chars or more in length */ if ( length <= 1 ) { fw->error = STEM_TO_NOTHING; return fw; } if ( length >= MAXWORDLEN ) { fw->error = STEM_WORD_TOO_BIG; return fw; } fw->free_strings = 1; fw->string_list[0] = estrdup( word ); return fw; }