Esempio n. 1
0
int CPorterStemming::Stem(char *word)  /* in/out: the word stemmed */
{
	int rule;    /* which rule is fired in replacing an end */

	/* Part 1: Check to ensure the word is all alphabetic */
	for ( end = word; *end != '\0'; end++ )
	{
		if ( !isalpha(*end) )
			return( 0 );
		else
			*end = tolower( *end );
	}

	end--;

	/*  Part 2: Run through the Porter algorithm */
	(void)ReplaceEnd( word, step1a_rules );
	rule = ReplaceEnd( word, step1b_rules );
	if ( (106 == rule) || (107 == rule) )
		(void)ReplaceEnd( word, step1b1_rules );

	(void)ReplaceEnd( word, step1c_rules );

	(void)ReplaceEnd( word, step2_rules );

	(void)ReplaceEnd( word, step3_rules );

	(void)ReplaceEnd( word, step4_rules );

	(void)ReplaceEnd( word, step5a_rules );
	(void)ReplaceEnd( word, step5b_rules );

	/* Part 3: Return an indication of successful stemming */
	return( 1 );
} /* Stem */
Esempio n. 2
0
static FUZZY_WORD *Stem( FUZZY_OBJECT *fi, const char *inword)
{
    char   *end;                /* pointer to the end of the word */
    char    word[MAXWORDLEN+1];
    int     length;
    int     rule_result;        /* which rule is fired in replacing an end */
    int     i;

    FUZZY_WORD *fw = create_fuzzy_word( inword, 1 );

    /* Make sure the word is not too large from the start. */
    if ( strlen( inword ) >= MAXWORDLEN )
    {
        fw->error = STEM_WORD_TOO_BIG;
        return fw;
    }


    /* make working copy */
    strcpy( word, inword );


    /* Part 1: Check to ensure the word is all alphabetic */
    /* no longer converts to lower case -- word should be lower before calling */

    for ( end = word; *end; end++ )
        if ( !isalpha( (unsigned int) *end ) )
        {
            fw->error = STEM_NOT_ALPHA;
            return fw;
        }



    /*  Part 2: Run through the Porter algorithm */


    for (i = 0; i < (int)(sizeof(all_steps)/sizeof(all_steps[0])); i++)
    {
        rule_result = ReplaceEnd(word, all_steps[i]);

        if ((rule_result == 106) || (rule_result == 107))
            rule_result = ReplaceEnd(word, step1b1_rules);

        if ( rule_result == STEM_WORD_TOO_BIG )
        {
            fw->error = rule_result;
            return fw;
        }
    }



    length = strlen( word );

    /* Stem must be two chars or more in length */
    if ( length <= 1 )
    {
        fw->error = STEM_TO_NOTHING;
        return fw;
    }


    if ( length >= MAXWORDLEN )
    {
        fw->error = STEM_WORD_TOO_BIG;
        return fw;
    }


    fw->free_strings = 1;
    fw->string_list[0] = estrdup( word );

    return fw;
}