예제 #1
0
bool
SnowStemmer_Equals_IMP(SnowballStemmer *self, Obj *other) {
    if ((SnowballStemmer*)other == self)                    { return true; }
    if (!Obj_is_a(other, SNOWBALLSTEMMER))                  { return false; }
    SnowballStemmerIVARS *ivars = SnowStemmer_IVARS(self);
    SnowballStemmerIVARS *ovars = SnowStemmer_IVARS((SnowballStemmer*)other);
    if (!Str_Equals(ovars->language, (Obj*)ivars->language)) { return false; }
    return true;
}
예제 #2
0
Hash*
SnowStemmer_Dump_IMP(SnowballStemmer *self) {
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    SnowStemmer_Dump_t super_dump
        = SUPER_METHOD_PTR(SNOWBALLSTEMMER, LUCY_SnowStemmer_Dump);
    Hash *dump = super_dump(self);
    Hash_Store_Utf8(dump, "language", 8, (Obj*)Str_Clone(ivars->language));
    return dump;
}
예제 #3
0
void
SnowStemmer_Destroy_IMP(SnowballStemmer *self) {
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    if (ivars->snowstemmer) {
        sb_stemmer_delete((struct sb_stemmer*)ivars->snowstemmer);
    }
    DECREF(ivars->language);
    SUPER_DESTROY(self, SNOWBALLSTEMMER);
}
예제 #4
0
SnowballStemmer*
SnowStemmer_init(SnowballStemmer *self, String *language) {
    char lang_buf[3];
    Analyzer_init((Analyzer*)self);
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    ivars->language = Str_Clone(language);

    // Get a Snowball stemmer.  Be case-insensitive.
    lang_buf[0] = tolower(Str_Code_Point_At(language, 0));
    lang_buf[1] = tolower(Str_Code_Point_At(language, 1));
    lang_buf[2] = '\0';
    ivars->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
    if (!ivars->snowstemmer) {
        THROW(ERR, "Can't find a Snowball stemmer for %o", language);
    }

    return self;
}
예제 #5
0
Inversion*
SnowStemmer_Transform_IMP(SnowballStemmer *self, Inversion *inversion) {
    Token *token;
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    struct sb_stemmer *const snowstemmer
        = (struct sb_stemmer*)ivars->snowstemmer;

    while (NULL != (token = Inversion_Next(inversion))) {
        TokenIVARS *const token_ivars = Token_IVARS(token);
        const sb_symbol *stemmed_text
            = sb_stemmer_stem(snowstemmer, (sb_symbol*)token_ivars->text,
                              token_ivars->len);
        size_t len = sb_stemmer_length(snowstemmer);
        if (len > token_ivars->len) {
            FREEMEM(token_ivars->text);
            token_ivars->text = (char*)MALLOCATE(len + 1);
        }
        memcpy(token_ivars->text, stemmed_text, len + 1);
        token_ivars->len = len;
    }
    Inversion_Reset(inversion);
    return (Inversion*)INCREF(inversion);
}