Esempio n. 1
0
void
Inverter_Add_Field_IMP(Inverter *self, InverterEntry *entry) {
    InverterIVARS *const ivars = Inverter_IVARS(self);
    InverterEntryIVARS *const entry_ivars = InvEntry_IVARS(entry);

    // Get an Inversion, going through analyzer if appropriate.
    if (entry_ivars->analyzer) {
        DECREF(entry_ivars->inversion);
        entry_ivars->inversion
            = Analyzer_Transform_Text(entry_ivars->analyzer,
                                      (String*)entry_ivars->value);
        Inversion_Invert(entry_ivars->inversion);
    }
    else if (entry_ivars->indexed || entry_ivars->highlightable) {
        String *value = (String*)entry_ivars->value;
        size_t token_len = Str_Get_Size(value);
        Token *seed = Token_new(Str_Get_Ptr8(value),
                                token_len, 0, token_len, 1.0f, 1);
        DECREF(entry_ivars->inversion);
        entry_ivars->inversion = Inversion_new(seed);
        DECREF(seed);
        Inversion_Invert(entry_ivars->inversion); // Nearly a no-op.
    }

    // Prime the iterator.
    VA_Push(ivars->entries, INCREF(entry));
    ivars->sorted = false;
}
Esempio n. 2
0
Inversion*
StandardTokenizer_transform_text(StandardTokenizer *self, CharBuf *text) {
    Inversion *new_inversion = Inversion_new(NULL);
    StandardTokenizer_Tokenize_Str(self, (char*)CB_Get_Ptr8(text),
                                   CB_Get_Size(text), new_inversion);
    return new_inversion;
}
Esempio n. 3
0
Inversion*
PolyAnalyzer_Transform_Text_IMP(PolyAnalyzer *self, String *text) {
    VArray *const   analyzers     = PolyAnalyzer_IVARS(self)->analyzers;
    const uint32_t  num_analyzers = VA_Get_Size(analyzers);
    Inversion      *retval;

    if (num_analyzers == 0) {
        size_t      token_len = Str_Get_Size(text);
        const char *buf       = Str_Get_Ptr8(text);
        Token *seed = Token_new(buf, token_len, 0, token_len, 1.0f, 1);
        retval = Inversion_new(seed);
        DECREF(seed);
    }
    else {
        Analyzer *first_analyzer = (Analyzer*)VA_Fetch(analyzers, 0);
        retval = Analyzer_Transform_Text(first_analyzer, text);
        for (uint32_t i = 1; i < num_analyzers; i++) {
            Analyzer *analyzer = (Analyzer*)VA_Fetch(analyzers, i);
            Inversion *new_inversion = Analyzer_Transform(analyzer, retval);
            DECREF(retval);
            retval = new_inversion;
        }
    }

    return retval;
}
Esempio n. 4
0
Inversion*
RegexTokenizer_Transform_Text_IMP(RegexTokenizer *self, String *text) {
    Inversion *new_inversion = Inversion_new(NULL);
    RegexTokenizer_Tokenize_Utf8(self, Str_Get_Ptr8(text),
                                 Str_Get_Size(text), new_inversion);
    return new_inversion;
}
Inversion*
WhitespaceTokenizer_Transform_Text_IMP(WhitespaceTokenizer *self,
                                       String *text) {
    Inversion *new_inversion = Inversion_new(NULL);
    WhitespaceTokenizer_Tokenize_Str(self, (char*)Str_Get_Ptr8(text),
                                     Str_Get_Size(text), new_inversion);
    return new_inversion;
}
Esempio n. 6
0
Inversion*
StandardTokenizer_transform(StandardTokenizer *self, Inversion *inversion) {
    Inversion *new_inversion = Inversion_new(NULL);
    Token *token;

    while (NULL != (token = Inversion_Next(inversion))) {
        StandardTokenizer_Tokenize_Str(self, token->text, token->len,
                                       new_inversion);
    }

    return new_inversion;
}
Inversion*
WhitespaceTokenizer_Transform_IMP(WhitespaceTokenizer *self,
                                  Inversion *inversion) {
    Inversion *new_inversion = Inversion_new(NULL);
    Token *token;

    while (NULL != (token = Inversion_Next(inversion))) {
        WhitespaceTokenizer_Tokenize_Str(self, Token_Get_Text(token),
                                         Token_Get_Len(token), new_inversion);
    }

    return new_inversion;
}
Esempio n. 8
0
Inversion*
RegexTokenizer_Transform_IMP(RegexTokenizer *self, Inversion *inversion) {
    Inversion *new_inversion = Inversion_new(NULL);
    Token *token;

    while (NULL != (token = Inversion_Next(inversion))) {
        TokenIVARS *const token_ivars = Token_IVARS(token);
        RegexTokenizer_Tokenize_Utf8(self, token_ivars->text, token_ivars->len,
                                     new_inversion);
    }

    return new_inversion;
}
Esempio n. 9
0
Inversion*
Stopalizer_transform(Stopalizer *self, Inversion *inversion)
{
    Token *token;
    Inversion *new_inversion = Inversion_new(NULL);
    Hash *const stoplist  = self->stoplist;

    while (NULL != (token = Inversion_Next(inversion))) {
        if (!Hash_Fetch_Str(stoplist, token->text, token->len)) {
            Inversion_Append(new_inversion, (Token*)INCREF(token));
        }
    }

    return new_inversion;
}
Esempio n. 10
0
Inversion*
SnowStop_Transform_IMP(SnowballStopFilter *self, Inversion *inversion) {
    Token *token;
    Inversion *new_inversion = Inversion_new(NULL);
    SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self);
    Hash *const stoplist  = ivars->stoplist;

    while (NULL != (token = Inversion_Next(inversion))) {
        TokenIVARS *const token_ivars = Token_IVARS(token);
        if (!Hash_Fetch_Utf8(stoplist, token_ivars->text, token_ivars->len)) {
            Inversion_Append(new_inversion, (Token*)INCREF(token));
        }
    }

    return new_inversion;
}