Beispiel #1
0
PolySearcher*
PolySearcher_init(PolySearcher *self, Schema *schema, VArray *searchers) {
    const uint32_t num_searchers = VA_Get_Size(searchers);
    int32_t *starts_array = (int32_t*)MALLOCATE(num_searchers * sizeof(int32_t));
    int32_t  doc_max      = 0;

    Searcher_init((Searcher*)self, schema);
    PolySearcherIVARS *const ivars = PolySearcher_IVARS(self);
    ivars->searchers = (VArray*)INCREF(searchers);
    ivars->starts = NULL; // Safe cleanup.

    for (uint32_t i = 0; i < num_searchers; i++) {
        Searcher *searcher
            = (Searcher*)CERTIFY(VA_Fetch(searchers, i), SEARCHER);
        Schema *candidate       = Searcher_Get_Schema(searcher);
        Class  *orig_class      = Schema_Get_Class(schema);
        Class  *candidate_class = Schema_Get_Class(candidate);

        // Confirm that searchers all use the same schema.
        if (orig_class != candidate_class) {
            THROW(ERR, "Conflicting schemas: '%o', '%o'",
                  Schema_Get_Class_Name(schema),
                  Schema_Get_Class_Name(candidate));
        }

        // Derive doc_max and relative start offsets.
        starts_array[i] = (int32_t)doc_max;
        doc_max += Searcher_Doc_Max(searcher);
    }

    ivars->doc_max = doc_max;
    ivars->starts  = I32Arr_new_steal(starts_array, num_searchers);

    return self;
}
PhraseCompiler*
PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent,
                    Searcher *searcher, float boost) {
    PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self);
    PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS(parent);
    Schema     *schema = Searcher_Get_Schema(searcher);
    Similarity *sim    = Schema_Fetch_Sim(schema, parent_ivars->field);
    Vector     *terms  = parent_ivars->terms;

    // Try harder to find a Similarity if necessary.
    if (!sim) { sim = Schema_Get_Similarity(schema); }

    // Init.
    Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost);

    // Store IDF for the phrase.
    ivars->idf = 0;
    for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) {
        Obj     *term     = Vec_Fetch(terms, i);
        int32_t  doc_max  = Searcher_Doc_Max(searcher);
        int32_t  doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, term);
        ivars->idf += Sim_IDF(sim, doc_freq, doc_max);
    }

    // Calculate raw weight.
    ivars->raw_weight = ivars->idf * ivars->boost;

    return self;
}
Beispiel #3
0
PhraseCompiler*
PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent, 
                    Searcher *searcher, float boost)
{
    Schema     *schema = Searcher_Get_Schema(searcher);
    Similarity *sim    = Schema_Fetch_Sim(schema, parent->field);
    VArray     *terms  = parent->terms;
    uint32_t i, max;

    // Try harder to find a Similarity if necessary. 
    if (!sim) { sim = Schema_Get_Similarity(schema); }

    // Init. 
    Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost);

    // Store IDF for the phrase. 
    self->idf = 0;
    for (i = 0, max = VA_Get_Size(terms); i < max; i++) {
        Obj *term = VA_Fetch(terms, i);
        int32_t doc_max  = Searcher_Doc_Max(searcher);
        int32_t doc_freq = Searcher_Doc_Freq(searcher, parent->field, term);
        self->idf += Sim_IDF(sim, doc_freq, doc_max);
    }

    // Calculate raw weight. 
    self->raw_weight = self->idf * self->boost;

    // Make final preparations. 
    PhraseCompiler_Normalize(self);

    return self;
}
Beispiel #4
0
TermCompiler*
TermCompiler_init(TermCompiler *self, Query *parent, Searcher *searcher,
                  float boost) {
    TermCompilerIVARS *const ivars = TermCompiler_IVARS(self);
    TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)parent);
    Schema     *schema  = Searcher_Get_Schema(searcher);
    Similarity *sim     = Schema_Fetch_Sim(schema, parent_ivars->field);

    // Try harder to get a Similarity if necessary.
    if (!sim) { sim = Schema_Get_Similarity(schema); }

    // Init.
    Compiler_init((Compiler*)self, parent, searcher, sim, boost);
    ivars->normalized_weight = 0.0f;
    ivars->query_norm_factor = 0.0f;

    // Derive.
    int32_t  doc_max  = Searcher_Doc_Max(searcher);
    uint32_t doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field,
                                          parent_ivars->term);
    ivars->idf = Sim_IDF(sim, (int32_t)doc_freq, doc_max);

    /* The score of any document is approximately equal to:
     *
     *    (tf_d * idf_t / norm_d) * (tf_q * idf_t / norm_q)
     *
     * Here we add in the first IDF, plus user-supplied boost.
     *
     * The second clause is factored in by the call to Normalize().
     *
     * tf_d and norm_d can only be added by the Matcher, since they are
     * per-document.
     */
    ivars->raw_weight = ivars->idf * ivars->boost;

    return self;
}