示例#1
0
PhraseCompiler*
PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent, 
                    Searcher *searcher, float boost)
{
    Schema     *schema = Searcher_Get_Schema(searcher);
    Similarity *sim    = Schema_Fetch_Sim(schema, parent->field);
    VArray     *terms  = parent->terms;
    uint32_t i, max;

    // Try harder to find a Similarity if necessary. 
    if (!sim) { sim = Schema_Get_Similarity(schema); }

    // Init. 
    Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost);

    // Store IDF for the phrase. 
    self->idf = 0;
    for (i = 0, max = VA_Get_Size(terms); i < max; i++) {
        Obj *term = VA_Fetch(terms, i);
        int32_t doc_max  = Searcher_Doc_Max(searcher);
        int32_t doc_freq = Searcher_Doc_Freq(searcher, parent->field, term);
        self->idf += Sim_IDF(sim, doc_freq, doc_max);
    }

    // Calculate raw weight. 
    self->raw_weight = self->idf * self->boost;

    // Make final preparations. 
    PhraseCompiler_Normalize(self);

    return self;
}
示例#2
0
PolySearcher*
PolySearcher_init(PolySearcher *self, Schema *schema, VArray *searchers) {
    const uint32_t num_searchers = VA_Get_Size(searchers);
    int32_t *starts_array = (int32_t*)MALLOCATE(num_searchers * sizeof(int32_t));
    int32_t  doc_max      = 0;

    Searcher_init((Searcher*)self, schema);
    PolySearcherIVARS *const ivars = PolySearcher_IVARS(self);
    ivars->searchers = (VArray*)INCREF(searchers);
    ivars->starts = NULL; // Safe cleanup.

    for (uint32_t i = 0; i < num_searchers; i++) {
        Searcher *searcher
            = (Searcher*)CERTIFY(VA_Fetch(searchers, i), SEARCHER);
        Schema *candidate       = Searcher_Get_Schema(searcher);
        Class  *orig_class      = Schema_Get_Class(schema);
        Class  *candidate_class = Schema_Get_Class(candidate);

        // Confirm that searchers all use the same schema.
        if (orig_class != candidate_class) {
            THROW(ERR, "Conflicting schemas: '%o', '%o'",
                  Schema_Get_Class_Name(schema),
                  Schema_Get_Class_Name(candidate));
        }

        // Derive doc_max and relative start offsets.
        starts_array[i] = (int32_t)doc_max;
        doc_max += Searcher_Doc_Max(searcher);
    }

    ivars->doc_max = doc_max;
    ivars->starts  = I32Arr_new_steal(starts_array, num_searchers);

    return self;
}
PhraseCompiler*
PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent,
                    Searcher *searcher, float boost) {
    PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self);
    PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS(parent);
    Schema     *schema = Searcher_Get_Schema(searcher);
    Similarity *sim    = Schema_Fetch_Sim(schema, parent_ivars->field);
    Vector     *terms  = parent_ivars->terms;

    // Try harder to find a Similarity if necessary.
    if (!sim) { sim = Schema_Get_Similarity(schema); }

    // Init.
    Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost);

    // Store IDF for the phrase.
    ivars->idf = 0;
    for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) {
        Obj     *term     = Vec_Fetch(terms, i);
        int32_t  doc_max  = Searcher_Doc_Max(searcher);
        int32_t  doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, term);
        ivars->idf += Sim_IDF(sim, doc_freq, doc_max);
    }

    // Calculate raw weight.
    ivars->raw_weight = ivars->idf * ivars->boost;

    return self;
}
void
TestQPSyntax_run_tests(Folder *index)
{
    u32_t i;
    TestBatch   *batch = Test_new_batch("TestQueryParserSyntax", 48, NULL);
    Searcher    *searcher     = Searcher_new((Obj*)index);
    QueryParser *qparser      = QParser_new(Searcher_Get_Schema(searcher), 
        NULL, NULL, NULL);
    QParser_Set_Heed_Colons(qparser, true);

    PLAN(batch);

    for (i = 0; leaf_test_funcs[i] != NULL; i++) {
        kino_TestQPSyntax_test_t test_func = leaf_test_funcs[i];
        TestQueryParser *test_case = test_func(i);
        Query *tree     = QParser_Tree(qparser, test_case->query_string);
        Query *expanded = QParser_Expand_Leaf(qparser, test_case->tree);
        Query *parsed   = QParser_Parse(qparser, test_case->query_string);
        Hits  *hits     = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);

        ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree),
            "tree()    %s", test_case->query_string->ptr);
        ASSERT_TRUE(batch, Query_Equals(expanded, (Obj*)test_case->expanded),
            "expand_leaf()    %s", test_case->query_string->ptr);
        ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
            "hits:    %s", test_case->query_string->ptr);
        DECREF(hits);
        DECREF(parsed);
        DECREF(expanded);
        DECREF(tree);
        DECREF(test_case);
    }

    for (i = 0; syntax_test_funcs[i] != NULL; i++) {
        kino_TestQPSyntax_test_t test_func = syntax_test_funcs[i];
        TestQueryParser *test_case = test_func(i);
        Query *tree   = QParser_Tree(qparser, test_case->query_string);
        Query *parsed = QParser_Parse(qparser, test_case->query_string);
        Hits  *hits   = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);

        ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree),
            "tree()    %s", test_case->query_string->ptr);
        ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
            "hits:    %s", test_case->query_string->ptr);
        DECREF(hits);
        DECREF(parsed);
        DECREF(tree);
        DECREF(test_case);
    }

    batch->destroy(batch);
    DECREF(searcher);
    DECREF(qparser);
}
示例#5
0
Compiler*
Compiler_init(Compiler *self, Query *parent, Searcher *searcher,
              Similarity *sim, float boost) {
    Query_init((Query*)self, boost);
    if (!sim) {
        Schema *schema = Searcher_Get_Schema(searcher);
        sim = Schema_Get_Similarity(schema);
    }
    self->parent  = (Query*)INCREF(parent);
    self->sim     = (Similarity*)INCREF(sim);
    ABSTRACT_CLASS_CHECK(self, COMPILER);
    return self;
}
示例#6
0
文件: TermQuery.c 项目: apache/lucy
TermCompiler*
TermCompiler_init(TermCompiler *self, Query *parent, Searcher *searcher,
                  float boost) {
    TermCompilerIVARS *const ivars = TermCompiler_IVARS(self);
    TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)parent);
    Schema     *schema  = Searcher_Get_Schema(searcher);
    Similarity *sim     = Schema_Fetch_Sim(schema, parent_ivars->field);

    // Try harder to get a Similarity if necessary.
    if (!sim) { sim = Schema_Get_Similarity(schema); }

    // Init.
    Compiler_init((Compiler*)self, parent, searcher, sim, boost);
    ivars->normalized_weight = 0.0f;
    ivars->query_norm_factor = 0.0f;

    // Derive.
    int32_t  doc_max  = Searcher_Doc_Max(searcher);
    uint32_t doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field,
                                          parent_ivars->term);
    ivars->idf = Sim_IDF(sim, (int32_t)doc_freq, doc_max);

    /* The score of any document is approximately equal to:
     *
     *    (tf_d * idf_t / norm_d) * (tf_q * idf_t / norm_q)
     *
     * Here we add in the first IDF, plus user-supplied boost.
     *
     * The second clause is factored in by the call to Normalize().
     *
     * tf_d and norm_d can only be added by the Matcher, since they are
     * per-document.
     */
    ivars->raw_weight = ivars->idf * ivars->boost;

    return self;
}
void
TestQPLogic_run_tests()
{
    u32_t i;
    TestBatch   *batch = Test_new_batch("TestQueryParserLogic", 178, NULL);
    Folder      *folder       = S_create_index();
    Searcher    *searcher     = Searcher_new((Obj*)folder);
    QueryParser *or_parser    = QParser_new(Searcher_Get_Schema(searcher), 
        NULL, NULL, NULL);
    static  ZombieCharBuf AND = ZCB_LITERAL("AND");
    QueryParser *and_parser   = QParser_new(Searcher_Get_Schema(searcher), 
        NULL, (CharBuf*)&AND, NULL);
    QParser_Set_Heed_Colons(or_parser, true);
    QParser_Set_Heed_Colons(and_parser, true);

    PLAN(batch);

    /* Run logical tests with default boolop of OR. */
    for (i = 0; logical_test_funcs[i] != NULL; i++) {
        kino_TestQPLogic_logical_test_t test_func = logical_test_funcs[i];
        TestQueryParser *test_case = test_func(BOOLOP_OR);
        Query *tree     = QParser_Tree(or_parser, test_case->query_string);
        Query *parsed   = QParser_Parse(or_parser, test_case->query_string);
        Hits  *hits     = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);

        ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree),
            "tree() OR   %s", test_case->query_string->ptr);
        ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
            "hits: OR   %s", test_case->query_string->ptr);
        DECREF(hits);
        DECREF(parsed);
        DECREF(tree);
        DECREF(test_case);
    }

    /* Run logical tests with default boolop of AND. */
    for (i = 0; logical_test_funcs[i] != NULL; i++) {
        kino_TestQPLogic_logical_test_t test_func = logical_test_funcs[i];
        TestQueryParser *test_case = test_func(BOOLOP_AND);
        Query *tree     = QParser_Tree(and_parser, test_case->query_string);
        Query *parsed   = QParser_Parse(and_parser, test_case->query_string);
        Hits  *hits     = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);

        ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree),
            "tree() AND   %s", test_case->query_string->ptr);
        ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
            "hits: AND   %s", test_case->query_string->ptr);
        DECREF(hits);
        DECREF(parsed);
        DECREF(tree);
        DECREF(test_case);
    }

    /* Run tests for QParser_Prune(). */
    for (i = 0; prune_test_funcs[i] != NULL; i++) {
        kino_TestQPLogic_prune_test_t test_func = prune_test_funcs[i];
        TestQueryParser *test_case = test_func();
        CharBuf *qstring = test_case->tree 
                         ? Obj_To_String(test_case->tree)
                         : CB_new_from_trusted_utf8("(NULL)", 6);
        Query *tree = test_case->tree;
        Query *wanted = test_case->expanded;
        Query *pruned   = QParser_Prune(or_parser, tree);
        Query *expanded;
        Hits  *hits;

        ASSERT_TRUE(batch, Query_Equals(pruned, (Obj*)wanted),
            "prune()   %s", qstring->ptr);
        expanded = QParser_Expand(or_parser, pruned);
        hits = Searcher_Hits(searcher, (Obj*)expanded, 0, 10, NULL);
        ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
            "hits:    %s", qstring->ptr);

        DECREF(hits);
        DECREF(expanded);
        DECREF(pruned);
        DECREF(qstring);
        DECREF(test_case);
    }

    DECREF(and_parser);
    DECREF(or_parser);
    DECREF(searcher);
    DECREF(folder);
    batch->destroy(batch);
}