コード例 #1
0
ファイル: ind.c プロジェクト: dustin/ferret
Index *index_new(Store *store, Analyzer *analyzer, HashSet *def_fields,
                 bool create)
{
    Index *self = ALLOC_AND_ZERO(Index);
    HashSetEntry *hse;
    /* FIXME: need to add these to the query parser */
    self->config = default_config;
    mutex_init(&self->mutex, NULL);
    self->has_writes = false;
    if (store) {
        REF(store);
        self->store = store;
    } else {
        self->store = open_ram_store();
        create = true;
    }
    if (analyzer) {
        self->analyzer = analyzer;
        REF(analyzer);
    } else {
        self->analyzer = mb_standard_analyzer_new(true);
    }

    if (create) {
        FieldInfos *fis = fis_new(STORE_YES, INDEX_YES,
                                  TERM_VECTOR_WITH_POSITIONS_OFFSETS);
        index_create(self->store, fis);
        fis_deref(fis);
    }

    /* options */
    self->key = NULL;
    self->id_field = intern("id");
    self->def_field = intern("id");
    self->auto_flush = false;
    self->check_latest = true;

    REF(self->analyzer);
    self->qp = qp_new(self->analyzer);
    for (hse = def_fields->first; hse; hse = hse->next) {
        qp_add_field(self->qp, (Symbol)hse->elem, true, true);
    }
    /* Index is a convenience class so set qp convenience options */
    self->qp->allow_any_fields = true;
    self->qp->clean_str = true;
    self->qp->handle_parse_errors = true;

    return self;
}
コード例 #2
0
ファイル: r_qparser.c プロジェクト: BusProject/theballot
/* 
 *  call-seq:
 *     QueryParser.new(options = {}) -> QueryParser
 *
 *  Create a new QueryParser. The QueryParser is used to convert string
 *  queries into Query objects. The options are;
 *
 *  === Options
 *
 *  :default_field::        Default: "*" (all fields). The default field to
 *                          search when no field is specified in the search
 *                          string. It can also be an array of fields.
 *  :analyzer::             Default: StandardAnalyzer. Analyzer used by the
 *                          query parser to parse query terms
 *  :wild_card_downcase::   Default: true. Specifies whether wild-card queries
 *                          should be downcased or not since they are not
 *                          passed through the parser
 *  :fields::               Default: []. Lets the query parser know what
 *                          fields are available for searching, particularly
 *                          when the "*" is specified as the search field
 *  :tokenized_fields::     Default: :fields. Lets the query parser know which
 *                          fields are tokenized so it knows which fields to
 *                          run the analyzer over.
 *  :validate_fields::      Default: false. Set to true if you want an
 *                          exception to be raised if there is an attempt to
 *                          search a non-existent field
 *  :or_default::           Default: true. Use "OR" as the default boolean
 *                          operator
 *  :default_slop::         Default: 0. Default slop to use in PhraseQuery
 *  :handle_parser_errors:: Default: true. QueryParser will quietly handle all
 *                          parsing errors internally. If you'd like to handle
 *                          them yourself, set this parameter to false.
 *  :clean_string::         Default: true. QueryParser will do a quick
 *                          once-over the query string make sure that quotes
 *                          and brackets match up and special characters are
 *                          escaped
 *  :max_clauses::          Default: 512. the maximum number of clauses
 *                          allowed in boolean queries and the maximum number
 *                          of terms allowed in multi, prefix, wild-card or
 *                          fuzzy queries when those queries are generated by
 *                          rewriting other queries
 */                   
static VALUE
frt_qp_init(int argc, VALUE *argv, VALUE self)
{
    VALUE roptions;
    VALUE rval;
    Analyzer *analyzer = NULL;
    bool has_options = false;

    HashSet *all_fields = NULL;
    HashSet *tkz_fields = NULL;
    HashSet *def_fields = NULL;
    QParser *qp;

    if (rb_scan_args(argc, argv, "01", &roptions) > 0) {
        if (TYPE(roptions) == T_HASH) {
            has_options = true;
            if (Qnil != (rval = rb_hash_aref(roptions, sym_default_field))) {
                def_fields = frt_get_fields(rval);
            }
            if (Qnil != (rval = rb_hash_aref(roptions, sym_analyzer))) {
                analyzer = frt_get_cwrapped_analyzer(rval);
            }
            if (Qnil != (rval = rb_hash_aref(roptions, sym_all_fields))) {
                all_fields = frt_get_fields(rval);
            }
            if (Qnil != (rval = rb_hash_aref(roptions, sym_fields))) {
                all_fields = frt_get_fields(rval);
            }
            if (Qnil != (rval = rb_hash_aref(roptions, sym_tkz_fields))) {
                tkz_fields = frt_get_fields(rval);
            }
        } else {
            def_fields = frt_get_fields(roptions);
        }
    }
    if (all_fields == NULL) {
        all_fields = hs_new_str(&free);
    }

    if (!analyzer) {
        analyzer = mb_standard_analyzer_new(true);
    }

    qp = qp_new(all_fields, def_fields, tkz_fields, analyzer);
    qp->allow_any_fields = true;
    qp->clean_str = true;
    /* handle options */
    if (argc > 0) {
        if (Qnil != (rval = rb_hash_aref(roptions, sym_handle_parse_errors))) {
            qp->handle_parse_errors = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_validate_fields))) {
            qp->allow_any_fields = !RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_wild_card_downcase))) {
            qp->wild_lower = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_or_default))) {
            qp->or_default = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_default_slop))) {
            qp->def_slop = FIX2INT(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_clean_string))) {
            qp->clean_str = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_max_clauses))) {
            qp->max_clauses = FIX2INT(rval);
        }
    }
    Frt_Wrap_Struct(self, frt_qp_mark, frt_qp_free, qp);
    object_add(qp, self);
    return self;
}
コード例 #3
0
ファイル: test_q_parser.c プロジェクト: Bira/ferret
static void test_q_parser_standard_analyzer(TestCase *tc, void *data)
{
    int i;
    Analyzer *analyzer = mb_standard_analyzer_new(true);
    QParser *parser;
    QPTestPair pairs[] = {
        {"", ""},
        {"word", "word"},
        {"f1:word", "f1:word"},
        {"f1|f2:word", "f1:word f2:word"},
        {"field:word", "field:word"},
        {"\"word1 word2 word3\"", "\"word1 word2 word3\""},
        {"\"word1 2342 word3\"", "\"word1 2342 word3\""},
        {"field:\"one two three\"", "field:\"one two three\""},
        {"field:\"one 222 three\"", "field:\"one 222 three\""},
        {"field:\"one <> three\"", "field:\"one <> three\""},
        {"field:\"<> two three\"", "field:\"two three\""},
        {"field:\"one <> three <>\"", "field:\"one <> three\""},
        {"field:\"one <> <> <> three <>\"", "field:\"one <> <> <> three\""},
        {"field:\"one <> <> <> three|four|five <>\"",
            "field:\"one <> <> <> three|four|five\""},
        {"field:\"one|two three|four|five six|seven\"",
            "field:\"one|two three|four|five six|seven\""},
        {"field:\"testing|trucks\"", "field:\"testing|trucks\""},
        {"[aaa bbb]", "[aaa bbb]"},
        {"{aaa bbb]", "{aaa bbb]"},
        {"field:[aaa bbb}", "field:[aaa bbb}"},
        {"{aaa bbb}", "{aaa bbb}"},
        {"{aaa>", "{aaa>"},
        {"[aaa>", "[aaa>"},
        {"field:<aaa}", "field:<aaa}"},
        {"<aaa]", "<aaa]"},
        {">aaa", "{aaa>"},
        {">=aaa", "[aaa>"},
        {"<aaa", "<aaa}"},
        {"field:<=aaa", "field:<aaa]"},
        {"REQ one REQ two", "+one +two"},
        {"REQ one two", "+one two"},
        {"one REQ two", "one +two"},
        {"+one +two", "+one +two"},
        {"+one two", "+one two"},
        {"one +two", "one +two"},
        {"-one -two", "-one -two"},
        {"-one two", "-one two"},
        {"one -two", "one -two"},
        {"!one !two", "-one -two"},
        {"!one two", "-one two"},
        {"one !two", "one -two"},
        {"NOT one NOT two", "-one -two"},
        {"NOT one two", "-one two"},
        {"one NOT two", "one -two"},
        {"one two", "one two"},
        {"one OR two", "one two"},
        {"one AND two", "+one +two"},
        {"one two AND three", "one two +three"},
        {"one two OR three", "one two three"},
        {"one (two AND three)", "one (+two +three)"},
        {"one AND (two OR three)", "+one +(two three)"},
        {"field:(one AND (two OR three))", "+field:one +(field:two field:three)"},
        {"one AND (two OR [aaa vvv})", "+one +(two [aaa vvv})"},
        {"one AND (f1:two OR f2:three) AND four", "+one +(f1:two f2:three) +four"},
        {"one^1.2300", "one^1.23"},
        {"(one AND two)^100.23", "(+one +two)^100.23"},
        {"field:(one AND two)^100.23", "(+field:one +field:two)^100.23"},
        {"field:(one AND [aaa bbb]^23.300)^100.23",
            "(+field:one +field:[aaa bbb]^23.3)^100.23"},
        {"(REQ field:\"one two three\")^23.000", "field:\"one two three\"^23.0"},
        {"asdf~0.2", "asdf~0.2"},
        {"field:asdf~0.2", "field:asdf~0.2"},
        {"asdf~0.2^100.00", "asdf~0.2^100.0"},
        {"field:asdf~0.2^0.1", "field:asdf~0.2^0.1"},
        {"field:\"asdf <> asdf|asdf\"~4", "field:\"asdf <> asdf|asdf\"~4"},
        {"\"one two three four five\"~5", "\"one two three four five\"~5"},
        {"ab?de", "ab?de"},
        {"ab*de", "ab*de"},
        {"asdf?*?asd*dsf?asfd*asdf?", "asdf?*?asd*dsf?asfd*asdf?"},
        {"field:a* AND field:(b*)", "+field:a* +field:b*"},
        {"field:abc~ AND field:(b*)", "+field:abc~ +field:b*"},
        {"asdf?*?asd*dsf?asfd*asdf?^20.0", "asdf?*?asd*dsf?asfd*asdf?^20.0"},

        {"*:xxx", "xxx f1:xxx f2:xxx field:xxx"},
        {"f1|f2:xxx", "f1:xxx f2:xxx"},

        {"*:asd~0.2", "asd~0.2 f1:asd~0.2 f2:asd~0.2 field:asd~0.2"},
        {"f1|f2:asd~0.2", "f1:asd~0.2 f2:asd~0.2"},

        {"*:a?d*^20.0", "(a?d* f1:a?d* f2:a?d* field:a?d*)^20.0"},
        {"f1|f2:a?d*^20.0", "(f1:a?d* f2:a?d*)^20.0"},

        {"*:\"asdf <> xxx|yyy\"",
            "\"asdf <> xxx|yyy\" f1:\"asdf <> xxx|yyy\" f2:\"asdf <> xxx|yyy\" "
                "field:\"asdf <> xxx|yyy\""},
        {"f1|f2:\"asdf <> xxx|yyy\"",
            "f1:\"asdf <> xxx|yyy\" f2:\"asdf <> xxx|yyy\""},
        {"f1|f2:\"do|yyy\"", "f1:yyy f2:yyy"},
        {"f1|f2:\"asdf <> do|yyy\"",
            "f1:\"asdf <> yyy\" f2:\"asdf <> yyy\""},

        {"*:[bbb xxx]", "[bbb xxx] f1:[bbb xxx] f2:[bbb xxx] field:[bbb xxx]"},
        {"f1|f2:[bbb xxx]", "f1:[bbb xxx] f2:[bbb xxx]"},

        {"*:(xxx AND bbb)",
            "+(xxx f1:xxx f2:xxx field:xxx) +(bbb f1:bbb f2:bbb field:bbb)"},
        {"f1|f2:(xxx AND bbb)", "+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)"},
        {"ASDF?*?asd*dsf?ASFD*asdf?^20.0", "asdf?*?asd*dsf?asfd*asdf?^20.0"},
        {"ASDFasdAasAasASD~", "asdfasdaasaasasd~"},
        {"\"onewordphrase\"", "onewordphrase"},
        {"one billion eight hundred and thirty three million four hundred and "
         "eighty eight thousand two hundred and sixty three",
            "one billion eight hundred thirty three million four hundred "
            "eighty eight thousand two hundred sixty three"},
        {"f1:*", "*"},
        {"f1:*^100.0", "*^100.0"},
        {"f1:?*", "f1:?*"},
        {"*:this", ""},
        {"this-is-a-hyphenated-word", "\"thisisahyphenatedword|this is a hyphenated word\"~4"},
        {"\"the phrase and the phrase\"", "\"phrase <> <> phrase\"~3"},
        {"\"the e-mail was in the inbox\"", "\"email|e mail <> <> <> inbox\"~5"},
        {"f1:?*^100.0", "f1:?*^100.0"},
        {"f1:(a1 f2:b2 c3)", "f1:a1 f2:b2 f1:c3"}
         /*
            */
    };  
    (void)data;

    REF(analyzer);
    parser = qp_new(analyzer);
    qp_add_field(parser, I("xx"),    true,  true);
    qp_add_field(parser, I("f1"),    false, true);
    qp_add_field(parser, I("f2"),    false, true);
    qp_add_field(parser, I("field"), false, true);

    for (i = 0; i < NELEMS(pairs); i++) {
        PARSER_TEST(pairs[i].qstr, pairs[i].qres);
    }
    parser->clean_str = true;
    for (i = 0; i < NELEMS(pairs); i++) {
        PARSER_TEST(pairs[i].qstr, pairs[i].qres);
    }
    PARSER_TEST("not_field:word", "");
    qp_destroy(parser);

    /* This time let the query parser destroy the analyzer */
    parser = qp_new(analyzer);
    qp_add_field(parser, I("xx"),    true,  true);
    qp_add_field(parser, I("f1"),    false, true);
    qp_add_field(parser, I("f2"),    false, true);
    qp_add_field(parser, I("field"), false, true);

    parser->clean_str = false;
    parser->allow_any_fields = true;
    for (i = 0; i < NELEMS(pairs); i++) {
        PARSER_TEST(pairs[i].qstr, pairs[i].qres);
    }
    parser->clean_str = true;
    for (i = 0; i < NELEMS(pairs); i++) {
        PARSER_TEST(pairs[i].qstr, pairs[i].qres);
    }
    PARSER_TEST("not_field:word", "not_field:word");

    parser->wild_lower = false;
    PARSER_TEST("ASDF?*?asd*dsf?ASFD*asdf?^20.0", "ASDF?*?asd*dsf?ASFD*asdf?^20.0");
    PARSER_TEST("ASDFasdAasAasASD~", "asdfasdaasaasasd~");
    qp_destroy(parser);
}