static void test_qp_bad_queries(TestCase *tc, void *data) { int i; QParser *parser; QPTestPair pairs[] = { {"[, ]", ""}, {"::*word", "word"}, {"::))*&)(*^&*(", ""}, {"::|)*&one)(*two(*&\"", "\"one two\"~1"} }; (void)data; parser = qp_new(letter_analyzer_new(true)); qp_add_field(parser, I("xx"), true, true); qp_add_field(parser, I("f1"), false, true); qp_add_field(parser, I("f2"), false, true); qp_add_field(parser, I("field"), false, true); parser->handle_parse_errors = true; for (i = 0; i < NELEMS(pairs); i++) { PARSER_TEST(pairs[i].qstr, pairs[i].qres); } parser->clean_str = true; for (i = 0; i < NELEMS(pairs); i++) { PARSER_TEST(pairs[i].qstr, pairs[i].qres); } qp_destroy(parser); }
Index *index_new(Store *store, Analyzer *analyzer, HashSet *def_fields, bool create) { Index *self = ALLOC_AND_ZERO(Index); HashSetEntry *hse; /* FIXME: need to add these to the query parser */ self->config = default_config; mutex_init(&self->mutex, NULL); self->has_writes = false; if (store) { REF(store); self->store = store; } else { self->store = open_ram_store(); create = true; } if (analyzer) { self->analyzer = analyzer; REF(analyzer); } else { self->analyzer = mb_standard_analyzer_new(true); } if (create) { FieldInfos *fis = fis_new(STORE_YES, INDEX_YES, TERM_VECTOR_WITH_POSITIONS_OFFSETS); index_create(self->store, fis); fis_deref(fis); } /* options */ self->key = NULL; self->id_field = intern("id"); self->def_field = intern("id"); self->auto_flush = false; self->check_latest = true; REF(self->analyzer); self->qp = qp_new(self->analyzer); for (hse = def_fields->first; hse; hse = hse->next) { qp_add_field(self->qp, (Symbol)hse->elem, true, true); } /* Index is a convenience class so set qp convenience options */ self->qp->allow_any_fields = true; self->qp->clean_str = true; self->qp->handle_parse_errors = true; return self; }
static void test_qp_keyword_switch(TestCase *tc, void *data) { QParser *parser; (void)data; parser = qp_new(letter_analyzer_new(true)); qp_add_field(parser, I("xx"), true, true); PARSER_TEST("REQ www (xxx AND yyy) OR NOT zzz", "+www (+xxx +yyy) -zzz"); parser->use_keywords = false; PARSER_TEST("REQ www (xxx AND yyy) OR NOT zzz", "req www (xxx and yyy) or not zzz"); qp_destroy(parser); }
static void test_qp_prefix_query(TestCase *tc, void *data) { QParser *parser; Query *q; (void)data; parser = qp_new(letter_analyzer_new(true)); qp_add_field(parser, I("xx"), true, true); q = qp_parse(parser, "asdg*"); Aiequal(PREFIX_QUERY, q->type); q_deref(q); q = qp_parse(parser, "a?dg*"); Aiequal(WILD_CARD_QUERY, q->type); q_deref(q); q = qp_parse(parser, "a*dg*"); Aiequal(WILD_CARD_QUERY, q->type); q_deref(q); q = qp_parse(parser, "asdg*a"); Aiequal(WILD_CARD_QUERY, q->type); q_deref(q); qp_destroy(parser); }
/* * call-seq: * QueryParser.new(options = {}) -> QueryParser * * Create a new QueryParser. The QueryParser is used to convert string * queries into Query objects. The options are; * * === Options * * :default_field:: Default: "*" (all fields). The default field to * search when no field is specified in the search * string. It can also be an array of fields. * :analyzer:: Default: StandardAnalyzer. Analyzer used by the * query parser to parse query terms * :wild_card_downcase:: Default: true. Specifies whether wild-card queries * should be downcased or not since they are not * passed through the parser * :fields:: Default: []. Lets the query parser know what * fields are available for searching, particularly * when the "*" is specified as the search field * :tokenized_fields:: Default: :fields. Lets the query parser know which * fields are tokenized so it knows which fields to * run the analyzer over. * :validate_fields:: Default: false. Set to true if you want an * exception to be raised if there is an attempt to * search a non-existent field * :or_default:: Default: true. Use "OR" as the default boolean * operator * :default_slop:: Default: 0. Default slop to use in PhraseQuery * :handle_parser_errors:: Default: true. QueryParser will quietly handle all * parsing errors internally. If you'd like to handle * them yourself, set this parameter to false. * :clean_string:: Default: true. QueryParser will do a quick * once-over the query string make sure that quotes * and brackets match up and special characters are * escaped * :max_clauses:: Default: 512. the maximum number of clauses * allowed in boolean queries and the maximum number * of terms allowed in multi, prefix, wild-card or * fuzzy queries when those queries are generated by * rewriting other queries */ static VALUE frt_qp_init(int argc, VALUE *argv, VALUE self) { VALUE roptions; VALUE rval; Analyzer *analyzer = NULL; bool has_options = false; HashSet *all_fields = NULL; HashSet *tkz_fields = NULL; HashSet *def_fields = NULL; QParser *qp; if (rb_scan_args(argc, argv, "01", &roptions) > 0) { if (TYPE(roptions) == T_HASH) { has_options = true; if (Qnil != (rval = rb_hash_aref(roptions, sym_default_field))) { def_fields = frt_get_fields(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_analyzer))) { analyzer = frt_get_cwrapped_analyzer(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_all_fields))) { all_fields = frt_get_fields(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_fields))) { all_fields = frt_get_fields(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_tkz_fields))) { tkz_fields = frt_get_fields(rval); } } else { def_fields = frt_get_fields(roptions); } } if (all_fields == NULL) { all_fields = hs_new_str(&free); } if (!analyzer) { analyzer = mb_standard_analyzer_new(true); } qp = qp_new(all_fields, def_fields, tkz_fields, analyzer); qp->allow_any_fields = true; qp->clean_str = true; /* handle options */ if (argc > 0) { if (Qnil != (rval = rb_hash_aref(roptions, sym_handle_parse_errors))) { qp->handle_parse_errors = RTEST(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_validate_fields))) { qp->allow_any_fields = !RTEST(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_wild_card_downcase))) { qp->wild_lower = RTEST(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_or_default))) { qp->or_default = RTEST(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_default_slop))) { qp->def_slop = FIX2INT(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_clean_string))) { qp->clean_str = RTEST(rval); } if (Qnil != (rval = rb_hash_aref(roptions, sym_max_clauses))) { qp->max_clauses = FIX2INT(rval); } } Frt_Wrap_Struct(self, frt_qp_mark, frt_qp_free, qp); object_add(qp, self); return self; }
static void test_q_parser_standard_analyzer(TestCase *tc, void *data) { int i; Analyzer *analyzer = mb_standard_analyzer_new(true); QParser *parser; QPTestPair pairs[] = { {"", ""}, {"word", "word"}, {"f1:word", "f1:word"}, {"f1|f2:word", "f1:word f2:word"}, {"field:word", "field:word"}, {"\"word1 word2 word3\"", "\"word1 word2 word3\""}, {"\"word1 2342 word3\"", "\"word1 2342 word3\""}, {"field:\"one two three\"", "field:\"one two three\""}, {"field:\"one 222 three\"", "field:\"one 222 three\""}, {"field:\"one <> three\"", "field:\"one <> three\""}, {"field:\"<> two three\"", "field:\"two three\""}, {"field:\"one <> three <>\"", "field:\"one <> three\""}, {"field:\"one <> <> <> three <>\"", "field:\"one <> <> <> three\""}, {"field:\"one <> <> <> three|four|five <>\"", "field:\"one <> <> <> three|four|five\""}, {"field:\"one|two three|four|five six|seven\"", "field:\"one|two three|four|five six|seven\""}, {"field:\"testing|trucks\"", "field:\"testing|trucks\""}, {"[aaa bbb]", "[aaa bbb]"}, {"{aaa bbb]", "{aaa bbb]"}, {"field:[aaa bbb}", "field:[aaa bbb}"}, {"{aaa bbb}", "{aaa bbb}"}, {"{aaa>", "{aaa>"}, {"[aaa>", "[aaa>"}, {"field:<aaa}", "field:<aaa}"}, {"<aaa]", "<aaa]"}, {">aaa", "{aaa>"}, {">=aaa", "[aaa>"}, {"<aaa", "<aaa}"}, {"field:<=aaa", "field:<aaa]"}, {"REQ one REQ two", "+one +two"}, {"REQ one two", "+one two"}, {"one REQ two", "one +two"}, {"+one +two", "+one +two"}, {"+one two", "+one two"}, {"one +two", "one +two"}, {"-one -two", "-one -two"}, {"-one two", "-one two"}, {"one -two", "one -two"}, {"!one !two", "-one -two"}, {"!one two", "-one two"}, {"one !two", "one -two"}, {"NOT one NOT two", "-one -two"}, {"NOT one two", "-one two"}, {"one NOT two", "one -two"}, {"one two", "one two"}, {"one OR two", "one two"}, {"one AND two", "+one +two"}, {"one two AND three", "one two +three"}, {"one two OR three", "one two three"}, {"one (two AND three)", "one (+two +three)"}, {"one AND (two OR three)", "+one +(two three)"}, {"field:(one AND (two OR three))", "+field:one +(field:two field:three)"}, {"one AND (two OR [aaa vvv})", "+one +(two [aaa vvv})"}, {"one AND (f1:two OR f2:three) AND four", "+one +(f1:two f2:three) +four"}, {"one^1.2300", "one^1.23"}, {"(one AND two)^100.23", "(+one +two)^100.23"}, {"field:(one AND two)^100.23", "(+field:one +field:two)^100.23"}, {"field:(one AND [aaa bbb]^23.300)^100.23", "(+field:one +field:[aaa bbb]^23.3)^100.23"}, {"(REQ field:\"one two three\")^23.000", "field:\"one two three\"^23.0"}, {"asdf~0.2", "asdf~0.2"}, {"field:asdf~0.2", "field:asdf~0.2"}, {"asdf~0.2^100.00", "asdf~0.2^100.0"}, {"field:asdf~0.2^0.1", "field:asdf~0.2^0.1"}, {"field:\"asdf <> asdf|asdf\"~4", "field:\"asdf <> asdf|asdf\"~4"}, {"\"one two three four five\"~5", "\"one two three four five\"~5"}, {"ab?de", "ab?de"}, {"ab*de", "ab*de"}, {"asdf?*?asd*dsf?asfd*asdf?", "asdf?*?asd*dsf?asfd*asdf?"}, {"field:a* AND field:(b*)", "+field:a* +field:b*"}, {"field:abc~ AND field:(b*)", "+field:abc~ +field:b*"}, {"asdf?*?asd*dsf?asfd*asdf?^20.0", "asdf?*?asd*dsf?asfd*asdf?^20.0"}, {"*:xxx", "xxx f1:xxx f2:xxx field:xxx"}, {"f1|f2:xxx", "f1:xxx f2:xxx"}, {"*:asd~0.2", "asd~0.2 f1:asd~0.2 f2:asd~0.2 field:asd~0.2"}, {"f1|f2:asd~0.2", "f1:asd~0.2 f2:asd~0.2"}, {"*:a?d*^20.0", "(a?d* f1:a?d* f2:a?d* field:a?d*)^20.0"}, {"f1|f2:a?d*^20.0", "(f1:a?d* f2:a?d*)^20.0"}, {"*:\"asdf <> xxx|yyy\"", "\"asdf <> xxx|yyy\" f1:\"asdf <> xxx|yyy\" f2:\"asdf <> xxx|yyy\" " "field:\"asdf <> xxx|yyy\""}, {"f1|f2:\"asdf <> xxx|yyy\"", "f1:\"asdf <> xxx|yyy\" f2:\"asdf <> xxx|yyy\""}, {"f1|f2:\"do|yyy\"", "f1:yyy f2:yyy"}, {"f1|f2:\"asdf <> do|yyy\"", "f1:\"asdf <> yyy\" f2:\"asdf <> yyy\""}, {"*:[bbb xxx]", "[bbb xxx] f1:[bbb xxx] f2:[bbb xxx] field:[bbb xxx]"}, {"f1|f2:[bbb xxx]", "f1:[bbb xxx] f2:[bbb xxx]"}, {"*:(xxx AND bbb)", "+(xxx f1:xxx f2:xxx field:xxx) +(bbb f1:bbb f2:bbb field:bbb)"}, {"f1|f2:(xxx AND bbb)", "+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)"}, {"ASDF?*?asd*dsf?ASFD*asdf?^20.0", "asdf?*?asd*dsf?asfd*asdf?^20.0"}, {"ASDFasdAasAasASD~", "asdfasdaasaasasd~"}, {"\"onewordphrase\"", "onewordphrase"}, {"one billion eight hundred and thirty three million four hundred and " "eighty eight thousand two hundred and sixty three", "one billion eight hundred thirty three million four hundred " "eighty eight thousand two hundred sixty three"}, {"f1:*", "*"}, {"f1:*^100.0", "*^100.0"}, {"f1:?*", "f1:?*"}, {"*:this", ""}, {"this-is-a-hyphenated-word", "\"thisisahyphenatedword|this is a hyphenated word\"~4"}, {"\"the phrase and the phrase\"", "\"phrase <> <> phrase\"~3"}, {"\"the e-mail was in the inbox\"", "\"email|e mail <> <> <> inbox\"~5"}, {"f1:?*^100.0", "f1:?*^100.0"}, {"f1:(a1 f2:b2 c3)", "f1:a1 f2:b2 f1:c3"} /* */ }; (void)data; REF(analyzer); parser = qp_new(analyzer); qp_add_field(parser, I("xx"), true, true); qp_add_field(parser, I("f1"), false, true); qp_add_field(parser, I("f2"), false, true); qp_add_field(parser, I("field"), false, true); for (i = 0; i < NELEMS(pairs); i++) { PARSER_TEST(pairs[i].qstr, pairs[i].qres); } parser->clean_str = true; for (i = 0; i < NELEMS(pairs); i++) { PARSER_TEST(pairs[i].qstr, pairs[i].qres); } PARSER_TEST("not_field:word", ""); qp_destroy(parser); /* This time let the query parser destroy the analyzer */ parser = qp_new(analyzer); qp_add_field(parser, I("xx"), true, true); qp_add_field(parser, I("f1"), false, true); qp_add_field(parser, I("f2"), false, true); qp_add_field(parser, I("field"), false, true); parser->clean_str = false; parser->allow_any_fields = true; for (i = 0; i < NELEMS(pairs); i++) { PARSER_TEST(pairs[i].qstr, pairs[i].qres); } parser->clean_str = true; for (i = 0; i < NELEMS(pairs); i++) { PARSER_TEST(pairs[i].qstr, pairs[i].qres); } PARSER_TEST("not_field:word", "not_field:word"); parser->wild_lower = false; PARSER_TEST("ASDF?*?asd*dsf?ASFD*asdf?^20.0", "ASDF?*?asd*dsf?ASFD*asdf?^20.0"); PARSER_TEST("ASDFasdAasAasASD~", "asdfasdaasaasasd~"); qp_destroy(parser); }