static TestQueryParser* prune_test_nomatch() { Query *tree = (Query*)NoMatchQuery_new(); Query *pruned = (Query*)NoMatchQuery_new(); return TestQP_new(NULL, tree, pruned, 0); }
static TestQueryParser* prune_test_optional_not() { Query *a_leaf = make_leaf_query(NULL, "a"); Query *b_leaf = make_leaf_query(NULL, "b"); Query *not_b = make_not_query(b_leaf); Query *tree = make_poly_query(BOOLOP_OR, (Query*)INCREF(a_leaf), not_b, NULL); Query *nomatch = (Query*)NoMatchQuery_new(); Query *pruned = make_poly_query(BOOLOP_OR, a_leaf, nomatch, NULL); return TestQP_new(NULL, tree, pruned, 4); }
Query* QParser_Prune_IMP(QueryParser *self, Query *query) { if (!query || Query_is_a(query, NOTQUERY) || Query_is_a(query, MATCHALLQUERY) ) { return (Query*)NoMatchQuery_new(); } else if (Query_is_a(query, POLYQUERY)) { S_do_prune(self, query); } return (Query*)INCREF(query); }
static TestQueryParser* prune_test_reqopt_required_not() { Query *a_leaf = make_leaf_query(NULL, "a"); Query *b_leaf = make_leaf_query(NULL, "b"); Query *not_a = make_not_query(a_leaf); Query *tree = (Query*)ReqOptQuery_new(not_a, b_leaf); Query *nomatch = (Query*)NoMatchQuery_new(); Query *pruned = (Query*)ReqOptQuery_new(nomatch, b_leaf); DECREF(nomatch); DECREF(not_a); DECREF(b_leaf); return TestQP_new(NULL, tree, pruned, 0); }
static TestQueryParser* prune_test_reqopt_optional_not() { Query *a_leaf = make_leaf_query(NULL, "a"); Query *b_leaf = make_leaf_query(NULL, "b"); Query *not_b = make_not_query(b_leaf); Query *tree = (Query*)ReqOptQuery_new(a_leaf, not_b); Query *nomatch = (Query*)NoMatchQuery_new(); Query *pruned = (Query*)ReqOptQuery_new(a_leaf, nomatch); DECREF(nomatch); DECREF(not_b); DECREF(a_leaf); return TestQP_new(NULL, tree, pruned, 4); }
static void S_do_prune(QueryParser *self, Query *query) { if (Query_is_a(query, NOTQUERY)) { // Don't allow double negatives. NOTQuery *not_query = (NOTQuery*)query; Query *neg_query = NOTQuery_Get_Negated_Query(not_query); if (!Query_is_a(neg_query, MATCHALLQUERY) && !S_has_valid_clauses(neg_query) ) { MatchAllQuery *matchall = MatchAllQuery_new(); NOTQuery_Set_Negated_Query(not_query, (Query*)matchall); DECREF(matchall); } } else if (Query_is_a(query, POLYQUERY)) { PolyQuery *polyquery = (PolyQuery*)query; Vector *children = PolyQuery_Get_Children(polyquery); // Recurse. for (uint32_t i = 0, max = Vec_Get_Size(children); i < max; i++) { Query *child = (Query*)Vec_Fetch(children, i); S_do_prune(self, child); } if (PolyQuery_is_a(polyquery, REQUIREDOPTIONALQUERY) || PolyQuery_is_a(polyquery, ORQUERY) ) { // Don't allow 'foo OR (-bar)'. Vector *children = PolyQuery_Get_Children(polyquery); for (uint32_t i = 0, max = Vec_Get_Size(children); i < max; i++) { Query *child = (Query*)Vec_Fetch(children, i); if (!S_has_valid_clauses(child)) { Vec_Store(children, i, (Obj*)NoMatchQuery_new()); } } } else if (PolyQuery_is_a(polyquery, ANDQUERY)) { // Don't allow '(-bar AND -baz)'. if (!S_has_valid_clauses((Query*)polyquery)) { Vector *children = PolyQuery_Get_Children(polyquery); Vec_Clear(children); } } } }
Query* QParser_Expand_Leaf_IMP(QueryParser *self, Query *query) { QueryParserIVARS *const ivars = QParser_IVARS(self); LeafQuery *leaf_query = (LeafQuery*)query; Schema *schema = ivars->schema; bool is_phrase = false; bool ambiguous = false; // Determine whether we can actually process the input. if (!Query_is_a(query, LEAFQUERY)) { return NULL; } String *full_text = LeafQuery_Get_Text(leaf_query); if (!Str_Get_Size(full_text)) { return NULL; } // If quoted, always generate PhraseQuery. StringIterator *top = Str_Top(full_text); StringIterator *tail = Str_Tail(full_text); StrIter_Skip_Next_Whitespace(top); StrIter_Skip_Prev_Whitespace(tail); if (StrIter_Starts_With_Utf8(top, "\"", 1)) { is_phrase = true; StrIter_Advance(top, 1); if (StrIter_Ends_With_Utf8(tail, "\"", 1) && !StrIter_Ends_With_Utf8(tail, "\\\"", 2) ) { StrIter_Recede(tail, 1); } } String *source_text = StrIter_substring(top, tail); // Either use LeafQuery's field or default to Parser's list. Vector *fields; if (LeafQuery_Get_Field(leaf_query)) { fields = Vec_new(1); Vec_Push(fields, INCREF(LeafQuery_Get_Field(leaf_query))); } else { fields = (Vector*)INCREF(ivars->fields); } CharBuf *unescape_buf = CB_new(Str_Get_Size(source_text)); Vector *queries = Vec_new(Vec_Get_Size(fields)); for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { String *field = (String*)Vec_Fetch(fields, i); Analyzer *analyzer = ivars->analyzer ? ivars->analyzer : Schema_Fetch_Analyzer(schema, field); if (!analyzer) { Vec_Push(queries, (Obj*)QParser_Make_Term_Query(self, field, (Obj*)source_text)); } else { // Extract token texts. String *split_source = S_unescape(self, source_text, unescape_buf); Vector *maybe_texts = Analyzer_Split(analyzer, split_source); uint32_t num_maybe_texts = Vec_Get_Size(maybe_texts); Vector *token_texts = Vec_new(num_maybe_texts); // Filter out zero-length token texts. for (uint32_t j = 0; j < num_maybe_texts; j++) { String *token_text = (String*)Vec_Fetch(maybe_texts, j); if (Str_Get_Size(token_text)) { Vec_Push(token_texts, INCREF(token_text)); } } if (Vec_Get_Size(token_texts) == 0) { /* Query might include stop words. Who knows? */ ambiguous = true; } // Add either a TermQuery or a PhraseQuery. if (is_phrase || Vec_Get_Size(token_texts) > 1) { Vec_Push(queries, (Obj*) QParser_Make_Phrase_Query(self, field, token_texts)); } else if (Vec_Get_Size(token_texts) == 1) { Vec_Push(queries, (Obj*)QParser_Make_Term_Query(self, field, Vec_Fetch(token_texts, 0))); } DECREF(token_texts); DECREF(maybe_texts); DECREF(split_source); } } Query *retval; if (Vec_Get_Size(queries) == 0) { retval = (Query*)NoMatchQuery_new(); if (ambiguous) { NoMatchQuery_Set_Fails_To_Match((NoMatchQuery*)retval, false); } } else if (Vec_Get_Size(queries) == 1) { retval = (Query*)INCREF(Vec_Fetch(queries, 0)); } else { retval = QParser_Make_OR_Query(self, queries); } // Clean up. DECREF(unescape_buf); DECREF(queries); DECREF(fields); DECREF(source_text); DECREF(tail); DECREF(top); return retval; }
Query* QParser_Expand_IMP(QueryParser *self, Query *query) { Query *retval = NULL; if (Query_is_a(query, LEAFQUERY)) { retval = QParser_Expand_Leaf(self, query); } else if (Query_is_a(query, ORQUERY) || Query_is_a(query, ANDQUERY)) { PolyQuery *polyquery = (PolyQuery*)query; Vector *children = PolyQuery_Get_Children(polyquery); Vector *new_kids = Vec_new(Vec_Get_Size(children)); for (uint32_t i = 0, max = Vec_Get_Size(children); i < max; i++) { Query *child = (Query*)Vec_Fetch(children, i); Query *new_child = QParser_Expand(self, child); // recurse if (new_child) { if (Query_is_a(new_child, NOMATCHQUERY)) { bool fails = NoMatchQuery_Get_Fails_To_Match( (NoMatchQuery*)new_child); if (fails) { Vec_Push(new_kids, (Obj*)new_child); } else { DECREF(new_child); } } else { Vec_Push(new_kids, (Obj*)new_child); } } } if (Vec_Get_Size(new_kids) == 0) { retval = (Query*)NoMatchQuery_new(); } else if (Vec_Get_Size(new_kids) == 1) { retval = (Query*)INCREF(Vec_Fetch(new_kids, 0)); } else { PolyQuery_Set_Children(polyquery, new_kids); retval = (Query*)INCREF(query); } DECREF(new_kids); } else if (Query_is_a(query, NOTQUERY)) { NOTQuery *not_query = (NOTQuery*)query; Query *negated_query = NOTQuery_Get_Negated_Query(not_query); negated_query = QParser_Expand(self, negated_query); if (negated_query) { NOTQuery_Set_Negated_Query(not_query, negated_query); DECREF(negated_query); retval = (Query*)INCREF(query); } else { retval = (Query*)MatchAllQuery_new(); } } else if (Query_is_a(query, REQUIREDOPTIONALQUERY)) { RequiredOptionalQuery *req_opt_query = (RequiredOptionalQuery*)query; Query *req_query = ReqOptQuery_Get_Required_Query(req_opt_query); Query *opt_query = ReqOptQuery_Get_Optional_Query(req_opt_query); req_query = QParser_Expand(self, req_query); opt_query = QParser_Expand(self, opt_query); if (req_query && opt_query) { ReqOptQuery_Set_Required_Query(req_opt_query, req_query); ReqOptQuery_Set_Optional_Query(req_opt_query, opt_query); retval = (Query*)INCREF(query); } else if (req_query) { retval = (Query*)INCREF(req_query); } else if (opt_query) { retval = (Query*)INCREF(opt_query); } else { retval = (Query*)NoMatchQuery_new(); } DECREF(opt_query); DECREF(req_query); } else { retval = (Query*)INCREF(query); } return retval; }
static Query* S_compose_subquery(QueryParser *self, Vector *elems, bool enclosed) { const int32_t default_occur = QParser_IVARS(self)->default_occur; Query *retval; if (Vec_Get_Size(elems) == 0) { // No elems means no query. Maybe the search string was something // like 'NOT AND' if (enclosed) { retval = default_occur == SHOULD ? QParser_Make_OR_Query(self, NULL) : QParser_Make_AND_Query(self, NULL); } else { retval = (Query*)NoMatchQuery_new(); } } else if (Vec_Get_Size(elems) == 1 && !enclosed) { ParserElem *elem = (ParserElem*)Vec_Fetch(elems, 0); Query *query = (Query*)ParserElem_As(elem, QUERY); retval = (Query*)INCREF(query); } else { uint32_t num_elems = Vec_Get_Size(elems); Vector *required = Vec_new(num_elems); Vector *optional = Vec_new(num_elems); Vector *negated = Vec_new(num_elems); Query *req_query = NULL; Query *opt_query = NULL; // Demux elems into bins. for (uint32_t i = 0; i < num_elems; i++) { ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i); if (ParserElem_Required(elem)) { Vec_Push(required, INCREF(ParserElem_As(elem, QUERY))); } else if (ParserElem_Optional(elem)) { Vec_Push(optional, INCREF(ParserElem_As(elem, QUERY))); } else if (ParserElem_Negated(elem)) { Vec_Push(negated, INCREF(ParserElem_As(elem, QUERY))); } } uint32_t num_required = Vec_Get_Size(required); uint32_t num_negated = Vec_Get_Size(negated); uint32_t num_optional = Vec_Get_Size(optional); // Bind all mandatory matchers together in one Query. if (num_required || num_negated) { if (enclosed || num_required + num_negated > 1) { Vector *children = Vec_Clone(required); Vec_Push_All(children, negated); req_query = QParser_Make_AND_Query(self, children); DECREF(children); } else if (num_required) { req_query = (Query*)INCREF(Vec_Fetch(required, 0)); } else if (num_negated) { req_query = (Query*)INCREF(Vec_Fetch(negated, 0)); } } // Bind all optional matchers together in one Query. if (num_optional) { if (!enclosed && num_optional == 1) { opt_query = (Query*)INCREF(Vec_Fetch(optional, 0)); } else { opt_query = QParser_Make_OR_Query(self, optional); } } // Unify required and optional. if (req_query && opt_query) { if (num_required) { // not just negated elems retval = QParser_Make_Req_Opt_Query(self, req_query, opt_query); } else { // req_query has only negated queries. Vector *children = Vec_new(2); Vec_Push(children, INCREF(req_query)); Vec_Push(children, INCREF(opt_query)); retval = QParser_Make_AND_Query(self, children); DECREF(children); } } else if (opt_query) { // Only optional elems. retval = (Query*)INCREF(opt_query); } else if (req_query) { // Only required elems. retval = (Query*)INCREF(req_query); } else { retval = NULL; // kill "uninitialized" compiler warning THROW(ERR, "Unexpected error"); } DECREF(opt_query); DECREF(req_query); DECREF(negated); DECREF(optional); DECREF(required); } return retval; }
static TestQueryParser* prune_test_null_querystring() { Query *pruned = (Query*)NoMatchQuery_new(); return TestQP_new(NULL, NULL, pruned, 0); }