Vector* HeatMap_Generate_Proximity_Boosts_IMP(HeatMap *self, Vector *spans) { Vector *boosts = Vec_new(0); const size_t num_spans = Vec_Get_Size(spans); if (num_spans > 1) { for (size_t i = 0, max = num_spans - 1; i < max; i++) { Span *span1 = (Span*)Vec_Fetch(spans, i); for (size_t j = i + 1; j <= max; j++) { Span *span2 = (Span*)Vec_Fetch(spans, j); float prox_score = HeatMap_Calc_Proximity_Boost(self, span1, span2); if (prox_score == 0) { break; } else { int32_t length = Span_Get_Offset(span2) - Span_Get_Offset(span1) + Span_Get_Length(span2); Vec_Push(boosts, (Obj*)Span_new(Span_Get_Offset(span1), length, prox_score)); } } } } return boosts; }
void DefDelWriter_Delete_By_Term_IMP(DefaultDeletionsWriter *self, String *field, Obj *term) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); for (size_t i = 0, max = Vec_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i); PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( seg_reader, Class_Get_Name(POSTINGLISTREADER)); BitVector *bit_vec = (BitVector*)Vec_Fetch(ivars->bit_vecs, i); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, field, term) : NULL; int32_t doc_id; int32_t num_zapped = 0; // Iterate through postings, marking each doc as deleted. if (plist) { while (0 != (doc_id = PList_Next(plist))) { num_zapped += !BitVec_Get(bit_vec, (size_t)doc_id); BitVec_Set(bit_vec, (size_t)doc_id); } if (num_zapped) { ivars->updated[i] = true; } DECREF(plist); } } }
void DefDelWriter_Delete_By_Query_IMP(DefaultDeletionsWriter *self, Query *query) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Compiler *compiler = Query_Make_Compiler(query, (Searcher*)ivars->searcher, Query_Get_Boost(query), false); for (size_t i = 0, max = Vec_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i); BitVector *bit_vec = (BitVector*)Vec_Fetch(ivars->bit_vecs, i); Matcher *matcher = Compiler_Make_Matcher(compiler, seg_reader, false); if (matcher) { int32_t doc_id; int32_t num_zapped = 0; // Iterate through matches, marking each doc as deleted. while (0 != (doc_id = Matcher_Next(matcher))) { num_zapped += !BitVec_Get(bit_vec, (size_t)doc_id); BitVec_Set(bit_vec, (size_t)doc_id); } if (num_zapped) { ivars->updated[i] = true; } DECREF(matcher); } } DECREF(compiler); }
Hash* DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); DefDelWriter_Metadata_t super_meta = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER, LUCY_DefDelWriter_Metadata); Hash *const metadata = super_meta(self); Hash *const files = Hash_new(0); for (size_t i = 0, max = Vec_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)Vec_Fetch(ivars->bit_vecs, i); Segment *segment = SegReader_Get_Segment(seg_reader); Hash *mini_meta = Hash_new(2); Hash_Store_Utf8(mini_meta, "count", 5, (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs))); Hash_Store_Utf8(mini_meta, "filename", 8, (Obj*)S_del_filename(self, seg_reader)); Hash_Store(files, Seg_Get_Name(segment), (Obj*)mini_meta); } } Hash_Store_Utf8(metadata, "files", 5, (Obj*)files); return metadata; }
Matcher* DefDelWriter_Seg_Deletions_IMP(DefaultDeletionsWriter *self, SegReader *seg_reader) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Matcher *deletions = NULL; Segment *segment = SegReader_Get_Segment(seg_reader); String *seg_name = Seg_Get_Name(segment); Integer *tick_obj = (Integer*)Hash_Fetch(ivars->name_to_tick, seg_name); size_t tick = tick_obj ? (size_t)Int_Get_Value(tick_obj) : 0; SegReader *candidate = tick_obj ? (SegReader*)Vec_Fetch(ivars->seg_readers, tick) : NULL; if (tick_obj) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) { BitVector *deldocs = (BitVector*)Vec_Fetch(ivars->bit_vecs, tick); deletions = (Matcher*)BitVecMatcher_new(deldocs); } } else { // Sanity check. THROW(ERR, "Couldn't find SegReader %o", seg_reader); } return deletions; }
void DefDelWriter_Finish_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Folder *const folder = ivars->folder; for (size_t i = 0, max = Vec_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)Vec_Fetch(ivars->bit_vecs, i); int32_t doc_max = SegReader_Doc_Max(seg_reader); size_t byte_size = (((size_t)doc_max + 1) + 7) / 8; size_t new_max = byte_size * 8 - 1; String *filename = S_del_filename(self, seg_reader); OutStream *outstream = Folder_Open_Out(folder, filename); if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Ensure that we have 1 bit for each doc in segment. BitVec_Grow(deldocs, new_max); // Write deletions data and clean up. OutStream_Write_Bytes(outstream, (char*)BitVec_Get_Raw_Bits(deldocs), byte_size); OutStream_Close(outstream); DECREF(outstream); DECREF(filename); } } Seg_Store_Metadata_Utf8(ivars->segment, "deletions", 9, (Obj*)DefDelWriter_Metadata(self)); }
Inversion* PolyAnalyzer_Transform_Text_IMP(PolyAnalyzer *self, String *text) { Vector *const analyzers = PolyAnalyzer_IVARS(self)->analyzers; const uint32_t num_analyzers = Vec_Get_Size(analyzers); Inversion *retval; if (num_analyzers == 0) { size_t token_len = Str_Get_Size(text); const char *buf = Str_Get_Ptr8(text); Token *seed = Token_new(buf, token_len, 0, token_len, 1.0f, 1); retval = Inversion_new(seed); DECREF(seed); } else { Analyzer *first_analyzer = (Analyzer*)Vec_Fetch(analyzers, 0); retval = Analyzer_Transform_Text(first_analyzer, text); for (uint32_t i = 1; i < num_analyzers; i++) { Analyzer *analyzer = (Analyzer*)Vec_Fetch(analyzers, i); Inversion *new_inversion = Analyzer_Transform(analyzer, retval); DECREF(retval); retval = new_inversion; } } return retval; }
static void S_compose_inner_queries(QueryParser *self, Vector *elems, String *default_field) { const int32_t default_occur = QParser_IVARS(self)->default_occur; // Generate all queries. Apply any fields. for (uint32_t i = Vec_Get_Size(elems); i--;) { String *field = default_field; ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i); // Apply field. if (i > 0) { // Field specifier must immediately precede any query. ParserElem* maybe_field_elem = (ParserElem*)Vec_Fetch(elems, i - 1); if (ParserElem_Get_Type(maybe_field_elem) == TOKEN_FIELD) { field = (String*)ParserElem_As(maybe_field_elem, STRING); } } if (ParserElem_Get_Type(elem) == TOKEN_STRING) { String *text = (String*)ParserElem_As(elem, STRING); LeafQuery *query = LeafQuery_new(field, text); ParserElem *new_elem = ParserElem_new(TOKEN_QUERY, (Obj*)query); if (default_occur == MUST) { ParserElem_Require(new_elem); } Vec_Store(elems, i, (Obj*)new_elem); } } }
void Indexer_Add_Index_IMP(Indexer *self, Obj *index) { IndexerIVARS *const ivars = Indexer_IVARS(self); Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_is_a(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_is_a(index, STRING)) { other_folder = (Folder*)FSFolder_new((String*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = ivars->schema; Schema *other_schema = IxReader_Get_Schema(reader); Vector *other_fields = Schema_All_Fields(other_schema); Vector *seg_readers = IxReader_Seg_Readers(reader); // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) { String *other_field = (String*)Vec_Fetch(other_fields, i); Seg_Add_Field(ivars->segment, other_field); } DECREF(other_fields); // Add all segments. for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map( ivars->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(ivars->segment)); SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
Vector* HeatMap_Flatten_Spans_IMP(HeatMap *self, Vector *spans) { const size_t num_spans = Vec_Get_Size(spans); UNUSED_VAR(self); if (!num_spans) { return Vec_new(0); } else { Vector *flattened = S_flattened_but_empty_spans(spans); const size_t num_raw_flattened = Vec_Get_Size(flattened); // Iterate over each of the source spans, contributing their scores to // any destination span that falls within range. size_t dest_tick = 0; for (size_t i = 0; i < num_spans; i++) { Span *source_span = (Span*)Vec_Fetch(spans, i); int32_t source_span_offset = Span_Get_Offset(source_span); int32_t source_span_len = Span_Get_Length(source_span); int32_t source_span_end = source_span_offset + source_span_len; // Get the location of the flattened span that shares the source // span's offset. for (; dest_tick < num_raw_flattened; dest_tick++) { Span *dest_span = (Span*)Vec_Fetch(flattened, dest_tick); if (Span_Get_Offset(dest_span) == source_span_offset) { break; } } // Fill in scores. for (size_t j = dest_tick; j < num_raw_flattened; j++) { Span *dest_span = (Span*)Vec_Fetch(flattened, j); if (Span_Get_Offset(dest_span) == source_span_end) { break; } else { float new_weight = Span_Get_Weight(dest_span) + Span_Get_Weight(source_span); Span_Set_Weight(dest_span, new_weight); } } } // Leave holes instead of spans that don't have any score. dest_tick = 0; for (size_t i = 0; i < num_raw_flattened; i++) { Span *span = (Span*)Vec_Fetch(flattened, i); if (Span_Get_Weight(span)) { Vec_Store(flattened, dest_tick++, INCREF(span)); } } Vec_Excise(flattened, dest_tick, num_raw_flattened - dest_tick); return flattened; } }
static void S_parse_subqueries(QueryParser *self, Vector *elems) { const int32_t default_occur = QParser_IVARS(self)->default_occur; while (1) { // Work from the inside out, starting with the leftmost innermost // paren group. size_t left = SIZE_MAX; size_t right = SIZE_MAX; String *field = NULL; for (size_t i = 0, max = Vec_Get_Size(elems); i < max; i++) { ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i); uint32_t type = ParserElem_Get_Type(elem); if (type == TOKEN_OPEN_PAREN) { left = i; } else if (type == TOKEN_CLOSE_PAREN) { right = i; break; } else if (type == TOKEN_FIELD && i < max - 1) { // If a field applies to an enclosing paren, pass it along. ParserElem *next_elem = (ParserElem*)Vec_Fetch(elems, i + 1); uint32_t next_type = ParserElem_Get_Type(next_elem); if (next_type == TOKEN_OPEN_PAREN) { field = (String*)ParserElem_As(elem, STRING); } } } // Break out of loop when there are no parens left. if (right == SIZE_MAX) { break; } // Create the subquery. Vector *sub_elems = Vec_Slice(elems, left + 1, right - left - 1); Query *subquery = S_parse_subquery(self, sub_elems, field, true); ParserElem *new_elem = ParserElem_new(TOKEN_QUERY, (Obj*)subquery); if (default_occur == MUST) { ParserElem_Require(new_elem); } DECREF(sub_elems); // Replace the elements used to create the subquery with the subquery // itself. if (left > 0) { ParserElem *maybe_field = (ParserElem*)Vec_Fetch(elems, left - 1); uint32_t maybe_field_type = ParserElem_Get_Type(maybe_field); if (maybe_field_type == TOKEN_FIELD) { left -= 1; } } Vec_Excise(elems, left + 1, right - left); Vec_Store(elems, left, (Obj*)new_elem); } }
QueryParser* QParser_init(QueryParser *self, Schema *schema, Analyzer *analyzer, String *default_boolop, Vector *fields) { QueryParserIVARS *const ivars = QParser_IVARS(self); // Init. ivars->heed_colons = false; ivars->lexer = QueryLexer_new(); // Assign. ivars->schema = (Schema*)INCREF(schema); ivars->analyzer = (Analyzer*)INCREF(analyzer); ivars->default_boolop = default_boolop ? Str_Clone(default_boolop) : Str_new_from_trusted_utf8("OR", 2); if (fields) { ivars->fields = Vec_Clone(fields); for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { CERTIFY(Vec_Fetch(fields, i), STRING); } Vec_Sort(ivars->fields); } else { Vector *all_fields = Schema_All_Fields(schema); uint32_t num_fields = Vec_Get_Size(all_fields); ivars->fields = Vec_new(num_fields); for (uint32_t i = 0; i < num_fields; i++) { String *field = (String*)Vec_Fetch(all_fields, i); FieldType *type = Schema_Fetch_Type(schema, field); if (type && FType_Indexed(type)) { Vec_Push(ivars->fields, INCREF(field)); } } DECREF(all_fields); } Vec_Sort(ivars->fields); // Derive default "occur" from default boolean operator. if (Str_Equals_Utf8(ivars->default_boolop, "OR", 2)) { ivars->default_occur = SHOULD; } else if (Str_Equals_Utf8(ivars->default_boolop, "AND", 3)) { ivars->default_occur = MUST; } else { THROW(ERR, "Invalid value for default_boolop: %o", ivars->default_boolop); } return self; }
static ORMatcher* S_ormatcher_init2(ORMatcher *self, ORMatcherIVARS *ivars, Vector *children, Similarity *sim) { // Init. PolyMatcher_init((PolyMatcher*)self, children, sim); ivars->size = 0; // Derive. ivars->max_size = (uint32_t)Vec_Get_Size(children); // Allocate. ivars->heap = (HeapedMatcherDoc**)CALLOCATE(ivars->max_size + 1, sizeof(HeapedMatcherDoc*)); // Create a pool of HMDs. Encourage CPU cache hits by using a single // allocation for all of them. size_t amount_to_malloc = (ivars->max_size + 1) * sizeof(HeapedMatcherDoc); ivars->blob = (char*)MALLOCATE(amount_to_malloc); ivars->pool = (HeapedMatcherDoc**)CALLOCATE(ivars->max_size + 1, sizeof(HeapedMatcherDoc*)); for (uint32_t i = 1; i <= ivars->max_size; i++) { size_t offset = i * sizeof(HeapedMatcherDoc); HeapedMatcherDoc *hmd = (HeapedMatcherDoc*)(ivars->blob + offset); ivars->pool[i] = hmd; } // Prime queue. for (uint32_t i = 0; i < ivars->max_size; i++) { Matcher *matcher = (Matcher*)Vec_Fetch(children, i); if (matcher) { S_add_element(self, ivars, (Matcher*)INCREF(matcher), 0); } } return self; }
PolySearcher* PolySearcher_init(PolySearcher *self, Schema *schema, Vector *searchers) { const uint32_t num_searchers = Vec_Get_Size(searchers); int32_t *starts_array = (int32_t*)MALLOCATE(num_searchers * sizeof(int32_t)); int32_t doc_max = 0; Searcher_init((Searcher*)self, schema); PolySearcherIVARS *const ivars = PolySearcher_IVARS(self); ivars->searchers = (Vector*)INCREF(searchers); ivars->starts = NULL; // Safe cleanup. for (uint32_t i = 0; i < num_searchers; i++) { Searcher *searcher = (Searcher*)CERTIFY(Vec_Fetch(searchers, i), SEARCHER); Schema *candidate = Searcher_Get_Schema(searcher); Class *orig_class = Schema_Get_Class(schema); Class *candidate_class = Schema_Get_Class(candidate); // Confirm that searchers all use the same schema. if (orig_class != candidate_class) { THROW(ERR, "Conflicting schemas: '%o', '%o'", Schema_Get_Class_Name(schema), Schema_Get_Class_Name(candidate)); } // Derive doc_max and relative start offsets. starts_array[i] = (int32_t)doc_max; doc_max += Searcher_Doc_Max(searcher); } ivars->doc_max = doc_max; ivars->starts = I32Arr_new_steal(starts_array, num_searchers); return self; }
void SortEx_Shrink_IMP(SortExternal *self) { SortExternalIVARS *const ivars = SortEx_IVARS(self); if (ivars->buf_max - ivars->buf_tick > 0) { size_t buf_count = SortEx_Buffer_Count(self); size_t size = buf_count * sizeof(Obj*); if (ivars->buf_tick > 0) { Obj **start = ivars->buffer + ivars->buf_tick; memmove(ivars->buffer, start, size); } ivars->buffer = (Obj**)REALLOCATE(ivars->buffer, size); ivars->buf_tick = 0; ivars->buf_max = buf_count; ivars->buf_cap = buf_count; } else { FREEMEM(ivars->buffer); ivars->buffer = NULL; ivars->buf_tick = 0; ivars->buf_max = 0; ivars->buf_cap = 0; } ivars->scratch_cap = 0; FREEMEM(ivars->scratch); ivars->scratch = NULL; for (uint32_t i = 0, max = Vec_Get_Size(ivars->runs); i < max; i++) { SortExternal *run = (SortExternal*)Vec_Fetch(ivars->runs, i); SortEx_Shrink(run); } }
String* Seg_Field_Name_IMP(Segment *self, int32_t field_num) { SegmentIVARS *const ivars = Seg_IVARS(self); return field_num ? (String*)Vec_Fetch(ivars->by_num, (size_t)field_num) : NULL; }
// Create all the spans needed by HeatMap_Flatten_Spans, based on the source // offsets and lengths... but leave the scores at 0. static Vector* S_flattened_but_empty_spans(Vector *spans) { const size_t num_spans = Vec_Get_Size(spans); int32_t *bounds = (int32_t*)MALLOCATE((num_spans * 2) * sizeof(int32_t)); // Assemble a list of all unique start/end boundaries. for (size_t i = 0; i < num_spans; i++) { Span *span = (Span*)Vec_Fetch(spans, i); bounds[i] = Span_Get_Offset(span); bounds[i + num_spans] = Span_Get_Offset(span) + Span_Get_Length(span); } qsort(bounds, num_spans * 2, sizeof(int32_t), S_compare_i32); size_t num_bounds = 0; int32_t last = INT32_MAX; for (size_t i = 0; i < num_spans * 2; i++) { if (bounds[i] != last) { bounds[num_bounds++] = bounds[i]; last = bounds[i]; } } // Create one Span for each zone between two bounds. Vector *flattened = Vec_new(num_bounds - 1); for (size_t i = 0; i < num_bounds - 1; i++) { int32_t start = bounds[i]; int32_t length = bounds[i + 1] - start; Vec_Push(flattened, (Obj*)Span_new(start, length, 0.0f)); } FREEMEM(bounds); return flattened; }
static InverterEntry* S_fetch_entry(InverterIVARS *ivars, String *field) { Schema *const schema = ivars->schema; int32_t field_num = Seg_Field_Num(ivars->segment, field); if (!field_num) { // This field seems not to be in the segment yet. Try to find it in // the Schema. if (Schema_Fetch_Type(schema, field)) { // The field is in the Schema. Get a field num from the Segment. field_num = Seg_Add_Field(ivars->segment, field); } else { // We've truly failed to find the field. The user must // not have spec'd it. THROW(ERR, "Unknown field name: '%o'", field); } } InverterEntry *entry = (InverterEntry*)Vec_Fetch(ivars->entry_pool, field_num); if (!entry) { entry = InvEntry_new(schema, (String*)field, field_num); Vec_Store(ivars->entry_pool, field_num, (Obj*)entry); } return entry; }
PhraseCompiler* PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent, Searcher *searcher, float boost) { PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self); PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS(parent); Schema *schema = Searcher_Get_Schema(searcher); Similarity *sim = Schema_Fetch_Sim(schema, parent_ivars->field); Vector *terms = parent_ivars->terms; // Try harder to find a Similarity if necessary. if (!sim) { sim = Schema_Get_Similarity(schema); } // Init. Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost); // Store IDF for the phrase. ivars->idf = 0; for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) { Obj *term = Vec_Fetch(terms, i); int32_t doc_max = Searcher_Doc_Max(searcher); int32_t doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, term); ivars->idf += Sim_IDF(sim, doc_freq, doc_max); } // Calculate raw weight. ivars->raw_weight = ivars->idf * ivars->boost; return self; }
void SortColl_Set_Reader_IMP(SortCollector *self, SegReader *reader) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); SortReader *sort_reader = (SortReader*)SegReader_Fetch(reader, Class_Get_Name(SORTREADER)); // Reset threshold variables and trigger auto-action behavior. MatchDocIVARS *const bumped_ivars = MatchDoc_IVARS(ivars->bumped); bumped_ivars->doc_id = INT32_MAX; ivars->bubble_doc = INT32_MAX; bumped_ivars->score = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN; ivars->bubble_score = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN; ivars->actions = ivars->auto_actions; // Obtain sort caches. Derive actions array for this segment. if (ivars->need_values && sort_reader) { for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortRule *rule = (SortRule*)Vec_Fetch(ivars->rules, i); String *field = SortRule_Get_Field(rule); SortCache *cache = field ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; ivars->sort_caches[i] = cache; ivars->derived_actions[i] = S_derive_action(rule, cache); if (cache) { ivars->ord_arrays[i] = SortCache_Get_Ords(cache); } else { ivars->ord_arrays[i] = NULL; } } } ivars->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0; SortColl_Set_Reader_t super_set_reader = (SortColl_Set_Reader_t)SUPER_METHOD_PTR(SORTCOLLECTOR, LUCY_SortColl_Set_Reader); super_set_reader(self, reader); }
Compiler* PhraseQuery_Make_Compiler_IMP(PhraseQuery *self, Searcher *searcher, float boost, bool subordinate) { PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); if (Vec_Get_Size(ivars->terms) == 1) { // Optimize for one-term "phrases". Obj *term = Vec_Fetch(ivars->terms, 0); TermQuery *term_query = TermQuery_new(ivars->field, term); TermCompiler *term_compiler; TermQuery_Set_Boost(term_query, ivars->boost); term_compiler = (TermCompiler*)TermQuery_Make_Compiler(term_query, searcher, boost, subordinate); DECREF(term_query); return (Compiler*)term_compiler; } else { PhraseCompiler *compiler = PhraseCompiler_new(self, searcher, boost); if (!subordinate) { PhraseCompiler_Normalize(compiler); } return (Compiler*)compiler; } }
void SortWriter_Add_Inverted_Doc_IMP(SortWriter *self, Inverter *inverter, int32_t doc_id) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); int32_t field_num; Inverter_Iterate(inverter); while (0 != (field_num = Inverter_Next(inverter))) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Sortable(type)) { SortFieldWriter *field_writer = S_lazy_init_field_writer(self, field_num); SortFieldWriter_Add(field_writer, doc_id, Inverter_Get_Value(inverter)); } } // If our SortFieldWriters have collectively passed the memory threshold, // flush all of them, then reset the counter which tracks memory // consumption. if ((size_t)Counter_Get_Value(ivars->counter) > ivars->mem_thresh) { for (size_t i = 0; i < Vec_Get_Size(ivars->field_writers); i++) { SortFieldWriter *const field_writer = (SortFieldWriter*)Vec_Fetch(ivars->field_writers, i); if (field_writer) { SortFieldWriter_Flush(field_writer); } } Counter_Reset(ivars->counter); ivars->flush_at_finish = true; } }
PolyLexicon* PolyLex_init(PolyLexicon *self, String *field, Vector *sub_readers) { uint32_t num_sub_readers = Vec_Get_Size(sub_readers); Vector *seg_lexicons = Vec_new(num_sub_readers); // Init. Lex_init((Lexicon*)self, field); PolyLexiconIVARS *const ivars = PolyLex_IVARS(self); ivars->term = NULL; ivars->lex_q = SegLexQ_new(num_sub_readers); // Derive. for (uint32_t i = 0; i < num_sub_readers; i++) { LexiconReader *lex_reader = (LexiconReader*)Vec_Fetch(sub_readers, i); if (lex_reader && CERTIFY(lex_reader, LEXICONREADER)) { Lexicon *seg_lexicon = LexReader_Lexicon(lex_reader, field, NULL); if (seg_lexicon != NULL) { Vec_Push(seg_lexicons, (Obj*)seg_lexicon); } } } ivars->seg_lexicons = seg_lexicons; PolyLex_Reset(self); return self; }
static void S_balance_parens(QueryParser *self, Vector *elems) { UNUSED_VAR(self); // Count paren balance, eliminate unbalanced right parens. int64_t paren_depth = 0; size_t i = 0; while (i < Vec_Get_Size(elems)) { ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i); if (ParserElem_Get_Type(elem) == TOKEN_OPEN_PAREN) { paren_depth++; } else if (ParserElem_Get_Type(elem) == TOKEN_CLOSE_PAREN) { if (paren_depth > 0) { paren_depth--; } else { Vec_Excise(elems, i, 1); continue; } } i++; } // Insert implicit parens. while (paren_depth--) { ParserElem *elem = ParserElem_new(TOKEN_CLOSE_PAREN, NULL); Vec_Push(elems, (Obj*)elem); } }
void SortWriter_Add_Segment_IMP(SortWriter *self, SegReader *reader, I32Array *doc_map) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); Vector *fields = Schema_All_Fields(ivars->schema); // Proceed field-at-a-time, rather than doc-at-a-time. for (size_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { String *field = (String*)Vec_Fetch(fields, i); SortReader *sort_reader = (SortReader*)SegReader_Fetch( reader, Class_Get_Name(SORTREADER)); SortCache *cache = sort_reader ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; if (cache) { int32_t field_num = Seg_Field_Num(ivars->segment, field); SortFieldWriter *field_writer = S_lazy_init_field_writer(self, field_num); SortFieldWriter_Add_Segment(field_writer, reader, doc_map, cache); ivars->flush_at_finish = true; } } DECREF(fields); }
static void S_discard_elems(Vector *elems, uint32_t type) { for (size_t i = Vec_Get_Size(elems); i--;) { ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i); if (ParserElem_Get_Type(elem) == type) { Vec_Excise(elems, i, 1); } } }
void PolyLex_Reset_IMP(PolyLexicon *self) { PolyLexiconIVARS *const ivars = PolyLex_IVARS(self); Vector *seg_lexicons = ivars->seg_lexicons; uint32_t num_segs = Vec_Get_Size(seg_lexicons); SegLexQueue *lex_q = ivars->lex_q; // Empty out the queue. while (1) { SegLexicon *seg_lex = (SegLexicon*)SegLexQ_Pop(lex_q); if (seg_lex == NULL) { break; } DECREF(seg_lex); } // Fill the queue with valid SegLexicons. for (uint32_t i = 0; i < num_segs; i++) { SegLexicon *const seg_lexicon = (SegLexicon*)Vec_Fetch(seg_lexicons, i); SegLex_Reset(seg_lexicon); if (SegLex_Next(seg_lexicon)) { SegLexQ_Insert(ivars->lex_q, INCREF(seg_lexicon)); } } if (ivars->term != NULL) { DECREF(ivars->term); ivars->term = NULL; } }
void Indexer_Delete_By_Term_IMP(Indexer *self, String *field, Obj *term) { IndexerIVARS *const ivars = Indexer_IVARS(self); Schema *schema = ivars->schema; FieldType *type = Schema_Fetch_Type(schema, field); // Raise exception if the field isn't indexed. if (!type || !FType_Indexed(type)) { THROW(ERR, "%o is not an indexed field", field); } // Analyze term if appropriate, then zap. if (FType_is_a(type, FULLTEXTTYPE)) { CERTIFY(term, STRING); Analyzer *analyzer = Schema_Fetch_Analyzer(schema, field); Vector *terms = Analyzer_Split(analyzer, (String*)term); Obj *analyzed_term = Vec_Fetch(terms, 0); if (analyzed_term) { DelWriter_Delete_By_Term(ivars->del_writer, field, analyzed_term); } DECREF(terms); } else { DelWriter_Delete_By_Term(ivars->del_writer, field, term); } }
static void S_compose_or_queries(QueryParser *self, Vector *elems) { const int32_t default_occur = QParser_IVARS(self)->default_occur; for (uint32_t i = 0; i + 2 < Vec_Get_Size(elems); i++) { ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i + 1); if (ParserElem_Get_Type(elem) == TOKEN_OR) { ParserElem *preceding = (ParserElem*)Vec_Fetch(elems, i); Vector *children = Vec_new(2); uint32_t num_to_zap = 0; // Add first clause. Query *preceding_query = (Query*)ParserElem_As(preceding, QUERY); Vec_Push(children, INCREF(preceding_query)); // Add following clauses. for (uint32_t j = i + 1, jmax = Vec_Get_Size(elems); j < jmax; j += 2, num_to_zap += 2 ) { ParserElem *maybe_or = (ParserElem*)Vec_Fetch(elems, j); ParserElem *following = (ParserElem*)Vec_Fetch(elems, j + 1); if (ParserElem_Get_Type(maybe_or) != TOKEN_OR) { break; } else if (ParserElem_Get_Type(following) == TOKEN_QUERY) { Query *next = (Query*)ParserElem_As(following, QUERY); Vec_Push(children, INCREF(next)); } else { THROW(ERR, "Unexpected type: %u32", ParserElem_Get_Type(following)); } } Query *or_query = QParser_Make_OR_Query(self, children); ParserElem_Set_Value(preceding, (Obj*)or_query); if (default_occur == MUST) { ParserElem_Require(preceding); } DECREF(or_query); DECREF(children); Vec_Excise(elems, i + 1, num_to_zap); } } }
void PolyCompiler_Apply_Norm_Factor_IMP(PolyCompiler *self, float factor) { PolyCompilerIVARS *const ivars = PolyCompiler_IVARS(self); for (size_t i = 0, max = Vec_Get_Size(ivars->children); i < max; i++) { Compiler *child = (Compiler*)Vec_Fetch(ivars->children, i); Compiler_Apply_Norm_Factor(child, factor); } }