示例#1
0
文件: HeatMap.c 项目: rectang/lucy
Vector*
HeatMap_Generate_Proximity_Boosts_IMP(HeatMap *self, Vector *spans) {
    Vector *boosts = Vec_new(0);
    const size_t num_spans = Vec_Get_Size(spans);

    if (num_spans > 1) {
        for (size_t i = 0, max = num_spans - 1; i < max; i++) {
            Span *span1 = (Span*)Vec_Fetch(spans, i);

            for (size_t j = i + 1; j <= max; j++) {
                Span *span2 = (Span*)Vec_Fetch(spans, j);
                float prox_score
                    = HeatMap_Calc_Proximity_Boost(self, span1, span2);
                if (prox_score == 0) {
                    break;
                }
                else {
                    int32_t length = Span_Get_Offset(span2)
                                     - Span_Get_Offset(span1)
                                     + Span_Get_Length(span2);
                    Vec_Push(boosts,
                            (Obj*)Span_new(Span_Get_Offset(span1), length,
                                           prox_score));
                }
            }
        }
    }

    return boosts;
}
示例#2
0
void
DefDelWriter_Delete_By_Term_IMP(DefaultDeletionsWriter *self,
                                String *field, Obj *term) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    for (size_t i = 0, max = Vec_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i);
        PostingListReader *plist_reader
            = (PostingListReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(POSTINGLISTREADER));
        BitVector *bit_vec = (BitVector*)Vec_Fetch(ivars->bit_vecs, i);
        PostingList *plist = plist_reader
                             ? PListReader_Posting_List(plist_reader, field, term)
                             : NULL;
        int32_t doc_id;
        int32_t num_zapped = 0;

        // Iterate through postings, marking each doc as deleted.
        if (plist) {
            while (0 != (doc_id = PList_Next(plist))) {
                num_zapped += !BitVec_Get(bit_vec, (size_t)doc_id);
                BitVec_Set(bit_vec, (size_t)doc_id);
            }
            if (num_zapped) { ivars->updated[i] = true; }
            DECREF(plist);
        }
    }
}
示例#3
0
void
DefDelWriter_Delete_By_Query_IMP(DefaultDeletionsWriter *self, Query *query) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Compiler *compiler = Query_Make_Compiler(query, (Searcher*)ivars->searcher,
                                             Query_Get_Boost(query), false);

    for (size_t i = 0, max = Vec_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i);
        BitVector *bit_vec = (BitVector*)Vec_Fetch(ivars->bit_vecs, i);
        Matcher *matcher = Compiler_Make_Matcher(compiler, seg_reader, false);

        if (matcher) {
            int32_t doc_id;
            int32_t num_zapped = 0;

            // Iterate through matches, marking each doc as deleted.
            while (0 != (doc_id = Matcher_Next(matcher))) {
                num_zapped += !BitVec_Get(bit_vec, (size_t)doc_id);
                BitVec_Set(bit_vec, (size_t)doc_id);
            }
            if (num_zapped) { ivars->updated[i] = true; }

            DECREF(matcher);
        }
    }

    DECREF(compiler);
}
示例#4
0
Hash*
DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    DefDelWriter_Metadata_t super_meta
        = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER,
                                                    LUCY_DefDelWriter_Metadata);
    Hash    *const metadata = super_meta(self);
    Hash    *const files    = Hash_new(0);

    for (size_t i = 0, max = Vec_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i);
        if (ivars->updated[i]) {
            BitVector *deldocs   = (BitVector*)Vec_Fetch(ivars->bit_vecs, i);
            Segment   *segment   = SegReader_Get_Segment(seg_reader);
            Hash      *mini_meta = Hash_new(2);
            Hash_Store_Utf8(mini_meta, "count", 5,
                            (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs)));
            Hash_Store_Utf8(mini_meta, "filename", 8,
                            (Obj*)S_del_filename(self, seg_reader));
            Hash_Store(files, Seg_Get_Name(segment), (Obj*)mini_meta);
        }
    }
    Hash_Store_Utf8(metadata, "files", 5, (Obj*)files);

    return metadata;
}
示例#5
0
Matcher*
DefDelWriter_Seg_Deletions_IMP(DefaultDeletionsWriter *self,
                               SegReader *seg_reader) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Matcher *deletions    = NULL;
    Segment *segment      = SegReader_Get_Segment(seg_reader);
    String  *seg_name     = Seg_Get_Name(segment);
    Integer *tick_obj     = (Integer*)Hash_Fetch(ivars->name_to_tick,
                                                   seg_name);
    size_t tick          = tick_obj ? (size_t)Int_Get_Value(tick_obj) : 0;
    SegReader *candidate  = tick_obj
                            ? (SegReader*)Vec_Fetch(ivars->seg_readers, tick)
                            : NULL;

    if (tick_obj) {
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Obtain(
                  candidate, Class_Get_Name(DELETIONSREADER));
        if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) {
            BitVector *deldocs = (BitVector*)Vec_Fetch(ivars->bit_vecs, tick);
            deletions = (Matcher*)BitVecMatcher_new(deldocs);
        }
    }
    else { // Sanity check.
        THROW(ERR, "Couldn't find SegReader %o", seg_reader);
    }

    return deletions;
}
示例#6
0
void
DefDelWriter_Finish_IMP(DefaultDeletionsWriter *self) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Folder *const folder = ivars->folder;

    for (size_t i = 0, max = Vec_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i);
        if (ivars->updated[i]) {
            BitVector *deldocs   = (BitVector*)Vec_Fetch(ivars->bit_vecs, i);
            int32_t    doc_max   = SegReader_Doc_Max(seg_reader);
            size_t     byte_size = (((size_t)doc_max + 1) + 7) / 8;
            size_t     new_max   = byte_size * 8 - 1;
            String    *filename  = S_del_filename(self, seg_reader);
            OutStream *outstream = Folder_Open_Out(folder, filename);
            if (!outstream) { RETHROW(INCREF(Err_get_error())); }

            // Ensure that we have 1 bit for each doc in segment.
            BitVec_Grow(deldocs, new_max);

            // Write deletions data and clean up.
            OutStream_Write_Bytes(outstream,
                                  (char*)BitVec_Get_Raw_Bits(deldocs),
                                  byte_size);
            OutStream_Close(outstream);
            DECREF(outstream);
            DECREF(filename);
        }
    }

    Seg_Store_Metadata_Utf8(ivars->segment, "deletions", 9,
                            (Obj*)DefDelWriter_Metadata(self));
}
示例#7
0
Inversion*
PolyAnalyzer_Transform_Text_IMP(PolyAnalyzer *self, String *text) {
    Vector *const   analyzers     = PolyAnalyzer_IVARS(self)->analyzers;
    const uint32_t  num_analyzers = Vec_Get_Size(analyzers);
    Inversion      *retval;

    if (num_analyzers == 0) {
        size_t      token_len = Str_Get_Size(text);
        const char *buf       = Str_Get_Ptr8(text);
        Token *seed = Token_new(buf, token_len, 0, token_len, 1.0f, 1);
        retval = Inversion_new(seed);
        DECREF(seed);
    }
    else {
        Analyzer *first_analyzer = (Analyzer*)Vec_Fetch(analyzers, 0);
        retval = Analyzer_Transform_Text(first_analyzer, text);
        for (uint32_t i = 1; i < num_analyzers; i++) {
            Analyzer *analyzer = (Analyzer*)Vec_Fetch(analyzers, i);
            Inversion *new_inversion = Analyzer_Transform(analyzer, retval);
            DECREF(retval);
            retval = new_inversion;
        }
    }

    return retval;
}
示例#8
0
文件: QueryParser.c 项目: kidaa/lucy
static void
S_compose_inner_queries(QueryParser *self, Vector *elems,
                        String *default_field) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;

    // Generate all queries.  Apply any fields.
    for (uint32_t i = Vec_Get_Size(elems); i--;) {
        String *field = default_field;
        ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);

        // Apply field.
        if (i > 0) {
            // Field specifier must immediately precede any query.
            ParserElem* maybe_field_elem
                = (ParserElem*)Vec_Fetch(elems, i - 1);
            if (ParserElem_Get_Type(maybe_field_elem) == TOKEN_FIELD) {
                field = (String*)ParserElem_As(maybe_field_elem, STRING);
            }
        }

        if (ParserElem_Get_Type(elem) == TOKEN_STRING) {
            String *text = (String*)ParserElem_As(elem, STRING);
            LeafQuery *query = LeafQuery_new(field, text);
            ParserElem *new_elem
                = ParserElem_new(TOKEN_QUERY, (Obj*)query);
            if (default_occur == MUST) {
                ParserElem_Require(new_elem);
            }
            Vec_Store(elems, i, (Obj*)new_elem);
        }
    }
}
示例#9
0
文件: Indexer.c 项目: rectang/lucy
void
Indexer_Add_Index_IMP(Indexer *self, Obj *index) {
    IndexerIVARS *const ivars = Indexer_IVARS(self);
    Folder *other_folder = NULL;
    IndexReader *reader  = NULL;

    if (Obj_is_a(index, FOLDER)) {
        other_folder = (Folder*)INCREF(index);
    }
    else if (Obj_is_a(index, STRING)) {
        other_folder = (Folder*)FSFolder_new((String*)index);
    }
    else {
        THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index));
    }

    reader = IxReader_open((Obj*)other_folder, NULL, NULL);
    if (reader == NULL) {
        THROW(ERR, "Index doesn't seem to contain any data");
    }
    else {
        Schema *schema       = ivars->schema;
        Schema *other_schema = IxReader_Get_Schema(reader);
        Vector *other_fields = Schema_All_Fields(other_schema);
        Vector *seg_readers  = IxReader_Seg_Readers(reader);

        // Validate schema compatibility and add fields.
        Schema_Eat(schema, other_schema);

        // Add fields to Segment.
        for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) {
            String *other_field = (String*)Vec_Fetch(other_fields, i);
            Seg_Add_Field(ivars->segment, other_field);
        }
        DECREF(other_fields);

        // Add all segments.
        for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
            SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i);
            DeletionsReader *del_reader
                = (DeletionsReader*)SegReader_Fetch(
                      seg_reader, Class_Get_Name(DELETIONSREADER));
            Matcher *deletions = del_reader
                                 ? DelReader_Iterator(del_reader)
                                 : NULL;
            I32Array *doc_map = DelWriter_Generate_Doc_Map(
                                    ivars->del_writer, deletions,
                                    SegReader_Doc_Max(seg_reader),
                                    (int32_t)Seg_Get_Count(ivars->segment));
            SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map);
            DECREF(deletions);
            DECREF(doc_map);
        }
        DECREF(seg_readers);
    }

    DECREF(reader);
    DECREF(other_folder);
}
示例#10
0
文件: HeatMap.c 项目: rectang/lucy
Vector*
HeatMap_Flatten_Spans_IMP(HeatMap *self, Vector *spans) {
    const size_t num_spans = Vec_Get_Size(spans);
    UNUSED_VAR(self);

    if (!num_spans) {
        return Vec_new(0);
    }
    else {
        Vector *flattened = S_flattened_but_empty_spans(spans);
        const size_t num_raw_flattened = Vec_Get_Size(flattened);

        // Iterate over each of the source spans, contributing their scores to
        // any destination span that falls within range.
        size_t dest_tick = 0;
        for (size_t i = 0; i < num_spans; i++) {
            Span *source_span = (Span*)Vec_Fetch(spans, i);
            int32_t source_span_offset = Span_Get_Offset(source_span);
            int32_t source_span_len    = Span_Get_Length(source_span);
            int32_t source_span_end    = source_span_offset + source_span_len;

            // Get the location of the flattened span that shares the source
            // span's offset.
            for (; dest_tick < num_raw_flattened; dest_tick++) {
                Span *dest_span = (Span*)Vec_Fetch(flattened, dest_tick);
                if (Span_Get_Offset(dest_span) == source_span_offset) {
                    break;
                }
            }

            // Fill in scores.
            for (size_t j = dest_tick; j < num_raw_flattened; j++) {
                Span *dest_span = (Span*)Vec_Fetch(flattened, j);
                if (Span_Get_Offset(dest_span) == source_span_end) {
                    break;
                }
                else {
                    float new_weight = Span_Get_Weight(dest_span)
                                       + Span_Get_Weight(source_span);
                    Span_Set_Weight(dest_span, new_weight);
                }
            }
        }

        // Leave holes instead of spans that don't have any score.
        dest_tick = 0;
        for (size_t i = 0; i < num_raw_flattened; i++) {
            Span *span = (Span*)Vec_Fetch(flattened, i);
            if (Span_Get_Weight(span)) {
                Vec_Store(flattened, dest_tick++, INCREF(span));
            }
        }
        Vec_Excise(flattened, dest_tick, num_raw_flattened - dest_tick);

        return flattened;
    }
}
示例#11
0
文件: QueryParser.c 项目: kidaa/lucy
static void
S_parse_subqueries(QueryParser *self, Vector *elems) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;
    while (1) {
        // Work from the inside out, starting with the leftmost innermost
        // paren group.
        size_t left = SIZE_MAX;
        size_t right = SIZE_MAX;
        String *field = NULL;
        for (size_t i = 0, max = Vec_Get_Size(elems); i < max; i++) {
            ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);
            uint32_t type = ParserElem_Get_Type(elem);
            if (type == TOKEN_OPEN_PAREN) {
                left = i;
            }
            else if (type == TOKEN_CLOSE_PAREN) {
                right = i;
                break;
            }
            else if (type == TOKEN_FIELD && i < max - 1) {
                // If a field applies to an enclosing paren, pass it along.
                ParserElem *next_elem = (ParserElem*)Vec_Fetch(elems, i + 1);
                uint32_t next_type = ParserElem_Get_Type(next_elem);
                if (next_type == TOKEN_OPEN_PAREN) {
                    field = (String*)ParserElem_As(elem, STRING);
                }
            }
        }

        // Break out of loop when there are no parens left.
        if (right == SIZE_MAX) {
            break;
        }

        // Create the subquery.
        Vector *sub_elems = Vec_Slice(elems, left + 1, right - left - 1);
        Query *subquery = S_parse_subquery(self, sub_elems, field, true);
        ParserElem *new_elem = ParserElem_new(TOKEN_QUERY, (Obj*)subquery);
        if (default_occur == MUST) {
            ParserElem_Require(new_elem);
        }
        DECREF(sub_elems);

        // Replace the elements used to create the subquery with the subquery
        // itself.
        if (left > 0) {
            ParserElem *maybe_field = (ParserElem*)Vec_Fetch(elems, left - 1);
            uint32_t maybe_field_type = ParserElem_Get_Type(maybe_field);
            if (maybe_field_type == TOKEN_FIELD) {
                left -= 1;
            }
        }
        Vec_Excise(elems, left + 1, right - left);
        Vec_Store(elems, left, (Obj*)new_elem);
    }
}
示例#12
0
文件: QueryParser.c 项目: kidaa/lucy
QueryParser*
QParser_init(QueryParser *self, Schema *schema, Analyzer *analyzer,
             String *default_boolop, Vector *fields) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    // Init.
    ivars->heed_colons = false;
    ivars->lexer       = QueryLexer_new();

    // Assign.
    ivars->schema         = (Schema*)INCREF(schema);
    ivars->analyzer       = (Analyzer*)INCREF(analyzer);
    ivars->default_boolop = default_boolop
                           ? Str_Clone(default_boolop)
                           : Str_new_from_trusted_utf8("OR", 2);

    if (fields) {
        ivars->fields = Vec_Clone(fields);
        for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
            CERTIFY(Vec_Fetch(fields, i), STRING);
        }
        Vec_Sort(ivars->fields);
    }
    else {
        Vector *all_fields = Schema_All_Fields(schema);
        uint32_t num_fields = Vec_Get_Size(all_fields);
        ivars->fields = Vec_new(num_fields);
        for (uint32_t i = 0; i < num_fields; i++) {
            String *field = (String*)Vec_Fetch(all_fields, i);
            FieldType *type = Schema_Fetch_Type(schema, field);
            if (type && FType_Indexed(type)) {
                Vec_Push(ivars->fields, INCREF(field));
            }
        }
        DECREF(all_fields);
    }
    Vec_Sort(ivars->fields);

    // Derive default "occur" from default boolean operator.
    if (Str_Equals_Utf8(ivars->default_boolop, "OR", 2)) {
        ivars->default_occur = SHOULD;
    }
    else if (Str_Equals_Utf8(ivars->default_boolop, "AND", 3)) {
        ivars->default_occur = MUST;
    }
    else {
        THROW(ERR, "Invalid value for default_boolop: %o", ivars->default_boolop);
    }

    return self;
}
示例#13
0
文件: ORMatcher.c 项目: apache/lucy
static ORMatcher*
S_ormatcher_init2(ORMatcher *self, ORMatcherIVARS *ivars, Vector *children,
                  Similarity *sim) {
    // Init.
    PolyMatcher_init((PolyMatcher*)self, children, sim);
    ivars->size = 0;

    // Derive.
    ivars->max_size = (uint32_t)Vec_Get_Size(children);

    // Allocate.
    ivars->heap = (HeapedMatcherDoc**)CALLOCATE(ivars->max_size + 1, sizeof(HeapedMatcherDoc*));

    // Create a pool of HMDs.  Encourage CPU cache hits by using a single
    // allocation for all of them.
    size_t amount_to_malloc = (ivars->max_size + 1) * sizeof(HeapedMatcherDoc);
    ivars->blob = (char*)MALLOCATE(amount_to_malloc);
    ivars->pool = (HeapedMatcherDoc**)CALLOCATE(ivars->max_size + 1, sizeof(HeapedMatcherDoc*));
    for (uint32_t i = 1; i <= ivars->max_size; i++) {
        size_t offset = i * sizeof(HeapedMatcherDoc);
        HeapedMatcherDoc *hmd = (HeapedMatcherDoc*)(ivars->blob + offset);
        ivars->pool[i] = hmd;
    }

    // Prime queue.
    for (uint32_t i = 0; i < ivars->max_size; i++) {
        Matcher *matcher = (Matcher*)Vec_Fetch(children, i);
        if (matcher) {
            S_add_element(self, ivars, (Matcher*)INCREF(matcher), 0);
        }
    }

    return self;
}
示例#14
0
PolySearcher*
PolySearcher_init(PolySearcher *self, Schema *schema, Vector *searchers) {
    const uint32_t num_searchers = Vec_Get_Size(searchers);
    int32_t *starts_array = (int32_t*)MALLOCATE(num_searchers * sizeof(int32_t));
    int32_t  doc_max      = 0;

    Searcher_init((Searcher*)self, schema);
    PolySearcherIVARS *const ivars = PolySearcher_IVARS(self);
    ivars->searchers = (Vector*)INCREF(searchers);
    ivars->starts = NULL; // Safe cleanup.

    for (uint32_t i = 0; i < num_searchers; i++) {
        Searcher *searcher
            = (Searcher*)CERTIFY(Vec_Fetch(searchers, i), SEARCHER);
        Schema *candidate       = Searcher_Get_Schema(searcher);
        Class  *orig_class      = Schema_Get_Class(schema);
        Class  *candidate_class = Schema_Get_Class(candidate);

        // Confirm that searchers all use the same schema.
        if (orig_class != candidate_class) {
            THROW(ERR, "Conflicting schemas: '%o', '%o'",
                  Schema_Get_Class_Name(schema),
                  Schema_Get_Class_Name(candidate));
        }

        // Derive doc_max and relative start offsets.
        starts_array[i] = (int32_t)doc_max;
        doc_max += Searcher_Doc_Max(searcher);
    }

    ivars->doc_max = doc_max;
    ivars->starts  = I32Arr_new_steal(starts_array, num_searchers);

    return self;
}
示例#15
0
文件: SortExternal.c 项目: kidaa/lucy
void
SortEx_Shrink_IMP(SortExternal *self) {
    SortExternalIVARS *const ivars = SortEx_IVARS(self);
    if (ivars->buf_max - ivars->buf_tick > 0) {
        size_t buf_count = SortEx_Buffer_Count(self);
        size_t size        = buf_count * sizeof(Obj*);
        if (ivars->buf_tick > 0) {
            Obj **start = ivars->buffer + ivars->buf_tick;
            memmove(ivars->buffer, start, size);
        }
        ivars->buffer   = (Obj**)REALLOCATE(ivars->buffer, size);
        ivars->buf_tick = 0;
        ivars->buf_max  = buf_count;
        ivars->buf_cap  = buf_count;
    }
    else {
        FREEMEM(ivars->buffer);
        ivars->buffer   = NULL;
        ivars->buf_tick = 0;
        ivars->buf_max  = 0;
        ivars->buf_cap  = 0;
    }
    ivars->scratch_cap = 0;
    FREEMEM(ivars->scratch);
    ivars->scratch = NULL;

    for (uint32_t i = 0, max = Vec_Get_Size(ivars->runs); i < max; i++) {
        SortExternal *run = (SortExternal*)Vec_Fetch(ivars->runs, i);
        SortEx_Shrink(run);
    }
}
示例#16
0
文件: Segment.c 项目: apache/lucy
String*
Seg_Field_Name_IMP(Segment *self, int32_t field_num) {
    SegmentIVARS *const ivars = Seg_IVARS(self);
    return field_num
           ? (String*)Vec_Fetch(ivars->by_num, (size_t)field_num)
           : NULL;
}
示例#17
0
文件: HeatMap.c 项目: rectang/lucy
// Create all the spans needed by HeatMap_Flatten_Spans, based on the source
// offsets and lengths... but leave the scores at 0.
static Vector*
S_flattened_but_empty_spans(Vector *spans) {
    const size_t num_spans = Vec_Get_Size(spans);
    int32_t *bounds = (int32_t*)MALLOCATE((num_spans * 2) * sizeof(int32_t));

    // Assemble a list of all unique start/end boundaries.
    for (size_t i = 0; i < num_spans; i++) {
        Span *span            = (Span*)Vec_Fetch(spans, i);
        bounds[i]             = Span_Get_Offset(span);
        bounds[i + num_spans] = Span_Get_Offset(span) + Span_Get_Length(span);
    }
    qsort(bounds, num_spans * 2, sizeof(int32_t), S_compare_i32);
    size_t   num_bounds = 0;
    int32_t  last       = INT32_MAX;
    for (size_t i = 0; i < num_spans * 2; i++) {
        if (bounds[i] != last) {
            bounds[num_bounds++] = bounds[i];
            last = bounds[i];
        }
    }

    // Create one Span for each zone between two bounds.
    Vector *flattened = Vec_new(num_bounds - 1);
    for (size_t i = 0; i < num_bounds - 1; i++) {
        int32_t  start   = bounds[i];
        int32_t  length  = bounds[i + 1] - start;
        Vec_Push(flattened, (Obj*)Span_new(start, length, 0.0f));
    }

    FREEMEM(bounds);
    return flattened;
}
示例#18
0
文件: Inverter.c 项目: kidaa/lucy
static InverterEntry*
S_fetch_entry(InverterIVARS *ivars, String *field) {
    Schema *const schema = ivars->schema;
    int32_t field_num = Seg_Field_Num(ivars->segment, field);
    if (!field_num) {
        // This field seems not to be in the segment yet.  Try to find it in
        // the Schema.
        if (Schema_Fetch_Type(schema, field)) {
            // The field is in the Schema.  Get a field num from the Segment.
            field_num = Seg_Add_Field(ivars->segment, field);
        }
        else {
            // We've truly failed to find the field.  The user must
            // not have spec'd it.
            THROW(ERR, "Unknown field name: '%o'", field);
        }
    }

    InverterEntry *entry
        = (InverterEntry*)Vec_Fetch(ivars->entry_pool, field_num);
    if (!entry) {
        entry = InvEntry_new(schema, (String*)field, field_num);
        Vec_Store(ivars->entry_pool, field_num, (Obj*)entry);
    }
    return entry;
}
示例#19
0
PhraseCompiler*
PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent,
                    Searcher *searcher, float boost) {
    PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self);
    PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS(parent);
    Schema     *schema = Searcher_Get_Schema(searcher);
    Similarity *sim    = Schema_Fetch_Sim(schema, parent_ivars->field);
    Vector     *terms  = parent_ivars->terms;

    // Try harder to find a Similarity if necessary.
    if (!sim) { sim = Schema_Get_Similarity(schema); }

    // Init.
    Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost);

    // Store IDF for the phrase.
    ivars->idf = 0;
    for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) {
        Obj     *term     = Vec_Fetch(terms, i);
        int32_t  doc_max  = Searcher_Doc_Max(searcher);
        int32_t  doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, term);
        ivars->idf += Sim_IDF(sim, doc_freq, doc_max);
    }

    // Calculate raw weight.
    ivars->raw_weight = ivars->idf * ivars->boost;

    return self;
}
示例#20
0
void
SortColl_Set_Reader_IMP(SortCollector *self, SegReader *reader) {
    SortCollectorIVARS *const ivars = SortColl_IVARS(self);
    SortReader *sort_reader
        = (SortReader*)SegReader_Fetch(reader, Class_Get_Name(SORTREADER));

    // Reset threshold variables and trigger auto-action behavior.
    MatchDocIVARS *const bumped_ivars = MatchDoc_IVARS(ivars->bumped);
    bumped_ivars->doc_id = INT32_MAX;
    ivars->bubble_doc    = INT32_MAX;
    bumped_ivars->score  = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN;
    ivars->bubble_score  = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN;
    ivars->actions       = ivars->auto_actions;

    // Obtain sort caches. Derive actions array for this segment.
    if (ivars->need_values && sort_reader) {
        for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) {
            SortRule  *rule  = (SortRule*)Vec_Fetch(ivars->rules, i);
            String    *field = SortRule_Get_Field(rule);
            SortCache *cache = field
                               ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                               : NULL;
            ivars->sort_caches[i] = cache;
            ivars->derived_actions[i] = S_derive_action(rule, cache);
            if (cache) { ivars->ord_arrays[i] = SortCache_Get_Ords(cache); }
            else       { ivars->ord_arrays[i] = NULL; }
        }
    }
    ivars->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0;
    SortColl_Set_Reader_t super_set_reader
        = (SortColl_Set_Reader_t)SUPER_METHOD_PTR(SORTCOLLECTOR,
                                                  LUCY_SortColl_Set_Reader);
    super_set_reader(self, reader);
}
示例#21
0
Compiler*
PhraseQuery_Make_Compiler_IMP(PhraseQuery *self, Searcher *searcher,
                              float boost, bool subordinate) {
    PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self);
    if (Vec_Get_Size(ivars->terms) == 1) {
        // Optimize for one-term "phrases".
        Obj *term = Vec_Fetch(ivars->terms, 0);
        TermQuery *term_query = TermQuery_new(ivars->field, term);
        TermCompiler *term_compiler;
        TermQuery_Set_Boost(term_query, ivars->boost);
        term_compiler
            = (TermCompiler*)TermQuery_Make_Compiler(term_query, searcher,
                                                     boost, subordinate);
        DECREF(term_query);
        return (Compiler*)term_compiler;
    }
    else {
        PhraseCompiler *compiler
            = PhraseCompiler_new(self, searcher, boost);
        if (!subordinate) {
            PhraseCompiler_Normalize(compiler);
        }
        return (Compiler*)compiler;
    }
}
示例#22
0
文件: SortWriter.c 项目: apache/lucy
void
SortWriter_Add_Inverted_Doc_IMP(SortWriter *self, Inverter *inverter,
                                int32_t doc_id) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);
    int32_t field_num;

    Inverter_Iterate(inverter);
    while (0 != (field_num = Inverter_Next(inverter))) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Sortable(type)) {
            SortFieldWriter *field_writer
                = S_lazy_init_field_writer(self, field_num);
            SortFieldWriter_Add(field_writer, doc_id,
                                Inverter_Get_Value(inverter));
        }
    }

    // If our SortFieldWriters have collectively passed the memory threshold,
    // flush all of them, then reset the counter which tracks memory
    // consumption.
    if ((size_t)Counter_Get_Value(ivars->counter) > ivars->mem_thresh) {
        for (size_t i = 0; i < Vec_Get_Size(ivars->field_writers); i++) {
            SortFieldWriter *const field_writer
                = (SortFieldWriter*)Vec_Fetch(ivars->field_writers, i);
            if (field_writer) { SortFieldWriter_Flush(field_writer); }
        }
        Counter_Reset(ivars->counter);
        ivars->flush_at_finish = true;
    }
}
示例#23
0
PolyLexicon*
PolyLex_init(PolyLexicon *self, String *field, Vector *sub_readers) {
    uint32_t  num_sub_readers = Vec_Get_Size(sub_readers);
    Vector   *seg_lexicons    = Vec_new(num_sub_readers);

    // Init.
    Lex_init((Lexicon*)self, field);
    PolyLexiconIVARS *const ivars = PolyLex_IVARS(self);
    ivars->term            = NULL;
    ivars->lex_q           = SegLexQ_new(num_sub_readers);

    // Derive.
    for (uint32_t i = 0; i < num_sub_readers; i++) {
        LexiconReader *lex_reader = (LexiconReader*)Vec_Fetch(sub_readers, i);
        if (lex_reader && CERTIFY(lex_reader, LEXICONREADER)) {
            Lexicon *seg_lexicon = LexReader_Lexicon(lex_reader, field, NULL);
            if (seg_lexicon != NULL) {
                Vec_Push(seg_lexicons, (Obj*)seg_lexicon);
            }
        }
    }
    ivars->seg_lexicons  = seg_lexicons;

    PolyLex_Reset(self);

    return self;
}
示例#24
0
文件: QueryParser.c 项目: kidaa/lucy
static void
S_balance_parens(QueryParser *self, Vector *elems) {
    UNUSED_VAR(self);
    // Count paren balance, eliminate unbalanced right parens.
    int64_t paren_depth = 0;
    size_t i = 0;
    while (i < Vec_Get_Size(elems)) {
        ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);
        if (ParserElem_Get_Type(elem) == TOKEN_OPEN_PAREN) {
            paren_depth++;
        }
        else if (ParserElem_Get_Type(elem) == TOKEN_CLOSE_PAREN) {
            if (paren_depth > 0) {
                paren_depth--;
            }
            else {
                Vec_Excise(elems, i, 1);
                continue;
            }
        }
        i++;
    }

    // Insert implicit parens.
    while (paren_depth--) {
        ParserElem *elem = ParserElem_new(TOKEN_CLOSE_PAREN, NULL);
        Vec_Push(elems, (Obj*)elem);
    }
}
示例#25
0
文件: SortWriter.c 项目: apache/lucy
void
SortWriter_Add_Segment_IMP(SortWriter *self, SegReader *reader,
                           I32Array *doc_map) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);
    Vector *fields = Schema_All_Fields(ivars->schema);

    // Proceed field-at-a-time, rather than doc-at-a-time.
    for (size_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
        String *field = (String*)Vec_Fetch(fields, i);
        SortReader *sort_reader = (SortReader*)SegReader_Fetch(
                                      reader, Class_Get_Name(SORTREADER));
        SortCache *cache = sort_reader
                           ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                           : NULL;
        if (cache) {
            int32_t field_num = Seg_Field_Num(ivars->segment, field);
            SortFieldWriter *field_writer
                = S_lazy_init_field_writer(self, field_num);
            SortFieldWriter_Add_Segment(field_writer, reader, doc_map, cache);
            ivars->flush_at_finish = true;
        }
    }

    DECREF(fields);
}
示例#26
0
文件: QueryParser.c 项目: kidaa/lucy
static void
S_discard_elems(Vector *elems, uint32_t type) {
    for (size_t i = Vec_Get_Size(elems); i--;) {
        ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);
        if (ParserElem_Get_Type(elem) == type) { Vec_Excise(elems, i, 1); }
    }
}
示例#27
0
void
PolyLex_Reset_IMP(PolyLexicon *self) {
    PolyLexiconIVARS *const ivars = PolyLex_IVARS(self);
    Vector *seg_lexicons = ivars->seg_lexicons;
    uint32_t num_segs = Vec_Get_Size(seg_lexicons);
    SegLexQueue *lex_q = ivars->lex_q;

    // Empty out the queue.
    while (1) {
        SegLexicon *seg_lex = (SegLexicon*)SegLexQ_Pop(lex_q);
        if (seg_lex == NULL) { break; }
        DECREF(seg_lex);
    }

    // Fill the queue with valid SegLexicons.
    for (uint32_t i = 0; i < num_segs; i++) {
        SegLexicon *const seg_lexicon
            = (SegLexicon*)Vec_Fetch(seg_lexicons, i);
        SegLex_Reset(seg_lexicon);
        if (SegLex_Next(seg_lexicon)) {
            SegLexQ_Insert(ivars->lex_q, INCREF(seg_lexicon));
        }
    }

    if (ivars->term != NULL) {
        DECREF(ivars->term);
        ivars->term = NULL;
    }
}
示例#28
0
文件: Indexer.c 项目: rectang/lucy
void
Indexer_Delete_By_Term_IMP(Indexer *self, String *field, Obj *term) {
    IndexerIVARS *const ivars = Indexer_IVARS(self);
    Schema    *schema = ivars->schema;
    FieldType *type   = Schema_Fetch_Type(schema, field);

    // Raise exception if the field isn't indexed.
    if (!type || !FType_Indexed(type)) {
        THROW(ERR, "%o is not an indexed field", field);
    }

    // Analyze term if appropriate, then zap.
    if (FType_is_a(type, FULLTEXTTYPE)) {
        CERTIFY(term, STRING);
        Analyzer *analyzer = Schema_Fetch_Analyzer(schema, field);
        Vector *terms = Analyzer_Split(analyzer, (String*)term);
        Obj *analyzed_term = Vec_Fetch(terms, 0);
        if (analyzed_term) {
            DelWriter_Delete_By_Term(ivars->del_writer, field,
                                     analyzed_term);
        }
        DECREF(terms);
    }
    else {
        DelWriter_Delete_By_Term(ivars->del_writer, field, term);
    }
}
示例#29
0
文件: QueryParser.c 项目: kidaa/lucy
static void
S_compose_or_queries(QueryParser *self, Vector *elems) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;

    for (uint32_t i = 0; i + 2 < Vec_Get_Size(elems); i++) {
        ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i + 1);
        if (ParserElem_Get_Type(elem) == TOKEN_OR) {
            ParserElem   *preceding  = (ParserElem*)Vec_Fetch(elems, i);
            Vector       *children   = Vec_new(2);
            uint32_t      num_to_zap = 0;

            // Add first clause.
            Query *preceding_query = (Query*)ParserElem_As(preceding, QUERY);
            Vec_Push(children, INCREF(preceding_query));

            // Add following clauses.
            for (uint32_t j = i + 1, jmax = Vec_Get_Size(elems);
                 j < jmax;
                 j += 2, num_to_zap += 2
                ) {
                ParserElem *maybe_or  = (ParserElem*)Vec_Fetch(elems, j);
                ParserElem *following = (ParserElem*)Vec_Fetch(elems, j + 1);
                if (ParserElem_Get_Type(maybe_or) != TOKEN_OR) {
                    break;
                }
                else if (ParserElem_Get_Type(following) == TOKEN_QUERY) {
                    Query *next = (Query*)ParserElem_As(following, QUERY);
                    Vec_Push(children, INCREF(next));
                }
                else {
                    THROW(ERR, "Unexpected type: %u32",
                          ParserElem_Get_Type(following));
                }
            }
            Query *or_query = QParser_Make_OR_Query(self, children);
            ParserElem_Set_Value(preceding, (Obj*)or_query);
            if (default_occur == MUST) {
                ParserElem_Require(preceding);
            }
            DECREF(or_query);
            DECREF(children);

            Vec_Excise(elems, i + 1, num_to_zap);
        }
    }
}
示例#30
0
文件: PolyQuery.c 项目: apache/lucy
void
PolyCompiler_Apply_Norm_Factor_IMP(PolyCompiler *self, float factor) {
    PolyCompilerIVARS *const ivars = PolyCompiler_IVARS(self);
    for (size_t i = 0, max = Vec_Get_Size(ivars->children); i < max; i++) {
        Compiler *child = (Compiler*)Vec_Fetch(ivars->children, i);
        Compiler_Apply_Norm_Factor(child, factor);
    }
}