Exemple #1
0
Obj*
TermQuery_Dump_IMP(TermQuery *self) {
    TermQueryIVARS *ivars = TermQuery_IVARS(self);
    TermQuery_Dump_t super_dump
        = SUPER_METHOD_PTR(TERMQUERY, LUCY_TermQuery_Dump);
    Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
    Hash_Store_Utf8(dump, "field", 5, Freezer_dump((Obj*)ivars->field));
    Hash_Store_Utf8(dump, "term", 4, Freezer_dump(ivars->term));
    return (Obj*)dump;
}
Exemple #2
0
PolyHighlightReader*
PolyHLReader_init(PolyHighlightReader *self, VArray *readers,
                  I32Array *offsets) {
    HLReader_init((HighlightReader*)self, NULL, NULL, NULL, NULL, -1);
    for (uint32_t i = 0, max = VA_Get_Size(readers); i < max; i++) {
        CERTIFY(VA_Fetch(readers, i), HIGHLIGHTREADER);
    }
    self->readers = (VArray*)INCREF(readers);
    self->offsets = (I32Array*)INCREF(offsets);
    return self;
}
static PhraseQuery*
S_do_init(PhraseQuery *self, String *field, Vector *terms, float boost) {
    Query_init((Query*)self, boost);
    PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self);
    for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) {
        CERTIFY(Vec_Fetch(terms, i), OBJ);
    }
    ivars->field = field;
    ivars->terms = terms;
    return self;
}
Exemple #4
0
PolyDocReader*
PolyDocReader_init(PolyDocReader *self, Vector *readers, I32Array *offsets) {
    DocReader_init((DocReader*)self, NULL, NULL, NULL, NULL, -1);
    PolyDocReaderIVARS *const ivars = PolyDocReader_IVARS(self);
    for (uint32_t i = 0, max = Vec_Get_Size(readers); i < max; i++) {
        CERTIFY(Vec_Fetch(readers, i), DOCREADER);
    }
    ivars->readers = (Vector*)INCREF(readers);
    ivars->offsets = (I32Array*)INCREF(offsets);
    return self;
}
Exemple #5
0
void
BBSortEx_feed(BBSortEx *self, void *data) {
    SortEx_feed((SortExternal*)self, data);

    // Flush() if necessary.
    ByteBuf *bytebuf = (ByteBuf*)CERTIFY(*(ByteBuf**)data, BYTEBUF);
    self->mem_consumed += BB_Get_Size(bytebuf);
    if (self->mem_consumed >= self->mem_thresh) {
        BBSortEx_Flush(self);
    }
}
int32_t
StrIter_Compare_To_IMP(StringIterator *self, Obj *other) {
    StringIterator *twin = (StringIterator*)CERTIFY(other, STRINGITERATOR);
    if (self->string != twin->string) {
        THROW(ERR, "Can't compare iterators of different strings");
        UNREACHABLE_RETURN(int32_t);
    }
    if (self->byte_offset < twin->byte_offset) { return -1; }
    if (self->byte_offset > twin->byte_offset) { return 1; }
    return 0;
}
Exemple #7
0
Obj*
PolyAnalyzer_Dump_IMP(PolyAnalyzer *self) {
    PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);
    PolyAnalyzer_Dump_t super_dump
        = SUPER_METHOD_PTR(POLYANALYZER, LUCY_PolyAnalyzer_Dump);
    Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
    if (ivars->analyzers) {
        Hash_Store_Utf8(dump, "analyzers", 9,
                        Freezer_dump((Obj*)ivars->analyzers));
    }
    return (Obj*)dump;
}
Obj*
ProximityQuery_Dump_IMP(ProximityQuery *self) {
    ProximityQueryIVARS *ivars = ProximityQuery_IVARS(self);
    ProximityQuery_Dump_t super_dump
        = SUPER_METHOD_PTR(PROXIMITYQUERY, LUCY_ProximityQuery_Dump);
    Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
    Hash_Store_Utf8(dump, "field", 5, Freezer_dump((Obj*)ivars->field));
    Hash_Store_Utf8(dump, "terms", 5, Freezer_dump((Obj*)ivars->terms));
    Hash_Store_Utf8(dump, "within", 6,
                    (Obj*)Str_newf("%i64", (int64_t)ivars->within));
    return (Obj*)dump;
}
Exemple #9
0
static PhraseQuery*
S_do_init(PhraseQuery *self, CharBuf *field, VArray *terms, float boost)
{
    uint32_t i, max;
    Query_init((Query*)self, boost);
    for (i = 0, max = VA_Get_Size(terms); i < max; i++) {
        CERTIFY(VA_Fetch(terms, i), OBJ);
    }
    self->field = field;
    self->terms = terms;
    return self;
}
Exemple #10
0
Obj*
LeafQuery_Dump_IMP(LeafQuery *self) {
    LeafQueryIVARS *ivars = LeafQuery_IVARS(self);
    LeafQuery_Dump_t super_dump
        = SUPER_METHOD_PTR(LEAFQUERY, LUCY_LeafQuery_Dump);
    Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
    if (ivars->field) {
        Hash_Store_Utf8(dump, "field", 5, Freezer_dump((Obj*)ivars->field));
    }
    Hash_Store_Utf8(dump, "text", 4, Freezer_dump((Obj*)ivars->text));
    return (Obj*)dump;
}
Exemple #11
0
PolyHighlightReader*
PolyHLReader_init(PolyHighlightReader *self, Vector *readers,
                  I32Array *offsets) {
    HLReader_init((HighlightReader*)self, NULL, NULL, NULL, NULL, -1);
    PolyHighlightReaderIVARS *const ivars = PolyHLReader_IVARS(self);
    for (size_t i = 0, max = Vec_Get_Size(readers); i < max; i++) {
        CERTIFY(Vec_Fetch(readers, i), HIGHLIGHTREADER);
    }
    ivars->readers = (Vector*)INCREF(readers);
    ivars->offsets = (I32Array*)INCREF(offsets);
    return self;
}
Exemple #12
0
static void
S_add_string_field(Schema *self, String *field, FieldType *type) {
    SchemaIVARS *const ivars = Schema_IVARS(self);
    StringType *string_type = (StringType*)CERTIFY(type, STRINGTYPE);
    Similarity *sim         = StringType_Make_Similarity(string_type);

    // Cache helpers.
    Hash_Store(ivars->sims, field, (Obj*)sim);

    // Store FieldType.
    Hash_Store(ivars->types, field, INCREF(type));
}
Exemple #13
0
Obj*
SnowStop_Dump_IMP(SnowballStopFilter *self) {
    SnowballStopFilterIVARS *ivars = SnowStop_IVARS(self);
    SnowStop_Dump_t super_dump
        = SUPER_METHOD_PTR(SNOWBALLSTOPFILTER, LUCY_SnowStop_Dump);
    Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
    if (ivars->stoplist) {
        Hash_Store_Utf8(dump, "stoplist", 8,
                        Freezer_dump((Obj*)ivars->stoplist));
    }
    return (Obj*)dump;
}
static void
test_Peek_and_Pop_All(TestBatchRunner *runner) {
    NumPriorityQueue *pq = NumPriQ_new(5);
    Float64 *val;

    S_insert_num(pq, 3);
    S_insert_num(pq, 1);
    S_insert_num(pq, 2);
    S_insert_num(pq, 20);
    S_insert_num(pq, 10);
    val = (Float64*)CERTIFY(NumPriQ_Peek(pq), FLOAT64);
    TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 1,
                "peek at the least item in the queue");

    VArray  *got = NumPriQ_Pop_All(pq);
    val = (Float64*)CERTIFY(VA_Fetch(got, 0), FLOAT64);
    TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 20, "pop_all");
    val = (Float64*)CERTIFY(VA_Fetch(got, 1), FLOAT64);
    TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 10, "pop_all");
    val = (Float64*)CERTIFY(VA_Fetch(got, 2), FLOAT64);
    TEST_INT_EQ(runner, (long)Float64_Get_Value(val),  3, "pop_all");
    val = (Float64*)CERTIFY(VA_Fetch(got, 3), FLOAT64);
    TEST_INT_EQ(runner, (long)Float64_Get_Value(val),  2, "pop_all");
    val = (Float64*)CERTIFY(VA_Fetch(got, 4), FLOAT64);
    TEST_INT_EQ(runner, (long)Float64_Get_Value(val),  1, "pop_all");

    DECREF(got);
    DECREF(pq);
}
static void
test_offsets(TestBatchRunner *runner) {
    Folder *folder = S_folder_with_contents();
    CompoundFileWriter *cf_writer = CFWriter_new(folder);
    Hash    *cf_metadata;
    Hash    *files;

    CFWriter_Consolidate(cf_writer);

    cf_metadata = (Hash*)CERTIFY(
                      Json_slurp_json(folder, cfmeta_file), HASH);
    files = (Hash*)CERTIFY(
                Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH);

    bool     offsets_ok = true;

    TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files");

    HashIterator *iter = HashIter_new(files);
    while (HashIter_Next(iter)) {
        String *file   = HashIter_Get_Key(iter);
        Hash   *stats  = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH);
        Obj    *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ);
        int64_t offs   = Obj_To_I64(offset);
        if (offs % 8 != 0) {
            offsets_ok = false;
            FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8",
                 offset, Str_Get_Ptr8(file));
            break;
        }
    }
    DECREF(iter);
    if (offsets_ok) {
        PASS(runner, "All offsets are multiples of 8");
    }

    DECREF(cf_metadata);
    DECREF(cf_writer);
    DECREF(folder);
}
Exemple #16
0
static ProximityQuery*
S_do_init(ProximityQuery *self, String *field, VArray *terms, float boost,
          uint32_t within) {
    Query_init((Query*)self, boost);
    ProximityQueryIVARS *const ivars = ProximityQuery_IVARS(self);
    for (uint32_t i = 0, max = VA_Get_Size(terms); i < max; i++) {
        CERTIFY(VA_Fetch(terms, i), OBJ);
    }
    ivars->field  = field;
    ivars->terms  = terms;
    ivars->within = within;
    return self;
}
Exemple #17
0
int32_t
Blob_Compare_To_IMP(Blob *self, Obj *other) {
    Blob *twin = (Blob*)CERTIFY(other, BLOB);
    const size_t size = self->size < twin->size ? self->size : twin->size;

    int32_t comparison = memcmp(self->buf, twin->buf, size);

    if (comparison == 0 && self->size != twin->size) {
        comparison = self->size < twin->size ? -1 : 1;
    }

    return comparison;
}
Exemple #18
0
QueryParser*
QParser_init(QueryParser *self, Schema *schema, Analyzer *analyzer,
             String *default_boolop, Vector *fields) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    // Init.
    ivars->heed_colons = false;
    ivars->lexer       = QueryLexer_new();

    // Assign.
    ivars->schema         = (Schema*)INCREF(schema);
    ivars->analyzer       = (Analyzer*)INCREF(analyzer);
    ivars->default_boolop = default_boolop
                           ? Str_Clone(default_boolop)
                           : Str_new_from_trusted_utf8("OR", 2);

    if (fields) {
        ivars->fields = Vec_Clone(fields);
        for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
            CERTIFY(Vec_Fetch(fields, i), STRING);
        }
        Vec_Sort(ivars->fields);
    }
    else {
        Vector *all_fields = Schema_All_Fields(schema);
        uint32_t num_fields = Vec_Get_Size(all_fields);
        ivars->fields = Vec_new(num_fields);
        for (uint32_t i = 0; i < num_fields; i++) {
            String *field = (String*)Vec_Fetch(all_fields, i);
            FieldType *type = Schema_Fetch_Type(schema, field);
            if (type && FType_Indexed(type)) {
                Vec_Push(ivars->fields, INCREF(field));
            }
        }
        DECREF(all_fields);
    }
    Vec_Sort(ivars->fields);

    // Derive default "occur" from default boolean operator.
    if (Str_Equals_Utf8(ivars->default_boolop, "OR", 2)) {
        ivars->default_occur = SHOULD;
    }
    else if (Str_Equals_Utf8(ivars->default_boolop, "AND", 3)) {
        ivars->default_occur = MUST;
    }
    else {
        THROW(ERR, "Invalid value for default_boolop: %o", ivars->default_boolop);
    }

    return self;
}
Exemple #19
0
Obj*
Hash_load(Hash *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6);
    UNUSED_VAR(self);

    // Assume that the presence of the "_class" key paired with a valid class
    // name indicates the output of a Dump rather than an ordinary Hash. */
    if (class_name && CB_Is_A(class_name, CHARBUF)) {
        VTable *vtable = VTable_fetch_vtable(class_name);

        if (!vtable) {
            CharBuf *parent_class = VTable_find_parent_class(class_name);
            if (parent_class) {
                VTable *parent = VTable_singleton(parent_class, NULL);
                vtable = VTable_singleton(class_name, parent);
                DECREF(parent_class);
            }
            else {
                // TODO: Fix Hash_Load() so that it works with ordinary hash
                // keys named "_class".
                THROW(ERR, "Can't find class '%o'", class_name);
            }
        }

        // Dispatch to an alternate Load() method.
        if (vtable) {
            Obj_Load_t load = METHOD_PTR(vtable, Lucy_Obj_Load);
            if (load == Obj_load) {
                THROW(ERR, "Abstract method Load() not defined for %o",
                      VTable_Get_Name(vtable));
            }
            else if (load != (Obj_Load_t)Hash_load) { // stop inf loop
                return VTable_Load_Obj(vtable, dump);
            }
        }
    }

    // It's an ordinary Hash.
    Hash *loaded = Hash_new(source->size);
    Obj *key;
    Obj *value;
    Hash_Iterate(source);
    while (Hash_Next(source, &key, &value)) {
        Hash_Store(loaded, key, Obj_Load(value, value));
    }

    return (Obj*)loaded;

}
Exemple #20
0
void
BBSortEx_Feed_IMP(BBSortEx *self, Obj *item) {
    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
    BBSortEx_Feed_t super_feed
        = SUPER_METHOD_PTR(BBSORTEX, LUCY_BBSortEx_Feed);
    super_feed(self, item);

    // Flush() if necessary.
    ByteBuf *bytebuf = (ByteBuf*)CERTIFY(item, BYTEBUF);
    ivars->mem_consumed += BB_Get_Size(bytebuf);
    if (ivars->mem_consumed >= ivars->mem_thresh) {
        BBSortEx_Flush(self);
    }
}
Exemple #21
0
void
BitVec_mimic(BitVector *self, Obj *other) {
    BitVector *twin = (BitVector*)CERTIFY(other, BITVECTOR);
    const uint32_t my_byte_size = (uint32_t)ceil(self->cap / 8.0);
    const uint32_t twin_byte_size = (uint32_t)ceil(twin->cap / 8.0);
    if (my_byte_size > twin_byte_size) {
        uint32_t space = my_byte_size - twin_byte_size;
        memset(self->bits + twin_byte_size, 0, space);
    }
    else if (my_byte_size < twin_byte_size) {
        BitVec_Grow(self, twin->cap - 1);
    }
    memcpy(self->bits, twin->bits, twin_byte_size);
}
void
BlobSortEx_Feed_IMP(BlobSortEx *self, Obj *item) {
    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
    BlobSortEx_Feed_t super_feed
        = SUPER_METHOD_PTR(BLOBSORTEX, LUCY_BlobSortEx_Feed);
    super_feed(self, item);

    // Flush() if necessary.
    Blob *blob = (Blob*)CERTIFY(item, BLOB);
    ivars->mem_consumed += Blob_Get_Size(blob);
    if (ivars->mem_consumed >= ivars->mem_thresh) {
        BlobSortEx_Flush(self);
    }
}
Exemple #23
0
VArray*
IxManager_recycle(IndexManager *self, PolyReader *reader, 
                  DeletionsWriter *del_writer, int64_t cutoff, bool_t optimize)
{
    VArray *seg_readers = PolyReader_Get_Seg_Readers(reader);
    VArray *candidates  = VA_Gather(seg_readers, S_check_cutoff, &cutoff);
    VArray *recyclables = VA_new(VA_Get_Size(candidates));
    const uint32_t num_candidates = VA_Get_Size(candidates);

    if (optimize) { 
        DECREF(recyclables);
        return candidates; 
    }

    // Sort by ascending size in docs, choose sparsely populated segments.
    VA_Sort(candidates, S_compare_doc_count, NULL);
    int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t));
    for (uint32_t i = 0; i < num_candidates; i++) {
        SegReader *seg_reader = (SegReader*)CERTIFY(
            VA_Fetch(candidates, i), SEGREADER);
        counts[i] = SegReader_Doc_Count(seg_reader);
    }
    I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates);
    uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts);
    DECREF(doc_counts);

    // Move SegReaders to be recycled.
    for (uint32_t i = 0; i < threshold; i++) {
        VA_Store(recyclables, i, VA_Delete(candidates, i));
    }

    // Find segments where at least 10% of all docs have been deleted. 
    for (uint32_t i = threshold; i < num_candidates; i++) {
        SegReader *seg_reader = (SegReader*)VA_Delete(candidates, i);
        CharBuf   *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        double doc_max = SegReader_Doc_Max(seg_reader);
        double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name);
        double del_proportion = num_deletions / doc_max;
        if (del_proportion >= 0.1) {
            VA_Push(recyclables, (Obj*)seg_reader);
        }
        else {
            DECREF(seg_reader);
        }
    }

    DECREF(candidates);
    return recyclables;
}
Exemple #24
0
PolyLexiconReader*
PolyLexReader_init(PolyLexiconReader *self, VArray *readers,
                   I32Array *offsets) {
    Schema *schema = NULL;
    for (uint32_t i = 0, max = VA_Get_Size(readers); i < max; i++) {
        LexiconReader *reader
            = (LexiconReader*)CERTIFY(VA_Fetch(readers, i), LEXICONREADER);
        if (!schema) { schema = LexReader_Get_Schema(reader); }
    }
    LexReader_init((LexiconReader*)self, schema, NULL, NULL, NULL, -1);
    PolyLexiconReaderIVARS *const ivars = PolyLexReader_IVARS(self);
    ivars->readers = (VArray*)INCREF(readers);
    ivars->offsets = (I32Array*)INCREF(offsets);
    return self;
}
Exemple #25
0
Obj*
I64SortCache_value(Int64SortCache *self, int32_t ord, Obj *blank) {
    if (ord == self->null_ord) {
        return NULL;
    }
    else if (ord < 0) {
        THROW(ERR, "Ordinal less than 0 for %o: %i32", self->field, ord);
    }
    else {
        Integer64 *int_blank = (Integer64*)CERTIFY(blank, INTEGER64);
        InStream_Seek(self->dat_in, ord * sizeof(int64_t));
        Int64_Set_Value(int_blank, InStream_Read_I64(self->dat_in));
    }
    return blank;
}
Exemple #26
0
Obj*
F32SortCache_value(Float32SortCache *self, int32_t ord, Obj *blank) {
    if (ord == self->null_ord) {
        return NULL;
    }
    else if (ord < 0) {
        THROW(ERR, "Ordinal less than 0 for %o: %i32", self->field, ord);
    }
    else {
        Float32 *num_blank = (Float32*)CERTIFY(blank, FLOAT32);
        InStream_Seek(self->dat_in, ord * sizeof(float));
        Float32_Set_Value(num_blank, InStream_Read_F32(self->dat_in));
    }
    return blank;
}
Exemple #27
0
static void
S_add_text_field(Schema *self, String *field, FieldType *type) {
    SchemaIVARS *const ivars = Schema_IVARS(self);
    FullTextType *fttype    = (FullTextType*)CERTIFY(type, FULLTEXTTYPE);
    Similarity   *sim       = FullTextType_Make_Similarity(fttype);
    Analyzer     *analyzer  = FullTextType_Get_Analyzer(fttype);

    // Cache helpers.
    Hash_Store(ivars->sims, field, (Obj*)sim);
    Hash_Store(ivars->analyzers, field, INCREF(analyzer));
    S_add_unique(ivars->uniq_analyzers, (Obj*)analyzer);

    // Store FieldType.
    Hash_Store(ivars->types, field, INCREF(type));
}
Exemple #28
0
Doc*
Doc_init(Doc *self, void *fields, int32_t doc_id) {
    DocIVARS *const ivars = Doc_IVARS(self);
    Hash *hash;

    if (fields) {
        hash = (Hash *)INCREF(CERTIFY(fields, HASH));
    }
    else {
        hash = Hash_new(0);
    }
    ivars->fields = hash;
    ivars->doc_id = doc_id;

    return self;
}
Exemple #29
0
void
BitVec_Mimic_IMP(BitVector *self, Obj *other) {
    CERTIFY(other, BITVECTOR);
    BitVectorIVARS *const ivars = BitVec_IVARS(self);
    BitVectorIVARS *const ovars = BitVec_IVARS((BitVector*)other);
    const size_t my_byte_size = SI_octet_size(ivars->cap);
    const size_t other_byte_size = SI_octet_size(ovars->cap);
    if (my_byte_size > other_byte_size) {
        size_t space = my_byte_size - other_byte_size;
        memset(ivars->bits + other_byte_size, 0, space);
    }
    else if (my_byte_size < other_byte_size) {
        BitVec_Grow(self, ovars->cap - 1);
    }
    memcpy(ivars->bits, ovars->bits, other_byte_size);
}
Exemple #30
0
Hash*
Hash_dump(Hash *self) {
    Hash *dump = Hash_new(self->size);
    Obj *key;
    Obj *value;

    Hash_Iterate(self);
    while (Hash_Next(self, &key, &value)) {
        // Since JSON only supports text hash keys, Dump() can only support
        // text hash keys.
        CERTIFY(key, CHARBUF);
        Hash_Store(dump, key, Obj_Dump(value));
    }

    return dump;
}