예제 #1
0
파일: TestHash.c 프로젝트: pavansondur/lucy
static void
test_Keys_Values_Iter(TestBatch *batch) {
    Hash     *hash     = Hash_new(0); // trigger multiple rebuilds.
    VArray   *expected = VA_new(100);
    VArray   *keys;
    VArray   *values;

    for (uint32_t i = 0; i < 500; i++) {
        CharBuf *cb = CB_newf("%u32", i);
        Hash_Store(hash, (Obj*)cb, (Obj*)cb);
        VA_Push(expected, INCREF(cb));
    }

    VA_Sort(expected, NULL, NULL);

    keys   = Hash_Keys(hash);
    values = Hash_Values(hash);
    VA_Sort(keys, NULL, NULL);
    VA_Sort(values, NULL, NULL);
    TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys");
    TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values");
    VA_Clear(keys);
    VA_Clear(values);

    {
        Obj *key;
        Obj *value;
        Hash_Iterate(hash);
        while (Hash_Next(hash, &key, &value)) {
            VA_Push(keys, INCREF(key));
            VA_Push(values, INCREF(value));
        }
    }

    VA_Sort(keys, NULL, NULL);
    VA_Sort(values, NULL, NULL);
    TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys from Iter");
    TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values from Iter");

    {
        ZombieCharBuf *forty = ZCB_WRAP_STR("40", 2);
        ZombieCharBuf *nope  = ZCB_WRAP_STR("nope", 4);
        Obj *key = Hash_Find_Key(hash, (Obj*)forty, ZCB_Hash_Sum(forty));
        TEST_TRUE(batch, Obj_Equals(key, (Obj*)forty), "Find_Key");
        key = Hash_Find_Key(hash, (Obj*)nope, ZCB_Hash_Sum(nope)),
        TEST_TRUE(batch, key == NULL,
                  "Find_Key returns NULL for non-existent key");
    }

    DECREF(hash);
    DECREF(expected);
    DECREF(keys);
    DECREF(values);
}
예제 #2
0
VArray*
IxManager_segreaders_to_merge(IndexManager *self, PolyReader *reader,
                              bool_t all)
{
    VArray *seg_readers = VA_Shallow_Copy(PolyReader_Get_Seg_Readers(reader));
    UNUSED_VAR(self);

    if (!all) {
        u32_t i;
        u32_t total_docs = 0;
        u32_t threshold = 0;
        const u32_t num_seg_readers = VA_Get_Size(seg_readers);

        /* Sort by ascending size in docs. */
        VA_Sort(seg_readers, S_compare_doc_count);

        /* Find sparsely populated segments. */
        for (i = 0; i < num_seg_readers; i++) {
            SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i);
            total_docs += SegReader_Doc_Count(seg_reader);
            if (total_docs < Math_fibonacci(i + 5)) {
                threshold = i + 1;
            }
        }
        VA_Splice(seg_readers, threshold, num_seg_readers);
    }

    return seg_readers;
}
예제 #3
0
uint32_t
Inverter_Iterate_IMP(Inverter *self) {
    InverterIVARS *const ivars = Inverter_IVARS(self);
    ivars->tick = -1;
    if (!ivars->sorted) {
        VA_Sort(ivars->entries, NULL, NULL);
        ivars->sorted = true;
    }
    return VA_Get_Size(ivars->entries);
}
예제 #4
0
파일: HeatMap.c 프로젝트: gitpan/KinoSearch
HeatMap*
HeatMap_init(HeatMap *self, VArray *spans, uint32_t window)
{
    VArray *spans_copy = VA_Shallow_Copy(spans);
    VArray *spans_plus_boosts;

    self->spans  = NULL;
    self->window = window;

    VA_Sort(spans_copy, NULL, NULL);
    spans_plus_boosts = HeatMap_Generate_Proximity_Boosts(self, spans_copy);
    VA_Push_VArray(spans_plus_boosts, spans_copy);
    VA_Sort(spans_plus_boosts, NULL, NULL);
    self->spans = HeatMap_Flatten_Spans(self, spans_plus_boosts);

    DECREF(spans_plus_boosts);
    DECREF(spans_copy);

    return self;
}
예제 #5
0
파일: TestHash.c 프로젝트: pavansondur/lucy
static void
test_stress(TestBatch *batch) {
    Hash     *hash     = Hash_new(0); // trigger multiple rebuilds.
    VArray   *expected = VA_new(1000);
    VArray   *keys;
    VArray   *values;

    for (uint32_t i = 0; i < 1000; i++) {
        CharBuf *cb = TestUtils_random_string(rand() % 1200);
        while (Hash_Fetch(hash, (Obj*)cb)) {
            DECREF(cb);
            cb = TestUtils_random_string(rand() % 1200);
        }
        Hash_Store(hash, (Obj*)cb, (Obj*)cb);
        VA_Push(expected, INCREF(cb));
    }

    VA_Sort(expected, NULL, NULL);

    // Overwrite for good measure.
    for (uint32_t i = 0; i < 1000; i++) {
        CharBuf *cb = (CharBuf*)VA_Fetch(expected, i);
        Hash_Store(hash, (Obj*)cb, INCREF(cb));
    }

    keys   = Hash_Keys(hash);
    values = Hash_Values(hash);
    VA_Sort(keys, NULL, NULL);
    VA_Sort(values, NULL, NULL);
    TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "stress Keys");
    TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "stress Values");

    DECREF(keys);
    DECREF(values);
    DECREF(expected);
    DECREF(hash);
}
예제 #6
0
VArray*
IxManager_recycle(IndexManager *self, PolyReader *reader, 
                  DeletionsWriter *del_writer, int64_t cutoff, bool_t optimize)
{
    VArray *seg_readers = PolyReader_Get_Seg_Readers(reader);
    VArray *candidates  = VA_Gather(seg_readers, S_check_cutoff, &cutoff);
    VArray *recyclables = VA_new(VA_Get_Size(candidates));
    const uint32_t num_candidates = VA_Get_Size(candidates);

    if (optimize) { 
        DECREF(recyclables);
        return candidates; 
    }

    // Sort by ascending size in docs, choose sparsely populated segments.
    VA_Sort(candidates, S_compare_doc_count, NULL);
    int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t));
    for (uint32_t i = 0; i < num_candidates; i++) {
        SegReader *seg_reader = (SegReader*)CERTIFY(
            VA_Fetch(candidates, i), SEGREADER);
        counts[i] = SegReader_Doc_Count(seg_reader);
    }
    I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates);
    uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts);
    DECREF(doc_counts);

    // Move SegReaders to be recycled.
    for (uint32_t i = 0; i < threshold; i++) {
        VA_Store(recyclables, i, VA_Delete(candidates, i));
    }

    // Find segments where at least 10% of all docs have been deleted. 
    for (uint32_t i = threshold; i < num_candidates; i++) {
        SegReader *seg_reader = (SegReader*)VA_Delete(candidates, i);
        CharBuf   *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        double doc_max = SegReader_Doc_Max(seg_reader);
        double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name);
        double del_proportion = num_deletions / doc_max;
        if (del_proportion >= 0.1) {
            VA_Push(recyclables, (Obj*)seg_reader);
        }
        else {
            DECREF(seg_reader);
        }
    }

    DECREF(candidates);
    return recyclables;
}
예제 #7
0
String*
Highlighter_Create_Excerpt_IMP(Highlighter *self, HitDoc *hit_doc) {
    HighlighterIVARS *const ivars = Highlighter_IVARS(self);
    String *field_val = (String*)HitDoc_Extract(hit_doc, ivars->field);
    String *retval;

    if (!field_val || !Obj_Is_A((Obj*)field_val, STRING)) {
        retval = NULL;
    }
    else if (!Str_Get_Size(field_val)) {
        // Empty string yields empty string.
        retval = Str_new_from_trusted_utf8("", 0);
    }
    else {
        DocVector *doc_vec
            = Searcher_Fetch_Doc_Vec(ivars->searcher,
                                     HitDoc_Get_Doc_ID(hit_doc));
        VArray *maybe_spans
            = Compiler_Highlight_Spans(ivars->compiler, ivars->searcher,
                                       doc_vec, ivars->field);
        VArray *score_spans = maybe_spans ? maybe_spans : VA_new(0);
        VA_Sort(score_spans, NULL, NULL);
        HeatMap *heat_map
            = HeatMap_new(score_spans, (ivars->excerpt_length * 2) / 3);

        int32_t top;
        String *raw_excerpt
            = Highlighter_Raw_Excerpt(self, field_val, &top, heat_map);
        String *highlighted
            = Highlighter_Highlight_Excerpt(self, score_spans, raw_excerpt,
                                            top);

        DECREF(raw_excerpt);
        DECREF(heat_map);
        DECREF(score_spans);
        DECREF(doc_vec);

        retval = highlighted;
    }

    DECREF(field_val);
    return retval;
}
예제 #8
0
void
Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *path)
{
    Hash   *all_data = Hash_new(0);
    VArray *list     = Snapshot_List(self);

    // Update path. 
    DECREF(self->path);
    if (path) {
        self->path = CB_Clone(path);
    }
    else {
        CharBuf *latest = IxFileNames_latest_snapshot(folder);
        uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1;
        char base36[StrHelp_MAX_BASE36_BYTES];
        StrHelp_to_base36(gen, &base36);
        self->path = CB_newf("snapshot_%s.json", &base36);
        DECREF(latest);
    }

    // Don't overwrite. 
    if (Folder_Exists(folder, self->path)) {
        THROW(ERR, "Snapshot file '%o' already exists", self->path);
    }

    // Sort, then store file names. 
    VA_Sort(list, NULL, NULL);
    Hash_Store_Str(all_data, "entries", 7, (Obj*)list);

    // Create a JSON-izable data structure. 
    Hash_Store_Str(all_data, "format", 6, 
        (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_format) );
    Hash_Store_Str(all_data, "subformat", 9, 
        (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_subformat) );

    // Write out JSON-ized data to the new file. 
    Json_spew_json((Obj*)all_data, folder, self->path);

    DECREF(all_data);
}
예제 #9
0
void
Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *filename)
{
    Hash   *all_data = Hash_new(0);
    VArray *list     = Snapshot_List(self);

    /* Update filename. */
    DECREF(self->filename);
    if (filename) {
        self->filename = CB_Clone(filename);
    }
    else {
        CharBuf *latest = IxFileNames_latest_snapshot(folder);
        i32_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1;
        CharBuf *base_36 = StrHelp_to_base36(gen);
        self->filename = CB_newf("snapshot_%o.json", base_36);
        DECREF(latest);
        DECREF(base_36);
    }

    /* Don't overwrite. */
    if (Folder_Exists(folder, self->filename)) {
        THROW("Snapshot file '%o' already exists", self->filename);
    }

    /* Sort, then store file names. */
    VA_Sort(list, NULL);
    Hash_Store_Str(all_data, "entries", 7, (Obj*)list);

    /* Create a JSON-izable data structure. */
    Hash_Store_Str(all_data, "format", 6, 
        (Obj*)CB_newf("%i32", (i32_t)Snapshot_current_file_format) );

    /* Write out JSON-ized data to the new file. */
    Json_spew_json((Obj*)all_data, folder, self->filename);

    DECREF(all_data);
}
예제 #10
0
static void
test_List(TestBatch *batch)
{
    Folder     *folder = (Folder*)RAMFolder_new(NULL);
    FileHandle *fh;
    VArray     *list;
    CharBuf    *elem;

    Folder_MkDir(folder, &foo);
    Folder_MkDir(folder, &foo_bar);
    Folder_MkDir(folder, &foo_bar_baz);
    fh = Folder_Open_FileHandle(folder, &boffo, FH_CREATE | FH_WRITE_ONLY);
    DECREF(fh);
    fh = Folder_Open_FileHandle(folder, &banana, FH_CREATE | FH_WRITE_ONLY);
    DECREF(fh);

    list = Folder_List(folder, NULL);
    VA_Sort(list, NULL, NULL);
    TEST_INT_EQ(batch, VA_Get_Size(list), 3, "List");
    elem = (CharBuf*)DOWNCAST(VA_Fetch(list, 0), CHARBUF);
    TEST_TRUE(batch, elem && CB_Equals(elem, (Obj*)&banana), 
        "List first file");
    elem = (CharBuf*)DOWNCAST(VA_Fetch(list, 1), CHARBUF);
    TEST_TRUE(batch, elem && CB_Equals(elem, (Obj*)&boffo), 
        "List second file");
    elem = (CharBuf*)DOWNCAST(VA_Fetch(list, 2), CHARBUF);
    TEST_TRUE(batch, elem && CB_Equals(elem, (Obj*)&foo), "List dir");
    DECREF(list);

    list = Folder_List(folder, &foo_bar);
    TEST_INT_EQ(batch, VA_Get_Size(list), 1, "List subdirectory contents");
    elem = (CharBuf*)DOWNCAST(VA_Fetch(list, 0), CHARBUF);
    TEST_TRUE(batch, elem && CB_Equals(elem, (Obj*)&baz), 
        "Just the filename");
    DECREF(list);

    DECREF(folder);
}
예제 #11
0
파일: PolyReader.c 프로젝트: theory/lucy
void
S_try_open_elements(void *context) {
    struct try_open_elements_context *args
        = (struct try_open_elements_context*)context;
    PolyReader *self              = args->self;
    PolyReaderIVARS *const ivars  = PolyReader_IVARS(self);
    VArray     *files             = Snapshot_List(ivars->snapshot);
    Folder     *folder            = PolyReader_Get_Folder(self);
    uint32_t    num_segs          = 0;
    uint64_t    latest_schema_gen = 0;
    CharBuf    *schema_file       = NULL;

    // Find schema file, count segments.
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(files, i);

        if (Seg_valid_seg_name(entry)) {
            num_segs++;
        }
        else if (CB_Starts_With_Str(entry, "schema_", 7)
                 && CB_Ends_With_Str(entry, ".json", 5)
                ) {
            uint64_t gen = IxFileNames_extract_gen(entry);
            if (gen > latest_schema_gen) {
                latest_schema_gen = gen;
                if (!schema_file) { schema_file = CB_Clone(entry); }
                else { CB_Mimic(schema_file, (Obj*)entry); }
            }
        }
    }

    // Read Schema.
    if (!schema_file) {
        DECREF(files);
        THROW(ERR, "Can't find a schema file.");
    }
    else {
        Hash *dump = (Hash*)Json_slurp_json(folder, schema_file);
        if (dump) { // read file successfully
            DECREF(ivars->schema);
            ivars->schema = (Schema*)CERTIFY(
                               VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA);
            DECREF(dump);
            DECREF(schema_file);
            schema_file = NULL;
        }
        else {
            CharBuf *mess = MAKE_MESS("Failed to parse %o", schema_file);
            DECREF(schema_file);
            DECREF(files);
            Err_throw_mess(ERR, mess);
        }
    }

    VArray *segments = VA_new(num_segs);
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(files, i);

        // Create a Segment for each segmeta.
        if (Seg_valid_seg_name(entry)) {
            int64_t seg_num = IxFileNames_extract_gen(entry);
            Segment *segment = Seg_new(seg_num);

            // Bail if reading the file fails (probably because it's been
            // deleted and a new snapshot file has been written so we need to
            // retry).
            if (Seg_Read_File(segment, folder)) {
                VA_Push(segments, (Obj*)segment);
            }
            else {
                CharBuf *mess = MAKE_MESS("Failed to read %o", entry);
                DECREF(segment);
                DECREF(segments);
                DECREF(files);
                Err_throw_mess(ERR, mess);
            }
        }
    }

    // Sort the segments by age.
    VA_Sort(segments, NULL, NULL);

    // Open individual SegReaders.
    struct try_open_segreader_context seg_context;
    seg_context.schema   = PolyReader_Get_Schema(self);
    seg_context.folder   = folder;
    seg_context.snapshot = PolyReader_Get_Snapshot(self);
    seg_context.segments = segments;
    seg_context.result   = NULL;
    args->seg_readers = VA_new(num_segs);
    Err *error = NULL;
    for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) {
        seg_context.seg_tick = seg_tick;
        error = Err_trap(S_try_open_segreader, &seg_context);
        if (error) {
            break;
        }
        VA_Push(args->seg_readers, (Obj*)seg_context.result);
        seg_context.result = NULL;
    }

    DECREF(segments);
    DECREF(files);
    if (error) {
        DECREF(args->seg_readers);
        args->seg_readers = NULL;
        RETHROW(error);
    }
}
예제 #12
0
파일: Json.c 프로젝트: pavansondur/lucy
static bool_t
S_to_json(Obj *dump, CharBuf *json, int32_t depth) {
    // Guard against infinite recursion in self-referencing data structures.
    if (depth > MAX_DEPTH) {
        CharBuf *mess = MAKE_MESS("Exceeded max depth of %i32", MAX_DEPTH);
        Err_set_error(Err_new(mess));
        return false;
    }

    if (!dump) {
        CB_Cat_Trusted_Str(json, "null", 4);
    }
    else if (dump == (Obj*)CFISH_TRUE) {
        CB_Cat_Trusted_Str(json, "true", 4);
    }
    else if (dump == (Obj*)CFISH_FALSE) {
        CB_Cat_Trusted_Str(json, "false", 5);
    }
    else if (Obj_Is_A(dump, CHARBUF)) {
        S_append_json_string(dump, json);
    }
    else if (Obj_Is_A(dump, INTNUM)) {
        CB_catf(json, "%i64", Obj_To_I64(dump));
    }
    else if (Obj_Is_A(dump, FLOATNUM)) {
        CB_catf(json, "%f64", Obj_To_F64(dump));
    }
    else if (Obj_Is_A(dump, VARRAY)) {
        VArray *array = (VArray*)dump;
        size_t size = VA_Get_Size(array);
        if (size == 0) {
            // Put empty array on single line.
            CB_Cat_Trusted_Str(json, "[]", 2);
            return true;
        }
        else if (size == 1) {
            Obj *elem = VA_Fetch(array, 0);
            if (!(Obj_Is_A(elem, HASH) || Obj_Is_A(elem, VARRAY))) {
                // Put array containing single scalar element on one line.
                CB_Cat_Trusted_Str(json, "[", 1);
                if (!S_to_json(elem, json, depth + 1)) {
                    return false;
                }
                CB_Cat_Trusted_Str(json, "]", 1);
                return true;
            }
        }
        // Fall back to spreading elements across multiple lines.
        CB_Cat_Trusted_Str(json, "[", 1);
        for (size_t i = 0; i < size; i++) {
            CB_Cat_Trusted_Str(json, "\n", 1);
            S_cat_whitespace(json, depth + 1);
            if (!S_to_json(VA_Fetch(array, i), json, depth + 1)) {
                return false;
            }
            if (i + 1 < size) {
                CB_Cat_Trusted_Str(json, ",", 1);
            }
        }
        CB_Cat_Trusted_Str(json, "\n", 1);
        S_cat_whitespace(json, depth);
        CB_Cat_Trusted_Str(json, "]", 1);
    }
    else if (Obj_Is_A(dump, HASH)) {
        Hash *hash = (Hash*)dump;
        size_t size = Hash_Get_Size(hash);

        // Put empty hash on single line.
        if (size == 0) {
            CB_Cat_Trusted_Str(json, "{}", 2);
            return true;
        }

        // Validate that all keys are strings, then sort.
        VArray *keys = Hash_Keys(hash);
        for (size_t i = 0; i < size; i++) {
            Obj *key = VA_Fetch(keys, i);
            if (!key || !Obj_Is_A(key, CHARBUF)) {
                DECREF(keys);
                CharBuf *key_class = key ? Obj_Get_Class_Name(key) : NULL;
                CharBuf *mess = MAKE_MESS("Illegal key type: %o", key_class);
                Err_set_error(Err_new(mess));
                return false;
            }
        }
        VA_Sort(keys, NULL, NULL);

        // Spread pairs across multiple lines.
        CB_Cat_Trusted_Str(json, "{", 1);
        for (size_t i = 0; i < size; i++) {
            Obj *key = VA_Fetch(keys, i);
            CB_Cat_Trusted_Str(json, "\n", 1);
            S_cat_whitespace(json, depth + 1);
            S_append_json_string(key, json);
            CB_Cat_Trusted_Str(json, ": ", 2);
            if (!S_to_json(Hash_Fetch(hash, key), json, depth + 1)) {
                DECREF(keys);
                return false;
            }
            if (i + 1 < size) {
                CB_Cat_Trusted_Str(json, ",", 1);
            }
        }
        CB_Cat_Trusted_Str(json, "\n", 1);
        S_cat_whitespace(json, depth);
        CB_Cat_Trusted_Str(json, "}", 1);

        DECREF(keys);
    }

    return true;
}
예제 #13
0
void
FilePurger_purge(FilePurger *self)
{
    Lock *deletion_lock = IxManager_Make_Deletion_Lock(self->manager);

    // Obtain deletion lock, purge files, release deletion lock.
    Lock_Clear_Stale(deletion_lock);
    if (Lock_Obtain(deletion_lock)) {
        Folder  *folder    = self->folder;
        Hash    *failures  = Hash_new(0);
        VArray  *purgables;
        VArray  *snapshots;

        S_discover_unused(self, &purgables, &snapshots);

        // Attempt to delete entries -- if failure, no big deal, just try
        // again later.  Proceed in reverse lexical order so that directories
        // get deleted after they've been emptied. 
        VA_Sort(purgables, NULL, NULL);
        for (uint32_t i = VA_Get_Size(purgables); i--; ) {
            CharBuf *entry = (CharBuf*)VA_fetch(purgables, i);
            if (Hash_Fetch(self->disallowed, (Obj*)entry)) { continue; }
            if (!Folder_Delete(folder, entry)) { 
                if (Folder_Exists(folder, entry)) {
                    Hash_Store(failures, (Obj*)entry, INCREF(&EMPTY));
                }
            }
        }

        for (uint32_t i = 0, max = VA_Get_Size(snapshots); i < max; i++) {
            Snapshot *snapshot = (Snapshot*)VA_Fetch(snapshots, i);
            bool_t snapshot_has_failures = false;
            if (Hash_Get_Size(failures)) {
                // Only delete snapshot files if all of their entries were
                // successfully deleted.  
                VArray *entries = Snapshot_List(snapshot);
                for (uint32_t j = VA_Get_Size(entries); j--; ) {
                    CharBuf *entry = (CharBuf*)VA_Fetch(entries, j);
                    if (Hash_Fetch(failures, (Obj*)entry)) {
                        snapshot_has_failures = true;
                        break;
                    }
                }
                DECREF(entries);
            }
            if (!snapshot_has_failures) {
                CharBuf *snapfile = Snapshot_Get_Path(snapshot);
                Folder_Delete(folder, snapfile);
            }
        }

        DECREF(failures);
        DECREF(purgables);
        DECREF(snapshots);
        Lock_Release(deletion_lock);
    }
    else {
        WARN("Can't obtain deletion lock, skipping deletion of "
            "obsolete files");
    }

    DECREF(deletion_lock);
}