Example #1
0
void
Hash_serialize(Hash *self, OutStream *outstream) {
    Obj *key;
    Obj *val;
    uint32_t charbuf_count = 0;
    OutStream_Write_C32(outstream, self->size);

    // Write CharBuf keys first.  CharBuf keys are the common case; grouping
    // them together is a form of run-length-encoding and saves space, since
    // we omit the per-key class name.
    Hash_Iterate(self);
    while (Hash_Next(self, &key, &val)) {
        if (Obj_Is_A(key, CHARBUF)) {
            charbuf_count++;
        }
    }
    OutStream_Write_C32(outstream, charbuf_count);
    Hash_Iterate(self);
    while (Hash_Next(self, &key, &val)) {
        if (Obj_Is_A(key, CHARBUF)) {
            Obj_Serialize(key, outstream);
            FREEZE(val, outstream);
        }
    }

    // Punt on the classes of the remaining keys.
    Hash_Iterate(self);
    while (Hash_Next(self, &key, &val)) {
        if (!Obj_Is_A(key, CHARBUF)) {
            FREEZE(key, outstream);
            FREEZE(val, outstream);
        }
    }
}
Example #2
0
static void
S_init_sub_readers(PolyReader *self, VArray *sub_readers) {
    PolyReaderIVARS *const ivars = PolyReader_IVARS(self);
    uint32_t  num_sub_readers = VA_Get_Size(sub_readers);
    int32_t *starts = (int32_t*)MALLOCATE(num_sub_readers * sizeof(int32_t));
    Hash  *data_readers = Hash_new(0);

    DECREF(ivars->sub_readers);
    DECREF(ivars->offsets);
    ivars->sub_readers       = (VArray*)INCREF(sub_readers);

    // Accumulate doc_max, subreader start offsets, and DataReaders.
    ivars->doc_max = 0;
    for (uint32_t i = 0; i < num_sub_readers; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(sub_readers, i);
        Hash *components = SegReader_Get_Components(seg_reader);
        CharBuf *api;
        DataReader *component;
        starts[i] = ivars->doc_max;
        ivars->doc_max += SegReader_Doc_Max(seg_reader);
        Hash_Iterate(components);
        while (Hash_Next(components, (Obj**)&api, (Obj**)&component)) {
            VArray *readers = (VArray*)Hash_Fetch(data_readers, (Obj*)api);
            if (!readers) {
                readers = VA_new(num_sub_readers);
                Hash_Store(data_readers, (Obj*)api, (Obj*)readers);
            }
            VA_Store(readers, i, INCREF(component));
        }
    }
    ivars->offsets = I32Arr_new_steal(starts, num_sub_readers);

    CharBuf *api;
    VArray  *readers;
    Hash_Iterate(data_readers);
    while (Hash_Next(data_readers, (Obj**)&api, (Obj**)&readers)) {
        DataReader *datareader
            = (DataReader*)CERTIFY(S_first_non_null(readers), DATAREADER);
        DataReader *aggregator
            = DataReader_Aggregator(datareader, readers, ivars->offsets);
        if (aggregator) {
            CERTIFY(aggregator, DATAREADER);
            Hash_Store(ivars->components, (Obj*)api, (Obj*)aggregator);
        }
    }
    DECREF(data_readers);

    DeletionsReader *del_reader
        = (DeletionsReader*)Hash_Fetch(
              ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER));
    ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0;
}
Example #3
0
static void
test_stemming(TestBatchRunner *runner) {
    FSFolder *modules_folder = TestUtils_modules_folder();
    String *path = Str_newf("analysis/snowstem/source/test/tests.json");
    Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path);
    if (!tests) { RETHROW(Err_get_error()); }

    String *iso;
    Hash *lang_data;
    Hash_Iterate(tests);
    while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) {
        VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5);
        VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5);
        SnowballStemmer *stemmer = SnowStemmer_new(iso);
        for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) {
            String *word  = (String*)VA_Fetch(words, i);
            VArray *got   = SnowStemmer_Split(stemmer, word);
            String *stem  = (String*)VA_Fetch(got, 0);
            TEST_TRUE(runner,
                      stem
                      && Str_Is_A(stem, STRING)
                      && Str_Equals(stem, VA_Fetch(stems, i)),
                      "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word)
                     );
            DECREF(got);
        }
        DECREF(stemmer);
    }

    DECREF(tests);
    DECREF(modules_folder);
    DECREF(path);
}
Example #4
0
bool_t
Hash_equals(Hash *self, Obj *other) {
    Hash    *twin = (Hash*)other;
    Obj     *key;
    Obj     *val;

    if (twin == self)             {
        return true;
    }
    if (!Obj_Is_A(other, HASH))   {
        return false;
    }
    if (self->size != twin->size) {
        return false;
    }

    Hash_Iterate(self);
    while (Hash_Next(self, &key, &val)) {
        Obj *other_val = Hash_Fetch(twin, key);
        if (!other_val || !Obj_Equals(other_val, val)) {
            return false;
        }
    }

    return true;
}
Example #5
0
VArray*
Hash_Values_IMP(Hash *self) {
    Obj *key;
    Obj *val;
    VArray *values = VA_new(self->size);
    Hash_Iterate(self);
    while (Hash_Next(self, &key, &val)) { VA_Push(values, INCREF(val)); }
    return values;
}
Example #6
0
static void
test_Keys_Values_Iter(TestBatch *batch) {
    Hash     *hash     = Hash_new(0); // trigger multiple rebuilds.
    VArray   *expected = VA_new(100);
    VArray   *keys;
    VArray   *values;

    for (uint32_t i = 0; i < 500; i++) {
        CharBuf *cb = CB_newf("%u32", i);
        Hash_Store(hash, (Obj*)cb, (Obj*)cb);
        VA_Push(expected, INCREF(cb));
    }

    VA_Sort(expected, NULL, NULL);

    keys   = Hash_Keys(hash);
    values = Hash_Values(hash);
    VA_Sort(keys, NULL, NULL);
    VA_Sort(values, NULL, NULL);
    TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys");
    TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values");
    VA_Clear(keys);
    VA_Clear(values);

    {
        Obj *key;
        Obj *value;
        Hash_Iterate(hash);
        while (Hash_Next(hash, &key, &value)) {
            VA_Push(keys, INCREF(key));
            VA_Push(values, INCREF(value));
        }
    }

    VA_Sort(keys, NULL, NULL);
    VA_Sort(values, NULL, NULL);
    TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys from Iter");
    TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values from Iter");

    {
        ZombieCharBuf *forty = ZCB_WRAP_STR("40", 2);
        ZombieCharBuf *nope  = ZCB_WRAP_STR("nope", 4);
        Obj *key = Hash_Find_Key(hash, (Obj*)forty, ZCB_Hash_Sum(forty));
        TEST_TRUE(batch, Obj_Equals(key, (Obj*)forty), "Find_Key");
        key = Hash_Find_Key(hash, (Obj*)nope, ZCB_Hash_Sum(nope)),
        TEST_TRUE(batch, key == NULL,
                  "Find_Key returns NULL for non-existent key");
    }

    DECREF(hash);
    DECREF(expected);
    DECREF(keys);
    DECREF(values);
}
Example #7
0
VArray*
Hash_Keys_IMP(Hash *self) {
    Obj *key;
    Obj *val;
    VArray *keys = VA_new(self->size);
    Hash_Iterate(self);
    while (Hash_Next(self, &key, &val)) {
        VA_Push(keys, INCREF(key));
    }
    return keys;
}
Example #8
0
Obj*
Hash_load(Hash *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6);
    UNUSED_VAR(self);

    // Assume that the presence of the "_class" key paired with a valid class
    // name indicates the output of a Dump rather than an ordinary Hash. */
    if (class_name && CB_Is_A(class_name, CHARBUF)) {
        VTable *vtable = VTable_fetch_vtable(class_name);

        if (!vtable) {
            CharBuf *parent_class = VTable_find_parent_class(class_name);
            if (parent_class) {
                VTable *parent = VTable_singleton(parent_class, NULL);
                vtable = VTable_singleton(class_name, parent);
                DECREF(parent_class);
            }
            else {
                // TODO: Fix Hash_Load() so that it works with ordinary hash
                // keys named "_class".
                THROW(ERR, "Can't find class '%o'", class_name);
            }
        }

        // Dispatch to an alternate Load() method.
        if (vtable) {
            Obj_Load_t load = METHOD_PTR(vtable, Lucy_Obj_Load);
            if (load == Obj_load) {
                THROW(ERR, "Abstract method Load() not defined for %o",
                      VTable_Get_Name(vtable));
            }
            else if (load != (Obj_Load_t)Hash_load) { // stop inf loop
                return VTable_Load_Obj(vtable, dump);
            }
        }
    }

    // It's an ordinary Hash.
    Hash *loaded = Hash_new(source->size);
    Obj *key;
    Obj *value;
    Hash_Iterate(source);
    while (Hash_Next(source, &key, &value)) {
        Hash_Store(loaded, key, Obj_Load(value, value));
    }

    return (Obj*)loaded;

}
Example #9
0
Hash*
Hash_dump(Hash *self) {
    Hash *dump = Hash_new(self->size);
    Obj *key;
    Obj *value;

    Hash_Iterate(self);
    while (Hash_Next(self, &key, &value)) {
        // Since JSON only supports text hash keys, Dump() can only support
        // text hash keys.
        CERTIFY(key, CHARBUF);
        Hash_Store(dump, key, Obj_Dump(value));
    }

    return dump;
}
Example #10
0
void
DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self,
                               SegReader *reader, I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9);

    if (del_meta) {
        VArray *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5);
        if (files) {
            String *seg;
            Hash *mini_meta;
            Hash_Iterate(files);
            while (Hash_Next(files, (Obj**)&seg, (Obj**)&mini_meta)) {

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (uint32_t i = 0, max = VA_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)VA_Fetch(seg_readers, i);
                    String *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (Str_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Obj_To_I64(Hash_Fetch_Utf8(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, Class_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
        }
    }
}
Example #11
0
void
Inverter_invert_doc(Inverter *self, Doc *doc) {
    InverterIVARS *const ivars = Inverter_IVARS(self);
    Hash *const fields = (Hash*)Doc_Get_Fields(doc);
    uint32_t   num_keys     = Hash_Iterate(fields);

    // Prepare for the new doc.
    Inverter_Set_Doc(self, doc);

    // Extract and invert the doc's fields.
    while (num_keys--) {
        Obj *key, *obj;
        Hash_Next(fields, &key, &obj);
        CharBuf *field = (CharBuf*)CERTIFY(key, CHARBUF);
        InverterEntry *inventry = S_fetch_entry(ivars, field);
        InverterEntryIVARS *inventry_ivars = InvEntry_IVARS(inventry);
        FieldType *type = inventry_ivars->type;

        // Get the field value.
        switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT: {
                    CharBuf *char_buf
                        = (CharBuf*)CERTIFY(obj, CHARBUF);
                    ViewCharBuf *value
                        = (ViewCharBuf*)inventry_ivars->value;
                    ViewCB_Assign(value, char_buf);
                    break;
                }
            case FType_BLOB: {
                    ByteBuf *byte_buf
                        = (ByteBuf*)CERTIFY(obj, BYTEBUF);
                    ViewByteBuf *value
                        = (ViewByteBuf*)inventry_ivars->value;
                    ViewBB_Assign(value, byte_buf);
                    break;
                }
            case FType_INT32: {
                    int32_t int_val = (int32_t)Obj_To_I64(obj);
                    Integer32* value = (Integer32*)inventry_ivars->value;
                    Int32_Set_Value(value, int_val);
                    break;
                }
            case FType_INT64: {
                    int64_t int_val = Obj_To_I64(obj);
                    Integer64* value = (Integer64*)inventry_ivars->value;
                    Int64_Set_Value(value, int_val);
                    break;
                }
            case FType_FLOAT32: {
                    float float_val = (float)Obj_To_F64(obj);
                    Float32* value = (Float32*)inventry_ivars->value;
                    Float32_Set_Value(value, float_val);
                    break;
                }
            case FType_FLOAT64: {
                    double float_val = Obj_To_F64(obj);
                    Float64* value = (Float64*)inventry_ivars->value;
                    Float64_Set_Value(value, float_val);
                    break;
                }
            default:
                THROW(ERR, "Unrecognized type: %o", type);
        }

        Inverter_Add_Field(self, inventry);
    }
}