Exemple #1
0
void
Inverter_Invert_Doc_IMP(Inverter *self, Doc *doc) {
    InverterIVARS *const ivars = Inverter_IVARS(self);
    Hash *const fields = (Hash*)Doc_Get_Fields(doc);

    // Prepare for the new doc.
    Inverter_Set_Doc(self, doc);

    // Extract and invert the doc's fields.
    HashIterator *iter = HashIter_new(fields);
    while (HashIter_Next(iter)) {
        String *field = HashIter_Get_Key(iter);
        Obj    *obj   = HashIter_Get_Value(iter);

        InverterEntry *inventry = S_fetch_entry(ivars, field);
        InverterEntryIVARS *inventry_ivars = InvEntry_IVARS(inventry);
        FieldType *type = inventry_ivars->type;

        // Get the field value.
        switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT: {
                    CERTIFY(obj, STRING);
                    break;
                }
            case FType_BLOB: {
                    CERTIFY(obj, BLOB);
                    break;
                }
            case FType_INT32:
            case FType_INT64: {
                    CERTIFY(obj, INTEGER);
                    break;
                }
            case FType_FLOAT32:
            case FType_FLOAT64: {
                    CERTIFY(obj, FLOAT);
                    break;
                }
            default:
                THROW(ERR, "Unrecognized type: %o", type);
        }

        if (inventry_ivars->value != obj) {
            DECREF(inventry_ivars->value);
            inventry_ivars->value = INCREF(obj);
        }

        Inverter_Add_Field(self, inventry);
    }
    DECREF(iter);
}
Exemple #2
0
Obj*
S_dump_hash(Hash *hash) {
    Hash *dump = Hash_new(Hash_Get_Size(hash));

    HashIterator *iter = HashIter_new(hash);
    while (HashIter_Next(iter)) {
        String *key   = HashIter_Get_Key(iter);
        Obj    *value = HashIter_Get_Value(iter);
        Hash_Store(dump, key, Freezer_dump(value));
    }
    DECREF(iter);

    return (Obj*)dump;
}
Exemple #3
0
void
Freezer_serialize_hash(Hash *hash, OutStream *outstream) {
    uint32_t hash_size = Hash_Get_Size(hash);
    OutStream_Write_C32(outstream, hash_size);

    HashIterator *iter = HashIter_new(hash);
    while (HashIter_Next(iter)) {
        String *key = HashIter_Get_Key(iter);
        Obj    *val = HashIter_Get_Value(iter);
        Freezer_serialize_string(key, outstream);
        FREEZE(val, outstream);
    }
    DECREF(iter);
}
Exemple #4
0
void
Schema_Eat_IMP(Schema *self, Schema *other) {
    if (!Schema_is_a(self, Schema_get_class(other))) {
        THROW(ERR, "%o not a descendent of %o",
              Schema_get_class_name(self), Schema_get_class_name(other));
    }

    SchemaIVARS *const ovars = Schema_IVARS(other);
    HashIterator *iter = HashIter_new(ovars->types);
    while (HashIter_Next(iter)) {
        String    *field = HashIter_Get_Key(iter);
        FieldType *type  = (FieldType*)HashIter_Get_Value(iter);
        Schema_Spec_Field(self, field, type);
    }
    DECREF(iter);
}
Exemple #5
0
Hash*
Schema_Dump_IMP(Schema *self) {
    SchemaIVARS *const ivars = Schema_IVARS(self);
    Hash *dump = Hash_new(0);
    Hash *type_dumps = Hash_new(Hash_Get_Size(ivars->types));

    // Record class name, store dumps of unique Analyzers.
    Hash_Store_Utf8(dump, "_class", 6,
                    (Obj*)Str_Clone(Schema_get_class_name(self)));
    Hash_Store_Utf8(dump, "analyzers", 9,
                    Freezer_dump((Obj*)ivars->uniq_analyzers));

    // Dump FieldTypes.
    Hash_Store_Utf8(dump, "fields", 6, (Obj*)type_dumps);
    HashIterator *iter = HashIter_new(ivars->types);
    while (HashIter_Next(iter)) {
        String    *field      = HashIter_Get_Key(iter);
        FieldType *type       = (FieldType*)HashIter_Get_Value(iter);
        Class     *type_class = FType_get_class(type);

        // Dump known types to simplified format.
        if (type_class == FULLTEXTTYPE) {
            FullTextType *fttype = (FullTextType*)type;
            Hash *type_dump = FullTextType_Dump_For_Schema(fttype);
            Analyzer *analyzer = FullTextType_Get_Analyzer(fttype);
            uint32_t tick
                = S_find_in_array(ivars->uniq_analyzers, (Obj*)analyzer);

            // Store the tick which references a unique analyzer.
            Hash_Store_Utf8(type_dump, "analyzer", 8,
                            (Obj*)Str_newf("%u32", tick));

            Hash_Store(type_dumps, field, (Obj*)type_dump);
        }
        else if (type_class == STRINGTYPE || type_class == BLOBTYPE) {
            Hash *type_dump = FType_Dump_For_Schema(type);
            Hash_Store(type_dumps, field, (Obj*)type_dump);
        }
        // Unknown FieldType type, so punt.
        else {
            Hash_Store(type_dumps, field, FType_Dump(type));
        }
    }
    DECREF(iter);

    return dump;
}
Exemple #6
0
void
DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self,
                               SegReader *reader, I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9);

    if (del_meta) {
        Vector *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5);
        if (files) {
            HashIterator *iter = HashIter_new(files);
            while (HashIter_Next(iter)) {
                String *seg       = HashIter_Get_Key(iter);
                Hash   *mini_meta = (Hash*)HashIter_Get_Value(iter);

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)Vec_Fetch(seg_readers, i);
                    String *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (Str_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, Class_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
            DECREF(iter);
        }
    }
}
Exemple #7
0
static Obj*
S_load_from_hash(Hash *dump) {
    String *class_name = (String*)Hash_Fetch_Utf8(dump, "_class", 6);

    // Assume that the presence of the "_class" key paired with a valid class
    // name indicates the output of a dump() rather than an ordinary Hash.
    if (class_name && Str_is_a(class_name, STRING)) {
        Class *klass = Class_fetch_class(class_name);

        if (!klass) {
            String *parent_class_name = Class_find_parent_class(class_name);
            if (parent_class_name) {
                Class *parent = Class_singleton(parent_class_name, NULL);
                klass = Class_singleton(class_name, parent);
                DECREF(parent_class_name);
            }
            else {
                // TODO: Fix load() so that it works with ordinary hash keys
                // named "_class".
                THROW(ERR, "Can't find class '%o'", class_name);
            }
        }

        // Dispatch to an alternate Load() method.
        if (klass) {
            return S_load_via_load_method(klass, (Obj*)dump);
        }

    }

    // It's an ordinary Hash.
    Hash *loaded = Hash_new(Hash_Get_Size(dump));

    HashIterator *iter = HashIter_new(dump);
    while (HashIter_Next(iter)) {
        String *key   = HashIter_Get_Key(iter);
        Obj    *value = HashIter_Get_Value(iter);
        Hash_Store(loaded, key, Freezer_load(value));
    }
    DECREF(iter);

    return (Obj*)loaded;

}
static void
test_offsets(TestBatchRunner *runner) {
    Folder *folder = S_folder_with_contents();
    CompoundFileWriter *cf_writer = CFWriter_new(folder);
    Hash    *cf_metadata;
    Hash    *files;

    CFWriter_Consolidate(cf_writer);

    cf_metadata = (Hash*)CERTIFY(
                      Json_slurp_json(folder, cfmeta_file), HASH);
    files = (Hash*)CERTIFY(
                Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH);

    bool     offsets_ok = true;

    TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files");

    HashIterator *iter = HashIter_new(files);
    while (HashIter_Next(iter)) {
        String *file   = HashIter_Get_Key(iter);
        Hash   *stats  = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH);
        Obj    *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ);
        int64_t offs   = Obj_To_I64(offset);
        if (offs % 8 != 0) {
            offsets_ok = false;
            FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8",
                 offset, Str_Get_Ptr8(file));
            break;
        }
    }
    DECREF(iter);
    if (offsets_ok) {
        PASS(runner, "All offsets are multiples of 8");
    }

    DECREF(cf_metadata);
    DECREF(cf_writer);
    DECREF(folder);
}
Exemple #9
0
Schema*
Schema_Load_IMP(Schema *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    String *class_name
        = (String*)CERTIFY(Hash_Fetch_Utf8(source, "_class", 6), STRING);
    Class *klass = Class_singleton(class_name, NULL);
    Schema *loaded = (Schema*)Class_Make_Obj(klass);
    Hash *type_dumps
        = (Hash*)CERTIFY(Hash_Fetch_Utf8(source, "fields", 6), HASH);
    Vector *analyzer_dumps
        = (Vector*)CERTIFY(Hash_Fetch_Utf8(source, "analyzers", 9), VECTOR);
    Vector *analyzers
        = (Vector*)Freezer_load((Obj*)analyzer_dumps);
    UNUSED_VAR(self);

    // Start with a blank Schema.
    Schema_init(loaded);
    SchemaIVARS *const loaded_ivars = Schema_IVARS(loaded);
    Vec_Grow(loaded_ivars->uniq_analyzers, Vec_Get_Size(analyzers));

    HashIterator *iter = HashIter_new(type_dumps);
    while (HashIter_Next(iter)) {
        String *field     = HashIter_Get_Key(iter);
        Hash   *type_dump = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH);
        String *type_str  = (String*)Hash_Fetch_Utf8(type_dump, "type", 4);
        if (type_str) {
            if (Str_Equals_Utf8(type_str, "fulltext", 8)) {
                // Replace the "analyzer" tick with the real thing.
                Obj *tick
                    = CERTIFY(Hash_Fetch_Utf8(type_dump, "analyzer", 8), OBJ);
                Analyzer *analyzer
                    = (Analyzer*)Vec_Fetch(analyzers,
                                          (uint32_t)Json_obj_to_i64(tick));
                if (!analyzer) {
                    THROW(ERR, "Can't find analyzer for '%o'", field);
                }
                Hash_Store_Utf8(type_dump, "analyzer", 8, INCREF(analyzer));
                FullTextType *type
                    = (FullTextType*)S_load_type(FULLTEXTTYPE,
                                                 (Obj*)type_dump);
                Schema_Spec_Field(loaded, field, (FieldType*)type);
                DECREF(type);
            }
            else if (Str_Equals_Utf8(type_str, "string", 6)) {
                StringType *type
                    = (StringType*)S_load_type(STRINGTYPE, (Obj*)type_dump);
                Schema_Spec_Field(loaded, field, (FieldType*)type);
                DECREF(type);
            }
            else if (Str_Equals_Utf8(type_str, "blob", 4)) {
                BlobType *type
                    = (BlobType*)S_load_type(BLOBTYPE, (Obj*)type_dump);
                Schema_Spec_Field(loaded, field, (FieldType*)type);
                DECREF(type);
            }
            else if (Str_Equals_Utf8(type_str, "i32_t", 5)) {
                Int32Type *type
                    = (Int32Type*)S_load_type(INT32TYPE, (Obj*)type_dump);
                Schema_Spec_Field(loaded, field, (FieldType*)type);
                DECREF(type);
            }
            else if (Str_Equals_Utf8(type_str, "i64_t", 5)) {
                Int64Type *type
                    = (Int64Type*)S_load_type(INT64TYPE, (Obj*)type_dump);
                Schema_Spec_Field(loaded, field, (FieldType*)type);
                DECREF(type);
            }
            else if (Str_Equals_Utf8(type_str, "f32_t", 5)) {
                Float32Type *type
                    = (Float32Type*)S_load_type(FLOAT32TYPE, (Obj*)type_dump);
                Schema_Spec_Field(loaded, field, (FieldType*)type);
                DECREF(type);
            }
            else if (Str_Equals_Utf8(type_str, "f64_t", 5)) {
                Float64Type *type
                    = (Float64Type*)S_load_type(FLOAT64TYPE, (Obj*)type_dump);
                Schema_Spec_Field(loaded, field, (FieldType*)type);
                DECREF(type);
            }
            else {
                THROW(ERR, "Unknown type '%o' for field '%o'", type_str, field);
            }
        }
        else {
            FieldType *type
                = (FieldType*)CERTIFY(Freezer_load((Obj*)type_dump),
                                      FIELDTYPE);
            Schema_Spec_Field(loaded, field, type);
            DECREF(type);
        }
    }
    DECREF(iter);

    DECREF(analyzers);

    return loaded;
}
Exemple #10
0
static bool
S_merge_updated_deletions(BackgroundMerger *self) {
    BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self);
    Hash *updated_deletions = NULL;

    PolyReader *new_polyreader
        = PolyReader_open((Obj*)ivars->folder, NULL, NULL);
    Vector *new_seg_readers
        = PolyReader_Get_Seg_Readers(new_polyreader);
    Vector *old_seg_readers
        = PolyReader_Get_Seg_Readers(ivars->polyreader);
    Hash *new_segs = Hash_new(Vec_Get_Size(new_seg_readers));

    for (uint32_t i = 0, max = Vec_Get_Size(new_seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(new_seg_readers, i);
        String    *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        Hash_Store(new_segs, seg_name, INCREF(seg_reader));
    }

    for (uint32_t i = 0, max = Vec_Get_Size(old_seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(old_seg_readers, i);
        String    *seg_name   = SegReader_Get_Seg_Name(seg_reader);

        // If this segment was merged away...
        if (Hash_Fetch(ivars->doc_maps, seg_name)) {
            SegReader *new_seg_reader
                = (SegReader*)CERTIFY(
                      Hash_Fetch(new_segs, seg_name),
                      SEGREADER);
            int32_t old_del_count = SegReader_Del_Count(seg_reader);
            int32_t new_del_count = SegReader_Del_Count(new_seg_reader);
            // ... were any new deletions applied against it?
            if (old_del_count != new_del_count) {
                DeletionsReader *del_reader
                    = (DeletionsReader*)SegReader_Obtain(
                          new_seg_reader,
                          Class_Get_Name(DELETIONSREADER));
                if (!updated_deletions) {
                    updated_deletions = Hash_new(max);
                }
                Hash_Store(updated_deletions, seg_name,
                           (Obj*)DelReader_Iterator(del_reader));
            }
        }
    }

    DECREF(new_polyreader);
    DECREF(new_segs);

    if (!updated_deletions) {
        return false;
    }
    else {
        PolyReader *merge_polyreader
            = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL);
        Vector *merge_seg_readers
            = PolyReader_Get_Seg_Readers(merge_polyreader);
        Snapshot *latest_snapshot
            = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL);
        int64_t new_seg_num
            = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1;
        Segment   *new_segment = Seg_new(new_seg_num);
        SegWriter *seg_writer  = SegWriter_new(ivars->schema, ivars->snapshot,
                                               new_segment, merge_polyreader);
        DeletionsWriter *del_writer = SegWriter_Get_Del_Writer(seg_writer);
        int64_t  merge_seg_num = Seg_Get_Number(ivars->segment);
        uint32_t seg_tick      = INT32_MAX;
        int32_t  offset        = INT32_MAX;

        SegWriter_Prep_Seg_Dir(seg_writer);

        for (uint32_t i = 0, max = Vec_Get_Size(merge_seg_readers); i < max; i++) {
            SegReader *seg_reader
                = (SegReader*)Vec_Fetch(merge_seg_readers, i);
            if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) {
                I32Array *offsets = PolyReader_Offsets(merge_polyreader);
                seg_tick = i;
                offset = I32Arr_Get(offsets, seg_tick);
                DECREF(offsets);
            }
        }
        if (offset == INT32_MAX) { THROW(ERR, "Failed sanity check"); }

        HashIterator *iter = HashIter_new(updated_deletions);
        while (HashIter_Next(iter)) {
            String  *seg_name  = HashIter_Get_Key(iter);
            Matcher *deletions = (Matcher*)HashIter_Get_Value(iter);

            I32Array *doc_map
                = (I32Array*)CERTIFY(
                      Hash_Fetch(ivars->doc_maps, seg_name),
                      I32ARRAY);
            int32_t del;
            while (0 != (del = Matcher_Next(deletions))) {
                // Find the slot where the deleted doc resides in the
                // rewritten segment. If the doc was already deleted when we
                // were merging, do nothing.
                int32_t remapped = I32Arr_Get(doc_map, del);
                if (remapped) {
                    // It's a new deletion, so carry it forward and zap it in
                    // the rewritten segment.
                    DelWriter_Delete_By_Doc_ID(del_writer, remapped + offset);
                }
            }
        }
        DECREF(iter);

        // Finish the segment and clean up.
        DelWriter_Finish(del_writer);
        SegWriter_Finish(seg_writer);
        DECREF(seg_writer);
        DECREF(new_segment);
        DECREF(latest_snapshot);
        DECREF(merge_polyreader);
        DECREF(updated_deletions);
    }

    return true;
}