Пример #1
0
static void
test_fields(TestBatch *batch)
{
    Segment *segment = Seg_new(1);
    ZombieCharBuf *foo = ZCB_WRAP_STR("foo",3 );
    ZombieCharBuf *bar = ZCB_WRAP_STR("bar", 3);
    ZombieCharBuf *baz = ZCB_WRAP_STR("baz", 3);
    int32_t field_num; 
    
    field_num = Seg_Add_Field(segment, (CharBuf*)foo);
    TEST_TRUE(batch, field_num == 1, 
        "Add_Field returns field number, and field numbers start at 1");
    field_num = Seg_Add_Field(segment, (CharBuf*)bar);
    TEST_TRUE(batch, field_num == 2, "add a second field");
    field_num = Seg_Add_Field(segment, (CharBuf*)foo);
    TEST_TRUE(batch, field_num == 1,
        "Add_Field returns existing field number if field is already known");

    TEST_TRUE(batch, ZCB_Equals(bar, (Obj*)Seg_Field_Name(segment, 2)),
        "Field_Name");
    TEST_TRUE(batch, Seg_Field_Name(segment, 3) == NULL, 
        "Field_Name returns NULL for unknown field number");
    TEST_TRUE(batch, Seg_Field_Num(segment, (CharBuf*)bar) == 2,
        "Field_Num");
    TEST_TRUE(batch, Seg_Field_Num(segment, (CharBuf*)baz) == 0, 
        "Field_Num returns 0 for unknown field name");

    DECREF(segment);
}
Пример #2
0
static void
test_Write_File_and_Read_File(TestBatchRunner *runner) {
    RAMFolder *folder  = RAMFolder_new(NULL);
    Segment   *segment = Seg_new(100);
    Segment   *got     = Seg_new(100);
    String    *meta;
    String    *flotsam = (String*)SSTR_WRAP_UTF8("flotsam", 7);
    String    *jetsam  = (String*)SSTR_WRAP_UTF8("jetsam", 6);

    Seg_Set_Count(segment, 111);
    Seg_Store_Metadata_Utf8(segment, "foo", 3, (Obj*)Str_newf("bar"));
    Seg_Add_Field(segment, flotsam);
    Seg_Add_Field(segment, jetsam);

    RAMFolder_MkDir(folder, Seg_Get_Name(segment));
    Seg_Write_File(segment, (Folder*)folder);
    Seg_Read_File(got, (Folder*)folder);

    TEST_TRUE(runner, Seg_Get_Count(got) == Seg_Get_Count(segment),
              "Round-trip count through file");
    TEST_TRUE(runner,
              Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam),
              "Round trip field names through file");
    meta = (String*)Seg_Fetch_Metadata_Utf8(got, "foo", 3);
    TEST_TRUE(runner,
              meta
              && Str_Is_A(meta, STRING)
              && Str_Equals_Utf8(meta, "bar", 3),
              "Round trip metadata through file");

    DECREF(got);
    DECREF(segment);
    DECREF(folder);
}
Пример #3
0
static void
test_Write_File_and_Read_File(TestBatch *batch)
{
    RAMFolder *folder  = RAMFolder_new(NULL);
    Segment   *segment = Seg_new(100);
    Segment   *got     = Seg_new(100);
    CharBuf   *meta;
    CharBuf   *flotsam = (CharBuf*)ZCB_WRAP_STR("flotsam", 7);
    CharBuf   *jetsam  = (CharBuf*)ZCB_WRAP_STR("jetsam", 6);

    Seg_Set_Count(segment, 111);
    Seg_Store_Metadata_Str(segment, "foo", 3, (Obj*)CB_newf("bar"));
    Seg_Add_Field(segment, flotsam);
    Seg_Add_Field(segment, jetsam);
    
    RAMFolder_MkDir(folder, Seg_Get_Name(segment));
    Seg_Write_File(segment, (Folder*)folder);
    Seg_Read_File(got, (Folder*)folder);

    TEST_TRUE(batch, Seg_Get_Count(got) == Seg_Get_Count(segment), 
        "Round-trip count through file");
    TEST_TRUE(batch, 
        Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam), 
        "Round trip field names through file");
    meta = (CharBuf*)Seg_Fetch_Metadata_Str(got, "foo", 3);
    TEST_TRUE(batch, meta && CB_Is_A(meta, CHARBUF) 
        && CB_Equals_Str(meta, "bar", 3), "Round trip metadata through file");

    DECREF(got);
    DECREF(segment);
    DECREF(folder);
}
Пример #4
0
static void
test_fields(TestBatchRunner *runner) {
    Segment *segment = Seg_new(1);
    StackString *foo = SSTR_WRAP_UTF8("foo", 3);
    StackString *bar = SSTR_WRAP_UTF8("bar", 3);
    StackString *baz = SSTR_WRAP_UTF8("baz", 3);
    int32_t field_num;

    field_num = Seg_Add_Field(segment, (String*)foo);
    TEST_TRUE(runner, field_num == 1,
              "Add_Field returns field number, and field numbers start at 1");
    field_num = Seg_Add_Field(segment, (String*)bar);
    TEST_TRUE(runner, field_num == 2, "add a second field");
    field_num = Seg_Add_Field(segment, (String*)foo);
    TEST_TRUE(runner, field_num == 1,
              "Add_Field returns existing field number if field is already known");

    TEST_TRUE(runner, SStr_Equals(bar, (Obj*)Seg_Field_Name(segment, 2)),
              "Field_Name");
    TEST_TRUE(runner, Seg_Field_Name(segment, 3) == NULL,
              "Field_Name returns NULL for unknown field number");
    TEST_TRUE(runner, Seg_Field_Num(segment, (String*)bar) == 2,
              "Field_Num");
    TEST_TRUE(runner, Seg_Field_Num(segment, (String*)baz) == 0,
              "Field_Num returns 0 for unknown field name");

    DECREF(segment);
}
Пример #5
0
static InverterEntry*
S_fetch_entry(InverterIVARS *ivars, CharBuf *field) {
    Schema *const schema = ivars->schema;
    int32_t field_num = Seg_Field_Num(ivars->segment, field);
    if (!field_num) {
        // This field seems not to be in the segment yet.  Try to find it in
        // the Schema.
        if (Schema_Fetch_Type(schema, field)) {
            // The field is in the Schema.  Get a field num from the Segment.
            field_num = Seg_Add_Field(ivars->segment, field);
        }
        else {
            // We've truly failed to find the field.  The user must
            // not have spec'd it.
            THROW(ERR, "Unknown field name: '%o'", field);
        }
    }

    InverterEntry *entry
        = (InverterEntry*)VA_Fetch(ivars->entry_pool, field_num);
    if (!entry) {
        entry = InvEntry_new(schema, (CharBuf*)field, field_num);
        VA_Store(ivars->entry_pool, field_num, (Obj*)entry);
    }
    return entry;
}
Пример #6
0
void
Indexer_add_index(Indexer *self, Obj *index)
{
    Folder *other_folder = NULL;
    IndexReader *reader  = NULL;
    
    if (Obj_Is_A(index, FOLDER)) {
        other_folder = (Folder*)INCREF(index);
    }
    else if (Obj_Is_A(index, CHARBUF)) {
        other_folder = (Folder*)FSFolder_new((CharBuf*)index);
    }
    else {
        THROW(ERR, "Invalid type for 'index': %o", Obj_Get_Class_Name(index));
    }

    reader = IxReader_open((Obj*)other_folder, NULL, NULL);
    if (reader == NULL) {
        THROW(ERR, "Index doesn't seem to contain any data");
    }
    else {
        Schema *schema       = self->schema;
        Schema *other_schema = IxReader_Get_Schema(reader);
        VArray *other_fields = Schema_All_Fields(other_schema);
        VArray *seg_readers  = IxReader_Seg_Readers(reader);
        uint32_t i, max;

        // Validate schema compatibility and add fields. 
        Schema_Eat(schema, other_schema);

        // Add fields to Segment. 
        for (i = 0, max = VA_Get_Size(other_fields); i < max; i++) {
            CharBuf *other_field = (CharBuf*)VA_Fetch(other_fields, i);
            Seg_Add_Field(self->segment, other_field);
        }
        DECREF(other_fields);

        // Add all segments. 
        for (i = 0, max = VA_Get_Size(seg_readers); i < max; i++) {
            SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i);
            DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch(
                seg_reader, VTable_Get_Name(DELETIONSREADER));
            Matcher *deletions = del_reader 
                               ? DelReader_Iterator(del_reader) 
                               : NULL;
            I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer,
                deletions, SegReader_Doc_Max(seg_reader),
                (int32_t)Seg_Get_Count(self->segment) 
            );
            SegWriter_Add_Segment(self->seg_writer, seg_reader, doc_map);
            DECREF(deletions);
            DECREF(doc_map);
        }
        DECREF(seg_readers);
    }

    DECREF(reader);
    DECREF(other_folder);
}
Пример #7
0
void
Indexer_Add_Index_IMP(Indexer *self, Obj *index) {
    IndexerIVARS *const ivars = Indexer_IVARS(self);
    Folder *other_folder = NULL;
    IndexReader *reader  = NULL;

    if (Obj_is_a(index, FOLDER)) {
        other_folder = (Folder*)INCREF(index);
    }
    else if (Obj_is_a(index, STRING)) {
        other_folder = (Folder*)FSFolder_new((String*)index);
    }
    else {
        THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index));
    }

    reader = IxReader_open((Obj*)other_folder, NULL, NULL);
    if (reader == NULL) {
        THROW(ERR, "Index doesn't seem to contain any data");
    }
    else {
        Schema *schema       = ivars->schema;
        Schema *other_schema = IxReader_Get_Schema(reader);
        Vector *other_fields = Schema_All_Fields(other_schema);
        Vector *seg_readers  = IxReader_Seg_Readers(reader);

        // Validate schema compatibility and add fields.
        Schema_Eat(schema, other_schema);

        // Add fields to Segment.
        for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) {
            String *other_field = (String*)Vec_Fetch(other_fields, i);
            Seg_Add_Field(ivars->segment, other_field);
        }
        DECREF(other_fields);

        // Add all segments.
        for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
            SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i);
            DeletionsReader *del_reader
                = (DeletionsReader*)SegReader_Fetch(
                      seg_reader, Class_Get_Name(DELETIONSREADER));
            Matcher *deletions = del_reader
                                 ? DelReader_Iterator(del_reader)
                                 : NULL;
            I32Array *doc_map = DelWriter_Generate_Doc_Map(
                                    ivars->del_writer, deletions,
                                    SegReader_Doc_Max(seg_reader),
                                    (int32_t)Seg_Get_Count(ivars->segment));
            SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map);
            DECREF(deletions);
            DECREF(doc_map);
        }
        DECREF(seg_readers);
    }

    DECREF(reader);
    DECREF(other_folder);
}
Пример #8
0
bool
Seg_Read_File_IMP(Segment *self, Folder *folder) {
    SegmentIVARS *const ivars = Seg_IVARS(self);
    String *filename = Str_newf("%o/segmeta.json", ivars->name);
    Hash   *metadata = (Hash*)Json_slurp_json(folder, filename);
    Hash   *my_metadata;

    // Bail unless the segmeta file was read successfully.
    DECREF(filename);
    if (!metadata) { return false; }
    CERTIFY(metadata, HASH);

    // Grab metadata for the Segment object itself.
    DECREF(ivars->metadata);
    ivars->metadata = metadata;
    my_metadata
        = (Hash*)CERTIFY(Hash_Fetch_Utf8(ivars->metadata, "segmeta", 7), HASH);

    // Assign.
    Obj *count = Hash_Fetch_Utf8(my_metadata, "count", 5);
    if (!count) { count = Hash_Fetch_Utf8(my_metadata, "doc_count", 9); }
    if (!count) { THROW(ERR, "Missing 'count'"); }
    else { ivars->count = Json_obj_to_i64(count); }

    // Get list of field nums.
    Vector *source_by_num = (Vector*)Hash_Fetch_Utf8(my_metadata,
                                                     "field_names", 11);
    size_t num_fields = source_by_num ? Vec_Get_Size(source_by_num) : 0;
    if (source_by_num == NULL) {
        THROW(ERR, "Failed to extract 'field_names' from metadata");
    }

    // Init.
    DECREF(ivars->by_num);
    DECREF(ivars->by_name);
    ivars->by_num  = Vec_new(num_fields);
    ivars->by_name = Hash_new(num_fields);

    // Copy the list of fields from the source.
    for (size_t i = 0; i < num_fields; i++) {
        String *name = (String*)Vec_Fetch(source_by_num, i);
        Seg_Add_Field(self, name);
    }

    return true;
}
Пример #9
0
bool_t
Seg_read_file(Segment *self, Folder *folder) {
    CharBuf *filename = CB_newf("%o/segmeta.json", self->name);
    Hash    *metadata = (Hash*)Json_slurp_json(folder, filename);
    Hash    *my_metadata;

    // Bail unless the segmeta file was read successfully.
    DECREF(filename);
    if (!metadata) { return false; }
    CERTIFY(metadata, HASH);

    // Grab metadata for the Segment object itself.
    DECREF(self->metadata);
    self->metadata = metadata;
    my_metadata
        = (Hash*)CERTIFY(Hash_Fetch_Str(self->metadata, "segmeta", 7), HASH);

    // Assign.
    Obj *count = Hash_Fetch_Str(my_metadata, "count", 5);
    if (!count) { count = Hash_Fetch_Str(my_metadata, "doc_count", 9); }
    if (!count) { THROW(ERR, "Missing 'count'"); }
    else { self->count = Obj_To_I64(count); }

    // Get list of field nums.
    VArray *source_by_num = (VArray*)Hash_Fetch_Str(my_metadata,
                                                    "field_names", 11);
    uint32_t num_fields = source_by_num ? VA_Get_Size(source_by_num) : 0;
    if (source_by_num == NULL) {
        THROW(ERR, "Failed to extract 'field_names' from metadata");
    }

    // Init.
    DECREF(self->by_num);
    DECREF(self->by_name);
    self->by_num  = VA_new(num_fields);
    self->by_name = Hash_new(num_fields);

    // Copy the list of fields from the source.
    for (uint32_t i = 0; i < num_fields; i++) {
        CharBuf *name = (CharBuf*)VA_Fetch(source_by_num, i);
        Seg_Add_Field(self, name);
    }

    return true;
}
Пример #10
0
Indexer*
Indexer_init(Indexer *self, Schema *schema, Obj *index, 
          IndexManager *manager, int32_t flags)
          
{
    bool_t    create   = (flags & Indexer_CREATE)   ? true : false;
    bool_t    truncate = (flags & Indexer_TRUNCATE) ? true : false;
    Folder   *folder   = S_init_folder(index, create);
    Lock     *write_lock;
    CharBuf  *latest_snapfile;
    Snapshot *latest_snapshot = Snapshot_new();

    // Init. 
    self->stock_doc     = Doc_new(NULL, 0);
    self->truncate      = false;
    self->optimize      = false;
    self->prepared      = false;
    self->needs_commit  = false;
    self->snapfile      = NULL;
    self->merge_lock    = NULL;

    // Assign. 
    self->folder       = folder;
    self->manager      = manager 
                       ? (IndexManager*)INCREF(manager) 
                       : IxManager_new(NULL, NULL);
    IxManager_Set_Folder(self->manager, folder);

    // Get a write lock for this folder. 
    write_lock = IxManager_Make_Write_Lock(self->manager);
    Lock_Clear_Stale(write_lock);
    if (Lock_Obtain(write_lock)) {
        // Only assign if successful, otherwise DESTROY unlocks -- bad! 
        self->write_lock = write_lock;
    }
    else {
        DECREF(write_lock);
        DECREF(self);
        RETHROW(INCREF(Err_get_error()));
    }

    // Find the latest snapshot or create a new one. 
    latest_snapfile = IxFileNames_latest_snapshot(folder);
    if (latest_snapfile) {
        Snapshot_Read_File(latest_snapshot, folder, latest_snapfile);
    }

    // Look for an existing Schema if one wasn't supplied. 
    if (schema) {
        self->schema = (Schema*)INCREF(schema);
    }
    else {
        if (!latest_snapfile) {
            THROW(ERR, "No Schema supplied, and can't find one in the index");
        }
        else {
            CharBuf *schema_file = S_find_schema_file(latest_snapshot);
            Hash *dump = (Hash*)Json_slurp_json(folder, schema_file);
            if (dump) { // read file successfully 
                self->schema = (Schema*)CERTIFY(
                    VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA);
                schema = self->schema;
                DECREF(dump);
                schema_file = NULL;
            }
            else {
                THROW(ERR, "Failed to parse %o", schema_file);
            }
        }
    }

    // If we're clobbering, start with an empty Snapshot and an empty 
    // PolyReader.  Otherwise, start with the most recent Snapshot and an
    // up-to-date PolyReader.
    if (truncate) {
        self->snapshot = Snapshot_new();
        self->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL);
        self->truncate = true;
    }
    else {
        // TODO: clone most recent snapshot rather than read it twice. 
        self->snapshot = (Snapshot*)INCREF(latest_snapshot);
        self->polyreader = latest_snapfile
            ? PolyReader_open((Obj*)folder, NULL, NULL)
            : PolyReader_new(schema, folder, NULL, NULL, NULL);

        if (latest_snapfile) {
            // Make sure than any existing fields which may have been
            // dynamically added during past indexing sessions get added.
            Schema *old_schema = PolyReader_Get_Schema(self->polyreader);
            Schema_Eat(schema, old_schema);
        }
    }

    // Zap detritus from previous sessions. 
    {
        // Note: we have to feed FilePurger with the most recent snapshot file
        // now, but with the Indexer's snapshot later.
        FilePurger *file_purger 
            = FilePurger_new(folder, latest_snapshot, self->manager);
        FilePurger_Purge(file_purger);
        DECREF(file_purger);
    }

    // Create a new segment. 
    {
        int64_t new_seg_num 
            = IxManager_Highest_Seg_Num(self->manager, latest_snapshot) + 1;
        Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager);
        uint32_t i, max;

        if (Lock_Is_Locked(merge_lock)) {
            // If there's a background merge process going on, stay out of its
            // way.
            Hash *merge_data = IxManager_Read_Merge_Data(self->manager);
            Obj *cutoff_obj = merge_data 
                            ? Hash_Fetch_Str(merge_data, "cutoff", 6)
                            : NULL;
            if (!cutoff_obj) {
                DECREF(merge_lock);
                DECREF(merge_data);
                THROW(ERR, "Background merge detected, but can't read merge data");
            }
            else {
                int64_t cutoff = Obj_To_I64(cutoff_obj);
                if (cutoff >= new_seg_num) {
                    new_seg_num = cutoff + 1;
                }
            }
            DECREF(merge_data);
        }

        self->segment = Seg_new(new_seg_num);

        // Add all known fields to Segment. 
        {
            VArray *fields = Schema_All_Fields(schema);
            for (i = 0, max = VA_Get_Size(fields); i < max; i++) {
                Seg_Add_Field(self->segment, (CharBuf*)VA_Fetch(fields, i));
            }
            DECREF(fields);
        }

        DECREF(merge_lock);
    }

    // Create new SegWriter and FilePurger.  
    self->file_purger 
        = FilePurger_new(folder, self->snapshot, self->manager);
    self->seg_writer = SegWriter_new(self->schema, self->snapshot,
        self->segment, self->polyreader); 
    SegWriter_Prep_Seg_Dir(self->seg_writer);

    // Grab a local ref to the DeletionsWriter. 
    self->del_writer = (DeletionsWriter*)INCREF(
        SegWriter_Get_Del_Writer(self->seg_writer));

    DECREF(latest_snapfile);
    DECREF(latest_snapshot);

    return self;
}
Пример #11
0
BackgroundMerger*
BGMerger_init(BackgroundMerger *self, Obj *index, IndexManager *manager) {
    BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self);
    Folder *folder = S_init_folder(index);

    // Init.
    ivars->optimize      = false;
    ivars->prepared      = false;
    ivars->needs_commit  = false;
    ivars->snapfile      = NULL;
    ivars->doc_maps      = Hash_new(0);

    // Assign.
    ivars->folder = folder;
    if (manager) {
        ivars->manager = (IndexManager*)INCREF(manager);
    }
    else {
        ivars->manager = IxManager_new(NULL, NULL);
        IxManager_Set_Write_Lock_Timeout(ivars->manager, 10000);
    }
    IxManager_Set_Folder(ivars->manager, folder);

    // Obtain write lock (which we'll only hold briefly), then merge lock.
    S_obtain_write_lock(self);
    if (!ivars->write_lock) {
        DECREF(self);
        RETHROW(INCREF(Err_get_error()));
    }
    S_obtain_merge_lock(self);
    if (!ivars->merge_lock) {
        DECREF(self);
        RETHROW(INCREF(Err_get_error()));
    }

    // Find the latest snapshot.  If there's no index content, bail early.
    ivars->snapshot = Snapshot_Read_File(Snapshot_new(), folder, NULL);
    if (!Snapshot_Get_Path(ivars->snapshot)) {
        S_release_write_lock(self);
        S_release_merge_lock(self);
        return self;
    }

    // Create FilePurger. Zap detritus from previous sessions.
    ivars->file_purger = FilePurger_new(folder, ivars->snapshot, ivars->manager);
    FilePurger_Purge(ivars->file_purger);

    // Open a PolyReader, passing in the IndexManager so we get a read lock on
    // the Snapshot's files -- so that Indexers don't zap our files while
    // we're operating in the background.
    ivars->polyreader = PolyReader_open((Obj*)folder, NULL, ivars->manager);

    // Clone the PolyReader's schema.
    Obj *dump = (Obj*)Schema_Dump(PolyReader_Get_Schema(ivars->polyreader));
    ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA);
    DECREF(dump);

    // Create new Segment.
    int64_t new_seg_num
        = IxManager_Highest_Seg_Num(ivars->manager, ivars->snapshot) + 1;
    Vector *fields = Schema_All_Fields(ivars->schema);
    ivars->segment = Seg_new(new_seg_num);
    for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
        Seg_Add_Field(ivars->segment, (String*)Vec_Fetch(fields, i));
    }
    DECREF(fields);

    // Our "cutoff" is the segment this BackgroundMerger will write.  Now that
    // we've determined the cutoff, write the merge data file.
    ivars->cutoff = Seg_Get_Number(ivars->segment);
    IxManager_Write_Merge_Data(ivars->manager, ivars->cutoff);

    /* Create the SegWriter but hold off on preparing the new segment
     * directory -- because if we don't need to merge any segments we don't
     * need it.  (We've reserved the dir by plopping down the merge.json
     * file.) */
    ivars->seg_writer = SegWriter_new(ivars->schema, ivars->snapshot,
                                      ivars->segment, ivars->polyreader);

    // Grab a local ref to the DeletionsWriter.
    ivars->del_writer
        = (DeletionsWriter*)INCREF(SegWriter_Get_Del_Writer(ivars->seg_writer));

    // Release the write lock.  Now new Indexers can start while we work in
    // the background.
    S_release_write_lock(self);

    return self;
}