Exemplo n.º 1
0
static void
test_spew_and_slurp(TestBatch *batch) {
    Obj *dump = S_make_dump();
    Folder *folder = (Folder*)RAMFolder_new(NULL);

    CharBuf *foo = (CharBuf*)ZCB_WRAP_STR("foo", 3);
    bool_t result = Json_spew_json(dump, folder, foo);
    TEST_TRUE(batch, result, "spew_json returns true on success");
    TEST_TRUE(batch, Folder_Exists(folder, foo),
              "spew_json wrote file");

    Obj *got = Json_slurp_json(folder, foo);
    TEST_TRUE(batch, got && Obj_Equals(dump, got),
              "Round trip through spew_json and slurp_json");
    DECREF(got);

    Err_set_error(NULL);
    result = Json_spew_json(dump, folder, foo);
    TEST_FALSE(batch, result, "Can't spew_json when file exists");
    TEST_TRUE(batch, Err_get_error() != NULL,
              "Failed spew_json sets Err_error");

    Err_set_error(NULL);
    CharBuf *bar = (CharBuf*)ZCB_WRAP_STR("bar", 3);
    got = Json_slurp_json(folder, bar);
    TEST_TRUE(batch, got == NULL,
              "slurp_json returns NULL when file doesn't exist");
    TEST_TRUE(batch, Err_get_error() != NULL,
              "Failed slurp_json sets Err_error");

    CharBuf *boffo = (CharBuf*)ZCB_WRAP_STR("boffo", 5);

    FileHandle *fh
        = Folder_Open_FileHandle(folder, boffo, FH_CREATE | FH_WRITE_ONLY);
    FH_Write(fh, "garbage", 7);
    DECREF(fh);

    Err_set_error(NULL);
    got = Json_slurp_json(folder, boffo);
    TEST_TRUE(batch, got == NULL,
              "slurp_json returns NULL when file doesn't contain valid JSON");
    TEST_TRUE(batch, Err_get_error() != NULL,
              "Failed slurp_json sets Err_error");
    DECREF(got);

    DECREF(dump);
    DECREF(folder);
}
Exemplo n.º 2
0
static void
test_stemming(TestBatchRunner *runner) {
    FSFolder *modules_folder = TestUtils_modules_folder();
    String *path = Str_newf("analysis/snowstem/source/test/tests.json");
    Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path);
    if (!tests) { RETHROW(Err_get_error()); }

    String *iso;
    Hash *lang_data;
    Hash_Iterate(tests);
    while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) {
        VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5);
        VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5);
        SnowballStemmer *stemmer = SnowStemmer_new(iso);
        for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) {
            String *word  = (String*)VA_Fetch(words, i);
            VArray *got   = SnowStemmer_Split(stemmer, word);
            String *stem  = (String*)VA_Fetch(got, 0);
            TEST_TRUE(runner,
                      stem
                      && Str_Is_A(stem, STRING)
                      && Str_Equals(stem, VA_Fetch(stems, i)),
                      "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word)
                     );
            DECREF(got);
        }
        DECREF(stemmer);
    }

    DECREF(tests);
    DECREF(modules_folder);
    DECREF(path);
}
Exemplo n.º 3
0
bool
LFLock_Maybe_Delete_File_IMP(LockFileLock *self, String *path,
                             bool delete_mine, bool delete_other) {
    LockFileLockIVARS *const ivars = LFLock_IVARS(self);
    Folder *folder  = ivars->folder;
    bool    success = false;

    // Only delete locks that start with our lock name.
    if (!Str_Starts_With_Utf8(path, "locks", 5)) {
        return false;
    }
    StringIterator *iter = Str_Top(path);
    StrIter_Advance(iter, 5 + 1);
    if (!StrIter_Starts_With(iter, ivars->name)) {
        DECREF(iter);
        return false;
    }
    DECREF(iter);

    // Attempt to delete dead lock file.
    if (Folder_Exists(folder, path)) {
        Hash *hash = (Hash*)Json_slurp_json(folder, path);
        if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) {
            String *pid_buf = (String*)Hash_Fetch_Utf8(hash, "pid", 3);
            String *host    = (String*)Hash_Fetch_Utf8(hash, "host", 4);
            String *name    = (String*)Hash_Fetch_Utf8(hash, "name", 4);

            // Match hostname and lock name.
            if (host != NULL
                && Str_Is_A(host, STRING)
                && Str_Equals(host, (Obj*)ivars->host)
                && name != NULL
                && Str_Is_A(name, STRING)
                && Str_Equals(name, (Obj*)ivars->name)
                && pid_buf != NULL
                && Str_Is_A(pid_buf, STRING)
               ) {
                // Verify that pid is either mine or dead.
                int pid = (int)Str_To_I64(pid_buf);
                if ((delete_mine && pid == PID_getpid())  // This process.
                    || (delete_other && !PID_active(pid)) // Dead pid.
                   ) {
                    if (Folder_Delete(folder, path)) {
                        success = true;
                    }
                    else {
                        String *mess
                            = MAKE_MESS("Can't delete '%o'", path);
                        DECREF(hash);
                        Err_throw_mess(ERR, mess);
                    }
                }
            }
        }
        DECREF(hash);
    }

    return success;
}
Exemplo n.º 4
0
bool_t
LFLock_maybe_delete_file(LockFileLock *self, const CharBuf *path,
                         bool_t delete_mine, bool_t delete_other) {
    Folder *folder  = self->folder;
    bool_t  success = false;
    ZombieCharBuf *scratch = ZCB_WRAP(path);

    // Only delete locks that start with our lock name.
    CharBuf *lock_dir_name = (CharBuf*)ZCB_WRAP_STR("locks", 5);
    if (!ZCB_Starts_With(scratch, lock_dir_name)) {
        return false;
    }
    ZCB_Nip(scratch, CB_Get_Size(lock_dir_name) + 1);
    if (!ZCB_Starts_With(scratch, self->name)) {
        return false;
    }

    // Attempt to delete dead lock file.
    if (Folder_Exists(folder, path)) {
        Hash *hash = (Hash*)Json_slurp_json(folder, path);
        if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) {
            CharBuf *pid_buf = (CharBuf*)Hash_Fetch_Str(hash, "pid", 3);
            CharBuf *host    = (CharBuf*)Hash_Fetch_Str(hash, "host", 4);
            CharBuf *name
                = (CharBuf*)Hash_Fetch_Str(hash, "name", 4);

            // Match hostname and lock name.
            if (host != NULL
                && CB_Equals(host, (Obj*)self->host)
                && name != NULL
                && CB_Equals(name, (Obj*)self->name)
                && pid_buf != NULL
               ) {
                // Verify that pid is either mine or dead.
                int pid = (int)CB_To_I64(pid_buf);
                if ((delete_mine && pid == PID_getpid())  // This process.
                    || (delete_other && !PID_active(pid)) // Dead pid.
                   ) {
                    if (Folder_Delete(folder, path)) {
                        success = true;
                    }
                    else {
                        CharBuf *mess
                            = MAKE_MESS("Can't delete '%o'", path);
                        DECREF(hash);
                        Err_throw_mess(ERR, mess);
                    }
                }
            }
        }
        DECREF(hash);
    }

    return success;
}
Exemplo n.º 5
0
Snapshot*
Snapshot_Read_File_IMP(Snapshot *self, Folder *folder, String *path) {
    SnapshotIVARS *const ivars = Snapshot_IVARS(self);

    // Eliminate all prior data. Pick a snapshot file.
    S_zero_out(self);
    ivars->path = (path != NULL && Str_Get_Size(path) > 0)
                  ? Str_Clone(path)
                  : IxFileNames_latest_snapshot(folder);

    if (ivars->path) {
        Hash *snap_data
            = (Hash*)CERTIFY(Json_slurp_json(folder, ivars->path), HASH);
        Obj *format_obj
            = CERTIFY(Hash_Fetch_Utf8(snap_data, "format", 6), OBJ);
        int32_t format = (int32_t)Json_obj_to_i64(format_obj);
        Obj *subformat_obj = Hash_Fetch_Utf8(snap_data, "subformat", 9);
        int32_t subformat = subformat_obj
                            ? (int32_t)Json_obj_to_i64(subformat_obj)
                            : 0;

        // Verify that we can read the index properly.
        if (format > Snapshot_current_file_format) {
            THROW(ERR, "Snapshot format too recent: %i32, %i32",
                  format, Snapshot_current_file_format);
        }

        // Build up list of entries.
        Vector *list = (Vector*)INCREF(CERTIFY(
                           Hash_Fetch_Utf8(snap_data, "entries", 7),
                           VECTOR));
        if (format == 1 || (format == 2 && subformat < 1)) {
            Vector *cleaned = S_clean_segment_contents(list);
            DECREF(list);
            list = cleaned;
        }
        Hash_Clear(ivars->entries);
        for (uint32_t i = 0, max = Vec_Get_Size(list); i < max; i++) {
            String *entry
                = (String*)CERTIFY(Vec_Fetch(list, i), STRING);
            Hash_Store(ivars->entries, entry, (Obj*)CFISH_TRUE);
        }

        DECREF(list);
        DECREF(snap_data);
    }

    return self;
}
Exemplo n.º 6
0
Snapshot*
Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *path)
{
    // Eliminate all prior data. Pick a snapshot file. 
    S_zero_out(self);
    self->path = path ? CB_Clone(path) : IxFileNames_latest_snapshot(folder);

    if (self->path) {
        Hash *snap_data = (Hash*)CERTIFY(
            Json_slurp_json(folder, self->path), HASH);
        Obj *format_obj = CERTIFY(
            Hash_Fetch_Str(snap_data, "format", 6), OBJ);
        int32_t format = (int32_t)Obj_To_I64(format_obj);
        Obj *subformat_obj = Hash_Fetch_Str(snap_data, "subformat", 9);
        int32_t subformat = subformat_obj 
                          ? (int32_t)Obj_To_I64(subformat_obj) 
                          : 0;

        // Verify that we can read the index properly. 
        if (format > Snapshot_current_file_format) {
            THROW(ERR, "Snapshot format too recent: %i32, %i32",
                format, Snapshot_current_file_format);
        }

        // Build up list of entries. 
        VArray *list = (VArray*)CERTIFY(
            Hash_Fetch_Str(snap_data, "entries", 7), VARRAY);
        INCREF(list);
        if (format == 1 || (format == 2 && subformat < 1)) {
            VArray *cleaned = S_clean_segment_contents(list);
            DECREF(list);
            list = cleaned;
        }
        Hash_Clear(self->entries);
        for (uint32_t i = 0, max = VA_Get_Size(list); i < max; i++) {
            CharBuf *entry = (CharBuf*)CERTIFY(
                VA_Fetch(list, i), CHARBUF);
            Hash_Store(self->entries, (Obj*)entry, INCREF(&EMPTY));
        }

        DECREF(list);
        DECREF(snap_data);
    }

    return self;
}
Exemplo n.º 7
0
bool
Seg_Read_File_IMP(Segment *self, Folder *folder) {
    SegmentIVARS *const ivars = Seg_IVARS(self);
    String *filename = Str_newf("%o/segmeta.json", ivars->name);
    Hash   *metadata = (Hash*)Json_slurp_json(folder, filename);
    Hash   *my_metadata;

    // Bail unless the segmeta file was read successfully.
    DECREF(filename);
    if (!metadata) { return false; }
    CERTIFY(metadata, HASH);

    // Grab metadata for the Segment object itself.
    DECREF(ivars->metadata);
    ivars->metadata = metadata;
    my_metadata
        = (Hash*)CERTIFY(Hash_Fetch_Utf8(ivars->metadata, "segmeta", 7), HASH);

    // Assign.
    Obj *count = Hash_Fetch_Utf8(my_metadata, "count", 5);
    if (!count) { count = Hash_Fetch_Utf8(my_metadata, "doc_count", 9); }
    if (!count) { THROW(ERR, "Missing 'count'"); }
    else { ivars->count = Json_obj_to_i64(count); }

    // Get list of field nums.
    Vector *source_by_num = (Vector*)Hash_Fetch_Utf8(my_metadata,
                                                     "field_names", 11);
    size_t num_fields = source_by_num ? Vec_Get_Size(source_by_num) : 0;
    if (source_by_num == NULL) {
        THROW(ERR, "Failed to extract 'field_names' from metadata");
    }

    // Init.
    DECREF(ivars->by_num);
    DECREF(ivars->by_name);
    ivars->by_num  = Vec_new(num_fields);
    ivars->by_name = Hash_new(num_fields);

    // Copy the list of fields from the source.
    for (size_t i = 0; i < num_fields; i++) {
        String *name = (String*)Vec_Fetch(source_by_num, i);
        Seg_Add_Field(self, name);
    }

    return true;
}
Exemplo n.º 8
0
bool_t
Seg_read_file(Segment *self, Folder *folder) {
    CharBuf *filename = CB_newf("%o/segmeta.json", self->name);
    Hash    *metadata = (Hash*)Json_slurp_json(folder, filename);
    Hash    *my_metadata;

    // Bail unless the segmeta file was read successfully.
    DECREF(filename);
    if (!metadata) { return false; }
    CERTIFY(metadata, HASH);

    // Grab metadata for the Segment object itself.
    DECREF(self->metadata);
    self->metadata = metadata;
    my_metadata
        = (Hash*)CERTIFY(Hash_Fetch_Str(self->metadata, "segmeta", 7), HASH);

    // Assign.
    Obj *count = Hash_Fetch_Str(my_metadata, "count", 5);
    if (!count) { count = Hash_Fetch_Str(my_metadata, "doc_count", 9); }
    if (!count) { THROW(ERR, "Missing 'count'"); }
    else { self->count = Obj_To_I64(count); }

    // Get list of field nums.
    VArray *source_by_num = (VArray*)Hash_Fetch_Str(my_metadata,
                                                    "field_names", 11);
    uint32_t num_fields = source_by_num ? VA_Get_Size(source_by_num) : 0;
    if (source_by_num == NULL) {
        THROW(ERR, "Failed to extract 'field_names' from metadata");
    }

    // Init.
    DECREF(self->by_num);
    DECREF(self->by_name);
    self->by_num  = VA_new(num_fields);
    self->by_name = Hash_new(num_fields);

    // Copy the list of fields from the source.
    for (uint32_t i = 0; i < num_fields; i++) {
        CharBuf *name = (CharBuf*)VA_Fetch(source_by_num, i);
        Seg_Add_Field(self, name);
    }

    return true;
}
Exemplo n.º 9
0
Hash*
IxManager_Read_Merge_Data_IMP(IndexManager *self) {
    IndexManagerIVARS *const ivars = IxManager_IVARS(self);
    String *merge_json = SSTR_WRAP_C("merge.json");
    if (Folder_Exists(ivars->folder, merge_json)) {
        Hash *stuff = (Hash*)Json_slurp_json(ivars->folder, merge_json);
        if (stuff) {
            CERTIFY(stuff, HASH);
            return stuff;
        }
        else {
            return Hash_new(0);
        }
    }
    else {
        return NULL;
    }
}
Exemplo n.º 10
0
Hash*
IxManager_read_merge_data(IndexManager *self)
{
    ZombieCharBuf *merge_json = ZCB_WRAP_STR("merge.json", 10);
    if (Folder_Exists(self->folder, (CharBuf*)merge_json)) {
        Hash *stuff 
            = (Hash*)Json_slurp_json(self->folder, (CharBuf*)merge_json);
        if (stuff) {
            CERTIFY(stuff, HASH);
            return stuff;
        }
        else {
            return Hash_new(0);
        }
    }
    else {
        return NULL;
    }
}
Exemplo n.º 11
0
static void
test_normalization(TestBatchRunner *runner) {
    FSFolder *modules_folder = TestUtils_modules_folder();
    if (modules_folder == NULL) {
        SKIP(runner, 13, "Can't locate test data");
        return;
    }

    String *path = Str_newf("unicode/utf8proc/tests.json");
    Vector *tests = (Vector*)Json_slurp_json((Folder*)modules_folder, path);
    if (!tests) { RETHROW(Err_get_error()); }

    for (uint32_t i = 0, max = Vec_Get_Size(tests); i < max; i++) {
        Hash *test = (Hash*)Vec_Fetch(tests, i);
        String *form = (String*)Hash_Fetch_Utf8(
                            test, "normalization_form", 18);
        bool case_fold = Bool_Get_Value((Boolean*)Hash_Fetch_Utf8(
                                              test, "case_fold", 9));
        bool strip_accents = Bool_Get_Value((Boolean*)Hash_Fetch_Utf8(
                                                  test, "strip_accents", 13));
        Normalizer *normalizer = Normalizer_new(form, case_fold, strip_accents);
        Vector *words = (Vector*)Hash_Fetch_Utf8(test, "words", 5);
        Vector *norms = (Vector*)Hash_Fetch_Utf8(test, "norms", 5);
        for (uint32_t j = 0, max = Vec_Get_Size(words); j < max; j++) {
            String *word = (String*)Vec_Fetch(words, j);
            Vector *got  = Normalizer_Split(normalizer, word);
            String *norm = (String*)Vec_Fetch(got, 0);
            TEST_TRUE(runner,
                      norm
                      && Str_is_a(norm, STRING)
                      && Str_Equals(norm, Vec_Fetch(norms, j)),
                      "Normalize %s %d %d: %s", Str_Get_Ptr8(form),
                      case_fold, strip_accents, Str_Get_Ptr8(word)
                     );
            DECREF(got);
        }
        DECREF(normalizer);
    }

    DECREF(tests);
    DECREF(modules_folder);
    DECREF(path);
}
Exemplo n.º 12
0
static void
test_offsets(TestBatchRunner *runner) {
    Folder *folder = S_folder_with_contents();
    CompoundFileWriter *cf_writer = CFWriter_new(folder);
    Hash    *cf_metadata;
    Hash    *files;

    CFWriter_Consolidate(cf_writer);

    cf_metadata = (Hash*)CERTIFY(
                      Json_slurp_json(folder, cfmeta_file), HASH);
    files = (Hash*)CERTIFY(
                Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH);

    bool     offsets_ok = true;

    TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files");

    HashIterator *iter = HashIter_new(files);
    while (HashIter_Next(iter)) {
        String *file   = HashIter_Get_Key(iter);
        Hash   *stats  = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH);
        Obj    *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ);
        int64_t offs   = Obj_To_I64(offset);
        if (offs % 8 != 0) {
            offsets_ok = false;
            FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8",
                 offset, Str_Get_Ptr8(file));
            break;
        }
    }
    DECREF(iter);
    if (offsets_ok) {
        PASS(runner, "All offsets are multiples of 8");
    }

    DECREF(cf_metadata);
    DECREF(cf_writer);
    DECREF(folder);
}
Exemplo n.º 13
0
Snapshot*
Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *filename)
{
    /* Eliminate all prior data. Pick a snapshot file. */
    S_zero_out(self);
    self->filename = filename 
                   ? CB_Clone(filename)
                   : IxFileNames_latest_snapshot(folder); 

    if (self->filename) {
        Hash *snap_data = (Hash*)ASSERT_IS_A(
            Json_slurp_json(folder, self->filename), HASH);
        Obj *format = ASSERT_IS_A(
            Hash_Fetch_Str(snap_data, "format", 6), OBJ);

        /* Verify that we can read the index properly. */
        if (Obj_To_I64(format) > Snapshot_current_file_format) {
            THROW("Snapshot format too recent: %i64, %i32",
                Obj_To_I64(format), Snapshot_current_file_format);
        }

        /* Build up list of entries. */
        {
            u32_t i, max;
            VArray *list = (VArray*)ASSERT_IS_A(
                Hash_Fetch_Str(snap_data, "entries", 7), VARRAY);
            Hash_Clear(self->entries);
            for (i = 0, max = VA_Get_Size(list); i < max; i++) {
                CharBuf *entry = (CharBuf*)ASSERT_IS_A(
                    VA_Fetch(list, i), CHARBUF);
                Hash_Store(self->entries, entry, INCREF(&EMPTY));
            }
        }

        DECREF(snap_data);
    }

    return self;
}
Exemplo n.º 14
0
static void
test_tokenizer(TestBatchRunner *runner) {
    StandardTokenizer *tokenizer = StandardTokenizer_new();

    String *word = SSTR_WRAP_C(
                              " ."
                              "tha\xCC\x82t's"
                              ":"
                              "1,02\xC2\xADZ4.38"
                              "\xE0\xB8\x81\xC2\xAD\xC2\xAD"
                              "\xF0\xA0\x80\x80"
                              "a"
                              "/");
    Vector *got = StandardTokenizer_Split(tokenizer, word);
    String *token = (String*)Vec_Fetch(got, 0);
    TEST_TRUE(runner,
              token
              && Str_is_a(token, STRING)
              && Str_Equals_Utf8(token, "tha\xcc\x82t's", 8),
              "Token: %s", Str_Get_Ptr8(token));
    token = (String*)Vec_Fetch(got, 1);
    TEST_TRUE(runner,
              token
              && Str_is_a(token, STRING)
              && Str_Equals_Utf8(token, "1,02\xC2\xADZ4.38", 11),
              "Token: %s", Str_Get_Ptr8(token));
    token = (String*)Vec_Fetch(got, 2);
    TEST_TRUE(runner,
              token
              && Str_is_a(token, STRING)
              && Str_Equals_Utf8(token, "\xE0\xB8\x81\xC2\xAD\xC2\xAD", 7),
              "Token: %s", Str_Get_Ptr8(token));
    token = (String*)Vec_Fetch(got, 3);
    TEST_TRUE(runner,
              token
              && Str_is_a(token, STRING)
              && Str_Equals_Utf8(token, "\xF0\xA0\x80\x80", 4),
              "Token: %s", Str_Get_Ptr8(token));
    token = (String*)Vec_Fetch(got, 4);
    TEST_TRUE(runner,
              token
              && Str_is_a(token, STRING)
              && Str_Equals_Utf8(token, "a", 1),
              "Token: %s", Str_Get_Ptr8(token));
    DECREF(got);

    FSFolder *modules_folder = TestUtils_modules_folder();
    if (modules_folder == NULL) {
        SKIP(runner, 1372, "Can't locate test data");
    }
    else {
        String *path = Str_newf("unicode/ucd/WordBreakTest.json");
        Vector *tests = (Vector*)Json_slurp_json((Folder*)modules_folder, path);
        if (!tests) { RETHROW(Err_get_error()); }

        for (uint32_t i = 0, max = Vec_Get_Size(tests); i < max; i++) {
            Hash *test = (Hash*)Vec_Fetch(tests, i);
            String *text = (String*)Hash_Fetch_Utf8(test, "text", 4);
            Vector *wanted = (Vector*)Hash_Fetch_Utf8(test, "words", 5);
            Vector *got = StandardTokenizer_Split(tokenizer, text);
            TEST_TRUE(runner, Vec_Equals(wanted, (Obj*)got), "UCD test #%d", i + 1);
            DECREF(got);
        }

        DECREF(tests);
        DECREF(modules_folder);
        DECREF(path);
    }

    DECREF(tokenizer);
}
Exemplo n.º 15
0
Indexer*
Indexer_init(Indexer *self, Schema *schema, Obj *index, 
          IndexManager *manager, int32_t flags)
          
{
    bool_t    create   = (flags & Indexer_CREATE)   ? true : false;
    bool_t    truncate = (flags & Indexer_TRUNCATE) ? true : false;
    Folder   *folder   = S_init_folder(index, create);
    Lock     *write_lock;
    CharBuf  *latest_snapfile;
    Snapshot *latest_snapshot = Snapshot_new();

    // Init. 
    self->stock_doc     = Doc_new(NULL, 0);
    self->truncate      = false;
    self->optimize      = false;
    self->prepared      = false;
    self->needs_commit  = false;
    self->snapfile      = NULL;
    self->merge_lock    = NULL;

    // Assign. 
    self->folder       = folder;
    self->manager      = manager 
                       ? (IndexManager*)INCREF(manager) 
                       : IxManager_new(NULL, NULL);
    IxManager_Set_Folder(self->manager, folder);

    // Get a write lock for this folder. 
    write_lock = IxManager_Make_Write_Lock(self->manager);
    Lock_Clear_Stale(write_lock);
    if (Lock_Obtain(write_lock)) {
        // Only assign if successful, otherwise DESTROY unlocks -- bad! 
        self->write_lock = write_lock;
    }
    else {
        DECREF(write_lock);
        DECREF(self);
        RETHROW(INCREF(Err_get_error()));
    }

    // Find the latest snapshot or create a new one. 
    latest_snapfile = IxFileNames_latest_snapshot(folder);
    if (latest_snapfile) {
        Snapshot_Read_File(latest_snapshot, folder, latest_snapfile);
    }

    // Look for an existing Schema if one wasn't supplied. 
    if (schema) {
        self->schema = (Schema*)INCREF(schema);
    }
    else {
        if (!latest_snapfile) {
            THROW(ERR, "No Schema supplied, and can't find one in the index");
        }
        else {
            CharBuf *schema_file = S_find_schema_file(latest_snapshot);
            Hash *dump = (Hash*)Json_slurp_json(folder, schema_file);
            if (dump) { // read file successfully 
                self->schema = (Schema*)CERTIFY(
                    VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA);
                schema = self->schema;
                DECREF(dump);
                schema_file = NULL;
            }
            else {
                THROW(ERR, "Failed to parse %o", schema_file);
            }
        }
    }

    // If we're clobbering, start with an empty Snapshot and an empty 
    // PolyReader.  Otherwise, start with the most recent Snapshot and an
    // up-to-date PolyReader.
    if (truncate) {
        self->snapshot = Snapshot_new();
        self->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL);
        self->truncate = true;
    }
    else {
        // TODO: clone most recent snapshot rather than read it twice. 
        self->snapshot = (Snapshot*)INCREF(latest_snapshot);
        self->polyreader = latest_snapfile
            ? PolyReader_open((Obj*)folder, NULL, NULL)
            : PolyReader_new(schema, folder, NULL, NULL, NULL);

        if (latest_snapfile) {
            // Make sure than any existing fields which may have been
            // dynamically added during past indexing sessions get added.
            Schema *old_schema = PolyReader_Get_Schema(self->polyreader);
            Schema_Eat(schema, old_schema);
        }
    }

    // Zap detritus from previous sessions. 
    {
        // Note: we have to feed FilePurger with the most recent snapshot file
        // now, but with the Indexer's snapshot later.
        FilePurger *file_purger 
            = FilePurger_new(folder, latest_snapshot, self->manager);
        FilePurger_Purge(file_purger);
        DECREF(file_purger);
    }

    // Create a new segment. 
    {
        int64_t new_seg_num 
            = IxManager_Highest_Seg_Num(self->manager, latest_snapshot) + 1;
        Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager);
        uint32_t i, max;

        if (Lock_Is_Locked(merge_lock)) {
            // If there's a background merge process going on, stay out of its
            // way.
            Hash *merge_data = IxManager_Read_Merge_Data(self->manager);
            Obj *cutoff_obj = merge_data 
                            ? Hash_Fetch_Str(merge_data, "cutoff", 6)
                            : NULL;
            if (!cutoff_obj) {
                DECREF(merge_lock);
                DECREF(merge_data);
                THROW(ERR, "Background merge detected, but can't read merge data");
            }
            else {
                int64_t cutoff = Obj_To_I64(cutoff_obj);
                if (cutoff >= new_seg_num) {
                    new_seg_num = cutoff + 1;
                }
            }
            DECREF(merge_data);
        }

        self->segment = Seg_new(new_seg_num);

        // Add all known fields to Segment. 
        {
            VArray *fields = Schema_All_Fields(schema);
            for (i = 0, max = VA_Get_Size(fields); i < max; i++) {
                Seg_Add_Field(self->segment, (CharBuf*)VA_Fetch(fields, i));
            }
            DECREF(fields);
        }

        DECREF(merge_lock);
    }

    // Create new SegWriter and FilePurger.  
    self->file_purger 
        = FilePurger_new(folder, self->snapshot, self->manager);
    self->seg_writer = SegWriter_new(self->schema, self->snapshot,
        self->segment, self->polyreader); 
    SegWriter_Prep_Seg_Dir(self->seg_writer);

    // Grab a local ref to the DeletionsWriter. 
    self->del_writer = (DeletionsWriter*)INCREF(
        SegWriter_Get_Del_Writer(self->seg_writer));

    DECREF(latest_snapfile);
    DECREF(latest_snapshot);

    return self;
}
Exemplo n.º 16
0
static void
test_open(TestBatchRunner *runner) {
    Folder *real_folder;
    CompoundFileReader *cf_reader;
    Hash *metadata;

    Err_set_error(NULL);
    real_folder = S_folder_with_contents();
    Folder_Delete(real_folder, cfmeta_file);
    cf_reader = CFReader_open(real_folder);
    TEST_TRUE(runner, cf_reader == NULL,
              "Return NULL when cfmeta file missing");
    TEST_TRUE(runner, Err_get_error() != NULL,
              "Set global error when cfmeta file missing");
    DECREF(real_folder);

    Err_set_error(NULL);
    real_folder = S_folder_with_contents();
    Folder_Delete(real_folder, cf_file);
    cf_reader = CFReader_open(real_folder);
    TEST_TRUE(runner, cf_reader == NULL,
              "Return NULL when cf.dat file missing");
    TEST_TRUE(runner, Err_get_error() != NULL,
              "Set global error when cf.dat file missing");
    DECREF(real_folder);

    Err_set_error(NULL);
    real_folder = S_folder_with_contents();
    metadata = (Hash*)Json_slurp_json(real_folder, cfmeta_file);
    Hash_Store_Utf8(metadata, "format", 6, (Obj*)Str_newf("%i32", -1));
    Folder_Delete(real_folder, cfmeta_file);
    Json_spew_json((Obj*)metadata, real_folder, cfmeta_file);
    cf_reader = CFReader_open(real_folder);
    TEST_TRUE(runner, cf_reader == NULL,
              "Return NULL when format is invalid");
    TEST_TRUE(runner, Err_get_error() != NULL,
              "Set global error when format is invalid");

    Err_set_error(NULL);
    Hash_Store_Utf8(metadata, "format", 6, (Obj*)Str_newf("%i32", 1000));
    Folder_Delete(real_folder, cfmeta_file);
    Json_spew_json((Obj*)metadata, real_folder, cfmeta_file);
    cf_reader = CFReader_open(real_folder);
    TEST_TRUE(runner, cf_reader == NULL,
              "Return NULL when format is too recent");
    TEST_TRUE(runner, Err_get_error() != NULL,
              "Set global error when format too recent");

    Err_set_error(NULL);
    DECREF(Hash_Delete_Utf8(metadata, "format", 6));
    Folder_Delete(real_folder, cfmeta_file);
    Json_spew_json((Obj*)metadata, real_folder, cfmeta_file);
    cf_reader = CFReader_open(real_folder);
    TEST_TRUE(runner, cf_reader == NULL,
              "Return NULL when format key is missing");
    TEST_TRUE(runner, Err_get_error() != NULL,
              "Set global error when format key is missing");

    Hash_Store_Utf8(metadata, "format", 6,
                    (Obj*)Str_newf("%i32", CFWriter_current_file_format));
    DECREF(Hash_Delete_Utf8(metadata, "files", 5));
    Folder_Delete(real_folder, cfmeta_file);
    Json_spew_json((Obj*)metadata, real_folder, cfmeta_file);
    cf_reader = CFReader_open(real_folder);
    TEST_TRUE(runner, cf_reader == NULL,
              "Return NULL when files key is missing");
    TEST_TRUE(runner, Err_get_error() != NULL,
              "Set global error when files key is missing");

    DECREF(metadata);
    DECREF(real_folder);
}
Exemplo n.º 17
0
void
S_try_open_elements(void *context) {
    struct try_open_elements_context *args
        = (struct try_open_elements_context*)context;
    PolyReader *self              = args->self;
    PolyReaderIVARS *const ivars  = PolyReader_IVARS(self);
    VArray     *files             = Snapshot_List(ivars->snapshot);
    Folder     *folder            = PolyReader_Get_Folder(self);
    uint32_t    num_segs          = 0;
    uint64_t    latest_schema_gen = 0;
    CharBuf    *schema_file       = NULL;

    // Find schema file, count segments.
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(files, i);

        if (Seg_valid_seg_name(entry)) {
            num_segs++;
        }
        else if (CB_Starts_With_Str(entry, "schema_", 7)
                 && CB_Ends_With_Str(entry, ".json", 5)
                ) {
            uint64_t gen = IxFileNames_extract_gen(entry);
            if (gen > latest_schema_gen) {
                latest_schema_gen = gen;
                if (!schema_file) { schema_file = CB_Clone(entry); }
                else { CB_Mimic(schema_file, (Obj*)entry); }
            }
        }
    }

    // Read Schema.
    if (!schema_file) {
        DECREF(files);
        THROW(ERR, "Can't find a schema file.");
    }
    else {
        Hash *dump = (Hash*)Json_slurp_json(folder, schema_file);
        if (dump) { // read file successfully
            DECREF(ivars->schema);
            ivars->schema = (Schema*)CERTIFY(
                               VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA);
            DECREF(dump);
            DECREF(schema_file);
            schema_file = NULL;
        }
        else {
            CharBuf *mess = MAKE_MESS("Failed to parse %o", schema_file);
            DECREF(schema_file);
            DECREF(files);
            Err_throw_mess(ERR, mess);
        }
    }

    VArray *segments = VA_new(num_segs);
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(files, i);

        // Create a Segment for each segmeta.
        if (Seg_valid_seg_name(entry)) {
            int64_t seg_num = IxFileNames_extract_gen(entry);
            Segment *segment = Seg_new(seg_num);

            // Bail if reading the file fails (probably because it's been
            // deleted and a new snapshot file has been written so we need to
            // retry).
            if (Seg_Read_File(segment, folder)) {
                VA_Push(segments, (Obj*)segment);
            }
            else {
                CharBuf *mess = MAKE_MESS("Failed to read %o", entry);
                DECREF(segment);
                DECREF(segments);
                DECREF(files);
                Err_throw_mess(ERR, mess);
            }
        }
    }

    // Sort the segments by age.
    VA_Sort(segments, NULL, NULL);

    // Open individual SegReaders.
    struct try_open_segreader_context seg_context;
    seg_context.schema   = PolyReader_Get_Schema(self);
    seg_context.folder   = folder;
    seg_context.snapshot = PolyReader_Get_Snapshot(self);
    seg_context.segments = segments;
    seg_context.result   = NULL;
    args->seg_readers = VA_new(num_segs);
    Err *error = NULL;
    for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) {
        seg_context.seg_tick = seg_tick;
        error = Err_trap(S_try_open_segreader, &seg_context);
        if (error) {
            break;
        }
        VA_Push(args->seg_readers, (Obj*)seg_context.result);
        seg_context.result = NULL;
    }

    DECREF(segments);
    DECREF(files);
    if (error) {
        DECREF(args->seg_readers);
        args->seg_readers = NULL;
        RETHROW(error);
    }
}
Exemplo n.º 18
0
CompoundFileReader*
CFReader_do_open(CompoundFileReader *self, Folder *folder) {
    CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11);
    Hash *metadata = (Hash*)Json_slurp_json((Folder*)folder, cfmeta_file);
    Err *error = NULL;

    Folder_init((Folder*)self, Folder_Get_Path(folder));

    // Parse metadata file.
    if (!metadata || !Hash_Is_A(metadata, HASH)) {
        error = Err_new(CB_newf("Can't read '%o' in '%o'", cfmeta_file,
                                Folder_Get_Path(folder)));
    }
    else {
        Obj *format = Hash_Fetch_Str(metadata, "format", 6);
        self->format = format ? (int32_t)Obj_To_I64(format) : 0;
        self->records = (Hash*)INCREF(Hash_Fetch_Str(metadata, "files", 5));
        if (self->format < 1) {
            error = Err_new(CB_newf("Corrupt %o file: Missing or invalid 'format'",
                                    cfmeta_file));
        }
        else if (self->format > CFWriter_current_file_format) {
            error = Err_new(CB_newf("Unsupported compound file format: %i32 "
                                    "(current = %i32", self->format,
                                    CFWriter_current_file_format));
        }
        else if (!self->records) {
            error = Err_new(CB_newf("Corrupt %o file: missing 'files' key",
                                    cfmeta_file));
        }
    }
    DECREF(metadata);
    if (error) {
        Err_set_error(error);
        DECREF(self);
        return NULL;
    }

    // Open an instream which we'll clone over and over.
    CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6);
    self->instream = Folder_Open_In(folder, cf_file);
    if (!self->instream) {
        ERR_ADD_FRAME(Err_get_error());
        DECREF(self);
        return NULL;
    }

    // Assign.
    self->real_folder = (Folder*)INCREF(folder);

    // Strip directory name from filepaths for old format.
    if (self->format == 1) {
        VArray *files = Hash_Keys(self->records);
        ZombieCharBuf *filename = ZCB_BLANK();
        ZombieCharBuf *folder_name
            = IxFileNames_local_part(Folder_Get_Path(folder), ZCB_BLANK());
        size_t folder_name_len = ZCB_Length(folder_name);

        for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
            CharBuf *orig = (CharBuf*)VA_Fetch(files, i);
            if (CB_Starts_With(orig, (CharBuf*)folder_name)) {
                Obj *record = Hash_Delete(self->records, (Obj*)orig);
                ZCB_Assign(filename, orig);
                ZCB_Nip(filename, folder_name_len + sizeof(DIR_SEP) - 1);
                Hash_Store(self->records, (Obj*)filename, (Obj*)record);
            }
        }

        DECREF(files);
    }

    return self;
}
Exemplo n.º 19
0
CompoundFileReader*
CFReader_do_open(CompoundFileReader *self, Folder *folder) {
    CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self);
    String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11);
    Hash *metadata = (Hash*)Json_slurp_json((Folder*)folder, cfmeta_file);
    Err *error = NULL;

    Folder_init((Folder*)self, Folder_Get_Path(folder));

    // Parse metadata file.
    if (!metadata || !Hash_Is_A(metadata, HASH)) {
        error = Err_new(Str_newf("Can't read '%o' in '%o'", cfmeta_file,
                                 Folder_Get_Path(folder)));
    }
    else {
        Obj *format = Hash_Fetch_Utf8(metadata, "format", 6);
        ivars->format = format ? (int32_t)Obj_To_I64(format) : 0;
        ivars->records = (Hash*)INCREF(Hash_Fetch_Utf8(metadata, "files", 5));
        if (ivars->format < 1) {
            error = Err_new(Str_newf("Corrupt %o file: Missing or invalid 'format'",
                                     cfmeta_file));
        }
        else if (ivars->format > CFWriter_current_file_format) {
            error = Err_new(Str_newf("Unsupported compound file format: %i32 "
                                     "(current = %i32", ivars->format,
                                     CFWriter_current_file_format));
        }
        else if (!ivars->records) {
            error = Err_new(Str_newf("Corrupt %o file: missing 'files' key",
                                     cfmeta_file));
        }
    }
    DECREF(metadata);
    if (error) {
        Err_set_error(error);
        DECREF(self);
        return NULL;
    }

    // Open an instream which we'll clone over and over.
    String *cf_file = (String*)SSTR_WRAP_UTF8("cf.dat", 6);
    ivars->instream = Folder_Open_In(folder, cf_file);
    if (!ivars->instream) {
        ERR_ADD_FRAME(Err_get_error());
        DECREF(self);
        return NULL;
    }

    // Assign.
    ivars->real_folder = (Folder*)INCREF(folder);

    // Strip directory name from filepaths for old format.
    if (ivars->format == 1) {
        Vector *files = Hash_Keys(ivars->records);
        String *folder_name = IxFileNames_local_part(Folder_Get_Path(folder));
        size_t folder_name_len = Str_Length(folder_name);

        for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) {
            String *orig = (String*)Vec_Fetch(files, i);
            if (Str_Starts_With(orig, folder_name)) {
                Obj *record = Hash_Delete(ivars->records, orig);
                size_t offset = folder_name_len + sizeof(CHY_DIR_SEP) - 1;
                size_t len    = Str_Length(orig) - offset;
                String *filename = Str_SubString(orig, offset, len);
                Hash_Store(ivars->records, filename, (Obj*)record);
                DECREF(filename);
            }
        }

        DECREF(folder_name);
        DECREF(files);
    }

    return self;
}