Exemplo n.º 1
0
InStream*
CFReader_local_open_in(CompoundFileReader *self, const CharBuf *name) {
    Hash *entry = (Hash*)Hash_Fetch(self->records, (Obj*)name);

    if (!entry) {
        InStream *instream = Folder_Local_Open_In(self->real_folder, name);
        if (!instream) {
            ERR_ADD_FRAME(Err_get_error());
        }
        return instream;
    }
    else {
        Obj *len    = Hash_Fetch_Str(entry, "length", 6);
        Obj *offset = Hash_Fetch_Str(entry, "offset", 6);
        if (!len || !offset) {
            Err_set_error(Err_new(CB_newf("Malformed entry for '%o' in '%o'",
                                          name, Folder_Get_Path(self->real_folder))));
            return NULL;
        }
        else if (CB_Get_Size(self->path)) {
            CharBuf *fullpath = CB_newf("%o/%o", self->path, name);
            InStream *instream = InStream_Reopen(self->instream, fullpath,
                                                 Obj_To_I64(offset), Obj_To_I64(len));
            DECREF(fullpath);
            return instream;
        }
        else {
            return InStream_Reopen(self->instream, name, Obj_To_I64(offset),
                                   Obj_To_I64(len));
        }
    }
}
Exemplo n.º 2
0
StringType*
StringType_load(StringType *self, Obj *dump)
{
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6);
    VTable *vtable 
        = (class_name != NULL && Obj_Is_A((Obj*)class_name, CHARBUF))
        ? VTable_singleton(class_name, NULL)
        : STRINGTYPE;
    StringType *loaded   = (StringType*)VTable_Make_Obj(vtable);
    Obj *boost_dump      = Hash_Fetch_Str(source, "boost", 5);
    Obj *indexed_dump    = Hash_Fetch_Str(source, "indexed", 7);
    Obj *stored_dump     = Hash_Fetch_Str(source, "stored", 6);
    Obj *sortable_dump   = Hash_Fetch_Str(source, "sortable", 8);
    UNUSED_VAR(self);

    StringType_init(loaded);
    if (boost_dump)    
        { loaded->boost    = (float)Obj_To_F64(boost_dump);     }
    if (indexed_dump)  
        { loaded->indexed  = (bool_t)Obj_To_I64(indexed_dump);  }
    if (stored_dump)   
        { loaded->stored   = (bool_t)Obj_To_I64(stored_dump);   }
    if (sortable_dump) 
        { loaded->sortable = (bool_t)Obj_To_I64(sortable_dump); }

    return loaded;
}
Exemplo n.º 3
0
bool_t
LFLock_maybe_delete_file(LockFileLock *self, const CharBuf *path,
                         bool_t delete_mine, bool_t delete_other) {
    Folder *folder  = self->folder;
    bool_t  success = false;
    ZombieCharBuf *scratch = ZCB_WRAP(path);

    // Only delete locks that start with our lock name.
    CharBuf *lock_dir_name = (CharBuf*)ZCB_WRAP_STR("locks", 5);
    if (!ZCB_Starts_With(scratch, lock_dir_name)) {
        return false;
    }
    ZCB_Nip(scratch, CB_Get_Size(lock_dir_name) + 1);
    if (!ZCB_Starts_With(scratch, self->name)) {
        return false;
    }

    // Attempt to delete dead lock file.
    if (Folder_Exists(folder, path)) {
        Hash *hash = (Hash*)Json_slurp_json(folder, path);
        if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) {
            CharBuf *pid_buf = (CharBuf*)Hash_Fetch_Str(hash, "pid", 3);
            CharBuf *host    = (CharBuf*)Hash_Fetch_Str(hash, "host", 4);
            CharBuf *name
                = (CharBuf*)Hash_Fetch_Str(hash, "name", 4);

            // Match hostname and lock name.
            if (host != NULL
                && CB_Equals(host, (Obj*)self->host)
                && name != NULL
                && CB_Equals(name, (Obj*)self->name)
                && pid_buf != NULL
               ) {
                // Verify that pid is either mine or dead.
                int pid = (int)CB_To_I64(pid_buf);
                if ((delete_mine && pid == PID_getpid())  // This process.
                    || (delete_other && !PID_active(pid)) // Dead pid.
                   ) {
                    if (Folder_Delete(folder, path)) {
                        success = true;
                    }
                    else {
                        CharBuf *mess
                            = MAKE_MESS("Can't delete '%o'", path);
                        DECREF(hash);
                        Err_throw_mess(ERR, mess);
                    }
                }
            }
        }
        DECREF(hash);
    }

    return success;
}
Exemplo n.º 4
0
Normalizer*
Normalizer_load(Normalizer *self, Obj *dump) {
    Normalizer_Load_t super_load
        = SUPER_METHOD_PTR(NORMALIZER, Lucy_Normalizer_Load);
    Normalizer *loaded = super_load(self, dump);
    Hash    *source = (Hash*)CERTIFY(dump, HASH);

    Obj *obj = Hash_Fetch_Str(source, "normalization_form", 18);
    CharBuf *form = (CharBuf*)CERTIFY(obj, CHARBUF);
    obj = Hash_Fetch_Str(source, "case_fold", 9);
    bool_t case_fold = Bool_Get_Value((BoolNum*)CERTIFY(obj, BOOLNUM));
    obj = Hash_Fetch_Str(source, "strip_accents", 13);
    bool_t strip_accents = Bool_Get_Value((BoolNum*)CERTIFY(obj, BOOLNUM));

    return Normalizer_init(loaded, form, case_fold, strip_accents);
}
Exemplo n.º 5
0
void
DefDelWriter_merge_segment(DefaultDeletionsWriter *self, SegReader *reader,
                           I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Str(segment, "deletions", 9);

    if (del_meta) {
        VArray *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Str(del_meta, "files", 5);
        if (files) {
            CharBuf *seg;
            Hash *mini_meta;
            Hash_Iterate(files);
            while (Hash_Next(files, (Obj**)&seg, (Obj**)&mini_meta)) {

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (uint32_t i = 0, max = VA_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)VA_Fetch(seg_readers, i);
                    CharBuf *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (CB_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Obj_To_I64(Hash_Fetch_Str(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, VTable_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
        }
    }
}
Exemplo n.º 6
0
Snapshot*
Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *path)
{
    // Eliminate all prior data. Pick a snapshot file. 
    S_zero_out(self);
    self->path = path ? CB_Clone(path) : IxFileNames_latest_snapshot(folder);

    if (self->path) {
        Hash *snap_data = (Hash*)CERTIFY(
            Json_slurp_json(folder, self->path), HASH);
        Obj *format_obj = CERTIFY(
            Hash_Fetch_Str(snap_data, "format", 6), OBJ);
        int32_t format = (int32_t)Obj_To_I64(format_obj);
        Obj *subformat_obj = Hash_Fetch_Str(snap_data, "subformat", 9);
        int32_t subformat = subformat_obj 
                          ? (int32_t)Obj_To_I64(subformat_obj) 
                          : 0;

        // Verify that we can read the index properly. 
        if (format > Snapshot_current_file_format) {
            THROW(ERR, "Snapshot format too recent: %i32, %i32",
                format, Snapshot_current_file_format);
        }

        // Build up list of entries. 
        VArray *list = (VArray*)CERTIFY(
            Hash_Fetch_Str(snap_data, "entries", 7), VARRAY);
        INCREF(list);
        if (format == 1 || (format == 2 && subformat < 1)) {
            VArray *cleaned = S_clean_segment_contents(list);
            DECREF(list);
            list = cleaned;
        }
        Hash_Clear(self->entries);
        for (uint32_t i = 0, max = VA_Get_Size(list); i < max; i++) {
            CharBuf *entry = (CharBuf*)CERTIFY(
                VA_Fetch(list, i), CHARBUF);
            Hash_Store(self->entries, (Obj*)entry, INCREF(&EMPTY));
        }

        DECREF(list);
        DECREF(snap_data);
    }

    return self;
}
Exemplo n.º 7
0
bool_t
Seg_read_file(Segment *self, Folder *folder) {
    CharBuf *filename = CB_newf("%o/segmeta.json", self->name);
    Hash    *metadata = (Hash*)Json_slurp_json(folder, filename);
    Hash    *my_metadata;

    // Bail unless the segmeta file was read successfully.
    DECREF(filename);
    if (!metadata) { return false; }
    CERTIFY(metadata, HASH);

    // Grab metadata for the Segment object itself.
    DECREF(self->metadata);
    self->metadata = metadata;
    my_metadata
        = (Hash*)CERTIFY(Hash_Fetch_Str(self->metadata, "segmeta", 7), HASH);

    // Assign.
    Obj *count = Hash_Fetch_Str(my_metadata, "count", 5);
    if (!count) { count = Hash_Fetch_Str(my_metadata, "doc_count", 9); }
    if (!count) { THROW(ERR, "Missing 'count'"); }
    else { self->count = Obj_To_I64(count); }

    // Get list of field nums.
    VArray *source_by_num = (VArray*)Hash_Fetch_Str(my_metadata,
                                                    "field_names", 11);
    uint32_t num_fields = source_by_num ? VA_Get_Size(source_by_num) : 0;
    if (source_by_num == NULL) {
        THROW(ERR, "Failed to extract 'field_names' from metadata");
    }

    // Init.
    DECREF(self->by_num);
    DECREF(self->by_name);
    self->by_num  = VA_new(num_fields);
    self->by_name = Hash_new(num_fields);

    // Copy the list of fields from the source.
    for (uint32_t i = 0; i < num_fields; i++) {
        CharBuf *name = (CharBuf*)VA_Fetch(source_by_num, i);
        Seg_Add_Field(self, name);
    }

    return true;
}
Exemplo n.º 8
0
DefaultHighlightReader*
DefHLReader_init(DefaultHighlightReader *self, Schema *schema,
                 Folder *folder, Snapshot *snapshot, VArray *segments,
                 int32_t seg_tick)
{
    Segment *segment;
    Hash    *metadata; 
    HLReader_init((HighlightReader*)self, schema, folder, snapshot,
        segments, seg_tick);
    segment  = DefHLReader_Get_Segment(self);
    metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "highlight", 9);
    if (!metadata) {
        metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "term_vectors", 12);
    }
    
    // Check format. 
    if (metadata) {
        Obj     *format    = Hash_Fetch_Str(metadata, "format", 6);
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            if (Obj_To_I64(format) != HLWriter_current_file_format) {
                THROW(ERR, "Unsupported highlight data format: %i64", 
                    Obj_To_I64(format));
            }
        }
    }


    // Open instreams. 
    {
        CharBuf *seg_name = Seg_Get_Name(segment);
        CharBuf *ix_file  = CB_newf("%o/highlight.ix", seg_name);
        CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name);
        if (Folder_Exists(folder, ix_file)) {
            self->ix_in = Folder_Open_In(folder, ix_file);
            if (!self->ix_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }   
            self->dat_in = Folder_Open_In(folder, dat_file);
            if (!self->dat_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }   
        }   
        DECREF(ix_file);
        DECREF(dat_file);
    }

    return self;
}
Exemplo n.º 9
0
EasyAnalyzer*
EasyAnalyzer_load(EasyAnalyzer *self, Obj *dump) {
    EasyAnalyzer_Load_t super_load
        = SUPER_METHOD_PTR(EASYANALYZER, Lucy_EasyAnalyzer_Load);
    EasyAnalyzer *loaded = super_load(self, dump);
    Hash    *source = (Hash*)CERTIFY(dump, HASH);
    CharBuf *language
        = (CharBuf*)CERTIFY(Hash_Fetch_Str(source, "language", 8), CHARBUF);
    return EasyAnalyzer_init(loaded, language);
}
Exemplo n.º 10
0
Tokenizer*
Tokenizer_load(Tokenizer *self, Obj *dump)
{
    Hash *source = (Hash*)ASSERT_IS_A(dump, HASH);
    Tokenizer_load_t super_load 
        = (Tokenizer_load_t)SUPER_METHOD(&TOKENIZER, Tokenizer, Load);
    Tokenizer *loaded = super_load(self, dump);
    CharBuf *pattern 
        = (CharBuf*)ASSERT_IS_A(Hash_Fetch_Str(source, "pattern", 7), CHARBUF);
    return Tokenizer_init(loaded, pattern);
}
Exemplo n.º 11
0
Snapshot*
Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *filename)
{
    /* Eliminate all prior data. Pick a snapshot file. */
    S_zero_out(self);
    self->filename = filename 
                   ? CB_Clone(filename)
                   : IxFileNames_latest_snapshot(folder); 

    if (self->filename) {
        Hash *snap_data = (Hash*)ASSERT_IS_A(
            Json_slurp_json(folder, self->filename), HASH);
        Obj *format = ASSERT_IS_A(
            Hash_Fetch_Str(snap_data, "format", 6), OBJ);

        /* Verify that we can read the index properly. */
        if (Obj_To_I64(format) > Snapshot_current_file_format) {
            THROW("Snapshot format too recent: %i64, %i32",
                Obj_To_I64(format), Snapshot_current_file_format);
        }

        /* Build up list of entries. */
        {
            u32_t i, max;
            VArray *list = (VArray*)ASSERT_IS_A(
                Hash_Fetch_Str(snap_data, "entries", 7), VARRAY);
            Hash_Clear(self->entries);
            for (i = 0, max = VA_Get_Size(list); i < max; i++) {
                CharBuf *entry = (CharBuf*)ASSERT_IS_A(
                    VA_Fetch(list, i), CHARBUF);
                Hash_Store(self->entries, entry, INCREF(&EMPTY));
            }
        }

        DECREF(snap_data);
    }

    return self;
}
Exemplo n.º 12
0
Obj*
Hash_load(Hash *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6);
    UNUSED_VAR(self);

    // Assume that the presence of the "_class" key paired with a valid class
    // name indicates the output of a Dump rather than an ordinary Hash. */
    if (class_name && CB_Is_A(class_name, CHARBUF)) {
        VTable *vtable = VTable_fetch_vtable(class_name);

        if (!vtable) {
            CharBuf *parent_class = VTable_find_parent_class(class_name);
            if (parent_class) {
                VTable *parent = VTable_singleton(parent_class, NULL);
                vtable = VTable_singleton(class_name, parent);
                DECREF(parent_class);
            }
            else {
                // TODO: Fix Hash_Load() so that it works with ordinary hash
                // keys named "_class".
                THROW(ERR, "Can't find class '%o'", class_name);
            }
        }

        // Dispatch to an alternate Load() method.
        if (vtable) {
            Obj_Load_t load = METHOD_PTR(vtable, Lucy_Obj_Load);
            if (load == Obj_load) {
                THROW(ERR, "Abstract method Load() not defined for %o",
                      VTable_Get_Name(vtable));
            }
            else if (load != (Obj_Load_t)Hash_load) { // stop inf loop
                return VTable_Load_Obj(vtable, dump);
            }
        }
    }

    // It's an ordinary Hash.
    Hash *loaded = Hash_new(source->size);
    Obj *key;
    Obj *value;
    Hash_Iterate(source);
    while (Hash_Next(source, &key, &value)) {
        Hash_Store(loaded, key, Obj_Load(value, value));
    }

    return (Obj*)loaded;

}
Exemplo n.º 13
0
DefaultDocReader*
DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, 
                  Snapshot *snapshot, VArray *segments, i32_t seg_tick)
{
    Hash *metadata; 
    Segment *segment;
    DocReader_init((DocReader*)self, schema, folder, snapshot, segments,
        seg_tick);
    segment = DefDocReader_Get_Segment(self);
    metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "documents", 9);

    if (metadata) {
        CharBuf *seg_name  = Seg_Get_Name(segment);
        CharBuf *ix_file   = CB_newf("%o/documents.ix", seg_name);
        CharBuf *dat_file  = CB_newf("%o/documents.dat", seg_name);
        Obj     *format    = Hash_Fetch_Str(metadata, "format", 6);

        /* Check format. */
        if (!format) { THROW("Missing 'format' var"); }
        else {
            i64_t format_val = Obj_To_I64(format);
            if (format_val < DocWriter_current_file_format) {
                THROW("Obsolete doc storage format %i64; "
                    "Index regeneration is required", format_val);
            }
            else if (format_val != DocWriter_current_file_format) {
                THROW("Unsupported doc storage format: %i64", format_val);
            }
        }

        /* Get streams. */
        if (Folder_Exists(folder, ix_file)) {
            self->ix_in  = Folder_Open_In(folder, ix_file);
            self->dat_in = Folder_Open_In(folder, dat_file);
            if (!self->ix_in || !self->dat_in) {
                CharBuf *mess = MAKE_MESS("Can't open either %o or %o",
                    ix_file, dat_file);
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                Err_throw_mess(mess);
            }
        }
        DECREF(ix_file);
        DECREF(dat_file);
    }
    
    return self;
}
Exemplo n.º 14
0
Inversion*
Stopalizer_transform(Stopalizer *self, Inversion *inversion)
{
    Token *token;
    Inversion *new_inversion = Inversion_new(NULL);
    Hash *const stoplist  = self->stoplist;

    while (NULL != (token = Inversion_Next(inversion))) {
        if (!Hash_Fetch_Str(stoplist, token->text, token->len)) {
            Inversion_Append(new_inversion, (Token*)INCREF(token));
        }
    }

    return new_inversion;
}
Exemplo n.º 15
0
PolyAnalyzer*
PolyAnalyzer_load(PolyAnalyzer *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    PolyAnalyzer_Load_t super_load 
        = SUPER_METHOD_PTR(POLYANALYZER, Lucy_PolyAnalyzer_Load);
    PolyAnalyzer *loaded = super_load(self, dump);
    VArray *analyzer_dumps = (VArray*)CERTIFY(
                                 Hash_Fetch_Str(source, "analyzers", 9),
                                 VARRAY);
    VArray *analyzers = (VArray*)CERTIFY(
                            VA_Load(analyzer_dumps, (Obj*)analyzer_dumps),
                            VARRAY);
    PolyAnalyzer_init(loaded, NULL, analyzers);
    DECREF(analyzers);
    return loaded;
}
Exemplo n.º 16
0
bool_t
Tokenizer_dump_equals(Tokenizer *self, Obj *dump)
{
    Tokenizer_dump_equals_t super_dump_equals = (Tokenizer_dump_equals_t)
        SUPER_METHOD(&TOKENIZER, Tokenizer, Dump_Equals);
    if (!super_dump_equals(self, dump)) {
        return false;
    }
    else {
        Hash *source = (Hash*)ASSERT_IS_A(dump, HASH);
        CharBuf *pattern = (CharBuf*)Hash_Fetch_Str(source, "pattern", 7);
        if (!pattern) return false;
        if (!CB_Equals(self->pattern, (Obj*)pattern)) return false;
    }
    return true;
}
Exemplo n.º 17
0
FullTextType*
FullTextType_load(FullTextType *self, Obj *dump) {
    UNUSED_VAR(self);
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6);
    VTable *vtable
        = (class_name != NULL && Obj_Is_A((Obj*)class_name, CHARBUF))
          ? VTable_singleton(class_name, NULL)
          : FULLTEXTTYPE;
    FullTextType *loaded = (FullTextType*)VTable_Make_Obj(vtable);

    // Extract boost.
    Obj *boost_dump = Hash_Fetch_Str(source, "boost", 5);
    float boost = boost_dump ? (float)Obj_To_F64(boost_dump) : 1.0f;

    // Find boolean properties.
    Obj *indexed_dump = Hash_Fetch_Str(source, "indexed", 7);
    Obj *stored_dump  = Hash_Fetch_Str(source, "stored", 6);
    Obj *sort_dump    = Hash_Fetch_Str(source, "sortable", 8);
    Obj *hl_dump      = Hash_Fetch_Str(source, "highlightable", 13);
    bool_t indexed  = indexed_dump ? Obj_To_Bool(indexed_dump) : true;
    bool_t stored   = stored_dump  ? Obj_To_Bool(stored_dump)  : true;
    bool_t sortable = sort_dump    ? Obj_To_Bool(sort_dump)    : false;
    bool_t hl       = hl_dump      ? Obj_To_Bool(hl_dump)      : false;

    // Extract an Analyzer.
    Obj *analyzer_dump = Hash_Fetch_Str(source, "analyzer", 8);
    Analyzer *analyzer = NULL;
    if (analyzer_dump) {
        if (Obj_Is_A(analyzer_dump, ANALYZER)) {
            // Schema munged the dump and installed a shared analyzer.
            analyzer = (Analyzer*)INCREF(analyzer_dump);
        }
        else if (Obj_Is_A((Obj*)analyzer_dump, HASH)) {
            analyzer = (Analyzer*)Obj_Load(analyzer_dump, analyzer_dump);
        }
    }
    CERTIFY(analyzer, ANALYZER);

    FullTextType_init(loaded, analyzer);
    DECREF(analyzer);
    if (boost_dump)   { loaded->boost         = boost;    }
    if (indexed_dump) { loaded->indexed       = indexed;  }
    if (stored_dump)  { loaded->stored        = stored;   }
    if (sort_dump)    { loaded->sortable      = sortable; }
    if (hl_dump)      { loaded->highlightable = hl;       }

    return loaded;
}
Exemplo n.º 18
0
static void
S_zap_dead_merge(FilePurger *self, Hash *candidates)
{
    IndexManager *manager = self->manager;
    Lock *merge_lock   = IxManager_Make_Merge_Lock(manager);

    Lock_Clear_Stale(merge_lock);
    if (!Lock_Is_Locked(merge_lock)) { 
        Hash *merge_data = IxManager_Read_Merge_Data(manager);
        Obj  *cutoff = merge_data 
                     ? Hash_Fetch_Str(merge_data, "cutoff", 6) 
                     : NULL;

        if (cutoff) {
            CharBuf *cutoff_seg = Seg_num_to_name(Obj_To_I64(cutoff));
            if (Folder_Exists(self->folder, cutoff_seg)) {
                ZombieCharBuf *merge_json = ZCB_WRAP_STR("merge.json", 10);
                DirHandle *dh = Folder_Open_Dir(self->folder, cutoff_seg);
                CharBuf *entry = dh ? DH_Get_Entry(dh) : NULL;
                CharBuf *filepath = CB_new(32);

                if (!dh) {
                    THROW(ERR, "Can't open segment dir '%o'", filepath);
                }

                Hash_Store(candidates, (Obj*)cutoff_seg, INCREF(&EMPTY));
                Hash_Store(candidates, (Obj*)merge_json, INCREF(&EMPTY));
                while (DH_Next(dh)) {
                    // TODO: recursively delete subdirs within seg dir.
                    CB_setf(filepath, "%o/%o", cutoff_seg, entry);
                    Hash_Store(candidates, (Obj*)filepath, INCREF(&EMPTY));
                }
                DECREF(filepath);
                DECREF(dh);
            }
            DECREF(cutoff_seg);
        }

        DECREF(merge_data);
    }

    DECREF(merge_lock);
    return;
}
Exemplo n.º 19
0
NumericType*
NumType_load(NumericType *self, Obj *dump)
{
    UNUSED_VAR(self);
    Hash *source = (Hash*)CERTIFY(dump, HASH);

    // Get a VTable
    CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6);
    CharBuf *type_spec  = (CharBuf*)Hash_Fetch_Str(source, "type", 4);
    VTable *vtable = NULL;
    if (class_name != NULL && Obj_Is_A((Obj*)class_name, CHARBUF)) {
        vtable = VTable_singleton(class_name, NULL);
    }
    else if (type_spec != NULL && Obj_Is_A((Obj*)type_spec, CHARBUF)) {
        if (CB_Equals_Str(type_spec, "i32_t", 5)) { 
            vtable = INT32TYPE; 
        }
        else if (CB_Equals_Str(type_spec, "i64_t", 5)) { 
            vtable = INT64TYPE; 
        }
        else if (CB_Equals_Str(type_spec, "f32_t", 5)) { 
            vtable = FLOAT32TYPE; 
        }
        else if (CB_Equals_Str(type_spec, "f64_t", 5)) { 
            vtable = FLOAT64TYPE; 
        }
        else {
            THROW(ERR, "Unrecognized type string: '%o'", type_spec);
        }
    }
    CERTIFY(vtable, VTABLE);
    NumericType *loaded = (NumericType*)VTable_Make_Obj(vtable);

    // Extract boost.
    Obj *boost_dump = Hash_Fetch_Str(source, "boost", 5);
    float boost = boost_dump ? (float)Obj_To_F64(boost_dump) : 1.0f;

    // Find boolean properties.
    Obj *indexed_dump = Hash_Fetch_Str(source, "indexed", 7);
    Obj *stored_dump  = Hash_Fetch_Str(source, "stored", 6);
    Obj *sort_dump    = Hash_Fetch_Str(source, "sortable", 8);
    bool_t indexed  = indexed_dump ? (bool_t)Obj_To_I64(indexed_dump) : true;
    bool_t stored   = stored_dump  ? (bool_t)Obj_To_I64(stored_dump)  : true;
    bool_t sortable = sort_dump    ? (bool_t)Obj_To_I64(sort_dump)    : false;

    return NumType_init2(loaded, boost, indexed, stored, sortable);
}
Exemplo n.º 20
0
CompoundFileReader*
CFReader_do_open(CompoundFileReader *self, Folder *folder) {
    CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11);
    Hash *metadata = (Hash*)Json_slurp_json((Folder*)folder, cfmeta_file);
    Err *error = NULL;

    Folder_init((Folder*)self, Folder_Get_Path(folder));

    // Parse metadata file.
    if (!metadata || !Hash_Is_A(metadata, HASH)) {
        error = Err_new(CB_newf("Can't read '%o' in '%o'", cfmeta_file,
                                Folder_Get_Path(folder)));
    }
    else {
        Obj *format = Hash_Fetch_Str(metadata, "format", 6);
        self->format = format ? (int32_t)Obj_To_I64(format) : 0;
        self->records = (Hash*)INCREF(Hash_Fetch_Str(metadata, "files", 5));
        if (self->format < 1) {
            error = Err_new(CB_newf("Corrupt %o file: Missing or invalid 'format'",
                                    cfmeta_file));
        }
        else if (self->format > CFWriter_current_file_format) {
            error = Err_new(CB_newf("Unsupported compound file format: %i32 "
                                    "(current = %i32", self->format,
                                    CFWriter_current_file_format));
        }
        else if (!self->records) {
            error = Err_new(CB_newf("Corrupt %o file: missing 'files' key",
                                    cfmeta_file));
        }
    }
    DECREF(metadata);
    if (error) {
        Err_set_error(error);
        DECREF(self);
        return NULL;
    }

    // Open an instream which we'll clone over and over.
    CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6);
    self->instream = Folder_Open_In(folder, cf_file);
    if (!self->instream) {
        ERR_ADD_FRAME(Err_get_error());
        DECREF(self);
        return NULL;
    }

    // Assign.
    self->real_folder = (Folder*)INCREF(folder);

    // Strip directory name from filepaths for old format.
    if (self->format == 1) {
        VArray *files = Hash_Keys(self->records);
        ZombieCharBuf *filename = ZCB_BLANK();
        ZombieCharBuf *folder_name
            = IxFileNames_local_part(Folder_Get_Path(folder), ZCB_BLANK());
        size_t folder_name_len = ZCB_Length(folder_name);

        for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
            CharBuf *orig = (CharBuf*)VA_Fetch(files, i);
            if (CB_Starts_With(orig, (CharBuf*)folder_name)) {
                Obj *record = Hash_Delete(self->records, (Obj*)orig);
                ZCB_Assign(filename, orig);
                ZCB_Nip(filename, folder_name_len + sizeof(DIR_SEP) - 1);
                Hash_Store(self->records, (Obj*)filename, (Obj*)record);
            }
        }

        DECREF(files);
    }

    return self;
}
Exemplo n.º 21
0
Obj*
Seg_fetch_metadata_str(Segment *self, const char *key, size_t len) {
    return Hash_Fetch_Str(self->metadata, key, len);
}
Exemplo n.º 22
0
static bool_t
S_maybe_merge(Indexer *self, VArray *seg_readers)
{
    bool_t    merge_happened  = false;
    uint32_t  num_seg_readers = VA_Get_Size(seg_readers);
    Lock     *merge_lock      = IxManager_Make_Merge_Lock(self->manager);
    bool_t    got_merge_lock  = Lock_Obtain(merge_lock);
    int64_t   cutoff;
    VArray   *to_merge;
    uint32_t  i, max;

    if (got_merge_lock) {
        self->merge_lock = merge_lock;
        cutoff = 0;
    }
    else {
        // If something else holds the merge lock, don't interfere. 
        Hash *merge_data = IxManager_Read_Merge_Data(self->manager);
        if (merge_data) {
            Obj *cutoff_obj = Hash_Fetch_Str(merge_data, "cutoff", 6);
            if (cutoff_obj) {
                cutoff = Obj_To_I64(cutoff_obj);
            }
            else {
                cutoff = I64_MAX;
            }
            DECREF(merge_data);
        }
        else {
            cutoff = I64_MAX;
        }
        DECREF(merge_lock);
    }

    // Get a list of segments to recycle.  Validate and confirm that there are
    // no dupes in the list.
    to_merge = IxManager_Recycle(self->manager, self->polyreader, 
        self->del_writer, cutoff, self->optimize);
    {
        Hash *seen = Hash_new(VA_Get_Size(to_merge));
        for (i = 0, max = VA_Get_Size(to_merge); i < max; i++) {
            SegReader *seg_reader = (SegReader*)CERTIFY(
                VA_Fetch(to_merge, i), SEGREADER);
            CharBuf *seg_name = SegReader_Get_Seg_Name(seg_reader);
            if (Hash_Fetch(seen, (Obj*)seg_name)) {
                DECREF(seen);
                DECREF(to_merge);
                THROW(ERR, "Recycle() tried to merge segment '%o' twice",
                    seg_name);
            }
            Hash_Store(seen, (Obj*)seg_name, INCREF(&EMPTY));
        }
        DECREF(seen);
    }

    // Consolidate segments if either sparse or optimizing forced. 
    for (i = 0, max = VA_Get_Size(to_merge); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(to_merge, i);
        int64_t seg_num = SegReader_Get_Seg_Num(seg_reader);
        Matcher *deletions 
            = DelWriter_Seg_Deletions(self->del_writer, seg_reader);
        I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer,
            deletions, SegReader_Doc_Max(seg_reader),
            (int32_t)Seg_Get_Count(self->segment) 
        );
        if (seg_num <= cutoff) {
            THROW(ERR, "Segment %o violates cutoff (%i64 <= %i64)",
                SegReader_Get_Seg_Name(seg_reader), seg_num, cutoff);
        }
        SegWriter_Merge_Segment(self->seg_writer, seg_reader, doc_map);
        merge_happened = true;
        DECREF(deletions);
        DECREF(doc_map);
    }

    // Write out new deletions. 
    if (DelWriter_Updated(self->del_writer)) {
        // Only write out if they haven't all been applied. 
        if (VA_Get_Size(to_merge) != num_seg_readers) {
            DelWriter_Finish(self->del_writer);
        }
    }

    DECREF(to_merge);
    return merge_happened;
}
Exemplo n.º 23
0
Indexer*
Indexer_init(Indexer *self, Schema *schema, Obj *index, 
          IndexManager *manager, int32_t flags)
          
{
    bool_t    create   = (flags & Indexer_CREATE)   ? true : false;
    bool_t    truncate = (flags & Indexer_TRUNCATE) ? true : false;
    Folder   *folder   = S_init_folder(index, create);
    Lock     *write_lock;
    CharBuf  *latest_snapfile;
    Snapshot *latest_snapshot = Snapshot_new();

    // Init. 
    self->stock_doc     = Doc_new(NULL, 0);
    self->truncate      = false;
    self->optimize      = false;
    self->prepared      = false;
    self->needs_commit  = false;
    self->snapfile      = NULL;
    self->merge_lock    = NULL;

    // Assign. 
    self->folder       = folder;
    self->manager      = manager 
                       ? (IndexManager*)INCREF(manager) 
                       : IxManager_new(NULL, NULL);
    IxManager_Set_Folder(self->manager, folder);

    // Get a write lock for this folder. 
    write_lock = IxManager_Make_Write_Lock(self->manager);
    Lock_Clear_Stale(write_lock);
    if (Lock_Obtain(write_lock)) {
        // Only assign if successful, otherwise DESTROY unlocks -- bad! 
        self->write_lock = write_lock;
    }
    else {
        DECREF(write_lock);
        DECREF(self);
        RETHROW(INCREF(Err_get_error()));
    }

    // Find the latest snapshot or create a new one. 
    latest_snapfile = IxFileNames_latest_snapshot(folder);
    if (latest_snapfile) {
        Snapshot_Read_File(latest_snapshot, folder, latest_snapfile);
    }

    // Look for an existing Schema if one wasn't supplied. 
    if (schema) {
        self->schema = (Schema*)INCREF(schema);
    }
    else {
        if (!latest_snapfile) {
            THROW(ERR, "No Schema supplied, and can't find one in the index");
        }
        else {
            CharBuf *schema_file = S_find_schema_file(latest_snapshot);
            Hash *dump = (Hash*)Json_slurp_json(folder, schema_file);
            if (dump) { // read file successfully 
                self->schema = (Schema*)CERTIFY(
                    VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA);
                schema = self->schema;
                DECREF(dump);
                schema_file = NULL;
            }
            else {
                THROW(ERR, "Failed to parse %o", schema_file);
            }
        }
    }

    // If we're clobbering, start with an empty Snapshot and an empty 
    // PolyReader.  Otherwise, start with the most recent Snapshot and an
    // up-to-date PolyReader.
    if (truncate) {
        self->snapshot = Snapshot_new();
        self->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL);
        self->truncate = true;
    }
    else {
        // TODO: clone most recent snapshot rather than read it twice. 
        self->snapshot = (Snapshot*)INCREF(latest_snapshot);
        self->polyreader = latest_snapfile
            ? PolyReader_open((Obj*)folder, NULL, NULL)
            : PolyReader_new(schema, folder, NULL, NULL, NULL);

        if (latest_snapfile) {
            // Make sure than any existing fields which may have been
            // dynamically added during past indexing sessions get added.
            Schema *old_schema = PolyReader_Get_Schema(self->polyreader);
            Schema_Eat(schema, old_schema);
        }
    }

    // Zap detritus from previous sessions. 
    {
        // Note: we have to feed FilePurger with the most recent snapshot file
        // now, but with the Indexer's snapshot later.
        FilePurger *file_purger 
            = FilePurger_new(folder, latest_snapshot, self->manager);
        FilePurger_Purge(file_purger);
        DECREF(file_purger);
    }

    // Create a new segment. 
    {
        int64_t new_seg_num 
            = IxManager_Highest_Seg_Num(self->manager, latest_snapshot) + 1;
        Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager);
        uint32_t i, max;

        if (Lock_Is_Locked(merge_lock)) {
            // If there's a background merge process going on, stay out of its
            // way.
            Hash *merge_data = IxManager_Read_Merge_Data(self->manager);
            Obj *cutoff_obj = merge_data 
                            ? Hash_Fetch_Str(merge_data, "cutoff", 6)
                            : NULL;
            if (!cutoff_obj) {
                DECREF(merge_lock);
                DECREF(merge_data);
                THROW(ERR, "Background merge detected, but can't read merge data");
            }
            else {
                int64_t cutoff = Obj_To_I64(cutoff_obj);
                if (cutoff >= new_seg_num) {
                    new_seg_num = cutoff + 1;
                }
            }
            DECREF(merge_data);
        }

        self->segment = Seg_new(new_seg_num);

        // Add all known fields to Segment. 
        {
            VArray *fields = Schema_All_Fields(schema);
            for (i = 0, max = VA_Get_Size(fields); i < max; i++) {
                Seg_Add_Field(self->segment, (CharBuf*)VA_Fetch(fields, i));
            }
            DECREF(fields);
        }

        DECREF(merge_lock);
    }

    // Create new SegWriter and FilePurger.  
    self->file_purger 
        = FilePurger_new(folder, self->snapshot, self->manager);
    self->seg_writer = SegWriter_new(self->schema, self->snapshot,
        self->segment, self->polyreader); 
    SegWriter_Prep_Seg_Dir(self->seg_writer);

    // Grab a local ref to the DeletionsWriter. 
    self->del_writer = (DeletionsWriter*)INCREF(
        SegWriter_Get_Del_Writer(self->seg_writer));

    DECREF(latest_snapfile);
    DECREF(latest_snapshot);

    return self;
}