Exemple #1
0
static void
test_stemming(TestBatchRunner *runner) {
    FSFolder *modules_folder = TestUtils_modules_folder();
    String *path = Str_newf("analysis/snowstem/source/test/tests.json");
    Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path);
    if (!tests) { RETHROW(Err_get_error()); }

    String *iso;
    Hash *lang_data;
    Hash_Iterate(tests);
    while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) {
        VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5);
        VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5);
        SnowballStemmer *stemmer = SnowStemmer_new(iso);
        for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) {
            String *word  = (String*)VA_Fetch(words, i);
            VArray *got   = SnowStemmer_Split(stemmer, word);
            String *stem  = (String*)VA_Fetch(got, 0);
            TEST_TRUE(runner,
                      stem
                      && Str_Is_A(stem, STRING)
                      && Str_Equals(stem, VA_Fetch(stems, i)),
                      "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word)
                     );
            DECREF(got);
        }
        DECREF(stemmer);
    }

    DECREF(tests);
    DECREF(modules_folder);
    DECREF(path);
}
Exemple #2
0
Obj*
RangeQuery_Load_IMP(RangeQuery *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    RangeQuery_Load_t super_load
        = SUPER_METHOD_PTR(RANGEQUERY, LUCY_RangeQuery_Load);
    RangeQuery *loaded = (RangeQuery*)super_load(self, dump);
    RangeQueryIVARS *loaded_ivars = RangeQuery_IVARS(loaded);
    Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ);
    loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING);
    Obj *lower_term = Hash_Fetch_Utf8(source, "lower_term", 10);
    if (lower_term) {
        loaded_ivars->lower_term
            = (Obj*)CERTIFY(Freezer_load(lower_term), OBJ);
    }
    Obj *upper_term = Hash_Fetch_Utf8(source, "upper_term", 10);
    if (upper_term) {
        loaded_ivars->upper_term
            = (Obj*)CERTIFY(Freezer_load(upper_term), OBJ);
    }
    Obj *include_lower
        = CERTIFY(Hash_Fetch_Utf8(source, "include_lower", 13), OBJ);
    loaded_ivars->include_lower = Json_obj_to_bool(include_lower);
    Obj *include_upper
        = CERTIFY(Hash_Fetch_Utf8(source, "include_upper", 13), OBJ);
    loaded_ivars->include_upper = Json_obj_to_bool(include_upper);
    return (Obj*)loaded;
}
Exemple #3
0
BlobType*
BlobType_Load_IMP(BlobType *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    String *class_name = (String*)Hash_Fetch_Utf8(source, "_class", 6);
    VTable *vtable
        = (class_name != NULL && Obj_Is_A((Obj*)class_name, STRING))
          ? VTable_singleton(class_name, NULL)
          : BLOBTYPE;
    BlobType *loaded     = (BlobType*)VTable_Make_Obj(vtable);
    Obj *boost_dump      = Hash_Fetch_Utf8(source, "boost", 5);
    Obj *indexed_dump    = Hash_Fetch_Utf8(source, "indexed", 7);
    Obj *stored_dump     = Hash_Fetch_Utf8(source, "stored", 6);
    UNUSED_VAR(self);

    BlobType_init(loaded, false);
    BlobTypeIVARS *const loaded_ivars = BlobType_IVARS(loaded);
    if (boost_dump) {
        loaded_ivars->boost = (float)Obj_To_F64(boost_dump);
    }
    if (indexed_dump) {
        loaded_ivars->indexed = Obj_To_Bool(indexed_dump);
    }
    if (stored_dump){
        loaded_ivars->stored = Obj_To_Bool(stored_dump);
    }

    return loaded;
}
InStream*
CFReader_Local_Open_In_IMP(CompoundFileReader *self, String *name) {
    CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self);
    Hash *entry = (Hash*)Hash_Fetch(ivars->records, name);

    if (!entry) {
        InStream *instream = Folder_Local_Open_In(ivars->real_folder, name);
        if (!instream) {
            ERR_ADD_FRAME(Err_get_error());
        }
        return instream;
    }
    else {
        Obj *len    = Hash_Fetch_Utf8(entry, "length", 6);
        Obj *offset = Hash_Fetch_Utf8(entry, "offset", 6);
        if (!len || !offset) {
            Err_set_error(Err_new(Str_newf("Malformed entry for '%o' in '%o'",
                                           name, Folder_Get_Path(ivars->real_folder))));
            return NULL;
        }
        else if (Str_Get_Size(ivars->path)) {
            String *fullpath = Str_newf("%o/%o", ivars->path, name);
            InStream *instream = InStream_Reopen(ivars->instream, fullpath,
                                                 Obj_To_I64(offset), Obj_To_I64(len));
            DECREF(fullpath);
            return instream;
        }
        else {
            return InStream_Reopen(ivars->instream, name, Obj_To_I64(offset),
                                   Obj_To_I64(len));
        }
    }
}
Exemple #5
0
bool
LFLock_Maybe_Delete_File_IMP(LockFileLock *self, String *path,
                             bool delete_mine, bool delete_other) {
    LockFileLockIVARS *const ivars = LFLock_IVARS(self);
    Folder *folder  = ivars->folder;
    bool    success = false;

    // Only delete locks that start with our lock name.
    if (!Str_Starts_With_Utf8(path, "locks", 5)) {
        return false;
    }
    StringIterator *iter = Str_Top(path);
    StrIter_Advance(iter, 5 + 1);
    if (!StrIter_Starts_With(iter, ivars->name)) {
        DECREF(iter);
        return false;
    }
    DECREF(iter);

    // Attempt to delete dead lock file.
    if (Folder_Exists(folder, path)) {
        Hash *hash = (Hash*)Json_slurp_json(folder, path);
        if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) {
            String *pid_buf = (String*)Hash_Fetch_Utf8(hash, "pid", 3);
            String *host    = (String*)Hash_Fetch_Utf8(hash, "host", 4);
            String *name    = (String*)Hash_Fetch_Utf8(hash, "name", 4);

            // Match hostname and lock name.
            if (host != NULL
                && Str_Is_A(host, STRING)
                && Str_Equals(host, (Obj*)ivars->host)
                && name != NULL
                && Str_Is_A(name, STRING)
                && Str_Equals(name, (Obj*)ivars->name)
                && pid_buf != NULL
                && Str_Is_A(pid_buf, STRING)
               ) {
                // Verify that pid is either mine or dead.
                int pid = (int)Str_To_I64(pid_buf);
                if ((delete_mine && pid == PID_getpid())  // This process.
                    || (delete_other && !PID_active(pid)) // Dead pid.
                   ) {
                    if (Folder_Delete(folder, path)) {
                        success = true;
                    }
                    else {
                        String *mess
                            = MAKE_MESS("Can't delete '%o'", path);
                        DECREF(hash);
                        Err_throw_mess(ERR, mess);
                    }
                }
            }
        }
        DECREF(hash);
    }

    return success;
}
Exemple #6
0
DefaultSortReader*
DefSortReader_init(DefaultSortReader *self, Schema *schema, Folder *folder,
                   Snapshot *snapshot, VArray *segments, int32_t seg_tick) {
    DataReader_init((DataReader*)self, schema, folder, snapshot, segments,
                    seg_tick);
    DefaultSortReaderIVARS *const ivars = DefSortReader_IVARS(self);
    Segment *segment  = DefSortReader_Get_Segment(self);
    Hash    *metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "sort", 4);

    // Check format.
    ivars->format = 0;
    if (metadata) {
        Obj *format = Hash_Fetch_Utf8(metadata, "format", 6);
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            ivars->format = (int32_t)Obj_To_I64(format);
            if (ivars->format < 2 || ivars->format > 3) {
                THROW(ERR, "Unsupported sort cache format: %i32",
                      ivars->format);
            }
        }
    }

    // Init.
    ivars->caches = Hash_new(0);

    // Either extract or fake up the "counts", "null_ords", and "ord_widths"
    // hashes.
    if (metadata) {
        ivars->counts
            = (Hash*)INCREF(CERTIFY(Hash_Fetch_Utf8(metadata, "counts", 6),
                                    HASH));
        ivars->null_ords = (Hash*)Hash_Fetch_Utf8(metadata, "null_ords", 9);
        if (ivars->null_ords) {
            ivars->null_ords = (Hash*)INCREF(CERTIFY(ivars->null_ords, HASH));
        }
        else {
            ivars->null_ords = Hash_new(0);
        }
        ivars->ord_widths = (Hash*)Hash_Fetch_Utf8(metadata, "ord_widths", 10);
        if (ivars->ord_widths) {
            ivars->ord_widths
                = (Hash*)INCREF(CERTIFY(ivars->ord_widths, HASH));
        }
        else {
            ivars->ord_widths = Hash_new(0);
        }
    }
    else {
        ivars->counts     = Hash_new(0);
        ivars->null_ords  = Hash_new(0);
        ivars->ord_widths = Hash_new(0);
    }

    return self;
}
Exemple #7
0
Obj*
Query_Load_IMP(Query *self, Obj *dump) {
    CHY_UNUSED_VAR(self);
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    String *class_name
        = (String*)CERTIFY(Hash_Fetch_Utf8(source, "_class", 6), STRING);
    VTable *vtable = VTable_singleton(class_name, NULL);
    Query *loaded = (Query*)VTable_Make_Obj(vtable);
    Obj *boost = CERTIFY(Hash_Fetch_Utf8(source, "boost", 5), OBJ);
    Query_IVARS(loaded)->boost = (float)Obj_To_F64(boost);
    return (Obj*)loaded;
}
Exemple #8
0
Obj*
TermQuery_Load_IMP(TermQuery *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    TermQuery_Load_t super_load
        = SUPER_METHOD_PTR(TERMQUERY, LUCY_TermQuery_Load);
    TermQuery *loaded = (TermQuery*)super_load(self, dump);
    TermQueryIVARS *loaded_ivars = TermQuery_IVARS(loaded);
    Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ);
    loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING);
    Obj *term = CERTIFY(Hash_Fetch_Utf8(source, "term", 4), OBJ);
    loaded_ivars->term = (Obj*)CERTIFY(Freezer_load(term), OBJ);
    return (Obj*)loaded;
}
Snapshot*
Snapshot_Read_File_IMP(Snapshot *self, Folder *folder, String *path) {
    SnapshotIVARS *const ivars = Snapshot_IVARS(self);

    // Eliminate all prior data. Pick a snapshot file.
    S_zero_out(self);
    ivars->path = (path != NULL && Str_Get_Size(path) > 0)
                  ? Str_Clone(path)
                  : IxFileNames_latest_snapshot(folder);

    if (ivars->path) {
        Hash *snap_data
            = (Hash*)CERTIFY(Json_slurp_json(folder, ivars->path), HASH);
        Obj *format_obj
            = CERTIFY(Hash_Fetch_Utf8(snap_data, "format", 6), OBJ);
        int32_t format = (int32_t)Json_obj_to_i64(format_obj);
        Obj *subformat_obj = Hash_Fetch_Utf8(snap_data, "subformat", 9);
        int32_t subformat = subformat_obj
                            ? (int32_t)Json_obj_to_i64(subformat_obj)
                            : 0;

        // Verify that we can read the index properly.
        if (format > Snapshot_current_file_format) {
            THROW(ERR, "Snapshot format too recent: %i32, %i32",
                  format, Snapshot_current_file_format);
        }

        // Build up list of entries.
        Vector *list = (Vector*)INCREF(CERTIFY(
                           Hash_Fetch_Utf8(snap_data, "entries", 7),
                           VECTOR));
        if (format == 1 || (format == 2 && subformat < 1)) {
            Vector *cleaned = S_clean_segment_contents(list);
            DECREF(list);
            list = cleaned;
        }
        Hash_Clear(ivars->entries);
        for (uint32_t i = 0, max = Vec_Get_Size(list); i < max; i++) {
            String *entry
                = (String*)CERTIFY(Vec_Fetch(list, i), STRING);
            Hash_Store(ivars->entries, entry, (Obj*)CFISH_TRUE);
        }

        DECREF(list);
        DECREF(snap_data);
    }

    return self;
}
Exemple #10
0
Obj*
ProximityQuery_Load_IMP(ProximityQuery *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    ProximityQuery_Load_t super_load
        = SUPER_METHOD_PTR(PROXIMITYQUERY, LUCY_ProximityQuery_Load);
    ProximityQuery *loaded = (ProximityQuery*)super_load(self, dump);
    ProximityQueryIVARS *loaded_ivars = ProximityQuery_IVARS(loaded);
    Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ);
    loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING);
    Obj *terms = CERTIFY(Hash_Fetch_Utf8(source, "terms", 5), OBJ);
    loaded_ivars->terms = (VArray*)CERTIFY(Freezer_load(terms), VARRAY);
    Obj *within = CERTIFY(Hash_Fetch_Utf8(source, "within", 6), OBJ);
    loaded_ivars->within = (uint32_t)Obj_To_I64(within);
    return (Obj*)loaded;
}
Obj*
PhraseQuery_Load_IMP(PhraseQuery *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    PhraseQuery_Load_t super_load
        = SUPER_METHOD_PTR(PHRASEQUERY, LUCY_PhraseQuery_Load);
    PhraseQuery *loaded = (PhraseQuery*)super_load(self, dump);
    PhraseQueryIVARS *loaded_ivars = PhraseQuery_IVARS(loaded);
    Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ);
    loaded_ivars->field
        = (String*)CERTIFY(Freezer_load(field), STRING);
    Obj *terms = CERTIFY(Hash_Fetch_Utf8(source, "terms", 5), OBJ);
    loaded_ivars->terms
        = (Vector*)CERTIFY(Freezer_load(terms), VECTOR);
    return (Obj*)loaded;
}
Exemple #12
0
Obj*
LeafQuery_Load_IMP(LeafQuery *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    LeafQuery_Load_t super_load
        = SUPER_METHOD_PTR(LEAFQUERY, LUCY_LeafQuery_Load);
    LeafQuery *loaded = (LeafQuery*)super_load(self, dump);
    LeafQueryIVARS *loaded_ivars = LeafQuery_IVARS(loaded);
    Obj *field = Hash_Fetch_Utf8(source, "field", 5);
    if (field) {
        loaded_ivars->field
            = (String*)CERTIFY(Freezer_load(field), STRING);
    }
    Obj *text = CERTIFY(Hash_Fetch_Utf8(source, "text", 4), OBJ);
    loaded_ivars->text = (String*)CERTIFY(Freezer_load(text), STRING);
    return (Obj*)loaded;
}
Exemple #13
0
void
DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self,
                               SegReader *reader, I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9);

    if (del_meta) {
        Vector *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5);
        if (files) {
            HashIterator *iter = HashIter_new(files);
            while (HashIter_Next(iter)) {
                String *seg       = HashIter_Get_Key(iter);
                Hash   *mini_meta = (Hash*)HashIter_Get_Value(iter);

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)Vec_Fetch(seg_readers, i);
                    String *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (Str_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, Class_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
            DECREF(iter);
        }
    }
}
Exemple #14
0
bool
Seg_Read_File_IMP(Segment *self, Folder *folder) {
    SegmentIVARS *const ivars = Seg_IVARS(self);
    String *filename = Str_newf("%o/segmeta.json", ivars->name);
    Hash   *metadata = (Hash*)Json_slurp_json(folder, filename);
    Hash   *my_metadata;

    // Bail unless the segmeta file was read successfully.
    DECREF(filename);
    if (!metadata) { return false; }
    CERTIFY(metadata, HASH);

    // Grab metadata for the Segment object itself.
    DECREF(ivars->metadata);
    ivars->metadata = metadata;
    my_metadata
        = (Hash*)CERTIFY(Hash_Fetch_Utf8(ivars->metadata, "segmeta", 7), HASH);

    // Assign.
    Obj *count = Hash_Fetch_Utf8(my_metadata, "count", 5);
    if (!count) { count = Hash_Fetch_Utf8(my_metadata, "doc_count", 9); }
    if (!count) { THROW(ERR, "Missing 'count'"); }
    else { ivars->count = Json_obj_to_i64(count); }

    // Get list of field nums.
    Vector *source_by_num = (Vector*)Hash_Fetch_Utf8(my_metadata,
                                                     "field_names", 11);
    size_t num_fields = source_by_num ? Vec_Get_Size(source_by_num) : 0;
    if (source_by_num == NULL) {
        THROW(ERR, "Failed to extract 'field_names' from metadata");
    }

    // Init.
    DECREF(ivars->by_num);
    DECREF(ivars->by_name);
    ivars->by_num  = Vec_new(num_fields);
    ivars->by_name = Hash_new(num_fields);

    // Copy the list of fields from the source.
    for (size_t i = 0; i < num_fields; i++) {
        String *name = (String*)Vec_Fetch(source_by_num, i);
        Seg_Add_Field(self, name);
    }

    return true;
}
SnowballStemmer*
SnowStemmer_Load_IMP(SnowballStemmer *self, Obj *dump) {
    SnowStemmer_Load_t super_load
        = SUPER_METHOD_PTR(SNOWBALLSTEMMER, LUCY_SnowStemmer_Load);
    SnowballStemmer *loaded = super_load(self, dump);
    Hash    *source = (Hash*)CERTIFY(dump, HASH);
    String *language
        = (String*)CERTIFY(Hash_Fetch_Utf8(source, "language", 8), STRING);
    return SnowStemmer_init(loaded, language);
}
Exemple #16
0
EasyAnalyzer*
EasyAnalyzer_Load_IMP(EasyAnalyzer *self, Obj *dump) {
    EasyAnalyzer_Load_t super_load
        = SUPER_METHOD_PTR(EASYANALYZER, LUCY_EasyAnalyzer_Load);
    EasyAnalyzer *loaded = super_load(self, dump);
    Hash    *source = (Hash*)CERTIFY(dump, HASH);
    String *language
        = (String*)CERTIFY(Hash_Fetch_Utf8(source, "language", 8), STRING);
    return EasyAnalyzer_init(loaded, language);
}
Exemple #17
0
RegexTokenizer*
RegexTokenizer_Load_IMP(RegexTokenizer *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    RegexTokenizer_Load_t super_load
        = SUPER_METHOD_PTR(REGEXTOKENIZER, LUCY_RegexTokenizer_Load);
    RegexTokenizer *loaded = super_load(self, dump);
    String *pattern 
        = (String*)CERTIFY(Hash_Fetch_Utf8(source, "pattern", 7), STRING);
    return RegexTokenizer_init(loaded, pattern);
}
Exemple #18
0
static void
test_normalization(TestBatchRunner *runner) {
    FSFolder *modules_folder = TestUtils_modules_folder();
    if (modules_folder == NULL) {
        SKIP(runner, 13, "Can't locate test data");
        return;
    }

    String *path = Str_newf("unicode/utf8proc/tests.json");
    Vector *tests = (Vector*)Json_slurp_json((Folder*)modules_folder, path);
    if (!tests) { RETHROW(Err_get_error()); }

    for (uint32_t i = 0, max = Vec_Get_Size(tests); i < max; i++) {
        Hash *test = (Hash*)Vec_Fetch(tests, i);
        String *form = (String*)Hash_Fetch_Utf8(
                            test, "normalization_form", 18);
        bool case_fold = Bool_Get_Value((Boolean*)Hash_Fetch_Utf8(
                                              test, "case_fold", 9));
        bool strip_accents = Bool_Get_Value((Boolean*)Hash_Fetch_Utf8(
                                                  test, "strip_accents", 13));
        Normalizer *normalizer = Normalizer_new(form, case_fold, strip_accents);
        Vector *words = (Vector*)Hash_Fetch_Utf8(test, "words", 5);
        Vector *norms = (Vector*)Hash_Fetch_Utf8(test, "norms", 5);
        for (uint32_t j = 0, max = Vec_Get_Size(words); j < max; j++) {
            String *word = (String*)Vec_Fetch(words, j);
            Vector *got  = Normalizer_Split(normalizer, word);
            String *norm = (String*)Vec_Fetch(got, 0);
            TEST_TRUE(runner,
                      norm
                      && Str_is_a(norm, STRING)
                      && Str_Equals(norm, Vec_Fetch(norms, j)),
                      "Normalize %s %d %d: %s", Str_Get_Ptr8(form),
                      case_fold, strip_accents, Str_Get_Ptr8(word)
                     );
            DECREF(got);
        }
        DECREF(normalizer);
    }

    DECREF(tests);
    DECREF(modules_folder);
    DECREF(path);
}
Exemple #19
0
DefaultDocReader*
DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder,
                  Snapshot *snapshot, Vector *segments, int32_t seg_tick) {
    Hash *metadata;
    Segment *segment;
    DocReader_init((DocReader*)self, schema, folder, snapshot, segments,
                   seg_tick);
    DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self);
    segment = DefDocReader_Get_Segment(self);
    metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "documents", 9);

    if (metadata) {
        String *seg_name  = Seg_Get_Name(segment);
        String *ix_file   = Str_newf("%o/documents.ix", seg_name);
        String *dat_file  = Str_newf("%o/documents.dat", seg_name);
        Obj     *format   = Hash_Fetch_Utf8(metadata, "format", 6);

        // Check format.
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            int64_t format_val = Json_obj_to_i64(format);
            if (format_val < DocWriter_current_file_format) {
                THROW(ERR, "Obsolete doc storage format %i64; "
                      "Index regeneration is required", format_val);
            }
            else if (format_val != DocWriter_current_file_format) {
                THROW(ERR, "Unsupported doc storage format: %i64", format_val);
            }
        }

        // Get streams.
        if (Folder_Exists(folder, ix_file)) {
            ivars->ix_in = Folder_Open_In(folder, ix_file);
            if (!ivars->ix_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }
            ivars->dat_in = Folder_Open_In(folder, dat_file);
            if (!ivars->dat_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }
        }
        DECREF(ix_file);
        DECREF(dat_file);
    }

    return self;
}
Exemple #20
0
Obj*
PolyQuery_Load_IMP(PolyQuery *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    PolyQuery_Load_t super_load
        = SUPER_METHOD_PTR(POLYQUERY, LUCY_PolyQuery_Load);
    PolyQuery *loaded = (PolyQuery*)super_load(self, dump);
    Obj *children = CERTIFY(Hash_Fetch_Utf8(source, "children", 8), OBJ);
    PolyQuery_IVARS(loaded)->children
        = (Vector*)CERTIFY(Freezer_load(children), VECTOR);
    return (Obj*)loaded;
}
Exemple #21
0
HitDoc*
HitDoc_Load_IMP(HitDoc *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    HitDoc_Load_t super_load
        = SUPER_METHOD_PTR(HITDOC, LUCY_HitDoc_Load);
    HitDoc *loaded = super_load(self, dump);
    HitDocIVARS *const loaded_ivars = HitDoc_IVARS(loaded);
    Obj *score = CERTIFY(Hash_Fetch_Utf8(source, "score", 5), OBJ);
    loaded_ivars->score = (float)Obj_To_F64(score);
    return loaded;
}
static void
test_offsets(TestBatchRunner *runner) {
    Folder *folder = S_folder_with_contents();
    CompoundFileWriter *cf_writer = CFWriter_new(folder);
    Hash    *cf_metadata;
    Hash    *files;

    CFWriter_Consolidate(cf_writer);

    cf_metadata = (Hash*)CERTIFY(
                      Json_slurp_json(folder, cfmeta_file), HASH);
    files = (Hash*)CERTIFY(
                Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH);

    bool     offsets_ok = true;

    TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files");

    HashIterator *iter = HashIter_new(files);
    while (HashIter_Next(iter)) {
        String *file   = HashIter_Get_Key(iter);
        Hash   *stats  = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH);
        Obj    *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ);
        int64_t offs   = Obj_To_I64(offset);
        if (offs % 8 != 0) {
            offsets_ok = false;
            FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8",
                 offset, Str_Get_Ptr8(file));
            break;
        }
    }
    DECREF(iter);
    if (offsets_ok) {
        PASS(runner, "All offsets are multiples of 8");
    }

    DECREF(cf_metadata);
    DECREF(cf_writer);
    DECREF(folder);
}
Exemple #23
0
DefaultHighlightReader*
DefHLReader_init(DefaultHighlightReader *self, Schema *schema,
                 Folder *folder, Snapshot *snapshot, Vector *segments,
                 int32_t seg_tick) {
    HLReader_init((HighlightReader*)self, schema, folder, snapshot,
                  segments, seg_tick);
    DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self);
    Segment *segment    = DefHLReader_Get_Segment(self);
    Hash *metadata      = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "highlight", 9);
    if (!metadata) {
        metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "term_vectors", 12);
    }

    // Check format.
    if (metadata) {
        Obj *format = Hash_Fetch_Utf8(metadata, "format", 6);
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            if (Json_obj_to_i64(format) != HLWriter_current_file_format) {
                THROW(ERR, "Unsupported highlight data format: %i64",
                      Json_obj_to_i64(format));
            }
        }
    }

    // Open instreams.
    String *seg_name = Seg_Get_Name(segment);
    String *ix_file  = Str_newf("%o/highlight.ix", seg_name);
    String *dat_file = Str_newf("%o/highlight.dat", seg_name);
    if (Folder_Exists(folder, ix_file)) {
        ivars->ix_in = Folder_Open_In(folder, ix_file);
        if (!ivars->ix_in) {
            Err *error = (Err*)INCREF(Err_get_error());
            DECREF(ix_file);
            DECREF(dat_file);
            DECREF(self);
            RETHROW(error);
        }
        ivars->dat_in = Folder_Open_In(folder, dat_file);
        if (!ivars->dat_in) {
            Err *error = (Err*)INCREF(Err_get_error());
            DECREF(ix_file);
            DECREF(dat_file);
            DECREF(self);
            RETHROW(error);
        }
    }
    DECREF(ix_file);
    DECREF(dat_file);

    return self;
}
Exemple #24
0
StringType*
StringType_Load_IMP(StringType *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    String *class_name = (String*)Hash_Fetch_Utf8(source, "_class", 6);
    VTable *vtable
        = (class_name != NULL && Obj_Is_A((Obj*)class_name, STRING))
          ? VTable_singleton(class_name, NULL)
          : STRINGTYPE;
    StringType *loaded   = (StringType*)VTable_Make_Obj(vtable);
    Obj *boost_dump      = Hash_Fetch_Utf8(source, "boost", 5);
    Obj *indexed_dump    = Hash_Fetch_Utf8(source, "indexed", 7);
    Obj *stored_dump     = Hash_Fetch_Utf8(source, "stored", 6);
    Obj *sortable_dump   = Hash_Fetch_Utf8(source, "sortable", 8);
    UNUSED_VAR(self);

    float boost    = boost_dump    ? (float)Obj_To_F64(boost_dump) : 1.0f;
    bool  indexed  = indexed_dump  ? Obj_To_Bool(indexed_dump)     : true;
    bool  stored   = stored_dump   ? Obj_To_Bool(stored_dump)      : true;
    bool  sortable = sortable_dump ? Obj_To_Bool(sortable_dump)    : false;

    return StringType_init2(loaded, boost, indexed, stored, sortable);
}
Exemple #25
0
Obj*
SnowStop_Load_IMP(SnowballStopFilter *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    SnowStop_Load_t super_load
        = SUPER_METHOD_PTR(SNOWBALLSTOPFILTER, LUCY_SnowStop_Load);
    SnowballStopFilter *loaded = (SnowballStopFilter*)super_load(self, dump);
    Obj *stoplist = Hash_Fetch_Utf8(source, "stoplist", 8);
    if (stoplist) {
        SnowStop_IVARS(loaded)->stoplist
            = (Hash*)CERTIFY(Freezer_load(stoplist), HASH);
    }
    return (Obj*)loaded;
}
Exemple #26
0
PolyAnalyzer*
PolyAnalyzer_Load_IMP(PolyAnalyzer *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    PolyAnalyzer_Load_t super_load 
        = SUPER_METHOD_PTR(POLYANALYZER, LUCY_PolyAnalyzer_Load);
    PolyAnalyzer *loaded = super_load(self, dump);
    VArray *analyzer_dumps
        = (VArray*)CERTIFY(Hash_Fetch_Utf8(source, "analyzers", 9), VARRAY);
    VArray *analyzers
        = (VArray*)CERTIFY(Freezer_load((Obj*)analyzer_dumps), VARRAY);
    PolyAnalyzer_init(loaded, NULL, analyzers);
    DECREF(analyzers);
    return loaded;
}
Exemple #27
0
Inversion*
SnowStop_Transform_IMP(SnowballStopFilter *self, Inversion *inversion) {
    Token *token;
    Inversion *new_inversion = Inversion_new(NULL);
    SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self);
    Hash *const stoplist  = ivars->stoplist;

    while (NULL != (token = Inversion_Next(inversion))) {
        TokenIVARS *const token_ivars = Token_IVARS(token);
        if (!Hash_Fetch_Utf8(stoplist, token_ivars->text, token_ivars->len)) {
            Inversion_Append(new_inversion, (Token*)INCREF(token));
        }
    }

    return new_inversion;
}
Exemple #28
0
static void
S_zap_dead_merge(FilePurger *self, Hash *candidates) {
    FilePurgerIVARS *const ivars = FilePurger_IVARS(self);
    IndexManager *manager    = ivars->manager;
    Lock         *merge_lock = IxManager_Make_Merge_Lock(manager);

    Lock_Clear_Stale(merge_lock);
    if (!Lock_Is_Locked(merge_lock)) {
        Hash *merge_data = IxManager_Read_Merge_Data(manager);
        Obj  *cutoff = merge_data
                       ? Hash_Fetch_Utf8(merge_data, "cutoff", 6)
                       : NULL;

        if (cutoff) {
            String *cutoff_seg = Seg_num_to_name(Json_obj_to_i64(cutoff));
            if (Folder_Exists(ivars->folder, cutoff_seg)) {
                String *merge_json = SSTR_WRAP_UTF8("merge.json", 10);
                DirHandle *dh = Folder_Open_Dir(ivars->folder, cutoff_seg);

                if (!dh) {
                    THROW(ERR, "Can't open segment dir '%o'", cutoff_seg);
                }

                Hash_Store(candidates, cutoff_seg, (Obj*)CFISH_TRUE);
                Hash_Store(candidates, merge_json, (Obj*)CFISH_TRUE);
                while (DH_Next(dh)) {
                    // TODO: recursively delete subdirs within seg dir.
                    String *entry = DH_Get_Entry(dh);
                    String *filepath = Str_newf("%o/%o", cutoff_seg, entry);
                    Hash_Store(candidates, filepath, (Obj*)CFISH_TRUE);
                    DECREF(filepath);
                    DECREF(entry);
                }
                DECREF(dh);
            }
            DECREF(cutoff_seg);
        }

        DECREF(merge_data);
    }

    DECREF(merge_lock);
    return;
}
Exemple #29
0
static Obj*
S_load_from_hash(Hash *dump) {
    String *class_name = (String*)Hash_Fetch_Utf8(dump, "_class", 6);

    // Assume that the presence of the "_class" key paired with a valid class
    // name indicates the output of a dump() rather than an ordinary Hash.
    if (class_name && Str_is_a(class_name, STRING)) {
        Class *klass = Class_fetch_class(class_name);

        if (!klass) {
            String *parent_class_name = Class_find_parent_class(class_name);
            if (parent_class_name) {
                Class *parent = Class_singleton(parent_class_name, NULL);
                klass = Class_singleton(class_name, parent);
                DECREF(parent_class_name);
            }
            else {
                // TODO: Fix load() so that it works with ordinary hash keys
                // named "_class".
                THROW(ERR, "Can't find class '%o'", class_name);
            }
        }

        // Dispatch to an alternate Load() method.
        if (klass) {
            return S_load_via_load_method(klass, (Obj*)dump);
        }

    }

    // It's an ordinary Hash.
    Hash *loaded = Hash_new(Hash_Get_Size(dump));

    HashIterator *iter = HashIter_new(dump);
    while (HashIter_Next(iter)) {
        String *key   = HashIter_Get_Key(iter);
        Obj    *value = HashIter_Get_Value(iter);
        Hash_Store(loaded, key, Freezer_load(value));
    }
    DECREF(iter);

    return (Obj*)loaded;

}
Exemple #30
0
FullTextType*
FullTextType_Load_IMP(FullTextType *self, Obj *dump) {
    UNUSED_VAR(self);
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    String *class_name = (String*)Hash_Fetch_Utf8(source, "_class", 6);
    Class *klass
        = (class_name != NULL && Obj_is_a((Obj*)class_name, STRING))
          ? Class_singleton(class_name, NULL)
          : FULLTEXTTYPE;
    FullTextType *loaded = (FullTextType*)Class_Make_Obj(klass);

    // Extract boost.
    Obj *boost_dump = Hash_Fetch_Utf8(source, "boost", 5);
    float boost = boost_dump ? (float)Json_obj_to_f64(boost_dump) : 1.0f;

    // Find boolean properties.
    Obj *indexed_dump = Hash_Fetch_Utf8(source, "indexed", 7);
    Obj *stored_dump  = Hash_Fetch_Utf8(source, "stored", 6);
    Obj *sort_dump    = Hash_Fetch_Utf8(source, "sortable", 8);
    Obj *hl_dump      = Hash_Fetch_Utf8(source, "highlightable", 13);
    bool indexed  = indexed_dump ? Json_obj_to_bool(indexed_dump) : true;
    bool stored   = stored_dump  ? Json_obj_to_bool(stored_dump)  : true;
    bool sortable = sort_dump    ? Json_obj_to_bool(sort_dump)    : false;
    bool hl       = hl_dump      ? Json_obj_to_bool(hl_dump)      : false;

    // Extract an Analyzer.
    Obj *analyzer_dump = Hash_Fetch_Utf8(source, "analyzer", 8);
    Analyzer *analyzer = NULL;
    if (analyzer_dump) {
        if (Obj_is_a(analyzer_dump, ANALYZER)) {
            // Schema munged the dump and installed a shared analyzer.
            analyzer = (Analyzer*)INCREF(analyzer_dump);
        }
        else if (Obj_is_a((Obj*)analyzer_dump, HASH)) {
            analyzer = (Analyzer*)Freezer_load(analyzer_dump);
        }
    }
    CERTIFY(analyzer, ANALYZER);

    FullTextType_init2(loaded, analyzer, boost, indexed, stored,
                       sortable, hl);
    DECREF(analyzer);
    return loaded;
}