Exemplo n.º 1
0
static Schema*
S_create_schema() {
    // Create a new schema.
    Schema *schema = Schema_new();

    // Create an analyzer.
    String       *language = Str_newf("en");
    EasyAnalyzer *analyzer = EasyAnalyzer_new(language);

    // Specify fields.

    FullTextType *type = FullTextType_new((Analyzer*)analyzer);

    {
        String *field_str = Str_newf("title");
        Schema_Spec_Field(schema, field_str, (FieldType*)type);
        DECREF(field_str);
    }

    {
        String *field_str = Str_newf("content");
        Schema_Spec_Field(schema, field_str, (FieldType*)type);
        DECREF(field_str);
    }

    DECREF(language);
    DECREF(analyzer);
    DECREF(type);
    return schema;
}
Exemplo n.º 2
0
InStream*
CFReader_Local_Open_In_IMP(CompoundFileReader *self, String *name) {
    CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self);
    Hash *entry = (Hash*)Hash_Fetch(ivars->records, name);

    if (!entry) {
        InStream *instream = Folder_Local_Open_In(ivars->real_folder, name);
        if (!instream) {
            ERR_ADD_FRAME(Err_get_error());
        }
        return instream;
    }
    else {
        Obj *len    = Hash_Fetch_Utf8(entry, "length", 6);
        Obj *offset = Hash_Fetch_Utf8(entry, "offset", 6);
        if (!len || !offset) {
            Err_set_error(Err_new(Str_newf("Malformed entry for '%o' in '%o'",
                                           name, Folder_Get_Path(ivars->real_folder))));
            return NULL;
        }
        else if (Str_Get_Size(ivars->path)) {
            String *fullpath = Str_newf("%o/%o", ivars->path, name);
            InStream *instream = InStream_Reopen(ivars->instream, fullpath,
                                                 Obj_To_I64(offset), Obj_To_I64(len));
            DECREF(fullpath);
            return instream;
        }
        else {
            return InStream_Reopen(ivars->instream, name, Obj_To_I64(offset),
                                   Obj_To_I64(len));
        }
    }
}
Exemplo n.º 3
0
static void
S_add_document(Indexer *indexer, const char *title, const char *content) {
    Doc *doc = Doc_new(NULL, 0);

    {
        // Store 'title' field   
        String *field_str = Str_newf("title");
        String *value_str = Str_new_from_utf8(title, strlen(title));
        Doc_Store(doc, field_str, (Obj*)value_str);
        DECREF(field_str);
        DECREF(value_str);
    }

    {
        // Store 'content' field   
        String *field_str = Str_newf("content");
        String *value_str = Str_new_from_utf8(content, strlen(content));
        Doc_Store(doc, field_str, (Obj*)value_str);
        DECREF(field_str);
        DECREF(value_str);
    }

    Indexer_Add_Doc(indexer, doc, 1.0);

    DECREF(doc);
}
Exemplo n.º 4
0
Hash*
FullTextType_Dump_For_Schema_IMP(FullTextType *self) {
    FullTextTypeIVARS *const ivars = FullTextType_IVARS(self);
    Hash *dump = Hash_new(0);
    Hash_Store_Utf8(dump, "type", 4, (Obj*)Str_newf("fulltext"));

    // Store attributes that override the defaults.
    if (ivars->boost != 1.0) {
        Hash_Store_Utf8(dump, "boost", 5, (Obj*)Str_newf("%f64", ivars->boost));
    }
    if (!ivars->indexed) {
        Hash_Store_Utf8(dump, "indexed", 7, (Obj*)CFISH_FALSE);
    }
    if (!ivars->stored) {
        Hash_Store_Utf8(dump, "stored", 6, (Obj*)CFISH_FALSE);
    }
    if (ivars->sortable) {
        Hash_Store_Utf8(dump, "sortable", 8, (Obj*)CFISH_TRUE);
    }
    if (ivars->highlightable) {
        Hash_Store_Utf8(dump, "highlightable", 13, (Obj*)CFISH_TRUE);
    }

    return dump;
}
Exemplo n.º 5
0
static OutStream*
S_lazy_init(HighlightWriter *self) {
    HighlightWriterIVARS *const ivars = HLWriter_IVARS(self);
    if (!ivars->dat_out) {
        Segment  *segment  = ivars->segment;
        Folder   *folder   = ivars->folder;
        String   *seg_name = Seg_Get_Name(segment);

        // Open outstreams.
        String *ix_file = Str_newf("%o/highlight.ix", seg_name);
        ivars->ix_out = Folder_Open_Out(folder, ix_file);
        DECREF(ix_file);
        if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); }

        String *dat_file = Str_newf("%o/highlight.dat", seg_name);
        ivars->dat_out = Folder_Open_Out(folder, dat_file);
        DECREF(dat_file);
        if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); }

        // Go past invalid doc 0.
        OutStream_Write_I64(ivars->ix_out, 0);
    }

    return ivars->dat_out;
}
Exemplo n.º 6
0
void
PostPool_Flip_IMP(PostingPool *self) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    uint32_t num_runs   = VA_Get_Size(ivars->runs);
    uint32_t sub_thresh = num_runs > 0
                          ? ivars->mem_thresh / num_runs
                          : ivars->mem_thresh;

    if (num_runs) {
        Folder  *folder = PolyReader_Get_Folder(ivars->polyreader);
        String *seg_name = Seg_Get_Name(ivars->segment);
        String *lex_temp_path  = Str_newf("%o/lextemp", seg_name);
        String *post_temp_path = Str_newf("%o/ptemp", seg_name);
        ivars->lex_temp_in = Folder_Open_In(folder, lex_temp_path);
        if (!ivars->lex_temp_in) {
            RETHROW(INCREF(Err_get_error()));
        }
        ivars->post_temp_in = Folder_Open_In(folder, post_temp_path);
        if (!ivars->post_temp_in) {
            RETHROW(INCREF(Err_get_error()));
        }
        DECREF(lex_temp_path);
        DECREF(post_temp_path);
    }

    PostPool_Sort_Buffer(self);
    if (num_runs && (ivars->buf_max - ivars->buf_tick) > 0) {
        uint32_t num_items = PostPool_Buffer_Count(self);
        // Cheap imitation of flush. FIXME.
        PostingPool *run
            = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment,
                           ivars->polyreader, ivars->field, ivars->lex_writer,
                           ivars->mem_pool, ivars->lex_temp_out,
                           ivars->post_temp_out, ivars->skip_out);
        PostPool_Grow_Buffer(run, num_items);
        PostingPoolIVARS *const run_ivars = PostPool_IVARS(run);

        memcpy(run_ivars->buffer, (ivars->buffer) + ivars->buf_tick,
               num_items * sizeof(Obj*));
        run_ivars->buf_max = num_items;
        PostPool_Add_Run(self, (SortExternal*)run);
        ivars->buf_tick = 0;
        ivars->buf_max = 0;
    }

    // Assign.
    for (uint32_t i = 0; i < num_runs; i++) {
        PostingPool *run = (PostingPool*)VA_Fetch(ivars->runs, i);
        if (run != NULL) {
            PostPool_Set_Mem_Thresh(run, sub_thresh);
            if (!PostPool_IVARS(run)->lexicon) {
                S_fresh_flip(run, ivars->lex_temp_in, ivars->post_temp_in);
            }
        }
    }

    ivars->flipped = true;
}
Exemplo n.º 7
0
Arquivo: Num.c Projeto: hernan604/lucy
void
Bool_init_class() {
    Bool_true_singleton          = (BoolNum*)VTable_Make_Obj(BOOLNUM);
    Bool_true_singleton->value   = true;
    Bool_true_singleton->string  = Str_newf("true");
    Bool_false_singleton         = (BoolNum*)VTable_Make_Obj(BOOLNUM);
    Bool_false_singleton->value  = false;
    Bool_false_singleton->string = Str_newf("false");
}
Exemplo n.º 8
0
DefaultDocReader*
DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder,
                  Snapshot *snapshot, Vector *segments, int32_t seg_tick) {
    Hash *metadata;
    Segment *segment;
    DocReader_init((DocReader*)self, schema, folder, snapshot, segments,
                   seg_tick);
    DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self);
    segment = DefDocReader_Get_Segment(self);
    metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "documents", 9);

    if (metadata) {
        String *seg_name  = Seg_Get_Name(segment);
        String *ix_file   = Str_newf("%o/documents.ix", seg_name);
        String *dat_file  = Str_newf("%o/documents.dat", seg_name);
        Obj     *format   = Hash_Fetch_Utf8(metadata, "format", 6);

        // Check format.
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            int64_t format_val = Json_obj_to_i64(format);
            if (format_val < DocWriter_current_file_format) {
                THROW(ERR, "Obsolete doc storage format %i64; "
                      "Index regeneration is required", format_val);
            }
            else if (format_val != DocWriter_current_file_format) {
                THROW(ERR, "Unsupported doc storage format: %i64", format_val);
            }
        }

        // Get streams.
        if (Folder_Exists(folder, ix_file)) {
            ivars->ix_in = Folder_Open_In(folder, ix_file);
            if (!ivars->ix_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }
            ivars->dat_in = Folder_Open_In(folder, dat_file);
            if (!ivars->dat_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }
        }
        DECREF(ix_file);
        DECREF(dat_file);
    }

    return self;
}
Exemplo n.º 9
0
void
BGMerger_Commit_IMP(BackgroundMerger *self) {
    BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self);

    // Safety check.
    if (!ivars->merge_lock) {
        THROW(ERR, "Can't call commit() more than once");
    }

    if (!ivars->prepared) {
        BGMerger_Prepare_Commit(self);
    }

    if (ivars->needs_commit) {
        bool success = false;
        String *temp_snapfile = ivars->snapfile;

        // Rename temp snapshot file.
        size_t ext_len      = sizeof(".temp") - 1;
        size_t snapfile_len = Str_Length(temp_snapfile);
        if (snapfile_len <= ext_len) {
            THROW(ERR, "Invalid snapfile name: %o", temp_snapfile);
        }
        ivars->snapfile = Str_SubString(temp_snapfile, 0,
                                       snapfile_len - ext_len);
        success = Folder_Hard_Link(ivars->folder, temp_snapfile,
                                   ivars->snapfile);
        Snapshot_Set_Path(ivars->snapshot, ivars->snapfile);
        if (!success) {
            String *mess = Str_newf("Can't create hard link from %o to %o",
                                    temp_snapfile, ivars->snapfile);
            DECREF(temp_snapfile);
            Err_throw_mess(ERR, mess);
        }
        if (!Folder_Delete(ivars->folder, temp_snapfile)) {
            String *mess = Str_newf("Can't delete %o", temp_snapfile);
            DECREF(temp_snapfile);
            Err_throw_mess(ERR, mess);
        }
        DECREF(temp_snapfile);
    }

    // Release the merge lock and remove the merge data file.
    S_release_merge_lock(self);
    IxManager_Remove_Merge_Data(ivars->manager);

    if (ivars->needs_commit) {
        // Purge obsolete files.
        FilePurger_Purge(ivars->file_purger);
    }

    // Release the write lock.
    S_release_write_lock(self);
}
Exemplo n.º 10
0
static void
test_analysis(TestBatchRunner *runner) {
    CaseFolder *case_folder = CaseFolder_new();
    String *source = Str_newf("caPiTal ofFensE");
    VArray *wanted = VA_new(1);
    VA_Push(wanted, (Obj*)Str_newf("capital offense"));
    TestUtils_test_analyzer(runner, (Analyzer*)case_folder, source, wanted,
                            "lowercase plain text");
    DECREF(wanted);
    DECREF(source);
    DECREF(case_folder);
}
Exemplo n.º 11
0
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder,
              Segment *segment, String *field) {
    int32_t  field_num = Seg_Field_Num(segment, field);
    String  *seg_name  = Seg_Get_Name(segment);
    String  *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    String  *ix_file   = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    // Init.
    Lex_init((Lexicon*)self, field);
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    ivars->tinfo        = TInfo_new(0);
    ivars->tick         = 0;

    // Derive
    ivars->field_type = Schema_Fetch_Type(schema, field);
    if (!ivars->field_type) {
        String *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(ERR, mess);
    }
    ivars->field_type = (FieldType*)INCREF(ivars->field_type);
    ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type);
    ivars->ixix_in = Folder_Open_In(folder, ixix_file);
    if (!ivars->ixix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->ix_in = Folder_Open_In(folder, ix_file);
    if (!ivars->ix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->index_interval = Arch_Index_Interval(arch);
    ivars->skip_interval  = Arch_Skip_Interval(arch);
    ivars->size    = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t));
    ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in,
            (size_t)InStream_Length(ivars->ixix_in));

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
Exemplo n.º 12
0
DefaultHighlightReader*
DefHLReader_init(DefaultHighlightReader *self, Schema *schema,
                 Folder *folder, Snapshot *snapshot, Vector *segments,
                 int32_t seg_tick) {
    HLReader_init((HighlightReader*)self, schema, folder, snapshot,
                  segments, seg_tick);
    DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self);
    Segment *segment    = DefHLReader_Get_Segment(self);
    Hash *metadata      = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "highlight", 9);
    if (!metadata) {
        metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "term_vectors", 12);
    }

    // Check format.
    if (metadata) {
        Obj *format = Hash_Fetch_Utf8(metadata, "format", 6);
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            if (Json_obj_to_i64(format) != HLWriter_current_file_format) {
                THROW(ERR, "Unsupported highlight data format: %i64",
                      Json_obj_to_i64(format));
            }
        }
    }

    // Open instreams.
    String *seg_name = Seg_Get_Name(segment);
    String *ix_file  = Str_newf("%o/highlight.ix", seg_name);
    String *dat_file = Str_newf("%o/highlight.dat", seg_name);
    if (Folder_Exists(folder, ix_file)) {
        ivars->ix_in = Folder_Open_In(folder, ix_file);
        if (!ivars->ix_in) {
            Err *error = (Err*)INCREF(Err_get_error());
            DECREF(ix_file);
            DECREF(dat_file);
            DECREF(self);
            RETHROW(error);
        }
        ivars->dat_in = Folder_Open_In(folder, dat_file);
        if (!ivars->dat_in) {
            Err *error = (Err*)INCREF(Err_get_error());
            DECREF(ix_file);
            DECREF(dat_file);
            DECREF(self);
            RETHROW(error);
        }
    }
    DECREF(ix_file);
    DECREF(dat_file);

    return self;
}
Exemplo n.º 13
0
int
main(int argc, char *argv[]) {
    // Initialize the library.
    lucy_bootstrap_parcel();

    if (argc != 2) {
        S_usage_and_exit(argv[0]);
    }

    const char *query_c = argv[1];

    printf("Searching for: %s\n\n", query_c);

    String *folder   = Str_newf("%s", path_to_index);
    String *language = Str_newf("en");
    Simple *lucy     = Simple_new((Obj*)folder, language);

    String *query_str = Str_newf("%s", query_c);
    Simple_Search(lucy, query_str, 0, 10);

    String *title_str = Str_newf("title");
    String *url_str   = Str_newf("url");
    HitDoc *hit;
    int i = 1;

    // Loop over search results.
    while (NULL != (hit = Simple_Next(lucy))) {
        String *title = (String*)HitDoc_Extract(hit, title_str);
        char *title_c = Str_To_Utf8(title);

        String *url = (String*)HitDoc_Extract(hit, url_str);
        char *url_c = Str_To_Utf8(url);

        printf("Result %d: %s (%s)\n", i, title_c, url_c);

        free(url_c);
        free(title_c);
        DECREF(url);
        DECREF(title);
        DECREF(hit);
        i++;
    }

    DECREF(url_str);
    DECREF(title_str);
    DECREF(query_str);
    DECREF(lucy);
    DECREF(language);
    DECREF(folder);
    return 0;
}
Exemplo n.º 14
0
static Schema*
S_create_schema() {
    // Create a new schema.
    Schema *schema = Schema_new();

    // Create an analyzer.
    String       *language = Str_newf("en");
    EasyAnalyzer *analyzer = EasyAnalyzer_new(language);

    // Specify fields.

    {
        String *field_str = Str_newf("title");
        FullTextType *type = FullTextType_new((Analyzer*)analyzer);
        Schema_Spec_Field(schema, field_str, (FieldType*)type);
        DECREF(type);
        DECREF(field_str);
    }

    {
        String *field_str = Str_newf("content");
        FullTextType *type = FullTextType_new((Analyzer*)analyzer);
        FullTextType_Set_Highlightable(type, true);
        Schema_Spec_Field(schema, field_str, (FieldType*)type);
        DECREF(type);
        DECREF(field_str);
    }

    {
        String *field_str = Str_newf("url");
        StringType *type = StringType_new();
        StringType_Set_Indexed(type, false);
        Schema_Spec_Field(schema, field_str, (FieldType*)type);
        DECREF(type);
        DECREF(field_str);
    }

    {
        String *field_str = Str_newf("category");
        StringType *type = StringType_new();
        StringType_Set_Stored(type, false);
        Schema_Spec_Field(schema, field_str, (FieldType*)type);
        DECREF(type);
        DECREF(field_str);
    }

    DECREF(analyzer);
    DECREF(language);
    return schema;
}
Exemplo n.º 15
0
Arquivo: Obj.c Projeto: hernan604/lucy
String*
Obj_To_String_IMP(Obj *self) {
#if (SIZEOF_PTR == 4)
    return Str_newf("%o@0x%x32", Obj_Get_Class_Name(self), self);
#elif (SIZEOF_PTR == 8)
    int64_t   iaddress   = PTR_TO_I64(self);
    uint64_t  address    = (uint64_t)iaddress;
    uint32_t  address_hi = address >> 32;
    uint32_t  address_lo = address & 0xFFFFFFFF;
    return Str_newf("%o@0x%x32%x32", Obj_Get_Class_Name(self), address_hi,
                    address_lo);
#else
  #error "Unexpected pointer size."
#endif
}
Exemplo n.º 16
0
void
SortFieldWriter_Flip_IMP(SortFieldWriter *self) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    uint32_t num_items = SortFieldWriter_Buffer_Count(self);
    uint32_t num_runs = Vec_Get_Size(ivars->runs);

    if (ivars->flipped) { THROW(ERR, "Can't call Flip() twice"); }
    ivars->flipped = true;

    // Sanity check.
    if (num_runs && num_items) {
        THROW(ERR, "Sanity check failed: num_runs: %u32 num_items: %u32",
              num_runs, num_items);
    }

    if (num_items) {
        SortFieldWriter_Sort_Buffer(self);
    }
    else if (num_runs) {
        Folder  *folder = PolyReader_Get_Folder(ivars->polyreader);
        String *seg_name = Seg_Get_Name(ivars->segment);
        String *ord_path = Str_newf("%o/sort_ord_temp", seg_name);
        ivars->ord_in = Folder_Open_In(folder, ord_path);
        DECREF(ord_path);
        if (!ivars->ord_in) { RETHROW(INCREF(Err_get_error())); }
        if (ivars->var_width) {
            String *ix_path = Str_newf("%o/sort_ix_temp", seg_name);
            ivars->ix_in = Folder_Open_In(folder, ix_path);
            DECREF(ix_path);
            if (!ivars->ix_in) { RETHROW(INCREF(Err_get_error())); }
        }
        String *dat_path = Str_newf("%o/sort_dat_temp", seg_name);
        ivars->dat_in = Folder_Open_In(folder, dat_path);
        DECREF(dat_path);
        if (!ivars->dat_in) { RETHROW(INCREF(Err_get_error())); }

        // Assign streams and a slice of mem_thresh.
        size_t sub_thresh = ivars->mem_thresh / num_runs;
        if (sub_thresh < 65536) { sub_thresh = 65536; }
        for (uint32_t i = 0; i < num_runs; i++) {
            SortFieldWriter *run = (SortFieldWriter*)Vec_Fetch(ivars->runs, i);
            S_flip_run(run, sub_thresh, ivars->ord_in, ivars->ix_in,
                       ivars->dat_in);
        }
    }

    ivars->flipped = true;
}
Exemplo n.º 17
0
Hash*
DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    DefDelWriter_Metadata_t super_meta
        = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER,
                                                    LUCY_DefDelWriter_Metadata);
    Hash    *const metadata = super_meta(self);
    Hash    *const files    = Hash_new(0);

    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        if (ivars->updated[i]) {
            BitVector *deldocs   = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
            Segment   *segment   = SegReader_Get_Segment(seg_reader);
            Hash      *mini_meta = Hash_new(2);
            Hash_Store_Utf8(mini_meta, "count", 5,
                            (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs)));
            Hash_Store_Utf8(mini_meta, "filename", 8,
                            (Obj*)S_del_filename(self, seg_reader));
            Hash_Store(files, (Obj*)Seg_Get_Name(segment), (Obj*)mini_meta);
        }
    }
    Hash_Store_Utf8(metadata, "files", 5, (Obj*)files);

    return metadata;
}
Exemplo n.º 18
0
static void
test_Cat(TestBatchRunner *runner) {
    String *wanted = Str_newf("a%s", smiley);
    String *source;
    String *got;

    source = S_get_str("");
    got = Str_Cat(source, wanted);
    TEST_TRUE(runner, Str_Equals(wanted, (Obj*)got), "Cat");
    DECREF(got);
    DECREF(source);

    source = S_get_str("a");
    got = Str_Cat_Utf8(source, smiley, smiley_len);
    TEST_TRUE(runner, Str_Equals(wanted, (Obj*)got), "Cat_Utf8");
    DECREF(got);
    DECREF(source);

    source = S_get_str("a");
    got = Str_Cat_Trusted_Utf8(source, smiley, smiley_len);
    TEST_TRUE(runner, Str_Equals(wanted, (Obj*)got), "Cat_Trusted_Utf8");
    DECREF(got);
    DECREF(source);

    DECREF(wanted);
}
Exemplo n.º 19
0
static void
test_Cat(TestBatchRunner *runner) {
    String  *wanted = Str_newf("a%s", smiley);
    CharBuf *got    = S_get_cb("");

    CB_Cat(got, wanted);
    TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Char(got, 0x263A);
    TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Char");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Utf8(got, smiley, smiley_len);
    TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Utf8");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Trusted_Utf8(got, smiley, smiley_len);
    TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Trusted_Utf8");
    DECREF(got);

    DECREF(wanted);
}
Exemplo n.º 20
0
static String*
S_del_filename(DefaultDeletionsWriter *self, SegReader *target_reader) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Segment *target_seg = SegReader_Get_Segment(target_reader);
    return Str_newf("%o/deletions-%o.bv", Seg_Get_Name(ivars->segment),
                    Seg_Get_Name(target_seg));
}
Exemplo n.º 21
0
// Create a test data structure including at least one each of Hash, Vector,
// and String.
static Obj*
S_make_dump() {
    Hash *dump = Hash_new(0);
    Hash_Store_Utf8(dump, "foo", 3, (Obj*)Str_newf("foo"));
    Hash_Store_Utf8(dump, "stuff", 5, (Obj*)Vec_new(0));
    return (Obj*)dump;
}
Exemplo n.º 22
0
void
Bool_init_class() {
    Boolean *true_obj = (Boolean*)Class_Make_Obj(BOOLEAN);
    true_obj->value   = true;
    true_obj->string  = Str_newf("true");
    if (!Atomic_cas_ptr((void**)&Bool_true_singleton, NULL, true_obj)) {
        Bool_Destroy(true_obj);
    }

    Boolean *false_obj = (Boolean*)Class_Make_Obj(BOOLEAN);
    false_obj->value   = false;
    false_obj->string  = Str_newf("false");
    if (!Atomic_cas_ptr((void**)&Bool_false_singleton, NULL, false_obj)) {
        Bool_Destroy(false_obj);
    }
}
Exemplo n.º 23
0
String*
IxManager_Make_Snapshot_Filename_IMP(IndexManager *self) {
    IndexManagerIVARS *const ivars = IxManager_IVARS(self);
    Folder *folder = (Folder*)CERTIFY(ivars->folder, FOLDER);
    DirHandle *dh = Folder_Open_Dir(folder, NULL);
    uint64_t max_gen = 0;

    if (!dh) { RETHROW(INCREF(Err_get_error())); }
    while (DH_Next(dh)) {
        String *entry = DH_Get_Entry(dh);
        if (Str_Starts_With_Utf8(entry, "snapshot_", 9)
            && Str_Ends_With_Utf8(entry, ".json", 5)
           ) {
            uint64_t gen = IxFileNames_extract_gen(entry);
            if (gen > max_gen) { max_gen = gen; }
        }
        DECREF(entry);
    }
    DECREF(dh);

    uint64_t new_gen = max_gen + 1;
    char  base36[StrHelp_MAX_BASE36_BYTES];
    StrHelp_to_base36(new_gen, &base36);
    return Str_newf("snapshot_%s.json", &base36);
}
Exemplo n.º 24
0
Hash*
SnowStop_gen_stoplist(String *language) {
    char lang[2];
    lang[0] = tolower(Str_Code_Point_At(language, 0));
    lang[1] = tolower(Str_Code_Point_At(language, 1));
    const uint8_t **words = NULL;
    if (memcmp(lang, "da", 2) == 0)      { words = SnowStop_snow_da; }
    else if (memcmp(lang, "de", 2) == 0) { words = SnowStop_snow_de; }
    else if (memcmp(lang, "en", 2) == 0) { words = SnowStop_snow_en; }
    else if (memcmp(lang, "es", 2) == 0) { words = SnowStop_snow_es; }
    else if (memcmp(lang, "fi", 2) == 0) { words = SnowStop_snow_fi; }
    else if (memcmp(lang, "fr", 2) == 0) { words = SnowStop_snow_fr; }
    else if (memcmp(lang, "hu", 2) == 0) { words = SnowStop_snow_hu; }
    else if (memcmp(lang, "it", 2) == 0) { words = SnowStop_snow_it; }
    else if (memcmp(lang, "nl", 2) == 0) { words = SnowStop_snow_nl; }
    else if (memcmp(lang, "no", 2) == 0) { words = SnowStop_snow_no; }
    else if (memcmp(lang, "pt", 2) == 0) { words = SnowStop_snow_pt; }
    else if (memcmp(lang, "ru", 2) == 0) { words = SnowStop_snow_ru; }
    else if (memcmp(lang, "sv", 2) == 0) { words = SnowStop_snow_sv; }
    else {
        return NULL;
    }
    size_t num_stopwords = 0;
    for (uint32_t i = 0; words[i] != NULL; i++) { num_stopwords++; }
    NoCloneHash *stoplist = NoCloneHash_new(num_stopwords);
    for (uint32_t i = 0; words[i] != NULL; i++) {
        char *word = (char*)words[i];
        String *stop = Str_new_wrap_trusted_utf8(word, strlen(word));
        NoCloneHash_Store(stoplist, (Obj*)stop, (Obj*)Str_newf(""));
        DECREF(stop);
    }
    return (Hash*)stoplist;
}
Exemplo n.º 25
0
static void
test_Write_File_and_Read_File(TestBatchRunner *runner) {
    RAMFolder *folder  = RAMFolder_new(NULL);
    Segment   *segment = Seg_new(100);
    Segment   *got     = Seg_new(100);
    String    *meta;
    String    *flotsam = (String*)SSTR_WRAP_UTF8("flotsam", 7);
    String    *jetsam  = (String*)SSTR_WRAP_UTF8("jetsam", 6);

    Seg_Set_Count(segment, 111);
    Seg_Store_Metadata_Utf8(segment, "foo", 3, (Obj*)Str_newf("bar"));
    Seg_Add_Field(segment, flotsam);
    Seg_Add_Field(segment, jetsam);

    RAMFolder_MkDir(folder, Seg_Get_Name(segment));
    Seg_Write_File(segment, (Folder*)folder);
    Seg_Read_File(got, (Folder*)folder);

    TEST_TRUE(runner, Seg_Get_Count(got) == Seg_Get_Count(segment),
              "Round-trip count through file");
    TEST_TRUE(runner,
              Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam),
              "Round trip field names through file");
    meta = (String*)Seg_Fetch_Metadata_Utf8(got, "foo", 3);
    TEST_TRUE(runner,
              meta
              && Str_Is_A(meta, STRING)
              && Str_Equals_Utf8(meta, "bar", 3),
              "Round trip metadata through file");

    DECREF(got);
    DECREF(segment);
    DECREF(folder);
}
Exemplo n.º 26
0
static void
S_search(IndexSearcher *searcher, const char *query) {
    printf("Searching for: %s\n", query);

    // Execute search query.
    String *query_str = Str_new_from_utf8(query, strlen(query));
    Hits   *hits      = IxSearcher_Hits(searcher, (Obj*)query_str, 0, 10, NULL);

    String *field_str = Str_newf("title");
    HitDoc *hit;
    int i = 1;

    // Loop over search results.
    while (NULL != (hit = Hits_Next(hits))) {
        String *value_str = (String*)HitDoc_Extract(hit, field_str);
        char *value = Str_To_Utf8(value_str);

        printf("Result %d: %s\n", i, value);

        free(value);
        DECREF(value_str);
        DECREF(hit);
        i++;
    }

    printf("\n");

    DECREF(query_str);
    DECREF(hits);
    DECREF(field_str);
}
Exemplo n.º 27
0
static void
test_Code_Point_At_and_From(TestBatchRunner *runner) {
    int32_t code_points[] = {
        'a', smiley_cp, smiley_cp, 'b', smiley_cp, 'c'
    };
    uint32_t num_code_points = sizeof(code_points) / sizeof(int32_t);
    String *string = Str_newf("a%s%sb%sc", smiley, smiley, smiley);
    uint32_t i;

    for (i = 0; i < num_code_points; i++) {
        uint32_t from = num_code_points - i;
        TEST_INT_EQ(runner, Str_Code_Point_At(string, i), code_points[i],
                    "Code_Point_At %ld", (long)i);
        TEST_INT_EQ(runner, Str_Code_Point_From(string, from),
                    code_points[i], "Code_Point_From %ld", (long)from);
    }

    TEST_INT_EQ(runner, Str_Code_Point_At(string, num_code_points), STR_OOB,
                "Code_Point_At %ld", (long)num_code_points);
    TEST_INT_EQ(runner, Str_Code_Point_From(string, 0), STR_OOB,
                "Code_Point_From 0");
    TEST_INT_EQ(runner, Str_Code_Point_From(string, num_code_points + 1),
                STR_OOB, "Code_Point_From %ld", (long)(num_code_points + 1));

    DECREF(string);
}
Exemplo n.º 28
0
Hash*
DataWriter_Metadata_IMP(DataWriter *self) {
    Hash *metadata = Hash_new(0);
    Hash_Store_Utf8(metadata, "format", 6,
                    (Obj*)Str_newf("%i32", DataWriter_Format(self)));
    return metadata;
}
Exemplo n.º 29
0
static void
test_stemming(TestBatchRunner *runner) {
    FSFolder *modules_folder = TestUtils_modules_folder();
    String *path = Str_newf("analysis/snowstem/source/test/tests.json");
    Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path);
    if (!tests) { RETHROW(Err_get_error()); }

    String *iso;
    Hash *lang_data;
    Hash_Iterate(tests);
    while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) {
        VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5);
        VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5);
        SnowballStemmer *stemmer = SnowStemmer_new(iso);
        for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) {
            String *word  = (String*)VA_Fetch(words, i);
            VArray *got   = SnowStemmer_Split(stemmer, word);
            String *stem  = (String*)VA_Fetch(got, 0);
            TEST_TRUE(runner,
                      stem
                      && Str_Is_A(stem, STRING)
                      && Str_Equals(stem, VA_Fetch(stems, i)),
                      "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word)
                     );
            DECREF(got);
        }
        DECREF(stemmer);
    }

    DECREF(tests);
    DECREF(modules_folder);
    DECREF(path);
}
Exemplo n.º 30
0
static void
test_To_ByteBuf(TestBatchRunner *runner) {
    String     *string = Str_newf("foo");
    ByteBuf    *bb     = Str_To_ByteBuf(string);
    TEST_TRUE(runner, BB_Equals_Bytes(bb, "foo", 3), "To_ByteBuf");
    DECREF(bb);
    DECREF(string);
}