Esempio n. 1
0
LexiconWriter*
LexWriter_init(LexiconWriter *self, Snapshot *snapshot, Segment *segment, 
               PolyReader *polyreader)
{
    Schema       *schema = PolyReader_Get_Schema(polyreader);
    Architecture *arch   = Schema_Get_Architecture(schema);

    DataWriter_init((DataWriter*)self, snapshot, segment, polyreader);

    /* Assign. */
    self->index_interval = Arch_Index_Interval(arch);
    self->skip_interval  = Arch_Skip_Interval(arch);

    /* Init. */
    self->ix_out             = NULL;
    self->ixix_out           = NULL;
    self->dat_out            = NULL;
    self->count              = 0;
    self->ix_count           = 0;
    self->last_tinfo         = TInfo_new(0,0,0,0);
    self->last_text          = CB_new(40);
    self->dat_file           = CB_new(30);
    self->ix_file            = CB_new(30);
    self->ixix_file          = CB_new(30);
    self->counts             = Hash_new(0);
    self->ix_counts          = Hash_new(0);
    self->stepper            = NULL;
    self->temp_mode          = false;

    /* Derive. */
    self->stepper = LexStepper_new((CharBuf*)&EMPTY, self->skip_interval);

    return self;
}
Esempio n. 2
0
DocVector*
DocVec_init(DocVector *self) {
    DocVectorIVARS *const ivars = DocVec_IVARS(self);
    ivars->field_bufs    = Hash_new(0);
    ivars->field_vectors = Hash_new(0);
    return self;
}
Esempio n. 3
0
Hash*
DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    DefDelWriter_Metadata_t super_meta
        = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER,
                                                    LUCY_DefDelWriter_Metadata);
    Hash    *const metadata = super_meta(self);
    Hash    *const files    = Hash_new(0);

    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        if (ivars->updated[i]) {
            BitVector *deldocs   = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
            Segment   *segment   = SegReader_Get_Segment(seg_reader);
            Hash      *mini_meta = Hash_new(2);
            Hash_Store_Utf8(mini_meta, "count", 5,
                            (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs)));
            Hash_Store_Utf8(mini_meta, "filename", 8,
                            (Obj*)S_del_filename(self, seg_reader));
            Hash_Store(files, (Obj*)Seg_Get_Name(segment), (Obj*)mini_meta);
        }
    }
    Hash_Store_Utf8(metadata, "files", 5, (Obj*)files);

    return metadata;
}
Esempio n. 4
0
Hash_T Hash_load(const char *file) {
    int fd, read, done = 0, i;
    int d[2];
    Hash_T h = NULL;

    if((fd=File_openRead(file)) != -1) {
        h = Hash_new();
        while(!done) {
            for(i=0; i<2; i++) {
                read = File_read(fd, &d[i], sizeof(int));
                if(read == -1)
                    goto err;
                else if(read == 0)
                    done = 1;
                else
                    Hash_add(h, d[0], d[1]);
            }
        }
    }
    return h;

err:
    Hash_free(&h);
    return Hash_new();
}
Esempio n. 5
0
LexiconWriter*
LexWriter_init(LexiconWriter *self, Schema *schema, Snapshot *snapshot,
               Segment *segment, PolyReader *polyreader)
{
    Architecture *arch   = Schema_Get_Architecture(schema);

    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);

    // Assign. 
    self->index_interval = Arch_Index_Interval(arch);
    self->skip_interval  = Arch_Skip_Interval(arch);

    // Init. 
    self->ix_out             = NULL;
    self->ixix_out           = NULL;
    self->dat_out            = NULL;
    self->count              = 0;
    self->ix_count           = 0;
    self->dat_file           = CB_new(30);
    self->ix_file            = CB_new(30);
    self->ixix_file          = CB_new(30);
    self->counts             = Hash_new(0);
    self->ix_counts          = Hash_new(0);
    self->temp_mode          = false;
    self->term_stepper       = NULL;
    self->tinfo_stepper      = (TermStepper*)MatchTInfoStepper_new(schema);

    return self;
}
Esempio n. 6
0
static void testNew()
{
  Hash_T h0 = Hash_new(0);
  EXPECT_NOT_NULL(h0);

  Hash_T h1 = Hash_new(1);
  EXPECT_NOT_NULL(h1);

  Hash_T h2 = Hash_new(2);
  EXPECT_NOT_NULL(h2);
}
Esempio n. 7
0
static void testFree()
{
  Hash_T h0 = Hash_new(0);
  Hash_free(&h0);
  EXPECT_NULL(h0);

  Hash_T h1 = Hash_new(1);
  Hash_free(&h1);
  EXPECT_NULL(h1);

  Hash_T h2 = Hash_new(2);
  Hash_free(&h2);
  EXPECT_NULL(h2);
}
Esempio n. 8
0
Hash*
FullTextType_Dump_For_Schema_IMP(FullTextType *self) {
    FullTextTypeIVARS *const ivars = FullTextType_IVARS(self);
    Hash *dump = Hash_new(0);
    Hash_Store_Utf8(dump, "type", 4, (Obj*)Str_newf("fulltext"));

    // Store attributes that override the defaults.
    if (ivars->boost != 1.0) {
        Hash_Store_Utf8(dump, "boost", 5, (Obj*)Str_newf("%f64", ivars->boost));
    }
    if (!ivars->indexed) {
        Hash_Store_Utf8(dump, "indexed", 7, (Obj*)CFISH_FALSE);
    }
    if (!ivars->stored) {
        Hash_Store_Utf8(dump, "stored", 6, (Obj*)CFISH_FALSE);
    }
    if (ivars->sortable) {
        Hash_Store_Utf8(dump, "sortable", 8, (Obj*)CFISH_TRUE);
    }
    if (ivars->highlightable) {
        Hash_Store_Utf8(dump, "highlightable", 13, (Obj*)CFISH_TRUE);
    }

    return dump;
}
Esempio n. 9
0
static void testInsert()
{
  // test general case
  Hash_T h0 = Hash_new(0);
  EXPECT_EQ_UINT32(0, h0->nElements);
  EXPECT_EQ_UINT32(0, h0->tableSize);

  int key1 = 123;
  char* value1 = "1";
  h0 = Hash_insert(h0, Atom_newFromInt64(key1), value1);
  EXPECT_NOT_NULL(h0);
  EXPECT_EQ_UINT32(1, h0->nElements);
  EXPECT_EQ_UINT32(1, h0->tableSize);

  int key2 = -27;
  char value2[] = "value2";
  h0 = Hash_insert(h0, Atom_newFromInt64(key2), value2);
  EXPECT_NOT_NULL(h0);
  EXPECT_EQ_UINT32(2, h0->nElements);
  EXPECT_EQ_UINT32(3, h0->tableSize);

  char * key3 = "abc";
  int16_t value3 = 1056;
  Str_T s = Str_newFromInt16(value3);
  h0 = Hash_insert(h0, Atom_newFromString(key3), Str_str(s));
  EXPECT_NOT_NULL(h0);
  EXPECT_EQ_UINT32(3, h0->nElements);
  EXPECT_EQ_UINT32(3, h0->tableSize);
  Str_free(&s);

  Hash_free(&h0);
  EXPECT_NULL(h0);
}
Esempio n. 10
0
// Create a test data structure including at least one each of Hash, Vector,
// and String.
static Obj*
S_make_dump() {
    Hash *dump = Hash_new(0);
    Hash_Store_Utf8(dump, "foo", 3, (Obj*)Str_newf("foo"));
    Hash_Store_Utf8(dump, "stuff", 5, (Obj*)Vec_new(0));
    return (Obj*)dump;
}
Esempio n. 11
0
static void
test_Dump_and_Load(TestBatch *batch) {
    Hash *hash = Hash_new(0);
    Obj  *dump;
    Hash *loaded;

    Hash_Store_Str(hash, "foo", 3,
                   (Obj*)CB_new_from_trusted_utf8("foo", 3));
    dump = (Obj*)Hash_Dump(hash);
    loaded = (Hash*)Obj_Load(dump, dump);
    TEST_TRUE(batch, Hash_Equals(hash, (Obj*)loaded),
              "Dump => Load round trip");
    DECREF(dump);
    DECREF(loaded);

    /* TODO: Fix Hash_Load().

    Hash_Store_Str(hash, "_class", 6,
        (Obj*)CB_new_from_trusted_utf8("not_a_class", 11));
    dump = (Obj*)Hash_Dump(hash);
    loaded = (Hash*)Obj_Load(dump, dump);

    TEST_TRUE(batch, Hash_Equals(hash, (Obj*)loaded),
              "Load still works with _class if it's not a real class");
    DECREF(dump);
    DECREF(loaded);

    */

    DECREF(hash);
}
Esempio n. 12
0
Hash*
DataWriter_Metadata_IMP(DataWriter *self) {
    Hash *metadata = Hash_new(0);
    Hash_Store_Utf8(metadata, "format", 6,
                    (Obj*)Str_newf("%i32", DataWriter_Format(self)));
    return metadata;
}
Esempio n. 13
0
// Create a test data structure including at least one each of Hash, VArray,
// and CharBuf.
static Obj*
S_make_dump() {
    Hash *dump = Hash_new(0);
    Hash_Store_Str(dump, "foo", 3, (Obj*)CB_newf("foo"));
    Hash_Store_Str(dump, "stuff", 5, (Obj*)VA_new(0));
    return (Obj*)dump;
}
Esempio n. 14
0
Hash*
SnowStop_gen_stoplist(String *language) {
    char lang[2];
    lang[0] = tolower(Str_Code_Point_At(language, 0));
    lang[1] = tolower(Str_Code_Point_At(language, 1));
    const uint8_t **words = NULL;
    if (memcmp(lang, "da", 2) == 0)      { words = SnowStop_snow_da; }
    else if (memcmp(lang, "de", 2) == 0) { words = SnowStop_snow_de; }
    else if (memcmp(lang, "en", 2) == 0) { words = SnowStop_snow_en; }
    else if (memcmp(lang, "es", 2) == 0) { words = SnowStop_snow_es; }
    else if (memcmp(lang, "fi", 2) == 0) { words = SnowStop_snow_fi; }
    else if (memcmp(lang, "fr", 2) == 0) { words = SnowStop_snow_fr; }
    else if (memcmp(lang, "hu", 2) == 0) { words = SnowStop_snow_hu; }
    else if (memcmp(lang, "it", 2) == 0) { words = SnowStop_snow_it; }
    else if (memcmp(lang, "nl", 2) == 0) { words = SnowStop_snow_nl; }
    else if (memcmp(lang, "no", 2) == 0) { words = SnowStop_snow_no; }
    else if (memcmp(lang, "pt", 2) == 0) { words = SnowStop_snow_pt; }
    else if (memcmp(lang, "ru", 2) == 0) { words = SnowStop_snow_ru; }
    else if (memcmp(lang, "sv", 2) == 0) { words = SnowStop_snow_sv; }
    else {
        return NULL;
    }
    size_t num_stopwords = 0;
    for (uint32_t i = 0; words[i] != NULL; i++) { num_stopwords++; }
    Hash *stoplist = Hash_new(num_stopwords);
    for (uint32_t i = 0; words[i] != NULL; i++) {
        char *word = (char*)words[i];
        String *stop = Str_new_wrap_trusted_utf8(word, strlen(word));
        Hash_Store(stoplist, stop, (Obj*)CFISH_TRUE);
        DECREF(stop);
    }
    return (Hash*)stoplist;
}
Esempio n. 15
0
Schema*
Schema_init(Schema *self) {
    SchemaIVARS *const ivars = Schema_IVARS(self);
    // Init.
    ivars->analyzers      = Hash_new(0);
    ivars->types          = Hash_new(0);
    ivars->sims           = Hash_new(0);
    ivars->uniq_analyzers = Vec_new(2);
    Vec_Resize(ivars->uniq_analyzers, 1);

    // Assign.
    ivars->arch = Schema_Architecture(self);
    ivars->sim  = Arch_Make_Similarity(ivars->arch);

    return self;
}
Esempio n. 16
0
Hash*
Schema_Dump_IMP(Schema *self) {
    SchemaIVARS *const ivars = Schema_IVARS(self);
    Hash *dump = Hash_new(0);
    Hash *type_dumps = Hash_new(Hash_Get_Size(ivars->types));

    // Record class name, store dumps of unique Analyzers.
    Hash_Store_Utf8(dump, "_class", 6,
                    (Obj*)Str_Clone(Schema_get_class_name(self)));
    Hash_Store_Utf8(dump, "analyzers", 9,
                    Freezer_dump((Obj*)ivars->uniq_analyzers));

    // Dump FieldTypes.
    Hash_Store_Utf8(dump, "fields", 6, (Obj*)type_dumps);
    HashIterator *iter = HashIter_new(ivars->types);
    while (HashIter_Next(iter)) {
        String    *field      = HashIter_Get_Key(iter);
        FieldType *type       = (FieldType*)HashIter_Get_Value(iter);
        Class     *type_class = FType_get_class(type);

        // Dump known types to simplified format.
        if (type_class == FULLTEXTTYPE) {
            FullTextType *fttype = (FullTextType*)type;
            Hash *type_dump = FullTextType_Dump_For_Schema(fttype);
            Analyzer *analyzer = FullTextType_Get_Analyzer(fttype);
            uint32_t tick
                = S_find_in_array(ivars->uniq_analyzers, (Obj*)analyzer);

            // Store the tick which references a unique analyzer.
            Hash_Store_Utf8(type_dump, "analyzer", 8,
                            (Obj*)Str_newf("%u32", tick));

            Hash_Store(type_dumps, field, (Obj*)type_dump);
        }
        else if (type_class == STRINGTYPE || type_class == BLOBTYPE) {
            Hash *type_dump = FType_Dump_For_Schema(type);
            Hash_Store(type_dumps, field, (Obj*)type_dump);
        }
        // Unknown FieldType type, so punt.
        else {
            Hash_Store(type_dumps, field, FType_Dump(type));
        }
    }
    DECREF(iter);

    return dump;
}
Esempio n. 17
0
static void
S_zero_out(Snapshot *self)
{
    DECREF(self->entries);
    DECREF(self->path);
    self->entries  = Hash_new(0);
    self->path = NULL;
}
Esempio n. 18
0
RAMFolder*
RAMFolder_init(RAMFolder *self, const CharBuf *path)
{
    Folder_init((Folder*)self, path);
    self->elems = Hash_new(16);
    if (CB_Get_Size(self->path) != 0) S_read_fsfolder(self);
    return self;
}
Esempio n. 19
0
static void
S_zero_out(Snapshot *self) {
    SnapshotIVARS *const ivars = Snapshot_IVARS(self);
    DECREF(ivars->entries);
    DECREF(ivars->path);
    ivars->entries  = Hash_new(0);
    ivars->path = NULL;
}
Esempio n. 20
0
static void
S_zero_out(Snapshot *self)
{
    DECREF(self->entries);
    DECREF(self->filename);
    self->entries  = Hash_new(0);
    self->filename = NULL;
}
Esempio n. 21
0
T HashSet_new (int (*hashCode)(Poly_t)
               , Poly_tyEquals equals)
{
  T set;
  
  Mem_NEW(set);
  // we don't use "dup"
  set->hash = Hash_new (hashCode, equals, 0);
  return set;
}
Esempio n. 22
0
Obj*
Query_Dump_IMP(Query *self) {
    QueryIVARS *ivars = Query_IVARS(self);
    Hash *dump = Hash_new(0);
    Hash_Store_Utf8(dump, "_class", 6,
                    (Obj*)Str_Clone(Obj_Get_Class_Name((Obj*)self)));
    Hash_Store_Utf8(dump, "boost", 5,
                    (Obj*)Str_newf("%f64", (double)ivars->boost));
    return (Obj*)dump;
}
Esempio n. 23
0
static void
test_Keys_Values_Iter(TestBatch *batch) {
    Hash     *hash     = Hash_new(0); // trigger multiple rebuilds.
    VArray   *expected = VA_new(100);
    VArray   *keys;
    VArray   *values;

    for (uint32_t i = 0; i < 500; i++) {
        CharBuf *cb = CB_newf("%u32", i);
        Hash_Store(hash, (Obj*)cb, (Obj*)cb);
        VA_Push(expected, INCREF(cb));
    }

    VA_Sort(expected, NULL, NULL);

    keys   = Hash_Keys(hash);
    values = Hash_Values(hash);
    VA_Sort(keys, NULL, NULL);
    VA_Sort(values, NULL, NULL);
    TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys");
    TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values");
    VA_Clear(keys);
    VA_Clear(values);

    {
        Obj *key;
        Obj *value;
        Hash_Iterate(hash);
        while (Hash_Next(hash, &key, &value)) {
            VA_Push(keys, INCREF(key));
            VA_Push(values, INCREF(value));
        }
    }

    VA_Sort(keys, NULL, NULL);
    VA_Sort(values, NULL, NULL);
    TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys from Iter");
    TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values from Iter");

    {
        ZombieCharBuf *forty = ZCB_WRAP_STR("40", 2);
        ZombieCharBuf *nope  = ZCB_WRAP_STR("nope", 4);
        Obj *key = Hash_Find_Key(hash, (Obj*)forty, ZCB_Hash_Sum(forty));
        TEST_TRUE(batch, Obj_Equals(key, (Obj*)forty), "Find_Key");
        key = Hash_Find_Key(hash, (Obj*)nope, ZCB_Hash_Sum(nope)),
        TEST_TRUE(batch, key == NULL,
                  "Find_Key returns NULL for non-existent key");
    }

    DECREF(hash);
    DECREF(expected);
    DECREF(keys);
    DECREF(values);
}
Esempio n. 24
0
SortWriter*
SortWriter_init(SortWriter *self, Schema *schema, Snapshot *snapshot,
                Segment *segment, PolyReader *polyreader) {
    uint32_t field_max = Schema_Num_Fields(schema) + 1;
    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);

    // Init.
    ivars->field_writers   = VA_new(field_max);
    ivars->counts          = Hash_new(0);
    ivars->null_ords       = Hash_new(0);
    ivars->ord_widths      = Hash_new(0);
    ivars->temp_ord_out    = NULL;
    ivars->temp_ix_out     = NULL;
    ivars->temp_dat_out    = NULL;
    ivars->mem_pool        = MemPool_new(0);
    ivars->mem_thresh      = default_mem_thresh;
    ivars->flush_at_finish = false;

    return self;
}
Esempio n. 25
0
SegWriter*
SegWriter_init(SegWriter *self, Schema *schema, Snapshot *snapshot,
               Segment *segment, PolyReader *polyreader) {
    Architecture *arch   = Schema_Get_Architecture(schema);
    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);
    SegWriterIVARS *const ivars = SegWriter_IVARS(self);
    ivars->by_api   = Hash_new(0);
    ivars->inverter = Inverter_new(schema, segment);
    ivars->writers  = Vec_new(16);
    Arch_Init_Seg_Writer(arch, self);
    return self;
}
Esempio n. 26
0
static void
S_init_sub_readers(PolyReader *self, VArray *sub_readers) {
    PolyReaderIVARS *const ivars = PolyReader_IVARS(self);
    uint32_t  num_sub_readers = VA_Get_Size(sub_readers);
    int32_t *starts = (int32_t*)MALLOCATE(num_sub_readers * sizeof(int32_t));
    Hash  *data_readers = Hash_new(0);

    DECREF(ivars->sub_readers);
    DECREF(ivars->offsets);
    ivars->sub_readers       = (VArray*)INCREF(sub_readers);

    // Accumulate doc_max, subreader start offsets, and DataReaders.
    ivars->doc_max = 0;
    for (uint32_t i = 0; i < num_sub_readers; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(sub_readers, i);
        Hash *components = SegReader_Get_Components(seg_reader);
        CharBuf *api;
        DataReader *component;
        starts[i] = ivars->doc_max;
        ivars->doc_max += SegReader_Doc_Max(seg_reader);
        Hash_Iterate(components);
        while (Hash_Next(components, (Obj**)&api, (Obj**)&component)) {
            VArray *readers = (VArray*)Hash_Fetch(data_readers, (Obj*)api);
            if (!readers) {
                readers = VA_new(num_sub_readers);
                Hash_Store(data_readers, (Obj*)api, (Obj*)readers);
            }
            VA_Store(readers, i, INCREF(component));
        }
    }
    ivars->offsets = I32Arr_new_steal(starts, num_sub_readers);

    CharBuf *api;
    VArray  *readers;
    Hash_Iterate(data_readers);
    while (Hash_Next(data_readers, (Obj**)&api, (Obj**)&readers)) {
        DataReader *datareader
            = (DataReader*)CERTIFY(S_first_non_null(readers), DATAREADER);
        DataReader *aggregator
            = DataReader_Aggregator(datareader, readers, ivars->offsets);
        if (aggregator) {
            CERTIFY(aggregator, DATAREADER);
            Hash_Store(ivars->components, (Obj*)api, (Obj*)aggregator);
        }
    }
    DECREF(data_readers);

    DeletionsReader *del_reader
        = (DeletionsReader*)Hash_Fetch(
              ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER));
    ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0;
}
Esempio n. 27
0
static void
test_max_depth(TestBatch *batch) {
    Hash *circular = Hash_new(0);
    Hash_Store_Str(circular, "circular", 8, INCREF(circular));
    Err_set_error(NULL);
    CharBuf *not_json = Json_to_json((Obj*)circular);
    TEST_TRUE(batch, not_json == NULL,
              "to_json returns NULL when fed recursing data");
    TEST_TRUE(batch, Err_get_error() != NULL,
              "to_json sets Err_error when fed recursing data");
    DECREF(Hash_Delete_Str(circular, "circular", 8));
    DECREF(circular);
}
Esempio n. 28
0
void
IxManager_Write_Merge_Data_IMP(IndexManager *self, int64_t cutoff) {
    IndexManagerIVARS *const ivars = IxManager_IVARS(self);
    String *merge_json = SSTR_WRAP_C("merge.json");
    Hash *data = Hash_new(1);
    bool success;
    Hash_Store_Utf8(data, "cutoff", 6, (Obj*)Str_newf("%i64", cutoff));
    success = Json_spew_json((Obj*)data, ivars->folder, merge_json);
    DECREF(data);
    if (!success) {
        THROW(ERR, "Failed to write to %o", merge_json);
    }
}
Esempio n. 29
0
static void
test_max_depth(TestBatchRunner *runner) {
    Hash *circular = Hash_new(0);
    Hash_Store_Utf8(circular, "circular", 8, INCREF(circular));
    Err_set_error(NULL);
    String *not_json = Json_to_json((Obj*)circular);
    TEST_TRUE(runner, not_json == NULL,
              "to_json returns NULL when fed recursing data");
    TEST_TRUE(runner, Err_get_error() != NULL,
              "to_json sets global error when fed recursing data");
    DECREF(Hash_Delete_Utf8(circular, "circular", 8));
    DECREF(circular);
}
Esempio n. 30
0
Segment*
Seg_init(Segment *self, int64_t number) {
    // Validate.
    if (number < 0) { THROW(ERR, "Segment number %i64 less than 0", number); }

    // Init.
    self->metadata  = Hash_new(0);
    self->count     = 0;
    self->by_num    = VA_new(2);
    self->by_name   = Hash_new(0);

    // Start field numbers at 1, not 0.
    VA_Push(self->by_num, (Obj*)CB_newf(""));

    // Assign.
    self->number = number;

    // Derive.
    self->name = Seg_num_to_name(number);

    return self;
}