Example #1
0
static String*
S_del_filename(DefaultDeletionsWriter *self, SegReader *target_reader) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Segment *target_seg = SegReader_Get_Segment(target_reader);
    return Str_newf("%o/deletions-%o.bv", Seg_Get_Name(ivars->segment),
                    Seg_Get_Name(target_seg));
}
Example #2
0
static void
test_Write_File_and_Read_File(TestBatchRunner *runner) {
    RAMFolder *folder  = RAMFolder_new(NULL);
    Segment   *segment = Seg_new(100);
    Segment   *got     = Seg_new(100);
    String    *meta;
    String    *flotsam = (String*)SSTR_WRAP_UTF8("flotsam", 7);
    String    *jetsam  = (String*)SSTR_WRAP_UTF8("jetsam", 6);

    Seg_Set_Count(segment, 111);
    Seg_Store_Metadata_Utf8(segment, "foo", 3, (Obj*)Str_newf("bar"));
    Seg_Add_Field(segment, flotsam);
    Seg_Add_Field(segment, jetsam);

    RAMFolder_MkDir(folder, Seg_Get_Name(segment));
    Seg_Write_File(segment, (Folder*)folder);
    Seg_Read_File(got, (Folder*)folder);

    TEST_TRUE(runner, Seg_Get_Count(got) == Seg_Get_Count(segment),
              "Round-trip count through file");
    TEST_TRUE(runner,
              Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam),
              "Round trip field names through file");
    meta = (String*)Seg_Fetch_Metadata_Utf8(got, "foo", 3);
    TEST_TRUE(runner,
              meta
              && Str_Is_A(meta, STRING)
              && Str_Equals_Utf8(meta, "bar", 3),
              "Round trip metadata through file");

    DECREF(got);
    DECREF(segment);
    DECREF(folder);
}
Example #3
0
static OutStream*
S_lazy_init(HighlightWriter *self) {
    HighlightWriterIVARS *const ivars = HLWriter_IVARS(self);
    if (!ivars->dat_out) {
        Segment  *segment  = ivars->segment;
        Folder   *folder   = ivars->folder;
        String   *seg_name = Seg_Get_Name(segment);

        // Open outstreams.
        String *ix_file = Str_newf("%o/highlight.ix", seg_name);
        ivars->ix_out = Folder_Open_Out(folder, ix_file);
        DECREF(ix_file);
        if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); }

        String *dat_file = Str_newf("%o/highlight.dat", seg_name);
        ivars->dat_out = Folder_Open_Out(folder, dat_file);
        DECREF(dat_file);
        if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); }

        // Go past invalid doc 0.
        OutStream_Write_I64(ivars->ix_out, 0);
    }

    return ivars->dat_out;
}
void
LexWriter_start_field(LexiconWriter *self, i32_t field_num)
{
    CharBuf  *const seg_name = Seg_Get_Name(self->segment);
    Folder   *const folder   = self->folder;
    Snapshot *const snapshot = LexWriter_Get_Snapshot(self);

    /* Open outstreams. */
    CB_setf(self->dat_file,  "%o/lexicon-%i32.dat",  seg_name, field_num);
    CB_setf(self->ix_file,   "%o/lexicon-%i32.ix",   seg_name, field_num);
    CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num);
    Snapshot_Add_Entry(snapshot, self->dat_file);
    Snapshot_Add_Entry(snapshot, self->ix_file);
    Snapshot_Add_Entry(snapshot, self->ixix_file);
    self->dat_out  = Folder_Open_Out(folder, self->dat_file);
    self->ix_out   = Folder_Open_Out(folder, self->ix_file);
    self->ixix_out = Folder_Open_Out(folder, self->ixix_file);
    if (!self->dat_out)  { THROW("Can't open %o", self->dat_file); }
    if (!self->ix_out)   { THROW("Can't open %o", self->ix_file); }
    if (!self->ixix_out) { THROW("Can't open %o", self->ixix_file); }

    /* Initialize count and ix_count, TermInfo and last term text. */
    self->count    = 0;
    self->ix_count = 0;
    TInfo_Reset(self->last_tinfo);
    CB_Set_Size(self->last_text, 0);
}
Example #5
0
Matcher*
DefDelWriter_Seg_Deletions_IMP(DefaultDeletionsWriter *self,
                               SegReader *seg_reader) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Matcher *deletions    = NULL;
    Segment *segment      = SegReader_Get_Segment(seg_reader);
    String  *seg_name     = Seg_Get_Name(segment);
    Integer32 *tick_obj   = (Integer32*)Hash_Fetch(ivars->name_to_tick,
                                                   (Obj*)seg_name);
    int32_t tick          = tick_obj ? Int32_Get_Value(tick_obj) : 0;
    SegReader *candidate  = tick_obj
                            ? (SegReader*)VA_Fetch(ivars->seg_readers, tick)
                            : NULL;

    if (tick_obj) {
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Obtain(
                  candidate, Class_Get_Name(DELETIONSREADER));
        if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) {
            BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, tick);
            deletions = (Matcher*)BitVecMatcher_new(deldocs);
        }
    }
    else { // Sanity check.
        THROW(ERR, "Couldn't find SegReader %o", seg_reader);
    }

    return deletions;
}
Example #6
0
Hash*
DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    DefDelWriter_Metadata_t super_meta
        = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER,
                                                    LUCY_DefDelWriter_Metadata);
    Hash    *const metadata = super_meta(self);
    Hash    *const files    = Hash_new(0);

    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        if (ivars->updated[i]) {
            BitVector *deldocs   = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
            Segment   *segment   = SegReader_Get_Segment(seg_reader);
            Hash      *mini_meta = Hash_new(2);
            Hash_Store_Utf8(mini_meta, "count", 5,
                            (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs)));
            Hash_Store_Utf8(mini_meta, "filename", 8,
                            (Obj*)S_del_filename(self, seg_reader));
            Hash_Store(files, (Obj*)Seg_Get_Name(segment), (Obj*)mini_meta);
        }
    }
    Hash_Store_Utf8(metadata, "files", 5, (Obj*)files);

    return metadata;
}
Example #7
0
static void
test_Write_File_and_Read_File(TestBatch *batch)
{
    RAMFolder *folder  = RAMFolder_new(NULL);
    Segment   *segment = Seg_new(100);
    Segment   *got     = Seg_new(100);
    CharBuf   *meta;
    CharBuf   *flotsam = (CharBuf*)ZCB_WRAP_STR("flotsam", 7);
    CharBuf   *jetsam  = (CharBuf*)ZCB_WRAP_STR("jetsam", 6);

    Seg_Set_Count(segment, 111);
    Seg_Store_Metadata_Str(segment, "foo", 3, (Obj*)CB_newf("bar"));
    Seg_Add_Field(segment, flotsam);
    Seg_Add_Field(segment, jetsam);
    
    RAMFolder_MkDir(folder, Seg_Get_Name(segment));
    Seg_Write_File(segment, (Folder*)folder);
    Seg_Read_File(got, (Folder*)folder);

    TEST_TRUE(batch, Seg_Get_Count(got) == Seg_Get_Count(segment), 
        "Round-trip count through file");
    TEST_TRUE(batch, 
        Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam), 
        "Round trip field names through file");
    meta = (CharBuf*)Seg_Fetch_Metadata_Str(got, "foo", 3);
    TEST_TRUE(batch, meta && CB_Is_A(meta, CHARBUF) 
        && CB_Equals_Str(meta, "bar", 3), "Round trip metadata through file");

    DECREF(got);
    DECREF(segment);
    DECREF(folder);
}
Example #8
0
static OutStream*
S_lazy_init(HighlightWriter *self)
{
    if (!self->dat_out) {
        Segment  *segment  = self->segment;
        Folder   *folder   = self->folder;
        CharBuf  *seg_name = Seg_Get_Name(segment);

        // Open outstreams. 
        {
            CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name);
            self->ix_out = Folder_Open_Out(folder, ix_file);
            DECREF(ix_file);
            if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); }
        }
        {
            CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name);
            self->dat_out = Folder_Open_Out(folder, dat_file);
            DECREF(dat_file);
            if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); }
        }

        // Go past invalid doc 0. 
        OutStream_Write_I64(self->ix_out, 0);
    }

    return self->dat_out;
}
Example #9
0
void
LexWriter_start_field(LexiconWriter *self, int32_t field_num)
{
    Segment   *const segment  = LexWriter_Get_Segment(self);
    Folder    *const folder   = LexWriter_Get_Folder(self);
    Schema    *const schema   = LexWriter_Get_Schema(self);
    CharBuf   *const seg_name = Seg_Get_Name(segment);
    CharBuf   *const field    = Seg_Field_Name(segment, field_num);
    FieldType *const type    = Schema_Fetch_Type(schema, field);

    // Open outstreams. 
    CB_setf(self->dat_file,  "%o/lexicon-%i32.dat",  seg_name, field_num);
    CB_setf(self->ix_file,   "%o/lexicon-%i32.ix",   seg_name, field_num);
    CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num);
    self->dat_out = Folder_Open_Out(folder, self->dat_file);
    if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); }
    self->ix_out = Folder_Open_Out(folder, self->ix_file);
    if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); }
    self->ixix_out = Folder_Open_Out(folder, self->ixix_file);
    if (!self->ixix_out) { RETHROW(INCREF(Err_get_error())); }

    // Initialize count and ix_count, term stepper and term info stepper. 
    self->count    = 0;
    self->ix_count = 0;
    self->term_stepper = FType_Make_Term_Stepper(type);
    TermStepper_Reset(self->tinfo_stepper);
}
static OutStream*
S_lazy_init(HighlightWriter *self)
{
    if (!self->dat_out) {
        Segment  *segment  = self->segment;
        Folder   *folder   = self->folder;
        Snapshot *snapshot = HLWriter_Get_Snapshot(self);
        CharBuf  *seg_name = Seg_Get_Name(segment);
        CharBuf  *ix_file  = CB_newf("%o/highlight.ix", seg_name);
        CharBuf  *dat_file = CB_newf("%o/highlight.dat", seg_name);

        /* Open outstreams. */
        Snapshot_Add_Entry(snapshot, ix_file);
        Snapshot_Add_Entry(snapshot, dat_file);
        self->ix_out  = Folder_Open_Out(folder, ix_file);
        self->dat_out = Folder_Open_Out(folder, dat_file);
        if (!self->ix_out)  { THROW("Can't open %o", ix_file); }
        if (!self->dat_out) { THROW("Can't open %o", dat_file); }
        DECREF(ix_file);
        DECREF(dat_file);

        /* Go past invalid doc 0. */
        OutStream_Write_U64(self->ix_out, 0);
    }

    return self->dat_out;
}
Example #11
0
SegReader*
SegReader_init(SegReader *self, Schema *schema, Folder *folder,
               Snapshot *snapshot, Vector *segments, int32_t seg_tick) {
    Segment *segment;

    IxReader_init((IndexReader*)self, schema, folder, snapshot, segments,
                  seg_tick, NULL);
    SegReaderIVARS *const ivars = SegReader_IVARS(self);
    segment = SegReader_Get_Segment(self);

    ivars->doc_max    = (int32_t)Seg_Get_Count(segment);
    ivars->seg_name   = (String*)INCREF(Seg_Get_Name(segment));
    ivars->seg_num    = Seg_Get_Number(segment);
    Err *error = Err_trap(S_try_init_components, self);
    if (error) {
        // An error occurred, so clean up self and rethrow the exception.
        DECREF(self);
        RETHROW(error);
    }

    DeletionsReader *del_reader
        = (DeletionsReader*)Hash_Fetch(
              ivars->components, Class_Get_Name(DELETIONSREADER));
    ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0;

    return self;
}
Example #12
0
void
PostPool_Flip_IMP(PostingPool *self) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    uint32_t num_runs   = VA_Get_Size(ivars->runs);
    uint32_t sub_thresh = num_runs > 0
                          ? ivars->mem_thresh / num_runs
                          : ivars->mem_thresh;

    if (num_runs) {
        Folder  *folder = PolyReader_Get_Folder(ivars->polyreader);
        String *seg_name = Seg_Get_Name(ivars->segment);
        String *lex_temp_path  = Str_newf("%o/lextemp", seg_name);
        String *post_temp_path = Str_newf("%o/ptemp", seg_name);
        ivars->lex_temp_in = Folder_Open_In(folder, lex_temp_path);
        if (!ivars->lex_temp_in) {
            RETHROW(INCREF(Err_get_error()));
        }
        ivars->post_temp_in = Folder_Open_In(folder, post_temp_path);
        if (!ivars->post_temp_in) {
            RETHROW(INCREF(Err_get_error()));
        }
        DECREF(lex_temp_path);
        DECREF(post_temp_path);
    }

    PostPool_Sort_Buffer(self);
    if (num_runs && (ivars->buf_max - ivars->buf_tick) > 0) {
        uint32_t num_items = PostPool_Buffer_Count(self);
        // Cheap imitation of flush. FIXME.
        PostingPool *run
            = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment,
                           ivars->polyreader, ivars->field, ivars->lex_writer,
                           ivars->mem_pool, ivars->lex_temp_out,
                           ivars->post_temp_out, ivars->skip_out);
        PostPool_Grow_Buffer(run, num_items);
        PostingPoolIVARS *const run_ivars = PostPool_IVARS(run);

        memcpy(run_ivars->buffer, (ivars->buffer) + ivars->buf_tick,
               num_items * sizeof(Obj*));
        run_ivars->buf_max = num_items;
        PostPool_Add_Run(self, (SortExternal*)run);
        ivars->buf_tick = 0;
        ivars->buf_max = 0;
    }

    // Assign.
    for (uint32_t i = 0; i < num_runs; i++) {
        PostingPool *run = (PostingPool*)VA_Fetch(ivars->runs, i);
        if (run != NULL) {
            PostPool_Set_Mem_Thresh(run, sub_thresh);
            if (!PostPool_IVARS(run)->lexicon) {
                S_fresh_flip(run, ivars->lex_temp_in, ivars->post_temp_in);
            }
        }
    }

    ivars->flipped = true;
}
Example #13
0
DefaultHighlightReader*
DefHLReader_init(DefaultHighlightReader *self, Schema *schema,
                 Folder *folder, Snapshot *snapshot, VArray *segments,
                 int32_t seg_tick)
{
    Segment *segment;
    Hash    *metadata; 
    HLReader_init((HighlightReader*)self, schema, folder, snapshot,
        segments, seg_tick);
    segment  = DefHLReader_Get_Segment(self);
    metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "highlight", 9);
    if (!metadata) {
        metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "term_vectors", 12);
    }
    
    // Check format. 
    if (metadata) {
        Obj     *format    = Hash_Fetch_Str(metadata, "format", 6);
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            if (Obj_To_I64(format) != HLWriter_current_file_format) {
                THROW(ERR, "Unsupported highlight data format: %i64", 
                    Obj_To_I64(format));
            }
        }
    }


    // Open instreams. 
    {
        CharBuf *seg_name = Seg_Get_Name(segment);
        CharBuf *ix_file  = CB_newf("%o/highlight.ix", seg_name);
        CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name);
        if (Folder_Exists(folder, ix_file)) {
            self->ix_in = Folder_Open_In(folder, ix_file);
            if (!self->ix_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }   
            self->dat_in = Folder_Open_In(folder, dat_file);
            if (!self->dat_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }   
        }   
        DECREF(ix_file);
        DECREF(dat_file);
    }

    return self;
}
Example #14
0
DefaultDocReader*
DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder,
                  Snapshot *snapshot, Vector *segments, int32_t seg_tick) {
    Hash *metadata;
    Segment *segment;
    DocReader_init((DocReader*)self, schema, folder, snapshot, segments,
                   seg_tick);
    DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self);
    segment = DefDocReader_Get_Segment(self);
    metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "documents", 9);

    if (metadata) {
        String *seg_name  = Seg_Get_Name(segment);
        String *ix_file   = Str_newf("%o/documents.ix", seg_name);
        String *dat_file  = Str_newf("%o/documents.dat", seg_name);
        Obj     *format   = Hash_Fetch_Utf8(metadata, "format", 6);

        // Check format.
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            int64_t format_val = Json_obj_to_i64(format);
            if (format_val < DocWriter_current_file_format) {
                THROW(ERR, "Obsolete doc storage format %i64; "
                      "Index regeneration is required", format_val);
            }
            else if (format_val != DocWriter_current_file_format) {
                THROW(ERR, "Unsupported doc storage format: %i64", format_val);
            }
        }

        // Get streams.
        if (Folder_Exists(folder, ix_file)) {
            ivars->ix_in = Folder_Open_In(folder, ix_file);
            if (!ivars->ix_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }
            ivars->dat_in = Folder_Open_In(folder, dat_file);
            if (!ivars->dat_in) {
                Err *error = (Err*)INCREF(Err_get_error());
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                RETHROW(error);
            }
        }
        DECREF(ix_file);
        DECREF(dat_file);
    }

    return self;
}
Example #15
0
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder,
              Segment *segment, String *field) {
    int32_t  field_num = Seg_Field_Num(segment, field);
    String  *seg_name  = Seg_Get_Name(segment);
    String  *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    String  *ix_file   = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    // Init.
    Lex_init((Lexicon*)self, field);
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    ivars->tinfo        = TInfo_new(0);
    ivars->tick         = 0;

    // Derive
    ivars->field_type = Schema_Fetch_Type(schema, field);
    if (!ivars->field_type) {
        String *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(ERR, mess);
    }
    ivars->field_type = (FieldType*)INCREF(ivars->field_type);
    ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type);
    ivars->ixix_in = Folder_Open_In(folder, ixix_file);
    if (!ivars->ixix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->ix_in = Folder_Open_In(folder, ix_file);
    if (!ivars->ix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->index_interval = Arch_Index_Interval(arch);
    ivars->skip_interval  = Arch_Skip_Interval(arch);
    ivars->size    = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t));
    ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in,
            (size_t)InStream_Length(ivars->ixix_in));

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
void
HLWriter_delete_segment(HighlightWriter *self, SegReader *reader)
{
    CharBuf  *merged_seg_name = Seg_Get_Name(SegReader_Get_Segment(reader));
    Snapshot *snapshot = HLWriter_Get_Snapshot(self);
    CharBuf  *ix_file  = CB_newf("%o/highlight.ix", merged_seg_name);
    CharBuf  *dat_file = CB_newf("%o/highlight.dat", merged_seg_name);
    Snapshot_Delete_Entry(snapshot, ix_file);
    Snapshot_Delete_Entry(snapshot, dat_file);
    DECREF(ix_file);
    DECREF(dat_file);
}
Example #17
0
static void
test_seg_name_and_num(TestBatchRunner *runner) {
    Segment *segment_z = Seg_new(35);
    String *seg_z_name = Seg_num_to_name(35);
    TEST_TRUE(runner, Seg_Get_Number(segment_z) == INT64_C(35), "Get_Number");
    TEST_TRUE(runner, Str_Equals_Utf8(Seg_Get_Name(segment_z), "seg_z", 5),
              "Get_Name");
    TEST_TRUE(runner, Str_Equals_Utf8(seg_z_name, "seg_z", 5),
              "num_to_name");
    DECREF(seg_z_name);
    DECREF(segment_z);
}
Example #18
0
DefaultHighlightReader*
DefHLReader_init(DefaultHighlightReader *self, Schema *schema,
                 Folder *folder, Snapshot *snapshot, Vector *segments,
                 int32_t seg_tick) {
    HLReader_init((HighlightReader*)self, schema, folder, snapshot,
                  segments, seg_tick);
    DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self);
    Segment *segment    = DefHLReader_Get_Segment(self);
    Hash *metadata      = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "highlight", 9);
    if (!metadata) {
        metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "term_vectors", 12);
    }

    // Check format.
    if (metadata) {
        Obj *format = Hash_Fetch_Utf8(metadata, "format", 6);
        if (!format) { THROW(ERR, "Missing 'format' var"); }
        else {
            if (Json_obj_to_i64(format) != HLWriter_current_file_format) {
                THROW(ERR, "Unsupported highlight data format: %i64",
                      Json_obj_to_i64(format));
            }
        }
    }

    // Open instreams.
    String *seg_name = Seg_Get_Name(segment);
    String *ix_file  = Str_newf("%o/highlight.ix", seg_name);
    String *dat_file = Str_newf("%o/highlight.dat", seg_name);
    if (Folder_Exists(folder, ix_file)) {
        ivars->ix_in = Folder_Open_In(folder, ix_file);
        if (!ivars->ix_in) {
            Err *error = (Err*)INCREF(Err_get_error());
            DECREF(ix_file);
            DECREF(dat_file);
            DECREF(self);
            RETHROW(error);
        }
        ivars->dat_in = Folder_Open_In(folder, dat_file);
        if (!ivars->dat_in) {
            Err *error = (Err*)INCREF(Err_get_error());
            DECREF(ix_file);
            DECREF(dat_file);
            DECREF(self);
            RETHROW(error);
        }
    }
    DECREF(ix_file);
    DECREF(dat_file);

    return self;
}
Example #19
0
static void
test_seg_name_and_num(TestBatch *batch)
{
    Segment *segment_z = Seg_new(35);
    CharBuf *seg_z_name = Seg_num_to_name(35);
    TEST_TRUE(batch, Seg_Get_Number(segment_z) == I64_C(35), "Get_Number");
    TEST_TRUE(batch, CB_Equals_Str(Seg_Get_Name(segment_z), "seg_z", 5), 
        "Get_Name");
    TEST_TRUE(batch, CB_Equals_Str(seg_z_name, "seg_z", 5), 
        "num_to_name");
    DECREF(seg_z_name);
    DECREF(segment_z);
}
Example #20
0
MatchPostingWriter*
MatchPostWriter_init(MatchPostingWriter *self, Schema *schema,
                     Snapshot *snapshot, Segment *segment,
                     PolyReader *polyreader, int32_t field_num) {
    Folder  *folder = PolyReader_Get_Folder(polyreader);
    CharBuf *filename
        = CB_newf("%o/postings-%i32.dat", Seg_Get_Name(segment), field_num);
    PostWriter_init((PostingWriter*)self, schema, snapshot, segment,
                    polyreader, field_num);
    self->outstream = Folder_Open_Out(folder, filename);
    if (!self->outstream) { RETHROW(INCREF(Err_get_error())); }
    DECREF(filename);
    return self;
}
Example #21
0
DefaultDocReader*
DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, 
                  Snapshot *snapshot, VArray *segments, i32_t seg_tick)
{
    Hash *metadata; 
    Segment *segment;
    DocReader_init((DocReader*)self, schema, folder, snapshot, segments,
        seg_tick);
    segment = DefDocReader_Get_Segment(self);
    metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "documents", 9);

    if (metadata) {
        CharBuf *seg_name  = Seg_Get_Name(segment);
        CharBuf *ix_file   = CB_newf("%o/documents.ix", seg_name);
        CharBuf *dat_file  = CB_newf("%o/documents.dat", seg_name);
        Obj     *format    = Hash_Fetch_Str(metadata, "format", 6);

        /* Check format. */
        if (!format) { THROW("Missing 'format' var"); }
        else {
            i64_t format_val = Obj_To_I64(format);
            if (format_val < DocWriter_current_file_format) {
                THROW("Obsolete doc storage format %i64; "
                    "Index regeneration is required", format_val);
            }
            else if (format_val != DocWriter_current_file_format) {
                THROW("Unsupported doc storage format: %i64", format_val);
            }
        }

        /* Get streams. */
        if (Folder_Exists(folder, ix_file)) {
            self->ix_in  = Folder_Open_In(folder, ix_file);
            self->dat_in = Folder_Open_In(folder, dat_file);
            if (!self->ix_in || !self->dat_in) {
                CharBuf *mess = MAKE_MESS("Can't open either %o or %o",
                    ix_file, dat_file);
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                Err_throw_mess(mess);
            }
        }
        DECREF(ix_file);
        DECREF(dat_file);
    }
    
    return self;
}
void
SortFieldWriter_Flip_IMP(SortFieldWriter *self) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    uint32_t num_items = SortFieldWriter_Buffer_Count(self);
    uint32_t num_runs = Vec_Get_Size(ivars->runs);

    if (ivars->flipped) { THROW(ERR, "Can't call Flip() twice"); }
    ivars->flipped = true;

    // Sanity check.
    if (num_runs && num_items) {
        THROW(ERR, "Sanity check failed: num_runs: %u32 num_items: %u32",
              num_runs, num_items);
    }

    if (num_items) {
        SortFieldWriter_Sort_Buffer(self);
    }
    else if (num_runs) {
        Folder  *folder = PolyReader_Get_Folder(ivars->polyreader);
        String *seg_name = Seg_Get_Name(ivars->segment);
        String *ord_path = Str_newf("%o/sort_ord_temp", seg_name);
        ivars->ord_in = Folder_Open_In(folder, ord_path);
        DECREF(ord_path);
        if (!ivars->ord_in) { RETHROW(INCREF(Err_get_error())); }
        if (ivars->var_width) {
            String *ix_path = Str_newf("%o/sort_ix_temp", seg_name);
            ivars->ix_in = Folder_Open_In(folder, ix_path);
            DECREF(ix_path);
            if (!ivars->ix_in) { RETHROW(INCREF(Err_get_error())); }
        }
        String *dat_path = Str_newf("%o/sort_dat_temp", seg_name);
        ivars->dat_in = Folder_Open_In(folder, dat_path);
        DECREF(dat_path);
        if (!ivars->dat_in) { RETHROW(INCREF(Err_get_error())); }

        // Assign streams and a slice of mem_thresh.
        size_t sub_thresh = ivars->mem_thresh / num_runs;
        if (sub_thresh < 65536) { sub_thresh = 65536; }
        for (uint32_t i = 0; i < num_runs; i++) {
            SortFieldWriter *run = (SortFieldWriter*)Vec_Fetch(ivars->runs, i);
            S_flip_run(run, sub_thresh, ivars->ord_in, ivars->ix_in,
                       ivars->dat_in);
        }
    }

    ivars->flipped = true;
}
Example #23
0
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, 
              Segment *segment, const CharBuf *field)
{
    i32_t    field_num = Seg_Field_Num(segment, field);
    CharBuf *seg_name  = Seg_Get_Name(segment);
    CharBuf *ixix_file = CB_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    CharBuf *ix_file   = CB_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    /* Init. */
    self->term  = ViewCB_new_from_trusted_utf8(NULL, 0);
    self->tinfo = TInfo_new(0,0,0,0);
    self->tick  = 0;

    /* Derive */
    self->field_type = Schema_Fetch_Type(schema, field);
    if (!self->field_type) {
        CharBuf *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(mess);
    }
    INCREF(self->field_type);
    self->ixix_in = Folder_Open_In(folder, ixix_file);
    self->ix_in   = Folder_Open_In(folder, ix_file);
    if (!self->ixix_in || !self->ix_in) {
        CharBuf *mess =
             MAKE_MESS("Can't open either %o or %o", ix_file, ixix_file);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(mess);
    }
    self->index_interval = Arch_Index_Interval(arch);
    self->skip_interval  = Arch_Skip_Interval(arch);
    self->size    = (i32_t)(InStream_Length(self->ixix_in) / sizeof(i64_t));
    self->offsets = (i64_t*)InStream_Buf(self->ixix_in,
        (size_t)InStream_Length(self->ixix_in));
    self->data = InStream_Buf(self->ix_in, InStream_Length(self->ix_in));
    self->limit = self->data + InStream_Length(self->ix_in);

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
Example #24
0
void
SegWriter_Delete_Segment_IMP(SegWriter *self, SegReader *reader) {
    SegWriterIVARS *const ivars = SegWriter_IVARS(self);
    Snapshot *snapshot = SegWriter_Get_Snapshot(self);
    String   *seg_name = Seg_Get_Name(SegReader_Get_Segment(reader));

    // Have all the sub-writers delete the segment.
    for (size_t i = 0, max = Vec_Get_Size(ivars->writers); i < max; i++) {
        DataWriter *writer = (DataWriter*)Vec_Fetch(ivars->writers, i);
        DataWriter_Delete_Segment(writer, reader);
    }
    DelWriter_Delete_Segment(ivars->del_writer, reader);

    // Remove seg directory from snapshot.
    Snapshot_Delete_Entry(snapshot, seg_name);
}
Example #25
0
void
DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self,
                               SegReader *reader, I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9);

    if (del_meta) {
        Vector *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5);
        if (files) {
            HashIterator *iter = HashIter_new(files);
            while (HashIter_Next(iter)) {
                String *seg       = HashIter_Get_Key(iter);
                Hash   *mini_meta = (Hash*)HashIter_Get_Value(iter);

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)Vec_Fetch(seg_readers, i);
                    String *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (Str_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, Class_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
            DECREF(iter);
        }
    }
}
Example #26
0
void
DefDelWriter_merge_segment(DefaultDeletionsWriter *self, SegReader *reader,
                           I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Str(segment, "deletions", 9);

    if (del_meta) {
        VArray *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Str(del_meta, "files", 5);
        if (files) {
            CharBuf *seg;
            Hash *mini_meta;
            Hash_Iterate(files);
            while (Hash_Next(files, (Obj**)&seg, (Obj**)&mini_meta)) {

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (uint32_t i = 0, max = VA_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)VA_Fetch(seg_readers, i);
                    CharBuf *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (CB_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Obj_To_I64(Hash_Fetch_Str(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, VTable_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
        }
    }
}
Example #27
0
// Indicate whether it is safe to build a SegLexicon using the given
// parameters. Will return false if the field is not indexed or if no terms
// are present for this field in this segment.
static bool
S_has_data(Schema *schema, Folder *folder, Segment *segment, String *field) {
    FieldType *type = Schema_Fetch_Type(schema, field);

    if (!type || !FType_Indexed(type)) {
        // If the field isn't indexed, bail out.
        return false;
    }
    else {
        // Bail out if there are no terms for this field in this segment.
        int32_t  field_num = Seg_Field_Num(segment, field);
        String  *seg_name  = Seg_Get_Name(segment);
        String  *file = Str_newf("%o/lexicon-%i32.dat", seg_name, field_num);
        bool retval = Folder_Exists(folder, file);
        DECREF(file);
        return retval;
    }
}
Example #28
0
void
SegWriter_Prep_Seg_Dir_IMP(SegWriter *self) {
    SegWriterIVARS *const ivars = SegWriter_IVARS(self);
    Folder *folder   = SegWriter_Get_Folder(self);
    String *seg_name = Seg_Get_Name(ivars->segment);

    // Clear stale segment files from crashed indexing sessions.
    if (Folder_Exists(folder, seg_name)) {
        bool result = Folder_Delete_Tree(folder, seg_name);
        if (!result) {
            THROW(ERR, "Couldn't completely remove '%o'", seg_name);
        }
    }

    // Create the segment directory.
    bool result = Folder_MkDir(folder, seg_name);
    if (!result) { RETHROW(INCREF(Err_get_error())); }
}
void
LexWriter_delete_segment(LexiconWriter *self, SegReader *reader)
{
    Snapshot *snapshot = LexWriter_Get_Snapshot(self);
    CharBuf  *merged_seg_name = Seg_Get_Name(SegReader_Get_Segment(reader));
    CharBuf  *pattern  = CB_newf("%o/lexicon", merged_seg_name);
    VArray   *files = Snapshot_List(snapshot);
    VArray   *my_old_files = VA_Grep(files, S_my_file, pattern);
    u32_t i, max;

    for (i = 0, max = VA_Get_Size(my_old_files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(my_old_files, i);
        Snapshot_Delete_Entry(snapshot, entry);
    }
    DECREF(my_old_files);
    DECREF(files);
    DECREF(pattern);
}
Example #30
0
static SortFieldWriter*
S_lazy_init_field_writer(SortWriter *self, int32_t field_num) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);

    SortFieldWriter *field_writer
        = (SortFieldWriter*)VA_Fetch(ivars->field_writers, field_num);
    if (!field_writer) {

        // Open temp files.
        if (!ivars->temp_ord_out) {
            Folder  *folder   = ivars->folder;
            CharBuf *seg_name = Seg_Get_Name(ivars->segment);
            CharBuf *path     = CB_newf("%o/sort_ord_temp", seg_name);
            ivars->temp_ord_out = Folder_Open_Out(folder, path);
            if (!ivars->temp_ord_out) {
                DECREF(path);
                RETHROW(INCREF(Err_get_error()));
            }
            CB_setf(path, "%o/sort_ix_temp", seg_name);
            ivars->temp_ix_out = Folder_Open_Out(folder, path);
            if (!ivars->temp_ix_out) {
                DECREF(path);
                RETHROW(INCREF(Err_get_error()));
            }
            CB_setf(path, "%o/sort_dat_temp", seg_name);
            ivars->temp_dat_out = Folder_Open_Out(folder, path);
            if (!ivars->temp_dat_out) {
                DECREF(path);
                RETHROW(INCREF(Err_get_error()));
            }
            DECREF(path);
        }

        CharBuf *field = Seg_Field_Name(ivars->segment, field_num);
        field_writer
            = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment,
                                  ivars->polyreader, field, ivars->mem_pool,
                                  ivars->mem_thresh, ivars->temp_ord_out,
                                  ivars->temp_ix_out, ivars->temp_dat_out);
        VA_Store(ivars->field_writers, field_num, (Obj*)field_writer);
    }
    return field_writer;
}