示例#1
0
void
PostPool_Flip_IMP(PostingPool *self) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    uint32_t num_runs   = VA_Get_Size(ivars->runs);
    uint32_t sub_thresh = num_runs > 0
                          ? ivars->mem_thresh / num_runs
                          : ivars->mem_thresh;

    if (num_runs) {
        Folder  *folder = PolyReader_Get_Folder(ivars->polyreader);
        String *seg_name = Seg_Get_Name(ivars->segment);
        String *lex_temp_path  = Str_newf("%o/lextemp", seg_name);
        String *post_temp_path = Str_newf("%o/ptemp", seg_name);
        ivars->lex_temp_in = Folder_Open_In(folder, lex_temp_path);
        if (!ivars->lex_temp_in) {
            RETHROW(INCREF(Err_get_error()));
        }
        ivars->post_temp_in = Folder_Open_In(folder, post_temp_path);
        if (!ivars->post_temp_in) {
            RETHROW(INCREF(Err_get_error()));
        }
        DECREF(lex_temp_path);
        DECREF(post_temp_path);
    }

    PostPool_Sort_Buffer(self);
    if (num_runs && (ivars->buf_max - ivars->buf_tick) > 0) {
        uint32_t num_items = PostPool_Buffer_Count(self);
        // Cheap imitation of flush. FIXME.
        PostingPool *run
            = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment,
                           ivars->polyreader, ivars->field, ivars->lex_writer,
                           ivars->mem_pool, ivars->lex_temp_out,
                           ivars->post_temp_out, ivars->skip_out);
        PostPool_Grow_Buffer(run, num_items);
        PostingPoolIVARS *const run_ivars = PostPool_IVARS(run);

        memcpy(run_ivars->buffer, (ivars->buffer) + ivars->buf_tick,
               num_items * sizeof(Obj*));
        run_ivars->buf_max = num_items;
        PostPool_Add_Run(self, (SortExternal*)run);
        ivars->buf_tick = 0;
        ivars->buf_max = 0;
    }

    // Assign.
    for (uint32_t i = 0; i < num_runs; i++) {
        PostingPool *run = (PostingPool*)VA_Fetch(ivars->runs, i);
        if (run != NULL) {
            PostPool_Set_Mem_Thresh(run, sub_thresh);
            if (!PostPool_IVARS(run)->lexicon) {
                S_fresh_flip(run, ivars->lex_temp_in, ivars->post_temp_in);
            }
        }
    }

    ivars->flipped = true;
}
示例#2
0
文件: DataWriter.c 项目: apache/lucy
DataWriter*
DataWriter_init(DataWriter *self, Schema *schema, Snapshot *snapshot,
                Segment *segment, PolyReader *polyreader) {
    DataWriterIVARS *const ivars = DataWriter_IVARS(self);
    ivars->snapshot   = (Snapshot*)INCREF(snapshot);
    ivars->segment    = (Segment*)INCREF(segment);
    ivars->polyreader = (PolyReader*)INCREF(polyreader);
    ivars->schema     = (Schema*)INCREF(schema);
    ivars->folder     = (Folder*)INCREF(PolyReader_Get_Folder(polyreader));
    ABSTRACT_CLASS_CHECK(self, DATAWRITER);
    return self;
}
示例#3
0
MatchPostingWriter*
MatchPostWriter_init(MatchPostingWriter *self, Schema *schema,
                     Snapshot *snapshot, Segment *segment,
                     PolyReader *polyreader, int32_t field_num) {
    Folder  *folder = PolyReader_Get_Folder(polyreader);
    CharBuf *filename
        = CB_newf("%o/postings-%i32.dat", Seg_Get_Name(segment), field_num);
    PostWriter_init((PostingWriter*)self, schema, snapshot, segment,
                    polyreader, field_num);
    self->outstream = Folder_Open_Out(folder, filename);
    if (!self->outstream) { RETHROW(INCREF(Err_get_error())); }
    DECREF(filename);
    return self;
}
void
SortFieldWriter_Flip_IMP(SortFieldWriter *self) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    uint32_t num_items = SortFieldWriter_Buffer_Count(self);
    uint32_t num_runs = Vec_Get_Size(ivars->runs);

    if (ivars->flipped) { THROW(ERR, "Can't call Flip() twice"); }
    ivars->flipped = true;

    // Sanity check.
    if (num_runs && num_items) {
        THROW(ERR, "Sanity check failed: num_runs: %u32 num_items: %u32",
              num_runs, num_items);
    }

    if (num_items) {
        SortFieldWriter_Sort_Buffer(self);
    }
    else if (num_runs) {
        Folder  *folder = PolyReader_Get_Folder(ivars->polyreader);
        String *seg_name = Seg_Get_Name(ivars->segment);
        String *ord_path = Str_newf("%o/sort_ord_temp", seg_name);
        ivars->ord_in = Folder_Open_In(folder, ord_path);
        DECREF(ord_path);
        if (!ivars->ord_in) { RETHROW(INCREF(Err_get_error())); }
        if (ivars->var_width) {
            String *ix_path = Str_newf("%o/sort_ix_temp", seg_name);
            ivars->ix_in = Folder_Open_In(folder, ix_path);
            DECREF(ix_path);
            if (!ivars->ix_in) { RETHROW(INCREF(Err_get_error())); }
        }
        String *dat_path = Str_newf("%o/sort_dat_temp", seg_name);
        ivars->dat_in = Folder_Open_In(folder, dat_path);
        DECREF(dat_path);
        if (!ivars->dat_in) { RETHROW(INCREF(Err_get_error())); }

        // Assign streams and a slice of mem_thresh.
        size_t sub_thresh = ivars->mem_thresh / num_runs;
        if (sub_thresh < 65536) { sub_thresh = 65536; }
        for (uint32_t i = 0; i < num_runs; i++) {
            SortFieldWriter *run = (SortFieldWriter*)Vec_Fetch(ivars->runs, i);
            S_flip_run(run, sub_thresh, ivars->ord_in, ivars->ix_in,
                       ivars->dat_in);
        }
    }

    ivars->flipped = true;
}
int32_t
SortFieldWriter_Finish_IMP(SortFieldWriter *self) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);

    // Bail if there's no data.
    if (!SortFieldWriter_Peek(self)) { return 0; }

    int32_t  field_num = ivars->field_num;
    Folder  *folder    = PolyReader_Get_Folder(ivars->polyreader);
    String  *seg_name  = Seg_Get_Name(ivars->segment);

    // Open streams.
    String *ord_path = Str_newf("%o/sort-%i32.ord", seg_name, field_num);
    OutStream *ord_out = Folder_Open_Out(folder, ord_path);
    DECREF(ord_path);
    if (!ord_out) { RETHROW(INCREF(Err_get_error())); }
    OutStream *ix_out = NULL;
    if (ivars->var_width) {
        String *ix_path = Str_newf("%o/sort-%i32.ix", seg_name, field_num);
        ix_out = Folder_Open_Out(folder, ix_path);
        DECREF(ix_path);
        if (!ix_out) { RETHROW(INCREF(Err_get_error())); }
    }
    String *dat_path = Str_newf("%o/sort-%i32.dat", seg_name, field_num);
    OutStream *dat_out = Folder_Open_Out(folder, dat_path);
    DECREF(dat_path);
    if (!dat_out) { RETHROW(INCREF(Err_get_error())); }

    int32_t cardinality = S_write_files(self, ord_out, ix_out, dat_out);

    // Close streams.
    OutStream_Close(ord_out);
    if (ix_out) { OutStream_Close(ix_out); }
    OutStream_Close(dat_out);
    DECREF(dat_out);
    DECREF(ix_out);
    DECREF(ord_out);

    return cardinality;
}
示例#6
0
文件: PolyReader.c 项目: theory/lucy
void
S_try_open_elements(void *context) {
    struct try_open_elements_context *args
        = (struct try_open_elements_context*)context;
    PolyReader *self              = args->self;
    PolyReaderIVARS *const ivars  = PolyReader_IVARS(self);
    VArray     *files             = Snapshot_List(ivars->snapshot);
    Folder     *folder            = PolyReader_Get_Folder(self);
    uint32_t    num_segs          = 0;
    uint64_t    latest_schema_gen = 0;
    CharBuf    *schema_file       = NULL;

    // Find schema file, count segments.
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(files, i);

        if (Seg_valid_seg_name(entry)) {
            num_segs++;
        }
        else if (CB_Starts_With_Str(entry, "schema_", 7)
                 && CB_Ends_With_Str(entry, ".json", 5)
                ) {
            uint64_t gen = IxFileNames_extract_gen(entry);
            if (gen > latest_schema_gen) {
                latest_schema_gen = gen;
                if (!schema_file) { schema_file = CB_Clone(entry); }
                else { CB_Mimic(schema_file, (Obj*)entry); }
            }
        }
    }

    // Read Schema.
    if (!schema_file) {
        DECREF(files);
        THROW(ERR, "Can't find a schema file.");
    }
    else {
        Hash *dump = (Hash*)Json_slurp_json(folder, schema_file);
        if (dump) { // read file successfully
            DECREF(ivars->schema);
            ivars->schema = (Schema*)CERTIFY(
                               VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA);
            DECREF(dump);
            DECREF(schema_file);
            schema_file = NULL;
        }
        else {
            CharBuf *mess = MAKE_MESS("Failed to parse %o", schema_file);
            DECREF(schema_file);
            DECREF(files);
            Err_throw_mess(ERR, mess);
        }
    }

    VArray *segments = VA_new(num_segs);
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(files, i);

        // Create a Segment for each segmeta.
        if (Seg_valid_seg_name(entry)) {
            int64_t seg_num = IxFileNames_extract_gen(entry);
            Segment *segment = Seg_new(seg_num);

            // Bail if reading the file fails (probably because it's been
            // deleted and a new snapshot file has been written so we need to
            // retry).
            if (Seg_Read_File(segment, folder)) {
                VA_Push(segments, (Obj*)segment);
            }
            else {
                CharBuf *mess = MAKE_MESS("Failed to read %o", entry);
                DECREF(segment);
                DECREF(segments);
                DECREF(files);
                Err_throw_mess(ERR, mess);
            }
        }
    }

    // Sort the segments by age.
    VA_Sort(segments, NULL, NULL);

    // Open individual SegReaders.
    struct try_open_segreader_context seg_context;
    seg_context.schema   = PolyReader_Get_Schema(self);
    seg_context.folder   = folder;
    seg_context.snapshot = PolyReader_Get_Snapshot(self);
    seg_context.segments = segments;
    seg_context.result   = NULL;
    args->seg_readers = VA_new(num_segs);
    Err *error = NULL;
    for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) {
        seg_context.seg_tick = seg_tick;
        error = Err_trap(S_try_open_segreader, &seg_context);
        if (error) {
            break;
        }
        VA_Push(args->seg_readers, (Obj*)seg_context.result);
        seg_context.result = NULL;
    }

    DECREF(segments);
    DECREF(files);
    if (error) {
        DECREF(args->seg_readers);
        args->seg_readers = NULL;
        RETHROW(error);
    }
}