Exemplo n.º 1
0
static SortCache*
S_lazy_init_sort_cache(DefaultSortReader *self, String *field) {
    DefaultSortReaderIVARS *const ivars = DefSortReader_IVARS(self);

    // See if we have any values.
    Obj *count_obj = Hash_Fetch(ivars->counts, (Obj*)field);
    int32_t count = count_obj ? (int32_t)Obj_To_I64(count_obj) : 0;
    if (!count) { return NULL; }

    // Get a FieldType and sanity check that the field is sortable.
    Schema    *schema = DefSortReader_Get_Schema(self);
    FieldType *type   = Schema_Fetch_Type(schema, field);
    if (!type || !FType_Sortable(type)) {
        THROW(ERR, "'%o' isn't a sortable field", field);
    }

    // Open streams.
    Folder    *folder    = DefSortReader_Get_Folder(self);
    Segment   *segment   = DefSortReader_Get_Segment(self);
    String    *seg_name  = Seg_Get_Name(segment);
    int32_t    field_num = Seg_Field_Num(segment, field);
    int8_t     prim_id   = FType_Primitive_ID(type);
    bool       var_width = (prim_id == FType_TEXT || prim_id == FType_BLOB)
                           ? true
                           : false;
    String *ord_path = Str_newf("%o/sort-%i32.ord", seg_name, field_num);
    InStream *ord_in = Folder_Open_In(folder, ord_path);
    DECREF(ord_path);
    if (!ord_in) {
        THROW(ERR, "Error building sort cache for '%o': %o",
              field, Err_get_error());
    }
    InStream *ix_in = NULL;
    if (var_width) {
        String *ix_path = Str_newf("%o/sort-%i32.ix", seg_name, field_num);
        ix_in = Folder_Open_In(folder, ix_path);
        DECREF(ix_path);
        if (!ix_in) {
            THROW(ERR, "Error building sort cache for '%o': %o",
                  field, Err_get_error());
        }
    }
    String *dat_path = Str_newf("%o/sort-%i32.dat", seg_name, field_num);
    InStream *dat_in = Folder_Open_In(folder, dat_path);
    DECREF(dat_path);
    if (!dat_in) {
        THROW(ERR, "Error building sort cache for '%o': %o",
              field, Err_get_error());
    }

    Obj     *null_ord_obj = Hash_Fetch(ivars->null_ords, (Obj*)field);
    int32_t  null_ord = null_ord_obj ? (int32_t)Obj_To_I64(null_ord_obj) : -1;
    Obj     *ord_width_obj = Hash_Fetch(ivars->ord_widths, (Obj*)field);
    int32_t  ord_width = ord_width_obj
                         ? (int32_t)Obj_To_I64(ord_width_obj)
                         : S_calc_ord_width(count);
    int32_t  doc_max = (int32_t)Seg_Get_Count(segment);

    SortCache *cache = NULL;
    switch (prim_id & FType_PRIMITIVE_ID_MASK) {
        case FType_TEXT:
            cache = (SortCache*)TextSortCache_new(field, type, count, doc_max,
                                                  null_ord, ord_width, ord_in,
                                                  ix_in, dat_in);
            break;
        case FType_INT32:
            cache = (SortCache*)I32SortCache_new(field, type, count, doc_max,
                                                 null_ord, ord_width, ord_in,
                                                 dat_in);
            break;
        case FType_INT64:
            cache = (SortCache*)I64SortCache_new(field, type, count, doc_max,
                                                 null_ord, ord_width, ord_in,
                                                 dat_in);
            break;
        case FType_FLOAT32:
            cache = (SortCache*)F32SortCache_new(field, type, count, doc_max,
                                                 null_ord, ord_width, ord_in,
                                                 dat_in);
            break;
        case FType_FLOAT64:
            cache = (SortCache*)F64SortCache_new(field, type, count, doc_max,
                                                 null_ord, ord_width, ord_in,
                                                 dat_in);
            break;
        default:
            THROW(ERR, "No SortCache class for %o", type);
    }
    Hash_Store(ivars->caches, (Obj*)field, (Obj*)cache);

    if (ivars->format == 2) { // bug compatibility
        SortCache_Set_Native_Ords(cache, true);
    }

    DECREF(ord_in);
    DECREF(ix_in);
    DECREF(dat_in);

    return cache;
}
static void
S_flip_run(SortFieldWriter *run, size_t sub_thresh, InStream *ord_in,
           InStream *ix_in, InStream *dat_in) {
    SortFieldWriterIVARS *const run_ivars = SortFieldWriter_IVARS(run);

    if (run_ivars->flipped) { THROW(ERR, "Can't Flip twice"); }
    run_ivars->flipped = true;

    // Get our own slice of mem_thresh.
    DECREF(run_ivars->counter);
    run_ivars->counter    = Counter_new();
    run_ivars->mem_thresh = sub_thresh;

    // Done if we already have a SortCache to read from.
    if (run_ivars->sort_cache) { return; }

    // Open the temp files for reading.
    String *seg_name  = Seg_Get_Name(run_ivars->segment);
    String *ord_alias = Str_newf("%o/sort_ord_temp-%i64-to-%i64", seg_name,
                                 run_ivars->ord_start, run_ivars->ord_end);
    InStream *ord_in_dupe
        = InStream_Reopen(ord_in, ord_alias, run_ivars->ord_start,
                          run_ivars->ord_end - run_ivars->ord_start);
    DECREF(ord_alias);
    InStream *ix_in_dupe = NULL;
    if (run_ivars->var_width) {
        String *ix_alias = Str_newf("%o/sort_ix_temp-%i64-to-%i64", seg_name,
                                    run_ivars->ix_start, run_ivars->ix_end);
        ix_in_dupe = InStream_Reopen(ix_in, ix_alias, run_ivars->ix_start,
                                     run_ivars->ix_end - run_ivars->ix_start);
        DECREF(ix_alias);
    }
    String *dat_alias = Str_newf("%o/sort_dat_temp-%i64-to-%i64", seg_name,
                                 run_ivars->dat_start, run_ivars->dat_end);
    InStream *dat_in_dupe
        = InStream_Reopen(dat_in, dat_alias, run_ivars->dat_start,
                          run_ivars->dat_end - run_ivars->dat_start);
    DECREF(dat_alias);

    // Get a SortCache.
    String *field = Seg_Field_Name(run_ivars->segment, run_ivars->field_num);
    switch (run_ivars->prim_id & FType_PRIMITIVE_ID_MASK) {
        case FType_TEXT:
            run_ivars->sort_cache = (SortCache*)TextSortCache_new(
                                  field, run_ivars->type, run_ivars->run_cardinality,
                                  run_ivars->run_max, run_ivars->null_ord,
                                  run_ivars->ord_width, ord_in_dupe,
                                  ix_in_dupe, dat_in_dupe);
            break;
        case FType_INT32:
            run_ivars->sort_cache = (SortCache*)I32SortCache_new(
                                  field, run_ivars->type, run_ivars->run_cardinality,
                                  run_ivars->run_max, run_ivars->null_ord,
                                  run_ivars->ord_width, ord_in_dupe,
                                  dat_in_dupe);
            break;
        case FType_INT64:
            run_ivars->sort_cache = (SortCache*)I64SortCache_new(
                                  field, run_ivars->type, run_ivars->run_cardinality,
                                  run_ivars->run_max, run_ivars->null_ord,
                                  run_ivars->ord_width, ord_in_dupe,
                                  dat_in_dupe);
            break;
        case FType_FLOAT32:
            run_ivars->sort_cache = (SortCache*)F32SortCache_new(
                                  field, run_ivars->type, run_ivars->run_cardinality,
                                  run_ivars->run_max, run_ivars->null_ord,
                                  run_ivars->ord_width, ord_in_dupe,
                                  dat_in_dupe);
            break;
        case FType_FLOAT64:
            run_ivars->sort_cache = (SortCache*)F64SortCache_new(
                                  field, run_ivars->type, run_ivars->run_cardinality,
                                  run_ivars->run_max, run_ivars->null_ord,
                                  run_ivars->ord_width, ord_in_dupe,
                                  dat_in_dupe);
            break;
        default:
            THROW(ERR, "No SortCache class for %o", run_ivars->type);
    }

    DECREF(ord_in_dupe);
    DECREF(ix_in_dupe);
    DECREF(dat_in_dupe);
}