Exemple #1
0
static bss_status_t serialize_column_quasiperiodicvector(struct sdb_table_t *tbl) {

    struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx;
    struct sdb_read_ctx_t *read_ctx = & ctx->read_ctx;
    struct bss_ctx_t *bss_ctx = ctx->bss_ctx;
    struct bsd_data_t bsd_data;
    struct sdb_column_t *column = tbl->columns + ctx->current_column;
    int nrows = tbl->nwrittenobjects / tbl->ncolumns;
    int nobjectstoread = nrows * tbl->ncolumns; //TODO could optimize away end of last row
    int incolumn = 1;

    switch (ctx->stage) {
        default: return BSS_EINTERNAL;

        case SDB_SS_MAP_LABEL_SENT:
        TRY( bss_object(bss_ctx, SDB_CLSID_QUASI_PERIODIC_VECTOR), SDB_SS_COLUMN_OBJECT_DEFINED); //TODO: use constants for class ID
        case SDB_SS_COLUMN_OBJECT_DEFINED:
        TRY( bss_double(bss_ctx, column->arg), SDB_SS_COLUMN_FACTOR_SENT);

        while( read_ctx->nreadobjects < nobjectstoread) {
            incolumn = read_ctx->nreadobjects%tbl->ncolumns == ctx->current_column;
            int r = sdb_read_data( read_ctx, & bsd_data, ! incolumn);
            if( r<0)
                return r;
            case SDB_SS_COLUMN_FACTOR_SENT:
            case SDB_SS_COLUMN_START_VALUE_SENT:
            case SDB_SS_COLUMN_SENDING_CELLS:
            case SDB_SS_COLUMN_SHIFT_SENT:
            if( incolumn) {
                bss_status_t res = serialize_cell_quasiperiodicvector(ctx, column->arg);
                if( res != BSS_EOK) return res;
            }
        }
        ctx->stage = SDB_SS_COLUMN_CONTENT_SENT;
        case SDB_SS_COLUMN_CONTENT_SENT:
        // finalize the container: send the last shift count
        TRY( bss_int(bss_ctx, ctx->current_shift), SDB_SS_COLUMN_LAST_SHIFT_SENT);

        case SDB_SS_COLUMN_LAST_SHIFT_SENT:
        // close shift list
        TRY( bss_close(bss_ctx), SDB_SS_COLUMN_INNER_LIST_CLOSED);

        case SDB_SS_COLUMN_INNER_LIST_CLOSED:
        TRY( bss_close( bss_ctx), SDB_SS_COLUMN_CLOSED);
    }

    return BSS_EOK;
}
static void sdb_restore_file_cells( sdb_table_t *tbl) {
    struct sdb_read_ctx_t rctx;
    struct bsd_data_t bsd;
    int nread;
    sdb_read_init( & rctx, tbl);
    while( (nread = sdb_read_data( & rctx, & bsd, 1)) > 0) {
        // restore data analysis
        switch( bsd.kind) {
        case BSD_INT:    sdb_analyze_integer(tbl, bsd.content.i); break;
        case BSD_DOUBLE: sdb_analyze_noninteger(tbl, 1); break;
        default:         sdb_analyze_noninteger(tbl, 0); break;
        }

        tbl->nwrittenbytes += nread;
        tbl->nwrittenobjects ++;
    }
//          printf( "exited with code %d, found %d objects encoded in %d bytes\n",
//                  nread, tbl->nwrittenobjects, tbl->nwrittenbytes);
    sdb_read_close( & rctx);
}
Exemple #3
0
/* Serializing a column: the entire table must be read in sequence, and each
 * column cell must be extracted and treated (here written straight
 * to the target list). */
static bss_status_t serialize_column_list( struct sdb_table_t *tbl) {

    struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx;
    struct sdb_read_ctx_t *read_ctx = & ctx->read_ctx;
    struct bss_ctx_t *bss_ctx = ctx->bss_ctx;
    struct bsd_data_t bsd_data;
    int nrows = tbl->nwrittenobjects / tbl->ncolumns;
    int nobjectstoread = nrows * tbl->ncolumns; //TODO could optimize away end of last row
    int incolumn = 1;

    switch (ctx->stage) {
        default: return BSS_EINTERNAL;

        case SDB_SS_MAP_LABEL_SENT:
        // TODO: find a smarter CTXID depending on content's type
        TRY( bss_list( bss_ctx, nrows, BS_CTXID_GLOBAL), SDB_SS_COLUMN_SENDING_CELLS);
        while( read_ctx->nreadobjects < nobjectstoread) {
            incolumn = read_ctx->nreadobjects%tbl->ncolumns == ctx->current_column;
            int r = sdb_read_data( read_ctx, & bsd_data, ! incolumn);
            if( r<0)
                return r;
            case SDB_SS_COLUMN_SENDING_CELLS:
            if( incolumn) {
                /* bss_raw might be run more than once, it will handle cases
                 * where only part of the data has been sent transparently,
                 * thanks to bss' transaction system.
                 *
                 * If we jump here directly, because serialization has been resumed
                 * in state SDB_SS_COLUMN_SENDING_CELLS, incolumn is always true:
                 * the previous serialization attempt stopped in this state,
                 * because the call to bss_raw() below failed on BSS_EAGAIN.
                 * Therefore, read_ctx was already pointing to something to
                 * serialize, and we were "incolumn" indeed. */
                TRY( bss_raw( bss_ctx, read_ctx->bytes, read_ctx->nbytes), SDB_SS_COLUMN_SENDING_CELLS);
            }
        }
        ctx->stage = SDB_SS_COLUMN_CONTENT_SENT;
        TRY( bss_close( bss_ctx), SDB_SS_COLUMN_CLOSED);
    }
    return BSS_EOK;
}
Exemple #4
0
/* Compute the smallest serialization container using  data analysis and stored data.
 * The method is to estimate as precisely as possible final size and take the smallest one.
 * Store the result in serialization_data struct.
 */
static int compute_serialization_methods( struct sdb_table_t *tbl) {
    // Data used for 2nd pass computations.
    struct data_analysis_t {
        int vsize,dvsize, qpvsize;                            // Computed sizes.
        double dvfactor;                            // Data for DV computations.
        int qpvperiod, qpvcurrentn;                // Data for QPV computations.
        double dprevious;                            // Previous data as double.
        int iprevious;                                  // Previous data as int.
    };

    sdb_ncolumn_t nsmallest = 0;
    sdb_ncolumn_t current_smallest;
    int i;
    struct data_analysis_t *analysis_data = NULL;
    int return_code = SDB_EOK;
    struct sdb_read_ctx_t read_ctx;

    // count columns that needs serialization computation
    for( i=0; i<tbl->ncolumns; i++) {
        struct sdb_column_t *column = tbl->columns + i;
        if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method)) {
            // if any data is not numeric, DV and QPV are not able to serialize them
            if( !column->data_analysis.all_numeric) {
                column->data_analysis.method = SDB_SM_LIST;
            }
            // QPV period guessing is integer only, DV support floats only when factor is forced.
            else if( !column->data_analysis.all_integer &&
                    !(column->serialization_method & SDB_SM_FIXED_PRECISION)) {
                column->data_analysis.method = SDB_SM_LIST;
            }
            // otherwise the serialization method must be computed
            else {
                column->data_analysis.method = SDB_SM_SMALLEST;
                nsmallest++;
            }
        }
    }

    if( 0 == nsmallest) return SDB_EOK; // no column to analyze.

    analysis_data = malloc( sizeof(struct data_analysis_t) * nsmallest);
    if( !analysis_data) return SDB_EMEM;

    // Initialize analysis data
    for( i=0, current_smallest=0; i<tbl->ncolumns; i++) {
        struct sdb_column_t *column = tbl->columns + i;
        if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) &&
                SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) {

            struct data_analysis_t *data = analysis_data + (current_smallest++);
            data->vsize = 0;
            data->dvfactor = (column->serialization_method & SDB_SM_FIXED_PRECISION) ?
                    column->data_analysis.original_arg : column->data_analysis.gcd;
            data->dvsize = bss_double_size(data->dvfactor);

            data->qpvperiod = round((double)column->data_analysis.delta_sum / (double)((tbl->nwrittenobjects / tbl->ncolumns) - 1));
            data->qpvsize = bss_int_size(data->qpvperiod);
        }
    }

    // read table and analyze data
    sdb_read_init( & read_ctx, tbl);
    for( i=0, current_smallest=0; i<tbl->nwrittenobjects; i++) {
        int column_index = read_ctx.nreadobjects%tbl->ncolumns;
        struct sdb_column_t *column = tbl->columns + column_index;
        bsd_data_t read_data;
        int is_smallest = SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) &&
                SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method);

        int r = sdb_read_data( & read_ctx, & read_data, ! is_smallest);
        if( r<0) {
            return_code = r;
            goto compute_serialization_methods_exit;
        }

        if( is_smallest) {
            struct data_analysis_t *data = analysis_data + (current_smallest++ % nsmallest);
            double dvalue = (BSD_INT == read_data.type) ? ((double)read_data.content.i) : read_data.content.d;
            // This will cause errors, QPV size will be marked as wrong later.
            int ivalue = (BSD_INT == read_data.type) ? read_data.content.i : 0;

            data->vsize += read_ctx.nbytes;
            if( i<tbl->ncolumns) { // first cell
                data->dvsize += bss_double_size(dvalue);
                data->qpvsize += bss_int_size(ivalue);
                data->qpvcurrentn = 0;
            } else {
                int qpvshift = ivalue - (data->iprevious + data->qpvperiod);
                if( 0 == qpvshift) {
                    data->qpvcurrentn++;
                } else {
                    data->qpvsize += bss_int_size(qpvshift) + bss_int_size(data->qpvcurrentn);
                    data->qpvcurrentn = 0;
                }
                // this is not useful to care about corner cases here, impact on computed size, if any, is negligible
                data->dvsize += bss_int_size(floor((dvalue - data->dprevious)/data->dvfactor));
            }
            data->dprevious = dvalue;
            data->iprevious = ivalue;
        }
    }

    // Finalize computation
    for( i=0, current_smallest=0; i<tbl->ncolumns; i++) {
        struct sdb_column_t *column = tbl->columns + i;
        if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) &&
                SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) {
            struct data_analysis_t *data = analysis_data + (current_smallest++);
            if( !column->data_analysis.all_integer) {
                data->qpvsize = INT_MAX;
            }
//#define SDB_VERBOSE_PRINT
#ifdef SDB_VERBOSE_PRINT
            printf("Data analysis results:\n"
                   " - List: size %d bytes\n"
                   " - Deltas Vector: size %d bytes; factor %f\n"
                   " - Quasi Periodic Vector: size %d bytes; period %d\n\n",
                   data->vsize, data->dvsize, data->dvfactor, data->qpvsize, data->qpvperiod);
#endif

            // See also the "--FIXME M3DA QPV" tags in stagedb.lua tests
            // (some tests has been disabled/chaged)
            if( data->qpvsize < data->dvsize && data->qpvsize < data->vsize) {
                column->arg = data->qpvperiod;
                column->data_analysis.method = SDB_SM_QUASIPERIODIC_VECTOR;
            } else if( data->dvsize < data->vsize) {
                column->arg = data->dvfactor;
                column->data_analysis.method = SDB_SM_DELTAS_VECTOR;
            } else {
                column->data_analysis.method = SDB_SM_LIST;
            }
        }
    }

compute_serialization_methods_exit:
    sdb_read_close( & read_ctx);
    free( analysis_data);
    return return_code;
}
Exemple #5
0
static bss_status_t serialize_column_deltasvector( struct sdb_table_t *tbl) {

    struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx;
    struct sdb_read_ctx_t *read_ctx = & ctx->read_ctx;
    struct bss_ctx_t *bss_ctx = ctx->bss_ctx;
    struct bsd_data_t bsd_data;
    struct sdb_column_t *column = tbl->columns + ctx->current_column;
    int nrows = tbl->nwrittenobjects / tbl->ncolumns;
    int nobjectstoread = nrows * tbl->ncolumns; //TODO could optimize away end of last row
    int incolumn = 1;

    switch (ctx->stage) {
        default: return BSS_EINTERNAL;

        case SDB_SS_MAP_LABEL_SENT:
        TRY( bss_object(bss_ctx, SDB_CLSID_DELTAS_VECTOR), SDB_SS_COLUMN_OBJECT_DEFINED); //TODO: use constants for class ID
        case SDB_SS_COLUMN_OBJECT_DEFINED:
        TRY( bss_double(bss_ctx, column->arg), SDB_SS_COLUMN_FACTOR_SENT);
        while( read_ctx->nreadobjects < nobjectstoread) {
            incolumn = read_ctx->nreadobjects%tbl->ncolumns == ctx->current_column;
            int r = sdb_read_data( read_ctx, & bsd_data, ! incolumn);
            if( r<0)
                return r;
            // the process is roughly the same on both situations
            case SDB_SS_COLUMN_FACTOR_SENT:
            case SDB_SS_COLUMN_SENDING_CELLS:
            case SDB_SS_COLUMN_START_VALUE_SENT:
            if( incolumn) {
                double value;
                // do NOT use bsd_data here because the process could be interrupted which would lead to uninitialized values
                if( !get_bsd_value( read_ctx, &value)) return BSS_EINVALID;

                // on first value, open the deltas list for next values
                // a sub-switch is needed because of value which must be initialized in any case
                switch (ctx->stage) {
                    case SDB_SS_COLUMN_FACTOR_SENT: {
                        double start = deltasvector_integer(value / column->arg, fabs(value/1e15));
                        TRY( bss_int(bss_ctx, start), SDB_SS_COLUMN_START_VALUE_SENT);
                        ctx->previous = value;
                    }
                    case SDB_SS_COLUMN_START_VALUE_SENT:
                        // TODO: There might be a better CTXID
                        TRY( bss_list(bss_ctx, nrows-1, BS_CTXID_NUMBER), SDB_SS_COLUMN_SENDING_CELLS); // TODO: can be typed
                        break;
                    default: {
                        int int_delta = deltasvector_integer((value - ctx->previous) / column->arg, fabs(value/1e15));
                        TRY( bss_int(bss_ctx, int_delta), SDB_SS_COLUMN_SENDING_CELLS);
                        if( int_delta != 0) {
                            ctx->previous = value;
                        }
                    }
                }
            }
        }
        ctx->stage = SDB_SS_COLUMN_CONTENT_SENT;
        // close the deltas list and then DV container
        case SDB_SS_COLUMN_CONTENT_SENT:
        TRY( bss_close( bss_ctx), SDB_SS_COLUMN_INNER_LIST_CLOSED);
        case SDB_SS_COLUMN_INNER_LIST_CLOSED:
        TRY( bss_close( bss_ctx), SDB_SS_COLUMN_CLOSED);
    }
    return BSS_EOK;
}