Beispiel #1
0
/* Cancels data analysis whenever a non-numeric value is stored. */
void sdb_analyze_noninteger( sdb_table_t *tbl, unsigned char numeric) {
    sdb_column_t *column = tbl->columns + (tbl->nwrittenobjects % tbl->ncolumns);
    if( SDB_SM_CONTAINER(column->serialization_method) == SDB_SM_SMALLEST) {
        column->data_analysis.all_integer = 0;
        column->data_analysis.all_numeric = column->data_analysis.all_numeric && numeric;
    }
}
/* Release all resources reserved by the table.
 * If there is a serialization in progress, it is canceled. */
int sdb_reset( sdb_table_t *tbl) {
    int i;
    if( tbl->state == SDB_ST_SERIALIZING) sdb_serialize_cancel( tbl);
    if( tbl->state != SDB_ST_READING) return SDB_EBADSTATE;
    // TODO must be able to reset an unconfigured table

    switch( tbl->storage_kind) {
    case SDB_SK_RAM: {
        struct sdb_chunk_t *p, *q;
        struct sdb_ram_storage_t *ram = & tbl->storage.ram;
        p = ram->first_chunk;
        while( p) {
            q=p->next; BS_MEM_FREE( p); p=q;
        }
        p = BS_MEM_ALLOC( sizeof( struct sdb_chunk_t) + SDB_MIN_CHUNK_SIZE - SDB_CHUNK_SIZE);
        if( ! p) { tbl->state = SDB_ST_BROKEN; return SDB_EMEM; }
        p->next = NULL;
        ram->first_chunk = p;
        ram->last_chunk = p;
        ram->last_chunk_ptr =  & ram->last_chunk;
        ram->last_chunk_size = SDB_MIN_CHUNK_SIZE;
        break;
    }
#ifdef SDB_FILE_SUPPORT
    case SDB_SK_FILE:
        // identifier/filename is stored as the 1st conf string
        tbl->storage.file = freopen( tbl->conf_strings, "w+", tbl->storage.file); // erases content
        if( ! tbl->storage.file) return SDB_EBADFILE;
        break;
#endif
    }
    tbl->nwrittenbytes     = 0;
    tbl->nwrittenobjects   = 0;

    // reset data analysis
    for( i=0; i<tbl->ncolumns; i++) {
        sdb_column_t *c = tbl->columns + i;
        if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(c->serialization_method)) {
            c->data_analysis.delta_sum = 0;
            c->data_analysis.all_integer = 1;
            c->data_analysis.all_numeric = 1;
        }
    }

    if( tbl->bss_ctx) bss_reset( tbl->bss_ctx);
    return SDB_EOK;
}
Beispiel #3
0
void sdb_analyze_integer( sdb_table_t *tbl, int i) {
    sdb_column_t *column = tbl->columns + (tbl->nwrittenobjects % tbl->ncolumns);
    if( SDB_SM_CONTAINER(column->serialization_method) == SDB_SM_SMALLEST
            && column->data_analysis.all_integer) {
        if( tbl->nwrittenobjects < tbl->ncolumns) {
            column->data_analysis.gcd = i; // used to initialize GCD calculation correctly (if any)
        } else {
            column->data_analysis.delta_sum += i - column->data_analysis.prev_value;
        }

        if( !(column->serialization_method & SDB_SM_FIXED_PRECISION)) {
            column->data_analysis.gcd = gcd(i, column->data_analysis.gcd);
        }

        column->data_analysis.prev_value = i;
    }
}
int sdb_setcolumn(  sdb_table_t *tbl, const char *label, enum sdb_serialization_method_t sm, double precision) {
	int idx = sdb_getcolnum( tbl, label);
	if( idx >= SDB_NCOLUMN_INVALID) return SDB_NCOLUMN_INVALID;
	sdb_column_t *c = tbl->columns + idx;

	c->serialization_method = sm;
    c->arg = precision;

    if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(sm)) {
        c->data_analysis.original_arg = precision;
        c->data_analysis.delta_sum = 0;
        c->data_analysis.all_integer = 1;
        c->data_analysis.all_numeric = 1;
    }

    // read existing cells if table is a file
#   ifdef SDB_FILE_SUPPORT
    if( tbl->storage_kind == SDB_SK_FILE) {
        sdb_restore_file_cells( tbl);
    }
#   endif
    return SDB_EOK;
}
Beispiel #5
0
static bss_status_t serialize_table( struct sdb_table_t *tbl) {
    struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx;
    struct bss_ctx_t *bss_ctx = ctx->bss_ctx;
    switch( ctx->stage) {
        default: return BSS_EINTERNAL;

        case SDB_SS_INITIALIZED:
        // TODO: check CTXID
        TRY( bss_map( bss_ctx, -1, BS_CTXID_GLOBAL), SDB_SS_MAP_OPENED);

        int r = compute_serialization_methods(tbl);
        if( r<0) return r;

        case SDB_SS_MAP_OPENED:
        for( ctx->current_column = 0;
                ctx->current_column<tbl->ncolumns;
                ctx->current_column++) {

            case SDB_SS_COLUMN_CLOSED:
            TRY( bss_string( bss_ctx,
                    tbl->conf_strings + tbl->columns[ctx->current_column].label_offset),
                    SDB_SS_MAP_LABEL_SENT);
            sdb_read_init( & ctx->read_ctx, tbl);

            case SDB_SS_MAP_LABEL_SENT:
            case SDB_SS_COLUMN_OBJECT_DEFINED:
            case SDB_SS_COLUMN_FACTOR_SENT:
            case SDB_SS_COLUMN_START_VALUE_SENT:
            case SDB_SS_COLUMN_SENDING_CELLS:
            case SDB_SS_COLUMN_CONTENT_SENT:
            case SDB_SS_COLUMN_SHIFT_SENT:
            case SDB_SS_COLUMN_LAST_SHIFT_SENT:
            case SDB_SS_COLUMN_INNER_LIST_CLOSED: {
                sdb_column_t *column = tbl->columns + ctx->current_column;
                int sm = (SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method)) ?
                        column->data_analysis.method : column->serialization_method;
                switch(SDB_SM_CONTAINER(sm)) {
                    case SDB_SM_LIST:
                    case SDB_SM_FASTEST:
                        TRY( serialize_column_list( tbl), SDB_SS_COLUMN_CLOSED);
                        break;
                    case SDB_SM_DELTAS_VECTOR:
                        TRY( serialize_column_deltasvector( tbl), SDB_SS_COLUMN_CLOSED);
                        break;
                    case SDB_SM_QUASIPERIODIC_VECTOR:
                        TRY( serialize_column_quasiperiodicvector( tbl), SDB_SS_COLUMN_CLOSED);
                        break;
                    default:
                        return BSS_EINVALID; // bad value, not implemented, ...
                }
            }
            sdb_read_close( & ctx->read_ctx);

        }
        ctx->stage = SDB_SS_ALL_COLUMNS_SENT;
        case SDB_SS_ALL_COLUMNS_SENT:
        TRY( bss_close( bss_ctx), SDB_SS_MAP_CLOSED);
        case SDB_SS_MAP_CLOSED:;
    }
    serialize_close( tbl);
    return BSS_EOK;
}
Beispiel #6
0
/* Compute the smallest serialization container using  data analysis and stored data.
 * The method is to estimate as precisely as possible final size and take the smallest one.
 * Store the result in serialization_data struct.
 */
static int compute_serialization_methods( struct sdb_table_t *tbl) {
    // Data used for 2nd pass computations.
    struct data_analysis_t {
        int vsize,dvsize, qpvsize;                            // Computed sizes.
        double dvfactor;                            // Data for DV computations.
        int qpvperiod, qpvcurrentn;                // Data for QPV computations.
        double dprevious;                            // Previous data as double.
        int iprevious;                                  // Previous data as int.
    };

    sdb_ncolumn_t nsmallest = 0;
    sdb_ncolumn_t current_smallest;
    int i;
    struct data_analysis_t *analysis_data = NULL;
    int return_code = SDB_EOK;
    struct sdb_read_ctx_t read_ctx;

    // count columns that needs serialization computation
    for( i=0; i<tbl->ncolumns; i++) {
        struct sdb_column_t *column = tbl->columns + i;
        if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method)) {
            // if any data is not numeric, DV and QPV are not able to serialize them
            if( !column->data_analysis.all_numeric) {
                column->data_analysis.method = SDB_SM_LIST;
            }
            // QPV period guessing is integer only, DV support floats only when factor is forced.
            else if( !column->data_analysis.all_integer &&
                    !(column->serialization_method & SDB_SM_FIXED_PRECISION)) {
                column->data_analysis.method = SDB_SM_LIST;
            }
            // otherwise the serialization method must be computed
            else {
                column->data_analysis.method = SDB_SM_SMALLEST;
                nsmallest++;
            }
        }
    }

    if( 0 == nsmallest) return SDB_EOK; // no column to analyze.

    analysis_data = malloc( sizeof(struct data_analysis_t) * nsmallest);
    if( !analysis_data) return SDB_EMEM;

    // Initialize analysis data
    for( i=0, current_smallest=0; i<tbl->ncolumns; i++) {
        struct sdb_column_t *column = tbl->columns + i;
        if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) &&
                SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) {

            struct data_analysis_t *data = analysis_data + (current_smallest++);
            data->vsize = 0;
            data->dvfactor = (column->serialization_method & SDB_SM_FIXED_PRECISION) ?
                    column->data_analysis.original_arg : column->data_analysis.gcd;
            data->dvsize = bss_double_size(data->dvfactor);

            data->qpvperiod = round((double)column->data_analysis.delta_sum / (double)((tbl->nwrittenobjects / tbl->ncolumns) - 1));
            data->qpvsize = bss_int_size(data->qpvperiod);
        }
    }

    // read table and analyze data
    sdb_read_init( & read_ctx, tbl);
    for( i=0, current_smallest=0; i<tbl->nwrittenobjects; i++) {
        int column_index = read_ctx.nreadobjects%tbl->ncolumns;
        struct sdb_column_t *column = tbl->columns + column_index;
        bsd_data_t read_data;
        int is_smallest = SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) &&
                SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method);

        int r = sdb_read_data( & read_ctx, & read_data, ! is_smallest);
        if( r<0) {
            return_code = r;
            goto compute_serialization_methods_exit;
        }

        if( is_smallest) {
            struct data_analysis_t *data = analysis_data + (current_smallest++ % nsmallest);
            double dvalue = (BSD_INT == read_data.type) ? ((double)read_data.content.i) : read_data.content.d;
            // This will cause errors, QPV size will be marked as wrong later.
            int ivalue = (BSD_INT == read_data.type) ? read_data.content.i : 0;

            data->vsize += read_ctx.nbytes;
            if( i<tbl->ncolumns) { // first cell
                data->dvsize += bss_double_size(dvalue);
                data->qpvsize += bss_int_size(ivalue);
                data->qpvcurrentn = 0;
            } else {
                int qpvshift = ivalue - (data->iprevious + data->qpvperiod);
                if( 0 == qpvshift) {
                    data->qpvcurrentn++;
                } else {
                    data->qpvsize += bss_int_size(qpvshift) + bss_int_size(data->qpvcurrentn);
                    data->qpvcurrentn = 0;
                }
                // this is not useful to care about corner cases here, impact on computed size, if any, is negligible
                data->dvsize += bss_int_size(floor((dvalue - data->dprevious)/data->dvfactor));
            }
            data->dprevious = dvalue;
            data->iprevious = ivalue;
        }
    }

    // Finalize computation
    for( i=0, current_smallest=0; i<tbl->ncolumns; i++) {
        struct sdb_column_t *column = tbl->columns + i;
        if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) &&
                SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) {
            struct data_analysis_t *data = analysis_data + (current_smallest++);
            if( !column->data_analysis.all_integer) {
                data->qpvsize = INT_MAX;
            }
//#define SDB_VERBOSE_PRINT
#ifdef SDB_VERBOSE_PRINT
            printf("Data analysis results:\n"
                   " - List: size %d bytes\n"
                   " - Deltas Vector: size %d bytes; factor %f\n"
                   " - Quasi Periodic Vector: size %d bytes; period %d\n\n",
                   data->vsize, data->dvsize, data->dvfactor, data->qpvsize, data->qpvperiod);
#endif

            // See also the "--FIXME M3DA QPV" tags in stagedb.lua tests
            // (some tests has been disabled/chaged)
            if( data->qpvsize < data->dvsize && data->qpvsize < data->vsize) {
                column->arg = data->qpvperiod;
                column->data_analysis.method = SDB_SM_QUASIPERIODIC_VECTOR;
            } else if( data->dvsize < data->vsize) {
                column->arg = data->dvfactor;
                column->data_analysis.method = SDB_SM_DELTAS_VECTOR;
            } else {
                column->data_analysis.method = SDB_SM_LIST;
            }
        }
    }

compute_serialization_methods_exit:
    sdb_read_close( & read_ctx);
    free( analysis_data);
    return return_code;
}