static bss_status_t serialize_column_quasiperiodicvector(struct sdb_table_t *tbl) { struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx; struct sdb_read_ctx_t *read_ctx = & ctx->read_ctx; struct bss_ctx_t *bss_ctx = ctx->bss_ctx; struct bsd_data_t bsd_data; struct sdb_column_t *column = tbl->columns + ctx->current_column; int nrows = tbl->nwrittenobjects / tbl->ncolumns; int nobjectstoread = nrows * tbl->ncolumns; //TODO could optimize away end of last row int incolumn = 1; switch (ctx->stage) { default: return BSS_EINTERNAL; case SDB_SS_MAP_LABEL_SENT: TRY( bss_object(bss_ctx, SDB_CLSID_QUASI_PERIODIC_VECTOR), SDB_SS_COLUMN_OBJECT_DEFINED); //TODO: use constants for class ID case SDB_SS_COLUMN_OBJECT_DEFINED: TRY( bss_double(bss_ctx, column->arg), SDB_SS_COLUMN_FACTOR_SENT); while( read_ctx->nreadobjects < nobjectstoread) { incolumn = read_ctx->nreadobjects%tbl->ncolumns == ctx->current_column; int r = sdb_read_data( read_ctx, & bsd_data, ! incolumn); if( r<0) return r; case SDB_SS_COLUMN_FACTOR_SENT: case SDB_SS_COLUMN_START_VALUE_SENT: case SDB_SS_COLUMN_SENDING_CELLS: case SDB_SS_COLUMN_SHIFT_SENT: if( incolumn) { bss_status_t res = serialize_cell_quasiperiodicvector(ctx, column->arg); if( res != BSS_EOK) return res; } } ctx->stage = SDB_SS_COLUMN_CONTENT_SENT; case SDB_SS_COLUMN_CONTENT_SENT: // finalize the container: send the last shift count TRY( bss_int(bss_ctx, ctx->current_shift), SDB_SS_COLUMN_LAST_SHIFT_SENT); case SDB_SS_COLUMN_LAST_SHIFT_SENT: // close shift list TRY( bss_close(bss_ctx), SDB_SS_COLUMN_INNER_LIST_CLOSED); case SDB_SS_COLUMN_INNER_LIST_CLOSED: TRY( bss_close( bss_ctx), SDB_SS_COLUMN_CLOSED); } return BSS_EOK; }
static void sdb_restore_file_cells( sdb_table_t *tbl) { struct sdb_read_ctx_t rctx; struct bsd_data_t bsd; int nread; sdb_read_init( & rctx, tbl); while( (nread = sdb_read_data( & rctx, & bsd, 1)) > 0) { // restore data analysis switch( bsd.kind) { case BSD_INT: sdb_analyze_integer(tbl, bsd.content.i); break; case BSD_DOUBLE: sdb_analyze_noninteger(tbl, 1); break; default: sdb_analyze_noninteger(tbl, 0); break; } tbl->nwrittenbytes += nread; tbl->nwrittenobjects ++; } // printf( "exited with code %d, found %d objects encoded in %d bytes\n", // nread, tbl->nwrittenobjects, tbl->nwrittenbytes); sdb_read_close( & rctx); }
/* Serializing a column: the entire table must be read in sequence, and each * column cell must be extracted and treated (here written straight * to the target list). */ static bss_status_t serialize_column_list( struct sdb_table_t *tbl) { struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx; struct sdb_read_ctx_t *read_ctx = & ctx->read_ctx; struct bss_ctx_t *bss_ctx = ctx->bss_ctx; struct bsd_data_t bsd_data; int nrows = tbl->nwrittenobjects / tbl->ncolumns; int nobjectstoread = nrows * tbl->ncolumns; //TODO could optimize away end of last row int incolumn = 1; switch (ctx->stage) { default: return BSS_EINTERNAL; case SDB_SS_MAP_LABEL_SENT: // TODO: find a smarter CTXID depending on content's type TRY( bss_list( bss_ctx, nrows, BS_CTXID_GLOBAL), SDB_SS_COLUMN_SENDING_CELLS); while( read_ctx->nreadobjects < nobjectstoread) { incolumn = read_ctx->nreadobjects%tbl->ncolumns == ctx->current_column; int r = sdb_read_data( read_ctx, & bsd_data, ! incolumn); if( r<0) return r; case SDB_SS_COLUMN_SENDING_CELLS: if( incolumn) { /* bss_raw might be run more than once, it will handle cases * where only part of the data has been sent transparently, * thanks to bss' transaction system. * * If we jump here directly, because serialization has been resumed * in state SDB_SS_COLUMN_SENDING_CELLS, incolumn is always true: * the previous serialization attempt stopped in this state, * because the call to bss_raw() below failed on BSS_EAGAIN. * Therefore, read_ctx was already pointing to something to * serialize, and we were "incolumn" indeed. */ TRY( bss_raw( bss_ctx, read_ctx->bytes, read_ctx->nbytes), SDB_SS_COLUMN_SENDING_CELLS); } } ctx->stage = SDB_SS_COLUMN_CONTENT_SENT; TRY( bss_close( bss_ctx), SDB_SS_COLUMN_CLOSED); } return BSS_EOK; }
/* Compute the smallest serialization container using data analysis and stored data. * The method is to estimate as precisely as possible final size and take the smallest one. * Store the result in serialization_data struct. */ static int compute_serialization_methods( struct sdb_table_t *tbl) { // Data used for 2nd pass computations. struct data_analysis_t { int vsize,dvsize, qpvsize; // Computed sizes. double dvfactor; // Data for DV computations. int qpvperiod, qpvcurrentn; // Data for QPV computations. double dprevious; // Previous data as double. int iprevious; // Previous data as int. }; sdb_ncolumn_t nsmallest = 0; sdb_ncolumn_t current_smallest; int i; struct data_analysis_t *analysis_data = NULL; int return_code = SDB_EOK; struct sdb_read_ctx_t read_ctx; // count columns that needs serialization computation for( i=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method)) { // if any data is not numeric, DV and QPV are not able to serialize them if( !column->data_analysis.all_numeric) { column->data_analysis.method = SDB_SM_LIST; } // QPV period guessing is integer only, DV support floats only when factor is forced. else if( !column->data_analysis.all_integer && !(column->serialization_method & SDB_SM_FIXED_PRECISION)) { column->data_analysis.method = SDB_SM_LIST; } // otherwise the serialization method must be computed else { column->data_analysis.method = SDB_SM_SMALLEST; nsmallest++; } } } if( 0 == nsmallest) return SDB_EOK; // no column to analyze. analysis_data = malloc( sizeof(struct data_analysis_t) * nsmallest); if( !analysis_data) return SDB_EMEM; // Initialize analysis data for( i=0, current_smallest=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) { struct data_analysis_t *data = analysis_data + (current_smallest++); data->vsize = 0; data->dvfactor = (column->serialization_method & SDB_SM_FIXED_PRECISION) ? column->data_analysis.original_arg : column->data_analysis.gcd; data->dvsize = bss_double_size(data->dvfactor); data->qpvperiod = round((double)column->data_analysis.delta_sum / (double)((tbl->nwrittenobjects / tbl->ncolumns) - 1)); data->qpvsize = bss_int_size(data->qpvperiod); } } // read table and analyze data sdb_read_init( & read_ctx, tbl); for( i=0, current_smallest=0; i<tbl->nwrittenobjects; i++) { int column_index = read_ctx.nreadobjects%tbl->ncolumns; struct sdb_column_t *column = tbl->columns + column_index; bsd_data_t read_data; int is_smallest = SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method); int r = sdb_read_data( & read_ctx, & read_data, ! is_smallest); if( r<0) { return_code = r; goto compute_serialization_methods_exit; } if( is_smallest) { struct data_analysis_t *data = analysis_data + (current_smallest++ % nsmallest); double dvalue = (BSD_INT == read_data.type) ? ((double)read_data.content.i) : read_data.content.d; // This will cause errors, QPV size will be marked as wrong later. int ivalue = (BSD_INT == read_data.type) ? read_data.content.i : 0; data->vsize += read_ctx.nbytes; if( i<tbl->ncolumns) { // first cell data->dvsize += bss_double_size(dvalue); data->qpvsize += bss_int_size(ivalue); data->qpvcurrentn = 0; } else { int qpvshift = ivalue - (data->iprevious + data->qpvperiod); if( 0 == qpvshift) { data->qpvcurrentn++; } else { data->qpvsize += bss_int_size(qpvshift) + bss_int_size(data->qpvcurrentn); data->qpvcurrentn = 0; } // this is not useful to care about corner cases here, impact on computed size, if any, is negligible data->dvsize += bss_int_size(floor((dvalue - data->dprevious)/data->dvfactor)); } data->dprevious = dvalue; data->iprevious = ivalue; } } // Finalize computation for( i=0, current_smallest=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) { struct data_analysis_t *data = analysis_data + (current_smallest++); if( !column->data_analysis.all_integer) { data->qpvsize = INT_MAX; } //#define SDB_VERBOSE_PRINT #ifdef SDB_VERBOSE_PRINT printf("Data analysis results:\n" " - List: size %d bytes\n" " - Deltas Vector: size %d bytes; factor %f\n" " - Quasi Periodic Vector: size %d bytes; period %d\n\n", data->vsize, data->dvsize, data->dvfactor, data->qpvsize, data->qpvperiod); #endif // See also the "--FIXME M3DA QPV" tags in stagedb.lua tests // (some tests has been disabled/chaged) if( data->qpvsize < data->dvsize && data->qpvsize < data->vsize) { column->arg = data->qpvperiod; column->data_analysis.method = SDB_SM_QUASIPERIODIC_VECTOR; } else if( data->dvsize < data->vsize) { column->arg = data->dvfactor; column->data_analysis.method = SDB_SM_DELTAS_VECTOR; } else { column->data_analysis.method = SDB_SM_LIST; } } } compute_serialization_methods_exit: sdb_read_close( & read_ctx); free( analysis_data); return return_code; }
static bss_status_t serialize_column_deltasvector( struct sdb_table_t *tbl) { struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx; struct sdb_read_ctx_t *read_ctx = & ctx->read_ctx; struct bss_ctx_t *bss_ctx = ctx->bss_ctx; struct bsd_data_t bsd_data; struct sdb_column_t *column = tbl->columns + ctx->current_column; int nrows = tbl->nwrittenobjects / tbl->ncolumns; int nobjectstoread = nrows * tbl->ncolumns; //TODO could optimize away end of last row int incolumn = 1; switch (ctx->stage) { default: return BSS_EINTERNAL; case SDB_SS_MAP_LABEL_SENT: TRY( bss_object(bss_ctx, SDB_CLSID_DELTAS_VECTOR), SDB_SS_COLUMN_OBJECT_DEFINED); //TODO: use constants for class ID case SDB_SS_COLUMN_OBJECT_DEFINED: TRY( bss_double(bss_ctx, column->arg), SDB_SS_COLUMN_FACTOR_SENT); while( read_ctx->nreadobjects < nobjectstoread) { incolumn = read_ctx->nreadobjects%tbl->ncolumns == ctx->current_column; int r = sdb_read_data( read_ctx, & bsd_data, ! incolumn); if( r<0) return r; // the process is roughly the same on both situations case SDB_SS_COLUMN_FACTOR_SENT: case SDB_SS_COLUMN_SENDING_CELLS: case SDB_SS_COLUMN_START_VALUE_SENT: if( incolumn) { double value; // do NOT use bsd_data here because the process could be interrupted which would lead to uninitialized values if( !get_bsd_value( read_ctx, &value)) return BSS_EINVALID; // on first value, open the deltas list for next values // a sub-switch is needed because of value which must be initialized in any case switch (ctx->stage) { case SDB_SS_COLUMN_FACTOR_SENT: { double start = deltasvector_integer(value / column->arg, fabs(value/1e15)); TRY( bss_int(bss_ctx, start), SDB_SS_COLUMN_START_VALUE_SENT); ctx->previous = value; } case SDB_SS_COLUMN_START_VALUE_SENT: // TODO: There might be a better CTXID TRY( bss_list(bss_ctx, nrows-1, BS_CTXID_NUMBER), SDB_SS_COLUMN_SENDING_CELLS); // TODO: can be typed break; default: { int int_delta = deltasvector_integer((value - ctx->previous) / column->arg, fabs(value/1e15)); TRY( bss_int(bss_ctx, int_delta), SDB_SS_COLUMN_SENDING_CELLS); if( int_delta != 0) { ctx->previous = value; } } } } } ctx->stage = SDB_SS_COLUMN_CONTENT_SENT; // close the deltas list and then DV container case SDB_SS_COLUMN_CONTENT_SENT: TRY( bss_close( bss_ctx), SDB_SS_COLUMN_INNER_LIST_CLOSED); case SDB_SS_COLUMN_INNER_LIST_CLOSED: TRY( bss_close( bss_ctx), SDB_SS_COLUMN_CLOSED); } return BSS_EOK; }