static void sdb_restore_file_cells( sdb_table_t *tbl) { struct sdb_read_ctx_t rctx; struct bsd_data_t bsd; int nread; sdb_read_init( & rctx, tbl); while( (nread = sdb_read_data( & rctx, & bsd, 1)) > 0) { // restore data analysis switch( bsd.kind) { case BSD_INT: sdb_analyze_integer(tbl, bsd.content.i); break; case BSD_DOUBLE: sdb_analyze_noninteger(tbl, 1); break; default: sdb_analyze_noninteger(tbl, 0); break; } tbl->nwrittenbytes += nread; tbl->nwrittenobjects ++; } // printf( "exited with code %d, found %d objects encoded in %d bytes\n", // nread, tbl->nwrittenobjects, tbl->nwrittenbytes); sdb_read_close( & rctx); }
static bss_status_t serialize_table( struct sdb_table_t *tbl) { struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx; struct bss_ctx_t *bss_ctx = ctx->bss_ctx; switch( ctx->stage) { default: return BSS_EINTERNAL; case SDB_SS_INITIALIZED: // TODO: check CTXID TRY( bss_map( bss_ctx, -1, BS_CTXID_GLOBAL), SDB_SS_MAP_OPENED); int r = compute_serialization_methods(tbl); if( r<0) return r; case SDB_SS_MAP_OPENED: for( ctx->current_column = 0; ctx->current_column<tbl->ncolumns; ctx->current_column++) { case SDB_SS_COLUMN_CLOSED: TRY( bss_string( bss_ctx, tbl->conf_strings + tbl->columns[ctx->current_column].label_offset), SDB_SS_MAP_LABEL_SENT); sdb_read_init( & ctx->read_ctx, tbl); case SDB_SS_MAP_LABEL_SENT: case SDB_SS_COLUMN_OBJECT_DEFINED: case SDB_SS_COLUMN_FACTOR_SENT: case SDB_SS_COLUMN_START_VALUE_SENT: case SDB_SS_COLUMN_SENDING_CELLS: case SDB_SS_COLUMN_CONTENT_SENT: case SDB_SS_COLUMN_SHIFT_SENT: case SDB_SS_COLUMN_LAST_SHIFT_SENT: case SDB_SS_COLUMN_INNER_LIST_CLOSED: { sdb_column_t *column = tbl->columns + ctx->current_column; int sm = (SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method)) ? column->data_analysis.method : column->serialization_method; switch(SDB_SM_CONTAINER(sm)) { case SDB_SM_LIST: case SDB_SM_FASTEST: TRY( serialize_column_list( tbl), SDB_SS_COLUMN_CLOSED); break; case SDB_SM_DELTAS_VECTOR: TRY( serialize_column_deltasvector( tbl), SDB_SS_COLUMN_CLOSED); break; case SDB_SM_QUASIPERIODIC_VECTOR: TRY( serialize_column_quasiperiodicvector( tbl), SDB_SS_COLUMN_CLOSED); break; default: return BSS_EINVALID; // bad value, not implemented, ... } } sdb_read_close( & ctx->read_ctx); } ctx->stage = SDB_SS_ALL_COLUMNS_SENT; case SDB_SS_ALL_COLUMNS_SENT: TRY( bss_close( bss_ctx), SDB_SS_MAP_CLOSED); case SDB_SS_MAP_CLOSED:; } serialize_close( tbl); return BSS_EOK; }
/* Compute the smallest serialization container using data analysis and stored data. * The method is to estimate as precisely as possible final size and take the smallest one. * Store the result in serialization_data struct. */ static int compute_serialization_methods( struct sdb_table_t *tbl) { // Data used for 2nd pass computations. struct data_analysis_t { int vsize,dvsize, qpvsize; // Computed sizes. double dvfactor; // Data for DV computations. int qpvperiod, qpvcurrentn; // Data for QPV computations. double dprevious; // Previous data as double. int iprevious; // Previous data as int. }; sdb_ncolumn_t nsmallest = 0; sdb_ncolumn_t current_smallest; int i; struct data_analysis_t *analysis_data = NULL; int return_code = SDB_EOK; struct sdb_read_ctx_t read_ctx; // count columns that needs serialization computation for( i=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method)) { // if any data is not numeric, DV and QPV are not able to serialize them if( !column->data_analysis.all_numeric) { column->data_analysis.method = SDB_SM_LIST; } // QPV period guessing is integer only, DV support floats only when factor is forced. else if( !column->data_analysis.all_integer && !(column->serialization_method & SDB_SM_FIXED_PRECISION)) { column->data_analysis.method = SDB_SM_LIST; } // otherwise the serialization method must be computed else { column->data_analysis.method = SDB_SM_SMALLEST; nsmallest++; } } } if( 0 == nsmallest) return SDB_EOK; // no column to analyze. analysis_data = malloc( sizeof(struct data_analysis_t) * nsmallest); if( !analysis_data) return SDB_EMEM; // Initialize analysis data for( i=0, current_smallest=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) { struct data_analysis_t *data = analysis_data + (current_smallest++); data->vsize = 0; data->dvfactor = (column->serialization_method & SDB_SM_FIXED_PRECISION) ? column->data_analysis.original_arg : column->data_analysis.gcd; data->dvsize = bss_double_size(data->dvfactor); data->qpvperiod = round((double)column->data_analysis.delta_sum / (double)((tbl->nwrittenobjects / tbl->ncolumns) - 1)); data->qpvsize = bss_int_size(data->qpvperiod); } } // read table and analyze data sdb_read_init( & read_ctx, tbl); for( i=0, current_smallest=0; i<tbl->nwrittenobjects; i++) { int column_index = read_ctx.nreadobjects%tbl->ncolumns; struct sdb_column_t *column = tbl->columns + column_index; bsd_data_t read_data; int is_smallest = SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method); int r = sdb_read_data( & read_ctx, & read_data, ! is_smallest); if( r<0) { return_code = r; goto compute_serialization_methods_exit; } if( is_smallest) { struct data_analysis_t *data = analysis_data + (current_smallest++ % nsmallest); double dvalue = (BSD_INT == read_data.type) ? ((double)read_data.content.i) : read_data.content.d; // This will cause errors, QPV size will be marked as wrong later. int ivalue = (BSD_INT == read_data.type) ? read_data.content.i : 0; data->vsize += read_ctx.nbytes; if( i<tbl->ncolumns) { // first cell data->dvsize += bss_double_size(dvalue); data->qpvsize += bss_int_size(ivalue); data->qpvcurrentn = 0; } else { int qpvshift = ivalue - (data->iprevious + data->qpvperiod); if( 0 == qpvshift) { data->qpvcurrentn++; } else { data->qpvsize += bss_int_size(qpvshift) + bss_int_size(data->qpvcurrentn); data->qpvcurrentn = 0; } // this is not useful to care about corner cases here, impact on computed size, if any, is negligible data->dvsize += bss_int_size(floor((dvalue - data->dprevious)/data->dvfactor)); } data->dprevious = dvalue; data->iprevious = ivalue; } } // Finalize computation for( i=0, current_smallest=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) { struct data_analysis_t *data = analysis_data + (current_smallest++); if( !column->data_analysis.all_integer) { data->qpvsize = INT_MAX; } //#define SDB_VERBOSE_PRINT #ifdef SDB_VERBOSE_PRINT printf("Data analysis results:\n" " - List: size %d bytes\n" " - Deltas Vector: size %d bytes; factor %f\n" " - Quasi Periodic Vector: size %d bytes; period %d\n\n", data->vsize, data->dvsize, data->dvfactor, data->qpvsize, data->qpvperiod); #endif // See also the "--FIXME M3DA QPV" tags in stagedb.lua tests // (some tests has been disabled/chaged) if( data->qpvsize < data->dvsize && data->qpvsize < data->vsize) { column->arg = data->qpvperiod; column->data_analysis.method = SDB_SM_QUASIPERIODIC_VECTOR; } else if( data->dvsize < data->vsize) { column->arg = data->dvfactor; column->data_analysis.method = SDB_SM_DELTAS_VECTOR; } else { column->data_analysis.method = SDB_SM_LIST; } } } compute_serialization_methods_exit: sdb_read_close( & read_ctx); free( analysis_data); return return_code; }