/* Cancels data analysis whenever a non-numeric value is stored. */ void sdb_analyze_noninteger( sdb_table_t *tbl, unsigned char numeric) { sdb_column_t *column = tbl->columns + (tbl->nwrittenobjects % tbl->ncolumns); if( SDB_SM_CONTAINER(column->serialization_method) == SDB_SM_SMALLEST) { column->data_analysis.all_integer = 0; column->data_analysis.all_numeric = column->data_analysis.all_numeric && numeric; } }
/* Release all resources reserved by the table. * If there is a serialization in progress, it is canceled. */ int sdb_reset( sdb_table_t *tbl) { int i; if( tbl->state == SDB_ST_SERIALIZING) sdb_serialize_cancel( tbl); if( tbl->state != SDB_ST_READING) return SDB_EBADSTATE; // TODO must be able to reset an unconfigured table switch( tbl->storage_kind) { case SDB_SK_RAM: { struct sdb_chunk_t *p, *q; struct sdb_ram_storage_t *ram = & tbl->storage.ram; p = ram->first_chunk; while( p) { q=p->next; BS_MEM_FREE( p); p=q; } p = BS_MEM_ALLOC( sizeof( struct sdb_chunk_t) + SDB_MIN_CHUNK_SIZE - SDB_CHUNK_SIZE); if( ! p) { tbl->state = SDB_ST_BROKEN; return SDB_EMEM; } p->next = NULL; ram->first_chunk = p; ram->last_chunk = p; ram->last_chunk_ptr = & ram->last_chunk; ram->last_chunk_size = SDB_MIN_CHUNK_SIZE; break; } #ifdef SDB_FILE_SUPPORT case SDB_SK_FILE: // identifier/filename is stored as the 1st conf string tbl->storage.file = freopen( tbl->conf_strings, "w+", tbl->storage.file); // erases content if( ! tbl->storage.file) return SDB_EBADFILE; break; #endif } tbl->nwrittenbytes = 0; tbl->nwrittenobjects = 0; // reset data analysis for( i=0; i<tbl->ncolumns; i++) { sdb_column_t *c = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(c->serialization_method)) { c->data_analysis.delta_sum = 0; c->data_analysis.all_integer = 1; c->data_analysis.all_numeric = 1; } } if( tbl->bss_ctx) bss_reset( tbl->bss_ctx); return SDB_EOK; }
void sdb_analyze_integer( sdb_table_t *tbl, int i) { sdb_column_t *column = tbl->columns + (tbl->nwrittenobjects % tbl->ncolumns); if( SDB_SM_CONTAINER(column->serialization_method) == SDB_SM_SMALLEST && column->data_analysis.all_integer) { if( tbl->nwrittenobjects < tbl->ncolumns) { column->data_analysis.gcd = i; // used to initialize GCD calculation correctly (if any) } else { column->data_analysis.delta_sum += i - column->data_analysis.prev_value; } if( !(column->serialization_method & SDB_SM_FIXED_PRECISION)) { column->data_analysis.gcd = gcd(i, column->data_analysis.gcd); } column->data_analysis.prev_value = i; } }
int sdb_setcolumn( sdb_table_t *tbl, const char *label, enum sdb_serialization_method_t sm, double precision) { int idx = sdb_getcolnum( tbl, label); if( idx >= SDB_NCOLUMN_INVALID) return SDB_NCOLUMN_INVALID; sdb_column_t *c = tbl->columns + idx; c->serialization_method = sm; c->arg = precision; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(sm)) { c->data_analysis.original_arg = precision; c->data_analysis.delta_sum = 0; c->data_analysis.all_integer = 1; c->data_analysis.all_numeric = 1; } // read existing cells if table is a file # ifdef SDB_FILE_SUPPORT if( tbl->storage_kind == SDB_SK_FILE) { sdb_restore_file_cells( tbl); } # endif return SDB_EOK; }
static bss_status_t serialize_table( struct sdb_table_t *tbl) { struct sdb_serialization_ctx_t *ctx = tbl->serialization_ctx; struct bss_ctx_t *bss_ctx = ctx->bss_ctx; switch( ctx->stage) { default: return BSS_EINTERNAL; case SDB_SS_INITIALIZED: // TODO: check CTXID TRY( bss_map( bss_ctx, -1, BS_CTXID_GLOBAL), SDB_SS_MAP_OPENED); int r = compute_serialization_methods(tbl); if( r<0) return r; case SDB_SS_MAP_OPENED: for( ctx->current_column = 0; ctx->current_column<tbl->ncolumns; ctx->current_column++) { case SDB_SS_COLUMN_CLOSED: TRY( bss_string( bss_ctx, tbl->conf_strings + tbl->columns[ctx->current_column].label_offset), SDB_SS_MAP_LABEL_SENT); sdb_read_init( & ctx->read_ctx, tbl); case SDB_SS_MAP_LABEL_SENT: case SDB_SS_COLUMN_OBJECT_DEFINED: case SDB_SS_COLUMN_FACTOR_SENT: case SDB_SS_COLUMN_START_VALUE_SENT: case SDB_SS_COLUMN_SENDING_CELLS: case SDB_SS_COLUMN_CONTENT_SENT: case SDB_SS_COLUMN_SHIFT_SENT: case SDB_SS_COLUMN_LAST_SHIFT_SENT: case SDB_SS_COLUMN_INNER_LIST_CLOSED: { sdb_column_t *column = tbl->columns + ctx->current_column; int sm = (SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method)) ? column->data_analysis.method : column->serialization_method; switch(SDB_SM_CONTAINER(sm)) { case SDB_SM_LIST: case SDB_SM_FASTEST: TRY( serialize_column_list( tbl), SDB_SS_COLUMN_CLOSED); break; case SDB_SM_DELTAS_VECTOR: TRY( serialize_column_deltasvector( tbl), SDB_SS_COLUMN_CLOSED); break; case SDB_SM_QUASIPERIODIC_VECTOR: TRY( serialize_column_quasiperiodicvector( tbl), SDB_SS_COLUMN_CLOSED); break; default: return BSS_EINVALID; // bad value, not implemented, ... } } sdb_read_close( & ctx->read_ctx); } ctx->stage = SDB_SS_ALL_COLUMNS_SENT; case SDB_SS_ALL_COLUMNS_SENT: TRY( bss_close( bss_ctx), SDB_SS_MAP_CLOSED); case SDB_SS_MAP_CLOSED:; } serialize_close( tbl); return BSS_EOK; }
/* Compute the smallest serialization container using data analysis and stored data. * The method is to estimate as precisely as possible final size and take the smallest one. * Store the result in serialization_data struct. */ static int compute_serialization_methods( struct sdb_table_t *tbl) { // Data used for 2nd pass computations. struct data_analysis_t { int vsize,dvsize, qpvsize; // Computed sizes. double dvfactor; // Data for DV computations. int qpvperiod, qpvcurrentn; // Data for QPV computations. double dprevious; // Previous data as double. int iprevious; // Previous data as int. }; sdb_ncolumn_t nsmallest = 0; sdb_ncolumn_t current_smallest; int i; struct data_analysis_t *analysis_data = NULL; int return_code = SDB_EOK; struct sdb_read_ctx_t read_ctx; // count columns that needs serialization computation for( i=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method)) { // if any data is not numeric, DV and QPV are not able to serialize them if( !column->data_analysis.all_numeric) { column->data_analysis.method = SDB_SM_LIST; } // QPV period guessing is integer only, DV support floats only when factor is forced. else if( !column->data_analysis.all_integer && !(column->serialization_method & SDB_SM_FIXED_PRECISION)) { column->data_analysis.method = SDB_SM_LIST; } // otherwise the serialization method must be computed else { column->data_analysis.method = SDB_SM_SMALLEST; nsmallest++; } } } if( 0 == nsmallest) return SDB_EOK; // no column to analyze. analysis_data = malloc( sizeof(struct data_analysis_t) * nsmallest); if( !analysis_data) return SDB_EMEM; // Initialize analysis data for( i=0, current_smallest=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) { struct data_analysis_t *data = analysis_data + (current_smallest++); data->vsize = 0; data->dvfactor = (column->serialization_method & SDB_SM_FIXED_PRECISION) ? column->data_analysis.original_arg : column->data_analysis.gcd; data->dvsize = bss_double_size(data->dvfactor); data->qpvperiod = round((double)column->data_analysis.delta_sum / (double)((tbl->nwrittenobjects / tbl->ncolumns) - 1)); data->qpvsize = bss_int_size(data->qpvperiod); } } // read table and analyze data sdb_read_init( & read_ctx, tbl); for( i=0, current_smallest=0; i<tbl->nwrittenobjects; i++) { int column_index = read_ctx.nreadobjects%tbl->ncolumns; struct sdb_column_t *column = tbl->columns + column_index; bsd_data_t read_data; int is_smallest = SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method); int r = sdb_read_data( & read_ctx, & read_data, ! is_smallest); if( r<0) { return_code = r; goto compute_serialization_methods_exit; } if( is_smallest) { struct data_analysis_t *data = analysis_data + (current_smallest++ % nsmallest); double dvalue = (BSD_INT == read_data.type) ? ((double)read_data.content.i) : read_data.content.d; // This will cause errors, QPV size will be marked as wrong later. int ivalue = (BSD_INT == read_data.type) ? read_data.content.i : 0; data->vsize += read_ctx.nbytes; if( i<tbl->ncolumns) { // first cell data->dvsize += bss_double_size(dvalue); data->qpvsize += bss_int_size(ivalue); data->qpvcurrentn = 0; } else { int qpvshift = ivalue - (data->iprevious + data->qpvperiod); if( 0 == qpvshift) { data->qpvcurrentn++; } else { data->qpvsize += bss_int_size(qpvshift) + bss_int_size(data->qpvcurrentn); data->qpvcurrentn = 0; } // this is not useful to care about corner cases here, impact on computed size, if any, is negligible data->dvsize += bss_int_size(floor((dvalue - data->dprevious)/data->dvfactor)); } data->dprevious = dvalue; data->iprevious = ivalue; } } // Finalize computation for( i=0, current_smallest=0; i<tbl->ncolumns; i++) { struct sdb_column_t *column = tbl->columns + i; if( SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->serialization_method) && SDB_SM_SMALLEST == SDB_SM_CONTAINER(column->data_analysis.method)) { struct data_analysis_t *data = analysis_data + (current_smallest++); if( !column->data_analysis.all_integer) { data->qpvsize = INT_MAX; } //#define SDB_VERBOSE_PRINT #ifdef SDB_VERBOSE_PRINT printf("Data analysis results:\n" " - List: size %d bytes\n" " - Deltas Vector: size %d bytes; factor %f\n" " - Quasi Periodic Vector: size %d bytes; period %d\n\n", data->vsize, data->dvsize, data->dvfactor, data->qpvsize, data->qpvperiod); #endif // See also the "--FIXME M3DA QPV" tags in stagedb.lua tests // (some tests has been disabled/chaged) if( data->qpvsize < data->dvsize && data->qpvsize < data->vsize) { column->arg = data->qpvperiod; column->data_analysis.method = SDB_SM_QUASIPERIODIC_VECTOR; } else if( data->dvsize < data->vsize) { column->arg = data->dvfactor; column->data_analysis.method = SDB_SM_DELTAS_VECTOR; } else { column->data_analysis.method = SDB_SM_LIST; } } } compute_serialization_methods_exit: sdb_read_close( & read_ctx); free( analysis_data); return return_code; }