int tpl_compat (const TUPLE *tpl1, const TUPLE *tpl2) { /* --- check tuples for compatibility */ ATT *att; /* to traverse the attributes */ int i1, i2, k; /* integer values, loop variable */ float f1, f2; /* float values */ if (!tpl1) { tpl1 = tpl2; tpl2 = NULL; } if (!tpl1) return -1; /* check for at least one tuple */ for (k = as_attcnt(tpl1->attset); --k >= 0; ) { att = as_att(tpl1->attset, k); /* traverse the columns and */ switch (att_type(att)) { /* get the attribute and its type */ case AT_REAL: /* if real-valued column */ f1 = tpl1->cols[k].f; /* check for different known values */ f2 = (tpl2) ? tpl2->cols[k].f : att_inst(att)->f; if ((f1 != f2) && (f1 > NV_REAL) && (f2 > NV_REAL)) return 0; break; case AT_INT: /* if integer-valued column */ i1 = tpl1->cols[k].i; /* check for different known values */ i2 = (tpl2) ? tpl2->cols[k].i : att_inst(att)->i; if ((i1 != i2) && (i1 > NV_INT) && (i2 > NV_INT)) return 0; break; default: /* if nominal-valued column */ i1 = tpl1->cols[k].i; /* check for different known values */ i2 = (tpl2) ? tpl2->cols[k].i : att_inst(att)->i; if ((i1 != i2) && (i1 > NV_NOM) && (i2 > NV_NOM)) return 0; break; /* if different known values exist, */ } /* return 'tuples are incompatible' */ } return -1; /* return 'tuples are compatible' */ } /* tpl_compat() */
int tpl_nullcnt (const TUPLE *tpl) { /* --- count null values */ CINST *col; /* to traverse the columns */ int i; /* loop variable */ int cnt = 0; /* number of null columns */ assert(tpl); /* check the function argument */ for (col = tpl->cols +(i = as_attcnt(tpl->attset)); --i >= 0; ) { switch (att_type(as_att(tpl->attset, i))) { case AT_REAL: if ((--col)->f <= NV_REAL) cnt++; break; case AT_INT : if ((--col)->i <= NV_INT) cnt++; break; default : if ((--col)->i <= NV_NOM) cnt++; break; } /* traverse the tuple columns */ } /* and if a column is null, */ return cnt; /* increment the column counter; */ } /* tpl_nullcnt() */ /* return number of null columns */
int tpl_isect (TUPLE *res, TUPLE *tpl1, const TUPLE *tpl2) { /* --- compute a tuple intersection */ ATT *att; /* to traverse the attributes */ INST *inst; /* to traverse the instances */ CINST *col1, *col2; /* to traverse the tuple columns */ int cnt; /* number of columns/loop variable */ int type; /* type of current column */ int null; /* type specific null value */ int ident = 3; /* 'identical to intersection' flags */ assert(tpl1 && tpl2 /* check the function arguments */ && (as_attcnt(tpl1->attset) == as_attcnt(tpl2->attset))); cnt = as_attcnt(tpl1->attset); col1 = tpl1->cols +cnt; /* get the number of columns and */ col2 = tpl2->cols +cnt; /* the column vectors of the tuples */ while (--cnt >= 0) { /* traverse the tuple columns */ col1--; col2--; /* advance the column pointers */ att = as_att(tpl1->attset, cnt); inst = (res) ? res->cols +cnt : att_inst(att); type = att_type(att); /* get the instance and the att. type */ if (type == AT_REAL) { /* if real-valued column */ if (col1->f == col2->f) /* if the values are identical, */ inst->f = col1->f; /* just copy any of them */ else if (col1->f <= NV_REAL) { /* if first value is null, */ inst->f = col2->f; /* copy second value and */ ident &= ~1; } /* clear first tuple flag */ else if (col2->f <= NV_REAL) { /* if second value is null, */ inst->f = col1->f; /* copy first value and */ ident &= ~2; } /* clear second tuple flag */ else return -1; } /* if columns differ, abort */ else { /* if integer or nominal column */ null = (type == AT_INT) /* get appropriate */ ? NV_INT : NV_NOM; /* null value */ if (col1->i == col2->i) /* if values are identical, */ inst->i = col1->i; /* just copy any of them */ else if (col1->i <= null) { /* if first value is null, */ inst->i = col2->i; /* copy second value and */ ident &= ~1; } /* clear first tuple flag */ else if (col2->i <= null) { /* if second value is null, */ inst->i = col1->i; /* copy first value and */ ident &= ~2; } /* clear second tuple flag */ else return -1; /* if columns differ, abort */ } /* (ident indicates which tuple is */ } /* identical to their intersection) */ return ident; /* return ident flags */ } /* tpl_isect() */
bool TChain::save(std::string fname, std::string group_name, size_t index, std::string dim_name, int compression, int subsample, bool converged, float lnZ) const { if((compression<0) || (compression > 9)) { std::cerr << "! Invalid gzip compression level: " << compression << std::endl; return false; } H5::Exception::dontPrint(); H5::H5File *file = H5Utils::openFile(fname); if(file == NULL) { return false; } /* try { file->unlink(group_name); } catch(...) { // pass } */ H5::Group *group = H5Utils::openGroup(file, group_name); if(group == NULL) { delete file; return false; } /* * Attributes */ // Datatype H5::CompType att_type(sizeof(TChainAttribute)); hid_t tid = H5Tcopy(H5T_C_S1); H5Tset_size(tid, H5T_VARIABLE); att_type.insertMember("dim_name", HOFFSET(TChainAttribute, dim_name), tid); //att_type.insertMember("total_weight", HOFFSET(TChainAttribute, total_weight), H5::PredType::NATIVE_FLOAT); //att_type.insertMember("ndim", HOFFSET(TChainAttribute, ndim), H5::PredType::NATIVE_UINT64); //att_type.insertMember("length", HOFFSET(TChainAttribute, length), H5::PredType::NATIVE_UINT64); // Dataspace int att_rank = 1; hsize_t att_dim = 1; H5::DataSpace att_space(att_rank, &att_dim); // Dataset //H5::Attribute att = group->createAttribute("parameter names", att_type, att_space); TChainAttribute att_data; att_data.dim_name = new char[dim_name.size()+1]; std::strcpy(att_data.dim_name, dim_name.c_str()); //att_data.total_weight = total_weight; //att_data.ndim = N; //att_data.length = length; //att.write(att_type, &att_data); delete[] att_data.dim_name; //int att_rank = 1; //hsize_t att_dim = 1; H5::DataType conv_dtype = H5::PredType::NATIVE_UCHAR; H5::DataSpace conv_dspace(att_rank, &att_dim); //H5::Attribute conv_att = H5Utils::openAttribute(group, "converged", conv_dtype, conv_dspace); //conv_att.write(conv_dtype, &converged); H5::DataType lnZ_dtype = H5::PredType::NATIVE_FLOAT; H5::DataSpace lnZ_dspace(att_rank, &att_dim); //H5::Attribute lnZ_att = H5Utils::openAttribute(group, "ln Z", lnZ_dtype, lnZ_dspace); //lnZ_att.write(lnZ_dtype, &lnZ); // Creation property list to be used for all three datasets H5::DSetCreatPropList plist; //plist.setDeflate(compression); // gzip compression level float fillvalue = 0; plist.setFillValue(H5::PredType::NATIVE_FLOAT, &fillvalue); H5D_layout_t layout = H5D_COMPACT; plist.setLayout(layout); /* * Choose subsample of points in chain */ size_t *el_idx = NULL; size_t *subsample_idx = NULL; if(subsample > 0) { size_t tot_weight_tmp = (size_t)ceil(total_weight); el_idx = new size_t[tot_weight_tmp]; size_t unrolled_idx = 0; size_t chain_idx = 0; std::vector<double>::const_iterator it, it_end; it_end = w.end(); for(it = w.begin(); it != it_end; ++it, chain_idx++) { for(size_t n = unrolled_idx; n < unrolled_idx + (size_t)(*it); n++) { el_idx[n] = chain_idx; } unrolled_idx += (size_t)(*it); } assert(chain_idx == length); gsl_rng *r; seed_gsl_rng(&r); subsample_idx = new size_t[tot_weight_tmp]; for(size_t i=0; i<subsample; i++) { subsample_idx[i] = el_idx[gsl_rng_uniform_int(r, tot_weight_tmp)]; } } /* * Coordinates */ // Dataspace hsize_t dim; if(subsample > 0) { dim = subsample; } else { dim = length; } // Chunking (required for compression) int rank = 2; hsize_t coord_dim[2] = {dim, N}; //if(dim < chunk) { //plist.setChunk(rank, &(coord_dim[0])); //} else { // plist.setChunk(rank, &chunk); //} H5::DataSpace x_dspace(rank, &(coord_dim[0])); // Dataset //std::stringstream x_dset_path; //x_dset_path << group_name << "/chain/coords"; std::stringstream coordname; coordname << "coords " << index; H5::DataSet* x_dataset = new H5::DataSet(group->createDataSet(coordname.str(), H5::PredType::NATIVE_FLOAT, x_dspace, plist)); // Write float *buf = new float[N*dim]; if(subsample > 0) { size_t tmp_idx; for(size_t i=0; i<subsample; i++) { tmp_idx = subsample_idx[i]; for(size_t k=0; k<N; k++) { buf[N*i+k] = x[N*tmp_idx+k]; } } } else { for(size_t i=0; i<dim; i++) { buf[i] = x[i]; } } x_dataset->write(buf, H5::PredType::NATIVE_FLOAT); /* * Weights */ // Dataspace if(subsample <= 0) { dim = w.size(); rank = 1; H5::DataSpace w_dspace(rank, &dim); // Dataset //std::stringstream w_dset_path; //w_dset_path << group_name << "/chain/weights"; H5::DataSet* w_dataset = new H5::DataSet(group->createDataSet("weights", H5::PredType::NATIVE_FLOAT, w_dspace, plist)); // Write if(subsample > 0) { for(size_t i=0; i<subsample; i++) { buf[i] = 1.; } } else { assert(w.size() < x.size()); for(size_t i=0; i<w.size(); i++) { buf[i] = w[i]; } } w_dataset->write(buf, H5::PredType::NATIVE_FLOAT); delete w_dataset; } /* * Probability densities */ // Dataspace rank = 1; H5::DataSpace L_dspace(rank, &dim); // Dataset //std::stringstream L_dset_path; //L_dset_path << group_name << "/chain/probs"; std::stringstream lnpname; lnpname << "ln_p " << index; H5::DataSet* L_dataset = new H5::DataSet(group->createDataSet(lnpname.str(), H5::PredType::NATIVE_FLOAT, L_dspace, plist)); // Write if(subsample > 0) { for(size_t i=0; i<subsample; i++) { buf[i] = L[subsample_idx[i]]; } } else { assert(L.size() < x.size()); for(size_t i=0; i<L.size(); i++) { buf[i] = L[i]; } } L_dataset->write(buf, H5::PredType::NATIVE_FLOAT); if(subsample > 0) { delete[] el_idx; delete[] subsample_idx; } delete[] buf; delete x_dataset; delete L_dataset; delete group; delete file; return true; }
int main (int argc, char *argv[]) { /* --- main function */ int i, k = 0; /* loop variables, counter */ char *s; /* to traverse the options */ char **optarg = NULL; /* option argument */ char *fn_hdr = NULL; /* name of table header file */ char *fn_tab = NULL; /* name of table file */ char *fn_dom = NULL; /* name of domains file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *nullchs = NULL; /* null value characters */ char *comment = NULL; /* comment characters */ int flags = 0; /* table file read flags */ int sort = 0; /* flag for domain sorting */ int atdet = 0; /* flag for automatic type determ. */ int ivals = AS_IVALS; /* flag for numeric intervals */ int maxlen = 0; /* maximal output line length */ int attid; /* loop variable for attributes */ ATT *att; /* to traverse attributes */ clock_t t; /* timer for measurement */ prgname = argv[0]; /* get program name for error msgs. */ /* --- print startup/usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no argument is given */ printf("usage: %s [options] " "[-d|-h hdrfile] tabfile domfile\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-s sort domains alphabetically " "(default: order of appearance)\n"); printf("-S sort domains numerically/alphabetically\n"); printf("-a automatic type determination " "(default: all nominal)\n"); printf("-i do not print intervals for numeric attributes\n"); printf("-l# output line length (default: no limit)\n"); printf("-b# blank characters (default: \" \\t\\r\")\n"); printf("-f# field separators (default: \" \\t\")\n"); printf("-r# record separators (default: \"\\n\")\n"); printf("-C# comment characters (default: \"#\")\n"); printf("-u# null value characters (default: \"?*\")\n"); printf("-n number of tuple occurrences in last field\n"); printf("-d use default header " "(field names = field numbers)\n"); printf("-h read table header (field names) from hdrfile\n"); printf("hdrfile file containing table header (field names)\n"); printf("tabfile table file to read " "(field names in first record)\n"); printf("domfile file to write domain descriptions to\n"); return 0; /* print a usage message */ } /* and abort the program */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse the arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (1) { /* traverse characters */ switch (*s++) { /* evaluate option */ case 's': sort = 1; break; case 'S': sort = 2; break; case 'a': atdet = 1; break; case 'i': ivals = 0; break; case 'l': maxlen = (int)strtol(s, &s, 0); break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; case 'u': optarg = &nullchs; break; case 'C': optarg = &comment; break; case 'n': flags |= AS_WEIGHT; break; case 'd': flags |= AS_DFLT; break; case 'h': optarg = &fn_hdr; break; default : error(E_OPTION, *--s); break; } /* set option variables */ if (!*s) break; /* if at end of string, abort loop */ if (optarg) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* -- if argument is no option */ switch (k++) { /* evaluate non-option */ case 0: fn_tab = s; break; case 1: fn_dom = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check option argument */ if (k != 2) error(E_ARGCNT); /* check number of arguments */ if (fn_hdr && (strcmp(fn_hdr, "-") == 0)) fn_hdr = ""; /* convert "-" to "" */ if (fn_hdr) /* set header flags */ flags = AS_ATT | (flags & ~AS_DFLT); /* --- determine attributes and domains --- */ attset = as_create("domains", att_delete); if (!attset) error(E_NOMEM); /* create an attribute set */ as_chars(attset, recseps, fldseps, blanks, nullchs, comment); fprintf(stderr, "\n"); /* set delimiter characters */ i = io_tab(attset, fn_hdr, fn_tab, flags, 1); if (i != 0) error(-i); /* read the table */ /* --- convert/sort domains --- */ if (atdet) { /* if automatic type determination */ for (attid = as_attcnt(attset); --attid >= 0; ) att_conv(as_att(attset, attid), AT_AUTO, NULL); } /* try to convert attributes */ if (sort) { /* if to sort domains (values) */ for (attid = as_attcnt(attset); --attid >= 0; ) { att = as_att(attset, attid); if (att_type(att) != AT_NOM) continue; att_valsort(att, (sort > 1) ? numcmp : strcmp, NULL, 0); } /* traverse nominal attributes */ } /* and sort their domains */ /* --- write output file --- */ t = clock(); /* start the timer */ if (fn_dom && *fn_dom) /* if a domain file name is given, */ out = fopen(fn_dom, "w"); /* open domain file for writing */ else { /* if no domain file name is given, */ out = stdout; fn_dom = "<stdout>"; } /* write to stdout */ fprintf(stderr, "writing %s ... ", fn_dom); if (!out) error(E_FOPEN, fn_dom); if (as_desc(attset, out, AS_TITLE|ivals, maxlen) != 0) error(E_FWRITE, fn_dom); /* write domain descriptions */ if (out != stdout) { /* if not written to stdout, */ i = fclose(out); out = NULL;/* close the output file */ if (i != 0) error(E_FWRITE, fn_dom); } /* print a success message */ fprintf(stderr, "[%d attribute(s)] done", as_attcnt(attset)); fprintf(stderr, " [%.2fs].\n", SEC_SINCE(t)); /* --- clean up --- */ #ifndef NDEBUG as_delete(attset); /* delete attribute set */ #endif #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */ } /* main() */