Пример #1
int tpl_compat (const TUPLE *tpl1, const TUPLE *tpl2)
{                               /* --- check tuples for compatibility */
  ATT   *att;                   /* to traverse the attributes */
  int   i1, i2, k;              /* integer values, loop variable */
  float f1, f2;                 /* float values */

  if (!tpl1) { tpl1 = tpl2; tpl2 = NULL; }
  if (!tpl1) return -1;         /* check for at least one tuple */
  for (k = as_attcnt(tpl1->attset); --k >= 0; ) {
    att = as_att(tpl1->attset, k);    /* traverse the columns and */
    switch (att_type(att)) {    /* get the attribute and its type */
      case AT_REAL:             /* if real-valued column */
        f1 = tpl1->cols[k].f;   /* check for different known values */
        f2 = (tpl2) ? tpl2->cols[k].f : att_inst(att)->f;
        if ((f1 != f2) && (f1 > NV_REAL) && (f2 > NV_REAL)) return 0;
      case AT_INT:              /* if integer-valued column */
        i1 = tpl1->cols[k].i;   /* check for different known values */
        i2 = (tpl2) ? tpl2->cols[k].i : att_inst(att)->i;
        if ((i1 != i2) && (i1 > NV_INT)  && (i2 > NV_INT))  return 0;
      default:                  /* if nominal-valued column */
        i1 = tpl1->cols[k].i;   /* check for different known values */
        i2 = (tpl2) ? tpl2->cols[k].i : att_inst(att)->i;
        if ((i1 != i2) && (i1 > NV_NOM)  && (i2 > NV_NOM))  return 0;
        break;                  /* if different known values exist, */
    }                           /* return 'tuples are incompatible' */
  return -1;                    /* return 'tuples are compatible' */
}  /* tpl_compat() */
Пример #2
int tpl_nullcnt (const TUPLE *tpl)
{                               /* --- count null values */
  CINST *col;                   /* to traverse the columns */
  int   i;                      /* loop variable */
  int   cnt = 0;                /* number of null columns */

  assert(tpl);                  /* check the function argument */
  for (col = tpl->cols +(i = as_attcnt(tpl->attset)); --i >= 0; ) {
    switch (att_type(as_att(tpl->attset, i))) {
      case AT_REAL: if ((--col)->f <= NV_REAL) cnt++; break;
      case AT_INT : if ((--col)->i <= NV_INT)  cnt++; break;
      default     : if ((--col)->i <= NV_NOM)  cnt++; break;
    }                           /* traverse the tuple columns */
  }                             /* and if a column is null, */
  return cnt;                   /* increment the column counter; */
}  /* tpl_nullcnt() */          /* return number of null columns */
Пример #3
int tpl_isect (TUPLE *res, TUPLE *tpl1, const TUPLE *tpl2)
{                               /* --- compute a tuple intersection */
  ATT   *att;                   /* to traverse the attributes */
  INST  *inst;                  /* to traverse the instances */
  CINST *col1, *col2;           /* to traverse the tuple columns */
  int   cnt;                    /* number of columns/loop variable */
  int   type;                   /* type of current column */
  int   null;                   /* type specific null value */
  int   ident = 3;              /* 'identical to intersection' flags */

  assert(tpl1 && tpl2           /* check the function arguments */
      && (as_attcnt(tpl1->attset) == as_attcnt(tpl2->attset)));
  cnt  = as_attcnt(tpl1->attset); 
  col1 = tpl1->cols +cnt;       /* get the number of columns and */
  col2 = tpl2->cols +cnt;       /* the column vectors of the tuples */
  while (--cnt >= 0) {          /* traverse the tuple columns */
    col1--; col2--;             /* advance the column pointers */
    att  = as_att(tpl1->attset, cnt);
    inst = (res) ? res->cols +cnt : att_inst(att);
    type = att_type(att);       /* get the instance and the att. type */
    if (type == AT_REAL) {       /* if real-valued column */
      if      (col1->f == col2->f)   /* if the values are identical, */
        inst->f = col1->f;           /* just copy any of them */
      else if (col1->f <= NV_REAL) { /* if first value is null, */
        inst->f = col2->f;           /* copy second value and */
        ident &= ~1; }               /* clear first tuple flag */
      else if (col2->f <= NV_REAL) { /* if second value is null, */
        inst->f = col1->f;           /* copy first value and */
        ident &= ~2; }               /* clear second tuple flag */
      else return -1; }              /* if columns differ, abort */
    else {                      /* if integer or nominal column */
      null = (type == AT_INT)        /* get appropriate */
           ? NV_INT : NV_NOM;        /* null value */
      if      (col1->i == col2->i)   /* if values are identical, */
        inst->i = col1->i;           /* just copy any of them */
      else if (col1->i <= null) {    /* if first value is null, */
        inst->i = col2->i;           /* copy second value and */
        ident &= ~1; }               /* clear first tuple flag */
      else if (col2->i <= null) {    /* if second value is null, */
        inst->i = col1->i;           /* copy first value and */
        ident &= ~2; }               /* clear second tuple flag */
      else return -1;                /* if columns differ, abort */
    }                           /* (ident indicates which tuple is */
  }                             /* identical to their intersection) */
  return ident;                 /* return ident flags */
}  /* tpl_isect() */
Пример #4
bool TChain::save(std::string fname, std::string group_name, size_t index,
                  std::string dim_name, int compression, int subsample,
                  bool converged, float lnZ) const {
	if((compression<0) || (compression > 9)) {
		std::cerr << "! Invalid gzip compression level: " << compression << std::endl;
		return false;
	H5::H5File *file = H5Utils::openFile(fname);
	if(file == NULL) { return false; }
	try {
	} catch(...) {
		// pass
	H5::Group *group = H5Utils::openGroup(file, group_name);
	if(group == NULL) {
		delete file;
		return false;
	 *  Attributes
	// Datatype
	H5::CompType att_type(sizeof(TChainAttribute));
	hid_t tid = H5Tcopy(H5T_C_S1);
	H5Tset_size(tid, H5T_VARIABLE);
	att_type.insertMember("dim_name", HOFFSET(TChainAttribute, dim_name), tid);
	//att_type.insertMember("total_weight", HOFFSET(TChainAttribute, total_weight), H5::PredType::NATIVE_FLOAT);
	//att_type.insertMember("ndim", HOFFSET(TChainAttribute, ndim), H5::PredType::NATIVE_UINT64);
	//att_type.insertMember("length", HOFFSET(TChainAttribute, length), H5::PredType::NATIVE_UINT64);
	// Dataspace
	int att_rank = 1;
	hsize_t att_dim = 1;
	H5::DataSpace att_space(att_rank, &att_dim);
	// Dataset
	//H5::Attribute att = group->createAttribute("parameter names", att_type, att_space);
	TChainAttribute att_data;
	att_data.dim_name = new char[dim_name.size()+1];
	std::strcpy(att_data.dim_name, dim_name.c_str());
	//att_data.total_weight = total_weight;
	//att_data.ndim = N;
	//att_data.length = length;
	//att.write(att_type, &att_data);
	delete[] att_data.dim_name;
	//int att_rank = 1;
	//hsize_t att_dim = 1;
	H5::DataType conv_dtype = H5::PredType::NATIVE_UCHAR;
	H5::DataSpace conv_dspace(att_rank, &att_dim);
	//H5::Attribute conv_att = H5Utils::openAttribute(group, "converged", conv_dtype, conv_dspace);
	//conv_att.write(conv_dtype, &converged);
	H5::DataType lnZ_dtype = H5::PredType::NATIVE_FLOAT;
	H5::DataSpace lnZ_dspace(att_rank, &att_dim);
	//H5::Attribute lnZ_att = H5Utils::openAttribute(group, "ln Z", lnZ_dtype, lnZ_dspace);
	//lnZ_att.write(lnZ_dtype, &lnZ);
	// Creation property list to be used for all three datasets
	H5::DSetCreatPropList plist;
	//plist.setDeflate(compression);	// gzip compression level
	float fillvalue = 0;
	plist.setFillValue(H5::PredType::NATIVE_FLOAT, &fillvalue);
	H5D_layout_t layout = H5D_COMPACT;
	 *  Choose subsample of points in chain
	size_t *el_idx = NULL;
	size_t *subsample_idx = NULL;
	if(subsample > 0) {
		size_t tot_weight_tmp = (size_t)ceil(total_weight);
		el_idx = new size_t[tot_weight_tmp];
		size_t unrolled_idx = 0;
		size_t chain_idx = 0;
		std::vector<double>::const_iterator it, it_end;
		it_end = w.end();
		for(it = w.begin(); it != it_end; ++it, chain_idx++) {
			for(size_t n = unrolled_idx; n < unrolled_idx + (size_t)(*it); n++) {
				el_idx[n] = chain_idx;
			unrolled_idx += (size_t)(*it);
		assert(chain_idx == length);
		gsl_rng *r;
		subsample_idx = new size_t[tot_weight_tmp];
		for(size_t i=0; i<subsample; i++) {
			subsample_idx[i] = el_idx[gsl_rng_uniform_int(r, tot_weight_tmp)];
	 *  Coordinates
	// Dataspace
	hsize_t dim;
	if(subsample > 0) {
		dim = subsample;
	} else {
		dim = length;
	// Chunking (required for compression)
	int rank = 2;
	hsize_t coord_dim[2] = {dim, N};
	//if(dim < chunk) {
	//plist.setChunk(rank, &(coord_dim[0]));
	//} else {
	//	plist.setChunk(rank, &chunk);
	H5::DataSpace x_dspace(rank, &(coord_dim[0]));
	// Dataset
	//std::stringstream x_dset_path;
	//x_dset_path << group_name << "/chain/coords";
	std::stringstream coordname;
	coordname << "coords " << index;
	H5::DataSet* x_dataset = new H5::DataSet(group->createDataSet(coordname.str(), H5::PredType::NATIVE_FLOAT, x_dspace, plist));
	// Write
	float *buf = new float[N*dim];
	if(subsample > 0) {
		size_t tmp_idx;
		for(size_t i=0; i<subsample; i++) {
			tmp_idx = subsample_idx[i];
			for(size_t k=0; k<N; k++) {
				buf[N*i+k] = x[N*tmp_idx+k];
	} else {
		for(size_t i=0; i<dim; i++) { buf[i] = x[i]; }
	x_dataset->write(buf, H5::PredType::NATIVE_FLOAT);
	 *  Weights
	// Dataspace
	if(subsample <= 0) {
		dim = w.size();
		rank = 1;
		H5::DataSpace w_dspace(rank, &dim);
		// Dataset
		//std::stringstream w_dset_path;
		//w_dset_path << group_name << "/chain/weights";
		H5::DataSet* w_dataset = new H5::DataSet(group->createDataSet("weights", H5::PredType::NATIVE_FLOAT, w_dspace, plist));
		// Write
		if(subsample > 0) {
			for(size_t i=0; i<subsample; i++) { buf[i] = 1.; }
		} else {
			assert(w.size() < x.size());
			for(size_t i=0; i<w.size(); i++) { buf[i] = w[i]; }
		w_dataset->write(buf, H5::PredType::NATIVE_FLOAT);
		delete w_dataset;
	 *  Probability densities
	// Dataspace
	rank = 1;
	H5::DataSpace L_dspace(rank, &dim);
	// Dataset
	//std::stringstream L_dset_path;
	//L_dset_path << group_name << "/chain/probs";
	std::stringstream lnpname;
	lnpname << "ln_p " << index;
	H5::DataSet* L_dataset = new H5::DataSet(group->createDataSet(lnpname.str(), H5::PredType::NATIVE_FLOAT, L_dspace, plist));
	// Write
	if(subsample > 0) {
		for(size_t i=0; i<subsample; i++) { buf[i] = L[subsample_idx[i]]; }
	} else {
		assert(L.size() < x.size());
		for(size_t i=0; i<L.size(); i++) { buf[i] = L[i]; }
	L_dataset->write(buf, H5::PredType::NATIVE_FLOAT);
	if(subsample > 0) {
		delete[] el_idx;
		delete[] subsample_idx;
	delete[] buf;
	delete x_dataset;
	delete L_dataset;
	delete group;
	delete file;
	return true;
Пример #5
int main (int argc, char *argv[])
{                               /* --- main function */
  int  i, k = 0;                /* loop variables, counter */
  char *s;                      /* to traverse the options */
  char **optarg = NULL;         /* option argument */
  char *fn_hdr  = NULL;         /* name of table header file */
  char *fn_tab  = NULL;         /* name of table file */
  char *fn_dom  = NULL;         /* name of domains file */
  char *blanks  = NULL;         /* blanks */
  char *fldseps = NULL;         /* field  separators */
  char *recseps = NULL;         /* record separators */
  char *nullchs = NULL;         /* null value characters */
  char *comment = NULL;         /* comment characters */
  int  flags    = 0;            /* table file read flags */
  int  sort     = 0;            /* flag for domain sorting */
  int  atdet    = 0;            /* flag for automatic type determ. */
  int  ivals    = AS_IVALS;     /* flag for numeric intervals */
  int  maxlen   = 0;            /* maximal output line length */
  int  attid;                   /* loop variable for attributes */
  ATT  *att;                    /* to traverse attributes */
  clock_t t;                    /* timer for measurement */

  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print startup/usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no argument is given */
    printf("usage: %s [options] "
                     "[-d|-h hdrfile] tabfile domfile\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-s       sort domains alphabetically "
                    "(default: order of appearance)\n");
    printf("-S       sort domains numerically/alphabetically\n");
    printf("-a       automatic type determination "
                    "(default: all nominal)\n");
    printf("-i       do not print intervals for numeric attributes\n");
    printf("-l#      output line length (default: no limit)\n");
    printf("-b#      blank   characters    (default: \" \\t\\r\")\n");
    printf("-f#      field   separators    (default: \" \\t\")\n");
    printf("-r#      record  separators    (default: \"\\n\")\n");
    printf("-C#      comment characters    (default: \"#\")\n");
    printf("-u#      null value characters (default: \"?*\")\n");
    printf("-n       number of tuple occurrences in last field\n");
    printf("-d       use default header "
                    "(field names = field numbers)\n");
    printf("-h       read table header (field names) from hdrfile\n");
    printf("hdrfile  file containing table header (field names)\n");
    printf("tabfile  table file to read "
                    "(field names in first record)\n");
    printf("domfile  file to write domain descriptions to\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse the arguments */
    s = argv[i];                /* get option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (1) {               /* traverse characters */
        switch (*s++) {         /* evaluate option */
          case 's': sort   = 1;                     break;
          case 'S': sort   = 2;                     break;
          case 'a': atdet  = 1;                     break;
          case 'i': ivals  = 0;                     break;
          case 'l': maxlen = (int)strtol(s, &s, 0); break;
  	  case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'u': optarg = &nullchs;              break;
          case 'C': optarg = &comment;              break;
          case 'n': flags |= AS_WEIGHT;             break;
          case 'd': flags |= AS_DFLT;               break;
          case 'h': optarg = &fn_hdr;               break;
          default : error(E_OPTION, *--s);          break;
        }                       /* set option variables */
        if (!*s) break;         /* if at end of string, abort loop */
        if (optarg) { *optarg = s; optarg = NULL; break; }
      } }                       /* get option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-option */
        case  0: fn_tab = s;      break;
        case  1: fn_dom = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
  if (optarg) error(E_OPTARG);  /* check option argument */
  if (k != 2) error(E_ARGCNT);  /* check number of arguments */
  if (fn_hdr && (strcmp(fn_hdr, "-") == 0))
    fn_hdr = "";                /* convert "-" to "" */
  if (fn_hdr)                   /* set header flags */
    flags = AS_ATT | (flags & ~AS_DFLT);

  /* --- determine attributes and domains --- */
  attset = as_create("domains", att_delete);
  if (!attset) error(E_NOMEM);  /* create an attribute set */
  as_chars(attset, recseps, fldseps, blanks, nullchs, comment);
  fprintf(stderr, "\n");        /* set delimiter characters */
  i = io_tab(attset, fn_hdr, fn_tab, flags, 1);
  if (i != 0) error(-i);        /* read the table */

  /* --- convert/sort domains --- */
  if (atdet) {                  /* if automatic type determination */
    for (attid = as_attcnt(attset); --attid >= 0; )
      att_conv(as_att(attset, attid), AT_AUTO, NULL);
  }                             /* try to convert attributes */
  if (sort) {                   /* if to sort domains (values) */
    for (attid = as_attcnt(attset); --attid >= 0; ) {
      att = as_att(attset, attid);
      if (att_type(att) != AT_NOM) continue;
      att_valsort(att, (sort > 1) ? numcmp : strcmp, NULL, 0);
    }                           /* traverse nominal attributes */
  }                             /* and sort their domains */

  /* --- write output file --- */
  t = clock();                  /* start the timer */
  if (fn_dom && *fn_dom)        /* if a domain file name is given, */
    out = fopen(fn_dom, "w");   /* open domain file for writing */
  else {                        /* if no domain file name is given, */
    out = stdout; fn_dom = "<stdout>"; }         /* write to stdout */
  fprintf(stderr, "writing %s ... ", fn_dom);
  if (!out) error(E_FOPEN, fn_dom);
  if (as_desc(attset, out, AS_TITLE|ivals, maxlen) != 0)
    error(E_FWRITE, fn_dom);    /* write domain descriptions */
  if (out != stdout) {          /* if not written to stdout, */
    i = fclose(out); out = NULL;/* close the output file */
    if (i != 0) error(E_FWRITE, fn_dom);
  }                             /* print a success message */
  fprintf(stderr, "[%d attribute(s)] done", as_attcnt(attset));
  fprintf(stderr, " [%.2fs].\n", SEC_SINCE(t));

  /* --- clean up --- */
  #ifndef NDEBUG
  as_delete(attset);            /* delete attribute set */
  #ifdef STORAGE
  showmem("at end of program"); /* check memory usage */
  return 0;                     /* return 'ok' */
}  /* main() */