예제 #1
0
void command_line(TaxPar &par)
{
  par.calc_values();

  cerr << "Command Line Options:" << endl;
  cerr << "  -ntrans number_of_transactions_in_000s (default: "
       << par.ntrans/1000 << ")" << endl;
  cerr << "  -tlen avg_items_per_transaction (default: " << par.tlen << ")" << endl;
  cerr << "  -nitems number_of_different_items_in_000s (default: "
       << par.nitems/1000 << ")" << endl;
  cerr << "  -nroots number_of_roots (default: " << par.nroots << ")" << endl;
  cerr << "  -nlevels number_of_different_levels (default: " << par.nlevels
       << ")" << endl;
  cerr << "  -fanout average_fanout (default: " << par.fanout << ")" << endl;
  cerr << "  -depth affects_average_depth_of_items_in_itemsets (default: "
    << par.depth_ratio << ")" << endl;
  cerr << endl;

  cerr << "  -npats number_of_patterns (default: " << par.lits.npats << ")" << endl;
  cerr << "  -patlen avg_length_of_maximal_pattern (default: "
       << par.lits.patlen << ")" << endl;
  cerr << "  -corr correlation_between_patterns (default: " << par.lits.corr
       << ")" << endl;
  cerr << "  -conf avg_confidence_in_a_rule (default: " << par.lits.conf
       << ")" << endl;
  cerr << endl;

  cerr << "  -fname <filename> (write to filename.data and filename.pat)" << endl;
  cerr << "  -ascii (Write data in ASCII format; default: " << (par.ascii? "True": "False") << ")" << endl;
  cerr << "  -randseed # (reset seed used generate to x-acts; must be negative)" << endl;
  cerr << "  -version (to print out version info)" << endl;
  exit(EXIT_FAILURE);
}
예제 #2
0
// Generate Transactions and Taxonomy
//
void gen_taxrules(TaxPar &par)
{
  Taxonomy *tax;
  StringSet *lits;
  StringSetIter *patterns;
  Transaction *trans;
  poisson_distribution<LINT> tlen(par.tlen - 1);

  ofstream data_fp;
  ofstream pat_fp;
  ofstream tax_fp;

  data_fp.open(data_file, ios::trunc);
  pat_fp.open(pat_file, ios::trunc);
  tax_fp.open(tax_file, ios::trunc);
  if (data_fp.fail() || pat_fp.fail() || tax_fp.fail()) {
    cerr << "Error opening output file" << endl;
    exit(1);
  }
  
  // generate taxonomy and write it to file
  tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio);
  if (par.ascii) 
    tax->write_asc(tax_fp);
  else
    tax->write(tax_fp);

  lits = new StringSet(par.nitems, par.lits, tax);

  par.write(pat_fp);
  lits->display(pat_fp);

  patterns = new StringSetIter(*lits);
  for (LINT i = 0; i < par.ntrans; i ++)
    {
      trans = mk_tran(*patterns, tlen(generator) + 1, tax);
      if (par.ascii) 
	trans->write_asc(data_fp);
      else 
	trans->write(data_fp);
      delete trans;
      delete trans;
    }
  
  data_fp.close();
  pat_fp.close();
  tax_fp.close();
}
예제 #3
0
void get_args(TaxPar &par, int argc, char **argv)
{
  LINT arg_pos = 2;
  
  strcpy(data_file, "data");
  strcpy(pat_file, "pat");
  strcpy(tax_file, "tax");
  while (arg_pos < argc)
    {
      if (strcmp(argv[arg_pos], "-ntrans") == 0) {
	// g++ LINT !!
	par.ntrans = LINT (1000 * atof(argv[++arg_pos]));
	cat_fname(".ntrans_", argv[arg_pos]);
	arg_pos++;
	if (par.ntrans < 1) exit_fail_with_msg("ntrans must be >= 1");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-tlen") == 0) {
	par.tlen = atof(argv[++arg_pos]);
	cat_fname(".tlen_", argv[arg_pos]);
	arg_pos++;
	if (par.tlen < 1) exit_fail_with_msg("tlen must be >= 1");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-nitems") == 0) {
	// g++ the LINT worm is back again! 
	par.nitems = LINT (1000 * atof(argv[++arg_pos]));
	cat_fname(".nitems_", argv[arg_pos]);
	arg_pos++;
	if (par.nitems < 1) exit_fail_with_msg("nitems must be >= 1");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-nroots") == 0) {
	par.nroots = atoi(argv[++arg_pos]);
	cat_fname(".nroots_", argv[arg_pos]);
	arg_pos++;
	if (par.nroots < 1) exit_fail_with_msg("nroots must be >= 1");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-nlevels") == 0) {
	par.nlevels = atof(argv[++arg_pos]);
	cat_fname(".nlevels_", argv[arg_pos]);
	arg_pos++;
	if (par.nlevels < 1) exit_fail_with_msg("nlevels must be >= 1");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-fanout") == 0) {
	par.fanout = atof(argv[++arg_pos]);
	cat_fname(".fanout_", argv[arg_pos]);
	arg_pos++;
	if (par.fanout < 1) exit_fail_with_msg("fanout must be >= 1");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-depth") == 0) {
	par.depth_ratio = atof(argv[++arg_pos]);
	cat_fname(".depth_", argv[arg_pos]);
	arg_pos++;
	if (par.depth_ratio <= 0) exit_fail_with_msg("fanout must be > 0");
	continue;
      }

      else if (strcmp(argv[arg_pos], "-npats") == 0) {
	par.lits.npats = atoi(argv[++arg_pos]);
	cat_fname(".npats_", argv[arg_pos]);
	arg_pos++;
	if (par.lits.npats < 1) exit_fail_with_msg("npats must be >= 1");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-patlen") == 0) {
	par.lits.patlen = atof(argv[++arg_pos]);
	cat_fname(".patlen_", argv[arg_pos]);
	arg_pos++;
	if (par.lits.patlen <= 0) exit_fail_with_msg("patlen must be > 0");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-corr") == 0) {
	par.lits.corr = atof(argv[++arg_pos]);
	cat_fname(".corr_", argv[arg_pos]);
	arg_pos++;
	continue;
      }
      else if (strcmp(argv[arg_pos], "-conf") == 0) {
	par.lits.conf = atof(argv[++arg_pos]);
	cat_fname(".conf_", argv[arg_pos]);
	arg_pos++;
	if (par.lits.conf > 1 || par.lits.conf < 0) 
	  exit_fail_with_msg("conf must be between 0 and 1");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-fname") == 0) {
        strcpy(data_file, argv[++arg_pos]);
        strcat(data_file, ".data");
        strcpy(pat_file, argv[arg_pos]);
        strcat(pat_file, ".pat");
        strcpy(tax_file, argv[arg_pos++]);
        strcat(tax_file, ".tax");
        userfile = true;
	continue;
      }
      else if (strcmp(argv[arg_pos], "-ascii") == 0) {
	par.ascii = true;
	cat_fname(".ascii", "");
	arg_pos++;
	continue;
      }
      else if (strcmp(argv[arg_pos], "-randseed") == 0) {
	par.seed = atoi(argv[++arg_pos]);
	arg_pos++;
	if (par.seed >= 0)
	  exit_fail_with_msg("randseed must be negative");
	continue;
      }
      else if (strcmp(argv[arg_pos], "-version") == 0) {
	cout << VERSION << endl;
	exit(EXIT_SUCCESS);
      }
      else {
	command_line(par);
      }
    }  // end while

  par.calc_values();
}
예제 #4
0
// Generate Transactions and Taxonomy
//
void gen_taxrules(TaxPar &par)
{
	Taxonomy *tax;
	StringSet *lits;
	StringSetIter *patterns;
	Transaction *trans;
	PoissonDist *tlen;

	ofstream data_fp;
	ofstream pat_fp;
	ofstream tax_fp;
	ofstream conf_fp; //added by MJZaki

	data_fp.open(data_file);
	pat_fp.open(pat_file);
	tax_fp.open(tax_file);
	conf_fp.open(conf_file); //added by MJZaki

	if (data_fp.fail() || pat_fp.fail() || tax_fp.fail() || conf_fp.fail())
	{
		cerr << "Error opening output file" << endl;
		exit(1);
	}

	// generate taxonomy and write it to file
	tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio);
	if (par.ascii)
		tax->write_asc(tax_fp);
	else
		tax->write(tax_fp);

	tlen = new PoissonDist(par.tlen - 1);

	lits = new StringSet(par.nitems, par.lits, tax);

	par.write(pat_fp);
	lits->display(pat_fp);

	patterns = new StringSetIter(*lits);
	LINT NTRANS = 0;
	for (LINT i = 0; i < par.ntrans; i++)
	{
		trans = mk_tran(*patterns, (*tlen)() + 1, par.lits.npats, tax);
		if (par.ascii)
			trans->write_asc(data_fp);
		else
			trans->write(data_fp);
		if (trans->size() > 0)
			NTRANS++;//added by MJZaki: repeat if trans empty
		else
			i--;
		delete trans;
	}

	data_fp.close();
	pat_fp.close();
	tax_fp.close();

	//added by MJZaki
	if (par.ascii)
	{
		conf_fp << NTRANS << "\n";
		conf_fp << par.nitems << "\n";
		conf_fp << par.tlen << "\n";
	}
	else
	{
		conf_fp.write((char *) &NTRANS, sizeof(LINT));
		conf_fp.write((char *) &par.nitems, sizeof(LINT));
		int t = (int) par.tlen;
		conf_fp.write((char *) &t, sizeof(LINT));
	}
	conf_fp.close();
}