Пример #1
0
// Generate Transactions
//
void gen_rules(TransPar &par)
{
  StringSet *lits;
  StringSetIter *patterns;
  Transaction *trans;
  poisson_distribution<LINT> tlen(par.tlen - 1);

  ofstream data_fp;
  ofstream pat_fp;

  data_fp.open(data_file, ios::trunc);
  pat_fp.open(pat_file, ios::trunc);
  
  lits = new StringSet(par.nitems, par.lits);

  // Reset random seed generator before generating transactions
  if (par.seed < 0) generator.seed(par.seed);

  par.write(pat_fp);
  lits->display(pat_fp);

  patterns = new StringSetIter(*lits);
  for (LINT i = 0; i < par.ntrans; i ++)
    {
      trans = mk_tran(*patterns, tlen(generator) + 1);
      if (par.ascii) 
	trans->write_asc(data_fp);
      else 
	trans->write(data_fp);
      delete trans;
    }
  
  data_fp.close();
  pat_fp.close();
}
Пример #2
0
// Generate Transactions and Taxonomy
//
void gen_taxrules(TaxPar &par)
{
  Taxonomy *tax;
  StringSet *lits;
  StringSetIter *patterns;
  Transaction *trans;
  poisson_distribution<LINT> tlen(par.tlen - 1);

  ofstream data_fp;
  ofstream pat_fp;
  ofstream tax_fp;

  data_fp.open(data_file, ios::trunc);
  pat_fp.open(pat_file, ios::trunc);
  tax_fp.open(tax_file, ios::trunc);
  if (data_fp.fail() || pat_fp.fail() || tax_fp.fail()) {
    cerr << "Error opening output file" << endl;
    exit(1);
  }
  
  // generate taxonomy and write it to file
  tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio);
  if (par.ascii) 
    tax->write_asc(tax_fp);
  else
    tax->write(tax_fp);

  lits = new StringSet(par.nitems, par.lits, tax);

  par.write(pat_fp);
  lits->display(pat_fp);

  patterns = new StringSetIter(*lits);
  for (LINT i = 0; i < par.ntrans; i ++)
    {
      trans = mk_tran(*patterns, tlen(generator) + 1, tax);
      if (par.ascii) 
	trans->write_asc(data_fp);
      else 
	trans->write(data_fp);
      delete trans;
      delete trans;
    }
  
  data_fp.close();
  pat_fp.close();
  tax_fp.close();
}
Пример #3
0
// Generate Transactions and Taxonomy
//
void gen_taxrules(TaxPar &par)
{
	Taxonomy *tax;
	StringSet *lits;
	StringSetIter *patterns;
	Transaction *trans;
	PoissonDist *tlen;

	ofstream data_fp;
	ofstream pat_fp;
	ofstream tax_fp;
	ofstream conf_fp; //added by MJZaki

	data_fp.open(data_file);
	pat_fp.open(pat_file);
	tax_fp.open(tax_file);
	conf_fp.open(conf_file); //added by MJZaki

	if (data_fp.fail() || pat_fp.fail() || tax_fp.fail() || conf_fp.fail())
	{
		cerr << "Error opening output file" << endl;
		exit(1);
	}

	// generate taxonomy and write it to file
	tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio);
	if (par.ascii)
		tax->write_asc(tax_fp);
	else
		tax->write(tax_fp);

	tlen = new PoissonDist(par.tlen - 1);

	lits = new StringSet(par.nitems, par.lits, tax);

	par.write(pat_fp);
	lits->display(pat_fp);

	patterns = new StringSetIter(*lits);
	LINT NTRANS = 0;
	for (LINT i = 0; i < par.ntrans; i++)
	{
		trans = mk_tran(*patterns, (*tlen)() + 1, par.lits.npats, tax);
		if (par.ascii)
			trans->write_asc(data_fp);
		else
			trans->write(data_fp);
		if (trans->size() > 0)
			NTRANS++;//added by MJZaki: repeat if trans empty
		else
			i--;
		delete trans;
	}

	data_fp.close();
	pat_fp.close();
	tax_fp.close();

	//added by MJZaki
	if (par.ascii)
	{
		conf_fp << NTRANS << "\n";
		conf_fp << par.nitems << "\n";
		conf_fp << par.tlen << "\n";
	}
	else
	{
		conf_fp.write((char *) &NTRANS, sizeof(LINT));
		conf_fp.write((char *) &par.nitems, sizeof(LINT));
		int t = (int) par.tlen;
		conf_fp.write((char *) &t, sizeof(LINT));
	}
	conf_fp.close();
}
Пример #4
0
// Generate Sequences
//
void gen_seq(SeqPar &par)
{
	StringSet *lseq; // potentially large sequences
	StringSetIter *patterns;
	StringSet *lits; // potentially large itemsets
	CustSeq *custSeqence; // sequence parameters
	PoissonDist *slen; // Poisson Distribution
	PoissonDist *tlen; // Poisson Distribution

	ofstream data_fp;
	ofstream pat_fp;
	ofstream conf_fp; //added by MJZaki
	ofstream ntpc_fp; //added by MJZaki
#ifndef _WIN32
	srand48(0);
#else
	srand(0);
#endif

	data_fp.open(data_file);
	pat_fp.open(pat_file);
	conf_fp.open(conf_file); //added by MJZaki
	ntpc_fp.open(ntpc_file); //added by MJZaki
	LINT *NTPC = new LINT[par.ncust]; //added by MJZaki
	LINT tottrans = 0;
	if (data_fp.fail() || pat_fp.fail() || ntpc_fp.fail() || conf_fp.fail())
	{
		cerr << "Error opening output file" << endl;
		exit(1);
	}

	slen = new PoissonDist(par.slen - 1);
	tlen = new PoissonDist(par.tlen - 1);

	lits = new StringSet(par.nitems, par.lits);
	lseq = new StringSet(par.lits.npats, par.lseq, NULL, par.rept, par.rept_var);

	//  pat_fp << "Large Itemsets:" << endl;
	//  lits->write(pat_fp);
	//  pat_fp << endl << endl << "Sequences:" << endl;
	par.write(pat_fp);
	lseq->display(pat_fp, *lits);

	patterns = new StringSetIter(*lseq);
	LINT NCUST = 0;
	LINT i;
	for (i = 0; i < par.ncust; i++)
	{
		if ((i + 1) % 1000 == 0)
			cout << "DONE " << (i + 1) << endl;
		custSeqence = mk_seq(i + 1, *patterns, *lits, (*slen)() + 1, (*tlen)() + 1);
		if (custSeqence->cid < par.mincustid)
			par.mincustid = custSeqence->cid;
		if (custSeqence->cid > par.maxcustid)
			par.maxcustid = custSeqence->cid;

		if (par.ascii)
			NTPC[NCUST] = custSeqence->write_asc(data_fp);
		else
			NTPC[NCUST] = custSeqence->write(data_fp);
		tottrans += NTPC[NCUST];
		if (NTPC[NCUST] > 0)
		{
			NCUST++;//added by MJZaki: repeat if trans empty
		}
		else
		{
#ifdef _DEBUG
			fprintf(stderr, "i = %d, NTPC[%d] = %d.\n", i, NCUST, NTPC[NCUST]);
			//cerr << custSeqence->toString();
#endif
			i--;
		}
		delete custSeqence;
	}

	data_fp.close();
	pat_fp.close();

	//added by MJZaki
	if (par.ascii)
	{
		// ascii format
		conf_fp << "Number of customers in database = " << NCUST << "\n";
		conf_fp << "Number of items = " << par.nitems << "\n";
		conf_fp << "Average sequence length = " << par.slen << "\n";
		conf_fp << "Average transaction length = " << par.tlen << "\n";
		conf_fp << "Number of total transactions = " << tottrans << "\n";
		conf_fp << "minimum transaction number = " << par.mincustid << "\n";
		conf_fp << "maximum transaction number = " << par.maxcustid << "\n";
	}
	else
	{
		// binary format
		conf_fp.write((char *) &NCUST, sizeof(LINT));
		conf_fp.write((char *) &par.nitems, sizeof(LINT));
		conf_fp.write((char *) &par.slen, sizeof(FLOAT));
		conf_fp.write((char *) &par.tlen, sizeof(FLOAT));
		conf_fp.write((char *) &tottrans, sizeof(LINT));
		conf_fp.write((char *) &par.mincustid, sizeof(LINT));
		conf_fp.write((char *) &par.maxcustid, sizeof(LINT));
	}
	conf_fp.close();

	if (par.ascii)
	{
		ntpc_fp << "Number of transactions that every customer have: \n";
		for (i = 0; i < NCUST; i++)
		{
			ntpc_fp << NTPC[i] << " ";
			if ((i+1) % 30 == 0)
				ntpc_fp << endl;
		}
		ntpc_fp << endl;
	}
	else
	{
		ntpc_fp.write((char *) NTPC, NCUST * sizeof(LINT));
	}
	ntpc_fp.close();
	delete[] NTPC;
}
Пример #5
0
// Generate Transactions
//
void gen_rules(TransPar &par)
{
	StringSet *lits;
	StringSetIter *patterns;
	Transaction *trans;
	PoissonDist *tlen;

	ofstream data_fp;
	ofstream pat_fp;
	ofstream conf_fp; //added by MJZaki

	data_fp.open(data_file);
	pat_fp.open(pat_file);
	conf_fp.open(conf_file); //added by MJZaki
	if (data_fp.fail() || pat_fp.fail() || conf_fp.fail())
	{
		cerr << "Error opening output file" << endl;
		exit(1);
	}

	lits = new StringSet(par.nitems, par.lits);

	// Reset random seed generator for before generating transactions
	if (par.seed < 0)
		RandSeed::set_seed(par.seed);

	tlen = new PoissonDist(par.tlen - 1);

	par.write(pat_fp);
	lits->display(pat_fp);

	patterns = new StringSetIter(*lits);
	LINT NTRANS = 0;
	//Transaction::set_print_cid(FALSE); // added by me to suppress cid
	for (LINT i = 0; i < par.ntrans; i++)
	{
		trans = mk_tran(*patterns, (*tlen)() + 1, par.lits.npats);
		if (trans->tid < par.mintid)
			par.mintid = trans->tid;
		if (trans->tid > par.maxtid)
			par.maxtid = trans->tid;
		if (par.ascii)
			trans->write_asc(data_fp);
		else
			trans->write(data_fp);
		//cout << "TRANS SZ " << trans->size() << endl;
		if (trans->size() > 0)
			NTRANS++; //added by MJZaki: repeat if trans empty
		else
			i--;
		delete trans;
	}

	data_fp.close();
	pat_fp.close();

	//added by MJZaki
	if (par.ascii)
	{
		conf_fp << NTRANS << "\n";
		conf_fp << par.nitems << "\n";
		conf_fp << par.tlen << "\n";
		conf_fp << par.mintid << "\n";
		conf_fp << par.maxtid << "\n";
	}
	else
	{
		cout << "WRITING " << NTRANS << " " << par.nitems << endl;
		conf_fp.write((char *) &NTRANS, sizeof(LINT));
		conf_fp.write((char *) &par.nitems, sizeof(LINT));
		conf_fp.write((char *) &par.tlen, sizeof(FLOAT));
		conf_fp.write((char *) &par.mintid, sizeof(LINT));
		conf_fp.write((char *) &par.maxtid, sizeof(LINT));
	}
	conf_fp.close();
}