// Generate Transactions // void gen_rules(TransPar &par) { StringSet *lits; StringSetIter *patterns; Transaction *trans; poisson_distribution<LINT> tlen(par.tlen - 1); ofstream data_fp; ofstream pat_fp; data_fp.open(data_file, ios::trunc); pat_fp.open(pat_file, ios::trunc); lits = new StringSet(par.nitems, par.lits); // Reset random seed generator before generating transactions if (par.seed < 0) generator.seed(par.seed); par.write(pat_fp); lits->display(pat_fp); patterns = new StringSetIter(*lits); for (LINT i = 0; i < par.ntrans; i ++) { trans = mk_tran(*patterns, tlen(generator) + 1); if (par.ascii) trans->write_asc(data_fp); else trans->write(data_fp); delete trans; } data_fp.close(); pat_fp.close(); }
// Generate Transactions and Taxonomy // void gen_taxrules(TaxPar &par) { Taxonomy *tax; StringSet *lits; StringSetIter *patterns; Transaction *trans; poisson_distribution<LINT> tlen(par.tlen - 1); ofstream data_fp; ofstream pat_fp; ofstream tax_fp; data_fp.open(data_file, ios::trunc); pat_fp.open(pat_file, ios::trunc); tax_fp.open(tax_file, ios::trunc); if (data_fp.fail() || pat_fp.fail() || tax_fp.fail()) { cerr << "Error opening output file" << endl; exit(1); } // generate taxonomy and write it to file tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio); if (par.ascii) tax->write_asc(tax_fp); else tax->write(tax_fp); lits = new StringSet(par.nitems, par.lits, tax); par.write(pat_fp); lits->display(pat_fp); patterns = new StringSetIter(*lits); for (LINT i = 0; i < par.ntrans; i ++) { trans = mk_tran(*patterns, tlen(generator) + 1, tax); if (par.ascii) trans->write_asc(data_fp); else trans->write(data_fp); delete trans; delete trans; } data_fp.close(); pat_fp.close(); tax_fp.close(); }
// Generate Transactions and Taxonomy // void gen_taxrules(TaxPar &par) { Taxonomy *tax; StringSet *lits; StringSetIter *patterns; Transaction *trans; PoissonDist *tlen; ofstream data_fp; ofstream pat_fp; ofstream tax_fp; ofstream conf_fp; //added by MJZaki data_fp.open(data_file); pat_fp.open(pat_file); tax_fp.open(tax_file); conf_fp.open(conf_file); //added by MJZaki if (data_fp.fail() || pat_fp.fail() || tax_fp.fail() || conf_fp.fail()) { cerr << "Error opening output file" << endl; exit(1); } // generate taxonomy and write it to file tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio); if (par.ascii) tax->write_asc(tax_fp); else tax->write(tax_fp); tlen = new PoissonDist(par.tlen - 1); lits = new StringSet(par.nitems, par.lits, tax); par.write(pat_fp); lits->display(pat_fp); patterns = new StringSetIter(*lits); LINT NTRANS = 0; for (LINT i = 0; i < par.ntrans; i++) { trans = mk_tran(*patterns, (*tlen)() + 1, par.lits.npats, tax); if (par.ascii) trans->write_asc(data_fp); else trans->write(data_fp); if (trans->size() > 0) NTRANS++;//added by MJZaki: repeat if trans empty else i--; delete trans; } data_fp.close(); pat_fp.close(); tax_fp.close(); //added by MJZaki if (par.ascii) { conf_fp << NTRANS << "\n"; conf_fp << par.nitems << "\n"; conf_fp << par.tlen << "\n"; } else { conf_fp.write((char *) &NTRANS, sizeof(LINT)); conf_fp.write((char *) &par.nitems, sizeof(LINT)); int t = (int) par.tlen; conf_fp.write((char *) &t, sizeof(LINT)); } conf_fp.close(); }
// Generate Sequences // void gen_seq(SeqPar &par) { StringSet *lseq; // potentially large sequences StringSetIter *patterns; StringSet *lits; // potentially large itemsets CustSeq *custSeqence; // sequence parameters PoissonDist *slen; // Poisson Distribution PoissonDist *tlen; // Poisson Distribution ofstream data_fp; ofstream pat_fp; ofstream conf_fp; //added by MJZaki ofstream ntpc_fp; //added by MJZaki #ifndef _WIN32 srand48(0); #else srand(0); #endif data_fp.open(data_file); pat_fp.open(pat_file); conf_fp.open(conf_file); //added by MJZaki ntpc_fp.open(ntpc_file); //added by MJZaki LINT *NTPC = new LINT[par.ncust]; //added by MJZaki LINT tottrans = 0; if (data_fp.fail() || pat_fp.fail() || ntpc_fp.fail() || conf_fp.fail()) { cerr << "Error opening output file" << endl; exit(1); } slen = new PoissonDist(par.slen - 1); tlen = new PoissonDist(par.tlen - 1); lits = new StringSet(par.nitems, par.lits); lseq = new StringSet(par.lits.npats, par.lseq, NULL, par.rept, par.rept_var); // pat_fp << "Large Itemsets:" << endl; // lits->write(pat_fp); // pat_fp << endl << endl << "Sequences:" << endl; par.write(pat_fp); lseq->display(pat_fp, *lits); patterns = new StringSetIter(*lseq); LINT NCUST = 0; LINT i; for (i = 0; i < par.ncust; i++) { if ((i + 1) % 1000 == 0) cout << "DONE " << (i + 1) << endl; custSeqence = mk_seq(i + 1, *patterns, *lits, (*slen)() + 1, (*tlen)() + 1); if (custSeqence->cid < par.mincustid) par.mincustid = custSeqence->cid; if (custSeqence->cid > par.maxcustid) par.maxcustid = custSeqence->cid; if (par.ascii) NTPC[NCUST] = custSeqence->write_asc(data_fp); else NTPC[NCUST] = custSeqence->write(data_fp); tottrans += NTPC[NCUST]; if (NTPC[NCUST] > 0) { NCUST++;//added by MJZaki: repeat if trans empty } else { #ifdef _DEBUG fprintf(stderr, "i = %d, NTPC[%d] = %d.\n", i, NCUST, NTPC[NCUST]); //cerr << custSeqence->toString(); #endif i--; } delete custSeqence; } data_fp.close(); pat_fp.close(); //added by MJZaki if (par.ascii) { // ascii format conf_fp << "Number of customers in database = " << NCUST << "\n"; conf_fp << "Number of items = " << par.nitems << "\n"; conf_fp << "Average sequence length = " << par.slen << "\n"; conf_fp << "Average transaction length = " << par.tlen << "\n"; conf_fp << "Number of total transactions = " << tottrans << "\n"; conf_fp << "minimum transaction number = " << par.mincustid << "\n"; conf_fp << "maximum transaction number = " << par.maxcustid << "\n"; } else { // binary format conf_fp.write((char *) &NCUST, sizeof(LINT)); conf_fp.write((char *) &par.nitems, sizeof(LINT)); conf_fp.write((char *) &par.slen, sizeof(FLOAT)); conf_fp.write((char *) &par.tlen, sizeof(FLOAT)); conf_fp.write((char *) &tottrans, sizeof(LINT)); conf_fp.write((char *) &par.mincustid, sizeof(LINT)); conf_fp.write((char *) &par.maxcustid, sizeof(LINT)); } conf_fp.close(); if (par.ascii) { ntpc_fp << "Number of transactions that every customer have: \n"; for (i = 0; i < NCUST; i++) { ntpc_fp << NTPC[i] << " "; if ((i+1) % 30 == 0) ntpc_fp << endl; } ntpc_fp << endl; } else { ntpc_fp.write((char *) NTPC, NCUST * sizeof(LINT)); } ntpc_fp.close(); delete[] NTPC; }
// Generate Transactions // void gen_rules(TransPar &par) { StringSet *lits; StringSetIter *patterns; Transaction *trans; PoissonDist *tlen; ofstream data_fp; ofstream pat_fp; ofstream conf_fp; //added by MJZaki data_fp.open(data_file); pat_fp.open(pat_file); conf_fp.open(conf_file); //added by MJZaki if (data_fp.fail() || pat_fp.fail() || conf_fp.fail()) { cerr << "Error opening output file" << endl; exit(1); } lits = new StringSet(par.nitems, par.lits); // Reset random seed generator for before generating transactions if (par.seed < 0) RandSeed::set_seed(par.seed); tlen = new PoissonDist(par.tlen - 1); par.write(pat_fp); lits->display(pat_fp); patterns = new StringSetIter(*lits); LINT NTRANS = 0; //Transaction::set_print_cid(FALSE); // added by me to suppress cid for (LINT i = 0; i < par.ntrans; i++) { trans = mk_tran(*patterns, (*tlen)() + 1, par.lits.npats); if (trans->tid < par.mintid) par.mintid = trans->tid; if (trans->tid > par.maxtid) par.maxtid = trans->tid; if (par.ascii) trans->write_asc(data_fp); else trans->write(data_fp); //cout << "TRANS SZ " << trans->size() << endl; if (trans->size() > 0) NTRANS++; //added by MJZaki: repeat if trans empty else i--; delete trans; } data_fp.close(); pat_fp.close(); //added by MJZaki if (par.ascii) { conf_fp << NTRANS << "\n"; conf_fp << par.nitems << "\n"; conf_fp << par.tlen << "\n"; conf_fp << par.mintid << "\n"; conf_fp << par.maxtid << "\n"; } else { cout << "WRITING " << NTRANS << " " << par.nitems << endl; conf_fp.write((char *) &NTRANS, sizeof(LINT)); conf_fp.write((char *) &par.nitems, sizeof(LINT)); conf_fp.write((char *) &par.tlen, sizeof(FLOAT)); conf_fp.write((char *) &par.mintid, sizeof(LINT)); conf_fp.write((char *) &par.maxtid, sizeof(LINT)); } conf_fp.close(); }