void sort_by_name( Taxonomy& tax, bool recursive, bool case_sensitive ) { // Make two functions for case sensitive and insensitive comparison. auto comp_by_name_cs = []( Taxon const& lhs, Taxon const& rhs ) { return lhs.name() < rhs.name(); }; auto comp_by_name_ci = []( Taxon const& lhs, Taxon const& rhs ) { return utils::to_lower( lhs.name() ) < utils::to_lower( rhs.name() ); }; // Sort. if( case_sensitive ) { // std::sort( tax.begin(), tax.end(), comp_by_name_cs ); tax.sort( comp_by_name_cs ); } else { // std::sort( tax.begin(), tax.end(), comp_by_name_ci ); tax.sort( comp_by_name_ci ); } // Run recursion if necessary. if( recursive ) { for( auto& child : tax ) { sort_by_name( child, true, case_sensitive ); } } }
// Generate Transactions and Taxonomy // void gen_taxrules(TaxPar &par) { Taxonomy *tax; StringSet *lits; StringSetIter *patterns; Transaction *trans; poisson_distribution<LINT> tlen(par.tlen - 1); ofstream data_fp; ofstream pat_fp; ofstream tax_fp; data_fp.open(data_file, ios::trunc); pat_fp.open(pat_file, ios::trunc); tax_fp.open(tax_file, ios::trunc); if (data_fp.fail() || pat_fp.fail() || tax_fp.fail()) { cerr << "Error opening output file" << endl; exit(1); } // generate taxonomy and write it to file tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio); if (par.ascii) tax->write_asc(tax_fp); else tax->write(tax_fp); lits = new StringSet(par.nitems, par.lits, tax); par.write(pat_fp); lits->display(pat_fp); patterns = new StringSetIter(*lits); for (LINT i = 0; i < par.ntrans; i ++) { trans = mk_tran(*patterns, tlen(generator) + 1, tax); if (par.ascii) trans->write_asc(data_fp); else trans->write(data_fp); delete trans; delete trans; } data_fp.close(); pat_fp.close(); tax_fp.close(); }
bool has_unique_ids( Taxonomy const& tax ) { std::unordered_set<std::string> ids; bool has_duplicates = false; auto collect_and_check = [&]( Taxon const& tax ){ if( ids.count( tax.id() ) > 0 ) { has_duplicates = true; return; } ids.insert( tax.id() ); }; preorder_for_each( tax, collect_and_check ); return ! has_duplicates; }
size_t total_taxa_count( Taxonomy const& tax ) { size_t count = tax.size(); for( auto const& t : tax ) { count += total_taxa_count( t ); } return count; }
void remove_taxa_at_level( Taxonomy& tax, size_t level ) { // Recursive implementation, because we are lazy. if( level == 0 ) { tax.clear_children(); } else { for( auto& c : tax ) { remove_taxa_at_level( c, level - 1 ); } } }
std::vector< size_t > taxa_count_levels( Taxonomy const& tax ) { if( tax.size() == 0 ) { return std::vector< size_t >(); } std::vector< size_t > result( 1, 0 ); result[ 0 ] = tax.size(); for( auto const& child : tax ) { auto cres = taxa_count_levels( child ); if( result.size() < cres.size() + 1 ) { result.resize( cres.size() + 1, 0 ); } for( size_t i = 0; i < cres.size(); ++i ) { result[ i+1 ] += cres[ i ]; } } return result; }
size_t taxa_count_at_level( Taxonomy const& tax, size_t level ) { // Recursive implementation, because we are lazy. size_t count = 0; if( level == 0 ) { count += tax.size(); } else { for( auto& c : tax ) { count += taxa_count_at_level( c, level - 1 ); } } return count; }
// Generate Transactions and Taxonomy // void gen_taxrules(TaxPar &par) { Taxonomy *tax; StringSet *lits; StringSetIter *patterns; Transaction *trans; PoissonDist *tlen; ofstream data_fp; ofstream pat_fp; ofstream tax_fp; ofstream conf_fp; //added by MJZaki data_fp.open(data_file); pat_fp.open(pat_file); tax_fp.open(tax_file); conf_fp.open(conf_file); //added by MJZaki if (data_fp.fail() || pat_fp.fail() || tax_fp.fail() || conf_fp.fail()) { cerr << "Error opening output file" << endl; exit(1); } // generate taxonomy and write it to file tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio); if (par.ascii) tax->write_asc(tax_fp); else tax->write(tax_fp); tlen = new PoissonDist(par.tlen - 1); lits = new StringSet(par.nitems, par.lits, tax); par.write(pat_fp); lits->display(pat_fp); patterns = new StringSetIter(*lits); LINT NTRANS = 0; for (LINT i = 0; i < par.ntrans; i++) { trans = mk_tran(*patterns, (*tlen)() + 1, par.lits.npats, tax); if (par.ascii) trans->write_asc(data_fp); else trans->write(data_fp); if (trans->size() > 0) NTRANS++;//added by MJZaki: repeat if trans empty else i--; delete trans; } data_fp.close(); pat_fp.close(); tax_fp.close(); //added by MJZaki if (par.ascii) { conf_fp << NTRANS << "\n"; conf_fp << par.nitems << "\n"; conf_fp << par.tlen << "\n"; } else { conf_fp.write((char *) &NTRANS, sizeof(LINT)); conf_fp.write((char *) &par.nitems, sizeof(LINT)); int t = (int) par.tlen; conf_fp.write((char *) &t, sizeof(LINT)); } conf_fp.close(); }