Exemple #1
0
void sort_by_name( Taxonomy& tax, bool recursive, bool case_sensitive )
{
    // Make two functions for case sensitive and insensitive comparison.
    auto comp_by_name_cs = []( Taxon const& lhs, Taxon const& rhs ) {
        return lhs.name() < rhs.name();
    };
    auto comp_by_name_ci = []( Taxon const& lhs, Taxon const& rhs ) {
        return utils::to_lower( lhs.name() ) < utils::to_lower( rhs.name() );
    };

    // Sort.
    if( case_sensitive ) {
        // std::sort( tax.begin(), tax.end(), comp_by_name_cs );
        tax.sort( comp_by_name_cs );
    } else {
        // std::sort( tax.begin(), tax.end(), comp_by_name_ci );
        tax.sort( comp_by_name_ci );
    }

    // Run recursion if necessary.
    if( recursive ) {
        for( auto& child : tax ) {
            sort_by_name( child, true, case_sensitive );
        }
    }
}
Exemple #2
0
// Generate Transactions and Taxonomy
//
void gen_taxrules(TaxPar &par)
{
  Taxonomy *tax;
  StringSet *lits;
  StringSetIter *patterns;
  Transaction *trans;
  poisson_distribution<LINT> tlen(par.tlen - 1);

  ofstream data_fp;
  ofstream pat_fp;
  ofstream tax_fp;

  data_fp.open(data_file, ios::trunc);
  pat_fp.open(pat_file, ios::trunc);
  tax_fp.open(tax_file, ios::trunc);
  if (data_fp.fail() || pat_fp.fail() || tax_fp.fail()) {
    cerr << "Error opening output file" << endl;
    exit(1);
  }
  
  // generate taxonomy and write it to file
  tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio);
  if (par.ascii) 
    tax->write_asc(tax_fp);
  else
    tax->write(tax_fp);

  lits = new StringSet(par.nitems, par.lits, tax);

  par.write(pat_fp);
  lits->display(pat_fp);

  patterns = new StringSetIter(*lits);
  for (LINT i = 0; i < par.ntrans; i ++)
    {
      trans = mk_tran(*patterns, tlen(generator) + 1, tax);
      if (par.ascii) 
	trans->write_asc(data_fp);
      else 
	trans->write(data_fp);
      delete trans;
      delete trans;
    }
  
  data_fp.close();
  pat_fp.close();
  tax_fp.close();
}
Exemple #3
0
bool has_unique_ids( Taxonomy const& tax )
{
    std::unordered_set<std::string> ids;
    bool has_duplicates = false;

    auto collect_and_check = [&]( Taxon const& tax ){
        if( ids.count( tax.id() ) > 0 ) {
            has_duplicates = true;
            return;
        }
        ids.insert( tax.id() );
    };
    preorder_for_each( tax, collect_and_check );

    return ! has_duplicates;
}
Exemple #4
0
size_t total_taxa_count( Taxonomy const& tax )
{
    size_t count = tax.size();
    for( auto const& t : tax ) {
        count += total_taxa_count( t );
    }
    return count;
}
Exemple #5
0
void remove_taxa_at_level( Taxonomy& tax, size_t level )
{
    // Recursive implementation, because we are lazy.
    if( level == 0 ) {
        tax.clear_children();
    } else {
        for( auto& c : tax ) {
            remove_taxa_at_level( c, level - 1 );
        }
    }
}
Exemple #6
0
std::vector< size_t > taxa_count_levels( Taxonomy const& tax )
{
    if( tax.size() == 0 ) {
        return std::vector< size_t >();
    }

    std::vector< size_t > result( 1, 0 );
    result[ 0 ] = tax.size();

    for( auto const& child : tax ) {
        auto cres = taxa_count_levels( child );

        if( result.size() < cres.size() + 1 ) {
            result.resize( cres.size() + 1, 0 );
        }

        for( size_t i = 0; i < cres.size(); ++i ) {
            result[ i+1 ] += cres[ i ];
        }
    }
    return result;
}
Exemple #7
0
size_t taxa_count_at_level( Taxonomy const& tax, size_t level )
{
    // Recursive implementation, because we are lazy.
    size_t count = 0;
    if( level == 0 ) {
        count += tax.size();
    } else {
        for( auto& c : tax ) {
            count += taxa_count_at_level( c, level - 1 );
        }
    }
    return count;
}
Exemple #8
0
// Generate Transactions and Taxonomy
//
void gen_taxrules(TaxPar &par)
{
	Taxonomy *tax;
	StringSet *lits;
	StringSetIter *patterns;
	Transaction *trans;
	PoissonDist *tlen;

	ofstream data_fp;
	ofstream pat_fp;
	ofstream tax_fp;
	ofstream conf_fp; //added by MJZaki

	data_fp.open(data_file);
	pat_fp.open(pat_file);
	tax_fp.open(tax_file);
	conf_fp.open(conf_file); //added by MJZaki

	if (data_fp.fail() || pat_fp.fail() || tax_fp.fail() || conf_fp.fail())
	{
		cerr << "Error opening output file" << endl;
		exit(1);
	}

	// generate taxonomy and write it to file
	tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio);
	if (par.ascii)
		tax->write_asc(tax_fp);
	else
		tax->write(tax_fp);

	tlen = new PoissonDist(par.tlen - 1);

	lits = new StringSet(par.nitems, par.lits, tax);

	par.write(pat_fp);
	lits->display(pat_fp);

	patterns = new StringSetIter(*lits);
	LINT NTRANS = 0;
	for (LINT i = 0; i < par.ntrans; i++)
	{
		trans = mk_tran(*patterns, (*tlen)() + 1, par.lits.npats, tax);
		if (par.ascii)
			trans->write_asc(data_fp);
		else
			trans->write(data_fp);
		if (trans->size() > 0)
			NTRANS++;//added by MJZaki: repeat if trans empty
		else
			i--;
		delete trans;
	}

	data_fp.close();
	pat_fp.close();
	tax_fp.close();

	//added by MJZaki
	if (par.ascii)
	{
		conf_fp << NTRANS << "\n";
		conf_fp << par.nitems << "\n";
		conf_fp << par.tlen << "\n";
	}
	else
	{
		conf_fp.write((char *) &NTRANS, sizeof(LINT));
		conf_fp.write((char *) &par.nitems, sizeof(LINT));
		int t = (int) par.tlen;
		conf_fp.write((char *) &t, sizeof(LINT));
	}
	conf_fp.close();
}