void umm_out_key(unordered_multimap<string, string> &folder, char *key)
{
    auto its = folder.equal_range(key);           ///iterator for a keys values
    for (auto it = its.first; it != its.second; ++it) {
        cout << it->first << '\t' << it->second << endl;
    }
}
Пример #2
0
void SESELoop::buildLoopMemberSet(BasicBlock& backEdgeDestination, const unordered_multimap<BasicBlock*, BasicBlock*>& destToOrigin, unordered_set<BasicBlock*>& members, unordered_set<BasicBlock*>& entries, unordered_set<BasicBlock*>& exits)
{
	// Build paths to back-edge start nodes.
	unordered_set<BasicBlock*> sinkNodeSet;
	auto range = destToOrigin.equal_range(&backEdgeDestination);
	for (auto iter = range.first; iter != range.second; iter++)
	{
		sinkNodeSet.insert(iter->second);
	}
	
	auto pathsToBackNodes = findPathsToSinkNodes(&backEdgeDestination, sinkNodeSet);
	
	// Build initial loop membership set
	for (const auto& path : pathsToBackNodes)
	{
		members.insert(path.begin(), path.end());
	}
	
	// The path-to-sink-nodes algorithm won't follow back edges. Because of that, if the cycle contains a
	// sub-cycle, we need to add its member nodes. This is probably handled by the loop membership refinement
	// step from the "No More Gotos" paper, but as noted below, we don't use that step.
	unordered_set<BasicBlock*> newMembers;
	for (BasicBlock* bb : members)
	{
		auto range = loopMembers.equal_range(bb);
		for (auto iter = range.first; iter != range.second; iter++)
		{
			newMembers.insert(iter->second);
		}
	}
	members.insert(newMembers.begin(), newMembers.end());
	
	for (BasicBlock* member : members)
	{
		loopMembers.insert({&backEdgeDestination, member});
		
		for (BasicBlock* pred : predecessors(member))
		{
			if (members.count(pred) == 0)
			{
				entries.insert(member);
			}
		}
		
		for (BasicBlock* succ : successors(member))
		{
			if (members.count(succ) == 0)
			{
				exits.insert(succ);
			}
		}
	}
}
Пример #3
0
 void getPath(string &start, string &end, unordered_set<string> &dict,
              unordered_multimap<string, string> &father, vector<vector<string>> &ret, vector<string> &path)
 {
     path.push_back(start);
     if (start == end) {
         ret.push_back(vector<string>(path.rbegin(), path.rend()));
     } else {
         auto range = father.equal_range(start);
         for (auto ite = range.first; ite != range.second; ++ite) {
             getPath(ite->second, end, dict, father, ret, path);
         }
     }
     path.pop_back();
 }
uint64_t sketchUnorderedComparisonError(const unordered_multimap<string, string>& map1, const unordered_multimap<string, string>& map2){
	uint64_t res(0);
	string beg,end;
	for (auto it=map1.begin(); it!=map1.end(); ++it){
		beg=it->first;
		end=it->second;
		auto ret = map2.equal_range(beg);
		for (auto it2=ret.first; it2!=ret.second; ++it2){
			if(isCorrect(end,it2->second)){
				++res;
			}
		}
	}
	return res;
}
double percentStrandedErrors(uint64_t k, const string& seq, const unordered_multimap<string, string>& genomicKmers, char nuc){
	double inter(0);
	string kmer;
	kmer.reserve(k);
	uint64_t i(0);
	for(; i+k<=seq.size(); ++i){
		kmer=seq.substr(i,k);
		if(kmer.size()!=k){cout<<"wtf"<<endl;}
		auto range(genomicKmers.equal_range(kmer.substr(0,nuc)));
		for(auto it(range.first); it!=range.second; ++it){
			if(isCorrect(kmer.substr(nuc),it->second)){
				++inter;
				break;
			}else{}
		}
	}
	return double(100*inter/(seq.size()-k+1));;
}
Пример #6
0
void reconcile( PhyloTree< TreeNode >& reftree, string treefile, unordered_multimap<string, string>& gene_map, string output_fname ){
	// read ref tree
	ifstream treein(treefile.c_str());
	if(!treein.is_open()){
		cerr << "Unable to read file " << treefile << endl;
		return;
	}

//
// read a tree with edge numberings from pplacer
// assume jplace format with treestring on second line
//
	string line;
	string treestring;
	getline( treein, line );
	getline( treein, treestring );
	size_t qpos = treestring.find("\"");
	size_t rqpos = treestring.rfind("\"");
	treestring = treestring.substr( qpos + 1, rqpos - qpos - 1);
	stringstream treestr(treestring);
//	cout << "Trying to read " << treestring << endl;

	PhyloTree< TreeNode > tree;
	tree.readTree( treestr );
	cout << "The read tree has " << tree.size() << " nodes\n";
//
// remove edge numbers
// assume jplace format
//
	std::unordered_map<int,int> edgenum_map;
	for(int i=0; i<tree.size(); i++){
		size_t atpos = tree[i].name.find("{");
		size_t ratpos = tree[i].name.rfind("}");
		int edgenum = -1;		
		if( atpos == string::npos ){
			edgenum = atoi(tree[i].name.c_str());
		}else{
			edgenum = atoi(tree[i].name.substr(atpos+1, ratpos - atpos - 1).c_str());
//			cerr << "node " << i << " edgenum is " << tree[i].name.substr(atpos+1, ratpos - atpos - 1) << " name is " << tree[i].name.substr(0, atpos) << endl;
			tree[i].name = tree[i].name.substr(0, atpos);
		}
//		cerr << "mapping " << i << " to " << edgenum << "\n";
		edgenum_map.insert(make_pair(i,edgenum));
	}
//	cerr << "Done removing edge numbers\n";

//
// construct boost graphs of the trees
//
	PhyloGraph pg;	
	make_graph( tree, pg );

	PhyloGraph refpg;	
	make_graph( reftree, refpg );

//
// Phase 3: construct map to reference tree
//
// a) cut gene tree on each edge
// b) compute splits at cut point
// c) cut species tree on each edge
// d) determine which species tree split matches the gene tree split best
// e) write out the split match


// plan for later...
// c) compute PD on either side of cut point
// d) logical AND splits with reftree splits
// e) compute minimum spanning tree among remaining nodes
// f) compute PD of minimum spanning trees
// 	
	vector< boost::dynamic_bitset<> > pg_splitlist;
	vector<Vertex> pg_vertex_map;
	enumerate_splits( pg, pg_splitlist, pg_vertex_map );
	cout << "Done with gene tree splits\n";
	vector< boost::dynamic_bitset<> > ref_splitlist;
	vector<Vertex> ref_vertex_map;
	enumerate_splits( refpg, ref_splitlist, ref_vertex_map );

	// need a mapping from vertex numbers in refpg to vertex numbers in pg
	cout << "Making gene tree map\n";
	unordered_map< string, int > gtmap;
	for(int i=0; i<tree.size(); i++){
		if(tree[i].children.size()==0){
			gtmap.insert(make_pair(tree[i].name, i));
		}
	}
	cout << gtmap.size() << " genes mapped\n";
	
	cout << "Making species to gene tree map\n";
	vector< vector< int > > species_to_gene_map;	// maps split IDs in species tree to split IDs in gene tree
	for(int i=0; i<refpg.V; i++){
		if(ref_vertex_map[i]==-1)
			continue;
		// which genes does this species contain?
		pair< unordered_multimap<string,string>::iterator, unordered_multimap<string,string>::iterator> iter;
		iter = gene_map.equal_range(reftree[i].name);
		vector<int> curmap;
		if(iter.first ==iter.second){
			cerr << "Error no mapping found for " << reftree[i].name << endl;
		}
		for(; iter.first !=iter.second; iter.first++){			
			if( pg_vertex_map[ gtmap[iter.first->second] ] == -1 )
				continue;
			curmap.push_back( pg_vertex_map[ gtmap[iter.first->second] ] );
//			cout << "mapped ref " << reftree[i].name << "\t" << ref_vertex_map[i] << " to " << curmap.back() << endl;
//			cout << "reverse map to " << tree[gtmap[iter.first->second]].name << " and " << other_map[tree[gtmap[iter.first->second]].name] << endl;
		}
		// add a list of gene vertices for this species
		species_to_gene_map.push_back(curmap);
	}
	cout << species_to_gene_map.size() << " species mapped\n";
	cout << "rs.size() " << ref_splitlist[0].size() << endl;

	cout << "Finding best edges\n";
	ofstream mapout(output_fname.c_str());
	for( size_t i=0; i < pg.E; i++ ){
		// for each reftree edge, calculate mapping quality between this edge and reftree edges
		double scoresum = 0;
		double bestscore = 0;
		vector<double> maxscores;
//		cout << "ts1.count()\t" << pg_splitlist[i].count() << endl;
		if(pg_splitlist[i].count() == 1){
			size_t f = pg_splitlist[i].find_first();
			int qq=0;
			for(int abc=-1; abc<(int)f; qq++)
				if(pg_vertex_map[qq]!=-1)
					abc++;
//			cout << "gene tree " << other_map[ tree[qq].name ] << " treenode " << qq << " split id " << f << " edge " << i << endl;
		}

		boost::dynamic_bitset<> treesplit1 = pg_splitlist[i];
		boost::dynamic_bitset<> treesplit2 = pg_splitlist[i];
		treesplit2.flip();

		for( size_t j=0; j < refpg.E; j++ ){
			// logical AND
			boost::dynamic_bitset<> refsplit1 = ref_splitlist[j];
			boost::dynamic_bitset<> refsplit2 = ref_splitlist[j];
			refsplit2.flip();
//			cout << "rs1.count() " << refsplit1.count() << "\trs2.count() " << refsplit2.count() << endl;
			normalize_split( refsplit1, species_to_gene_map, pg_splitlist[i].size() );
			normalize_split( refsplit2, species_to_gene_map, pg_splitlist[i].size() );
//			cout << "normalized rs1.count() " << refsplit1.count() << "\trs2.count() " << refsplit2.count() << endl;
			
			boost::dynamic_bitset<> and11 = treesplit1 & refsplit1;
			boost::dynamic_bitset<> and21 = treesplit2 & refsplit1;
			boost::dynamic_bitset<> and12 = treesplit1 & refsplit2;
			boost::dynamic_bitset<> and22 = treesplit2 & refsplit2;
			double a11score = (double)and11.count() / (double)treesplit1.count();
			double a22score = (double)and22.count() / (double)treesplit2.count();
			double a1122score = (a11score + a22score) / 2.0;
			double a12score = (double)and12.count() / (double)treesplit1.count();
			double a21score = (double)and21.count() / (double)treesplit2.count();
			double a1212score = (a12score + a21score) / 2.0;
			a1212score = pow( a1212score, 100.0 );
			a1122score = pow( a1122score, 100.0 );
			maxscores.push_back( max(a1122score, a1212score));
			scoresum += maxscores.back();
			bestscore = max(maxscores.back(), bestscore);
		}
		// count the number of nodes with the max score. if it is more than a threshold, ignore this node since it is too hard to reconcile
		int place_count = 0;
		for(size_t j=0; j<maxscores.size(); j++){
			if(maxscores[j] < bestscore)
				continue;
			place_count++;
		}
		if(place_count < placement_limit ){
			for(size_t j=0; j<maxscores.size(); j++){
				if(maxscores[j] < bestscore)
					continue;
				string refnodename = reftree[ refpg.edge_array[j].first ].name;
	//			cout << "gene tree edge " << i << " linking " << other_map[tree[pg.edge_array[i].first].name] << " best reftree edge " << refnodename << endl; 
	//			cout << "found edge " << pg.edge_array[i].first << "\n";
				mapout << edgenum_map[pg.edge_array[i].first] << "\t" << refnodename << endl;
			}
		}
//		if(pg_splitlist[i].count() == 1)
//			return;
	}
}
Пример #7
0
Файл: main.cpp Проект: yzhu29/ca
bool cache_access2 (int index, int tag, Data d) {
    bool hit = 1;
    pair<int, Data> mypair(index, d);
                int count = L2.count(index);
                // Miss
                if (count <= 0) {
                    L2.insert(mypair);
                    miss_count2++;
                    hit = 0;
                    valid_cacheline2++;
                }
                else if (count == 1) {
                    auto range = L2.equal_range(index);
                    auto it = range.first;
                    Data d1 = it->second;
                    // Hit
                    if (d1.tag == tag) {
                        it->second.lru = 0;
                        //cout << "Hit" << endl;
                        hit_count2++;
                    }

                    // Miss
                    else {
                        it->second.lru = 1;
                        //cout << "Miss" << endl;
                        miss_count2++;
                        hit = 0;
                        L2.insert(mypair);
                        valid_cacheline2++;
                    }
                }
                else if (count == 2) {
                    auto range = L2.equal_range(index);
                    auto it = range.first;
                    auto it_1 = range.first;
                    auto it_2 = ++it;
                    Data d1 = it_1->second;
                    Data d2 = it_2->second;
                    // Hit
                    if (d1.tag == tag) {
                        it_1->second.lru = 0;
                        it_2->second.lru = 1;
                        //cout << "Hit" << endl;
                        hit_count2++;
                    }
                    else if (d2.tag == tag) {
                        it_1->second.lru = 1;
                        it_2->second.lru = 0;
                        //cout << "Hit" << endl;
                        hit_count2++;
                    }
                    else if (d1.lru == 1) {
                        it_2->second.lru = 1;
                        //cout << "Miss" << endl;
                        miss_count2++;
                        evict_count2++;
                        hit = 0;
                        L2.erase(it_1);
                        L2.insert(mypair);
                    }
                    else {
                        it_1->second.lru = 1;
                        //cout << "Miss" << endl;
                        miss_count2++;
                        evict_count2++;
                        hit = 0;
                        L2.erase(it_2);
                        L2.insert(mypair);
                    }

                }
                else {
                    cout << "Containing more than 2 cachelines" <<endl;
                    exit(1);
                }
    return hit;
}