void umm_out_key(unordered_multimap<string, string> &folder, char *key) { auto its = folder.equal_range(key); ///iterator for a keys values for (auto it = its.first; it != its.second; ++it) { cout << it->first << '\t' << it->second << endl; } }
void SESELoop::buildLoopMemberSet(BasicBlock& backEdgeDestination, const unordered_multimap<BasicBlock*, BasicBlock*>& destToOrigin, unordered_set<BasicBlock*>& members, unordered_set<BasicBlock*>& entries, unordered_set<BasicBlock*>& exits) { // Build paths to back-edge start nodes. unordered_set<BasicBlock*> sinkNodeSet; auto range = destToOrigin.equal_range(&backEdgeDestination); for (auto iter = range.first; iter != range.second; iter++) { sinkNodeSet.insert(iter->second); } auto pathsToBackNodes = findPathsToSinkNodes(&backEdgeDestination, sinkNodeSet); // Build initial loop membership set for (const auto& path : pathsToBackNodes) { members.insert(path.begin(), path.end()); } // The path-to-sink-nodes algorithm won't follow back edges. Because of that, if the cycle contains a // sub-cycle, we need to add its member nodes. This is probably handled by the loop membership refinement // step from the "No More Gotos" paper, but as noted below, we don't use that step. unordered_set<BasicBlock*> newMembers; for (BasicBlock* bb : members) { auto range = loopMembers.equal_range(bb); for (auto iter = range.first; iter != range.second; iter++) { newMembers.insert(iter->second); } } members.insert(newMembers.begin(), newMembers.end()); for (BasicBlock* member : members) { loopMembers.insert({&backEdgeDestination, member}); for (BasicBlock* pred : predecessors(member)) { if (members.count(pred) == 0) { entries.insert(member); } } for (BasicBlock* succ : successors(member)) { if (members.count(succ) == 0) { exits.insert(succ); } } } }
void getPath(string &start, string &end, unordered_set<string> &dict, unordered_multimap<string, string> &father, vector<vector<string>> &ret, vector<string> &path) { path.push_back(start); if (start == end) { ret.push_back(vector<string>(path.rbegin(), path.rend())); } else { auto range = father.equal_range(start); for (auto ite = range.first; ite != range.second; ++ite) { getPath(ite->second, end, dict, father, ret, path); } } path.pop_back(); }
uint64_t sketchUnorderedComparisonError(const unordered_multimap<string, string>& map1, const unordered_multimap<string, string>& map2){ uint64_t res(0); string beg,end; for (auto it=map1.begin(); it!=map1.end(); ++it){ beg=it->first; end=it->second; auto ret = map2.equal_range(beg); for (auto it2=ret.first; it2!=ret.second; ++it2){ if(isCorrect(end,it2->second)){ ++res; } } } return res; }
double percentStrandedErrors(uint64_t k, const string& seq, const unordered_multimap<string, string>& genomicKmers, char nuc){ double inter(0); string kmer; kmer.reserve(k); uint64_t i(0); for(; i+k<=seq.size(); ++i){ kmer=seq.substr(i,k); if(kmer.size()!=k){cout<<"wtf"<<endl;} auto range(genomicKmers.equal_range(kmer.substr(0,nuc))); for(auto it(range.first); it!=range.second; ++it){ if(isCorrect(kmer.substr(nuc),it->second)){ ++inter; break; }else{} } } return double(100*inter/(seq.size()-k+1));; }
void reconcile( PhyloTree< TreeNode >& reftree, string treefile, unordered_multimap<string, string>& gene_map, string output_fname ){ // read ref tree ifstream treein(treefile.c_str()); if(!treein.is_open()){ cerr << "Unable to read file " << treefile << endl; return; } // // read a tree with edge numberings from pplacer // assume jplace format with treestring on second line // string line; string treestring; getline( treein, line ); getline( treein, treestring ); size_t qpos = treestring.find("\""); size_t rqpos = treestring.rfind("\""); treestring = treestring.substr( qpos + 1, rqpos - qpos - 1); stringstream treestr(treestring); // cout << "Trying to read " << treestring << endl; PhyloTree< TreeNode > tree; tree.readTree( treestr ); cout << "The read tree has " << tree.size() << " nodes\n"; // // remove edge numbers // assume jplace format // std::unordered_map<int,int> edgenum_map; for(int i=0; i<tree.size(); i++){ size_t atpos = tree[i].name.find("{"); size_t ratpos = tree[i].name.rfind("}"); int edgenum = -1; if( atpos == string::npos ){ edgenum = atoi(tree[i].name.c_str()); }else{ edgenum = atoi(tree[i].name.substr(atpos+1, ratpos - atpos - 1).c_str()); // cerr << "node " << i << " edgenum is " << tree[i].name.substr(atpos+1, ratpos - atpos - 1) << " name is " << tree[i].name.substr(0, atpos) << endl; tree[i].name = tree[i].name.substr(0, atpos); } // cerr << "mapping " << i << " to " << edgenum << "\n"; edgenum_map.insert(make_pair(i,edgenum)); } // cerr << "Done removing edge numbers\n"; // // construct boost graphs of the trees // PhyloGraph pg; make_graph( tree, pg ); PhyloGraph refpg; make_graph( reftree, refpg ); // // Phase 3: construct map to reference tree // // a) cut gene tree on each edge // b) compute splits at cut point // c) cut species tree on each edge // d) determine which species tree split matches the gene tree split best // e) write out the split match // plan for later... // c) compute PD on either side of cut point // d) logical AND splits with reftree splits // e) compute minimum spanning tree among remaining nodes // f) compute PD of minimum spanning trees // vector< boost::dynamic_bitset<> > pg_splitlist; vector<Vertex> pg_vertex_map; enumerate_splits( pg, pg_splitlist, pg_vertex_map ); cout << "Done with gene tree splits\n"; vector< boost::dynamic_bitset<> > ref_splitlist; vector<Vertex> ref_vertex_map; enumerate_splits( refpg, ref_splitlist, ref_vertex_map ); // need a mapping from vertex numbers in refpg to vertex numbers in pg cout << "Making gene tree map\n"; unordered_map< string, int > gtmap; for(int i=0; i<tree.size(); i++){ if(tree[i].children.size()==0){ gtmap.insert(make_pair(tree[i].name, i)); } } cout << gtmap.size() << " genes mapped\n"; cout << "Making species to gene tree map\n"; vector< vector< int > > species_to_gene_map; // maps split IDs in species tree to split IDs in gene tree for(int i=0; i<refpg.V; i++){ if(ref_vertex_map[i]==-1) continue; // which genes does this species contain? pair< unordered_multimap<string,string>::iterator, unordered_multimap<string,string>::iterator> iter; iter = gene_map.equal_range(reftree[i].name); vector<int> curmap; if(iter.first ==iter.second){ cerr << "Error no mapping found for " << reftree[i].name << endl; } for(; iter.first !=iter.second; iter.first++){ if( pg_vertex_map[ gtmap[iter.first->second] ] == -1 ) continue; curmap.push_back( pg_vertex_map[ gtmap[iter.first->second] ] ); // cout << "mapped ref " << reftree[i].name << "\t" << ref_vertex_map[i] << " to " << curmap.back() << endl; // cout << "reverse map to " << tree[gtmap[iter.first->second]].name << " and " << other_map[tree[gtmap[iter.first->second]].name] << endl; } // add a list of gene vertices for this species species_to_gene_map.push_back(curmap); } cout << species_to_gene_map.size() << " species mapped\n"; cout << "rs.size() " << ref_splitlist[0].size() << endl; cout << "Finding best edges\n"; ofstream mapout(output_fname.c_str()); for( size_t i=0; i < pg.E; i++ ){ // for each reftree edge, calculate mapping quality between this edge and reftree edges double scoresum = 0; double bestscore = 0; vector<double> maxscores; // cout << "ts1.count()\t" << pg_splitlist[i].count() << endl; if(pg_splitlist[i].count() == 1){ size_t f = pg_splitlist[i].find_first(); int qq=0; for(int abc=-1; abc<(int)f; qq++) if(pg_vertex_map[qq]!=-1) abc++; // cout << "gene tree " << other_map[ tree[qq].name ] << " treenode " << qq << " split id " << f << " edge " << i << endl; } boost::dynamic_bitset<> treesplit1 = pg_splitlist[i]; boost::dynamic_bitset<> treesplit2 = pg_splitlist[i]; treesplit2.flip(); for( size_t j=0; j < refpg.E; j++ ){ // logical AND boost::dynamic_bitset<> refsplit1 = ref_splitlist[j]; boost::dynamic_bitset<> refsplit2 = ref_splitlist[j]; refsplit2.flip(); // cout << "rs1.count() " << refsplit1.count() << "\trs2.count() " << refsplit2.count() << endl; normalize_split( refsplit1, species_to_gene_map, pg_splitlist[i].size() ); normalize_split( refsplit2, species_to_gene_map, pg_splitlist[i].size() ); // cout << "normalized rs1.count() " << refsplit1.count() << "\trs2.count() " << refsplit2.count() << endl; boost::dynamic_bitset<> and11 = treesplit1 & refsplit1; boost::dynamic_bitset<> and21 = treesplit2 & refsplit1; boost::dynamic_bitset<> and12 = treesplit1 & refsplit2; boost::dynamic_bitset<> and22 = treesplit2 & refsplit2; double a11score = (double)and11.count() / (double)treesplit1.count(); double a22score = (double)and22.count() / (double)treesplit2.count(); double a1122score = (a11score + a22score) / 2.0; double a12score = (double)and12.count() / (double)treesplit1.count(); double a21score = (double)and21.count() / (double)treesplit2.count(); double a1212score = (a12score + a21score) / 2.0; a1212score = pow( a1212score, 100.0 ); a1122score = pow( a1122score, 100.0 ); maxscores.push_back( max(a1122score, a1212score)); scoresum += maxscores.back(); bestscore = max(maxscores.back(), bestscore); } // count the number of nodes with the max score. if it is more than a threshold, ignore this node since it is too hard to reconcile int place_count = 0; for(size_t j=0; j<maxscores.size(); j++){ if(maxscores[j] < bestscore) continue; place_count++; } if(place_count < placement_limit ){ for(size_t j=0; j<maxscores.size(); j++){ if(maxscores[j] < bestscore) continue; string refnodename = reftree[ refpg.edge_array[j].first ].name; // cout << "gene tree edge " << i << " linking " << other_map[tree[pg.edge_array[i].first].name] << " best reftree edge " << refnodename << endl; // cout << "found edge " << pg.edge_array[i].first << "\n"; mapout << edgenum_map[pg.edge_array[i].first] << "\t" << refnodename << endl; } } // if(pg_splitlist[i].count() == 1) // return; } }
bool cache_access2 (int index, int tag, Data d) { bool hit = 1; pair<int, Data> mypair(index, d); int count = L2.count(index); // Miss if (count <= 0) { L2.insert(mypair); miss_count2++; hit = 0; valid_cacheline2++; } else if (count == 1) { auto range = L2.equal_range(index); auto it = range.first; Data d1 = it->second; // Hit if (d1.tag == tag) { it->second.lru = 0; //cout << "Hit" << endl; hit_count2++; } // Miss else { it->second.lru = 1; //cout << "Miss" << endl; miss_count2++; hit = 0; L2.insert(mypair); valid_cacheline2++; } } else if (count == 2) { auto range = L2.equal_range(index); auto it = range.first; auto it_1 = range.first; auto it_2 = ++it; Data d1 = it_1->second; Data d2 = it_2->second; // Hit if (d1.tag == tag) { it_1->second.lru = 0; it_2->second.lru = 1; //cout << "Hit" << endl; hit_count2++; } else if (d2.tag == tag) { it_1->second.lru = 1; it_2->second.lru = 0; //cout << "Hit" << endl; hit_count2++; } else if (d1.lru == 1) { it_2->second.lru = 1; //cout << "Miss" << endl; miss_count2++; evict_count2++; hit = 0; L2.erase(it_1); L2.insert(mypair); } else { it_1->second.lru = 1; //cout << "Miss" << endl; miss_count2++; evict_count2++; hit = 0; L2.erase(it_2); L2.insert(mypair); } } else { cout << "Containing more than 2 cachelines" <<endl; exit(1); } return hit; }