void cbn_consoleOutput(unordered_multimap<string, string> &folder) { std::cout << "Folder contains:"<<endl; for ( auto it = folder.begin(); it != folder.end(); ++it ){ std::cout << " " << it->first << ":" << it->second; std::cout << std::endl;} }
void evict() { /* We are being evicted. Print our stats, update waste maps and clear. */ if(WANT_RAW_OUTPUT) { cout << bytesUsed->count() << "\t" << timesReusedBeforeEvicted << "\t" << accessSite << "[" << varInfo << "]\t" << "0x" << hex << address << dec << endl; } if(timesReusedBeforeEvicted == 0) { zeroReuseMap.insert(pair<string, ZeroReuseRecord> (accessSite, ZeroReuseRecord(varInfo, address))); } if((float)(bytesUsed->count()) / (float)lineSize < LOW_UTIL_THRESHOLD) { lowUtilMap.insert(pair<string, LowUtilRecord> (accessSite, LowUtilRecord(varInfo, address, bytesUsed->count()))); } address = 0; tag = 0; accessSite = ""; varInfo = ""; timesReusedBeforeEvicted = 0; bytesUsed->reset(); }
/** Inserts a value to the collection. Returns true if the collection did not already contain the specified element. */ bool insert(int val) { int count = map.count(val); map.insert(make_pair(val, elements.size())); elements.push_back(val); return !count; }
void eraseIf(unordered_multimap<K, V, H, E, A>& unordered, EraseIfFn const& f) { for (typename unordered_multimap<K, V, H, E, A>::iterator i = unordered.begin(), e = unordered.end(); i != e;) // no ++i - intentional if (f(*i)) i = unordered.erase(i); else ++i; }
void eraseIfVal(unordered_multimap<K, V, H, E, A>& umap, EraseIfFn const& f) { for (typename unordered_multimap<K, V, H, E, A>::iterator i = umap.begin(), e = umap.end(); i != e;) // no ++i - intentional if (f(i->second)) i = umap.erase(i); else ++i; }
void umm_buckets(unordered_multimap<string, string> &folder) { std::cout << "folder buckets contain:\n"; for ( unsigned i = 0; i < folder.bucket_count(); ++i) { std::cout << "bucket #" << i << " contains:"; for ( auto local_it = folder.begin(i); local_it!= folder.end(i); ++local_it ) std::cout << " " << local_it->first << ":" << local_it->second; std::cout << std::endl; } }
// 这里必须假设hash的操作是O(1)的 int consecutives(unordered_multimap<int, int> &hash, int value, bool ascending) { // true: 升序 false: 降序 int count = 0; while (hash.count(value) > 0) { ++count; hash.erase(value); // 非常重要! value += ascending ? 1: (-1); } return count; }
inline void insert_hash(hash_t key,hashtgt &val){ #if PERFORMANCE_STATISTICS ++totalh; #endif for(auto it=hashes.find(key);it!=hashes.end();++it){ if(memcmp(val.digest,it->second.digest,sizeof(val.digest))==0){ #if PERFORMANCE_STATISTICS ++conflict; #endif //already have the same one inserted! return; } } hashes.insert(make_pair(key,val)); }
uint64_t sketchUnorderedComparisonError(const unordered_multimap<string, string>& map1, const unordered_multimap<string, string>& map2){ uint64_t res(0); string beg,end; for (auto it=map1.begin(); it!=map1.end(); ++it){ beg=it->first; end=it->second; auto ret = map2.equal_range(beg); for (auto it2=ret.first; it2!=ret.second; ++it2){ if(isCorrect(end,it2->second)){ ++res; } } } return res; }
void umm_out_key(unordered_multimap<string, string> &folder, char *key) { auto its = folder.equal_range(key); ///iterator for a keys values for (auto it = its.first; it != its.second; ++it) { cout << it->first << '\t' << it->second << endl; } }
void SESELoop::buildLoopMemberSet(BasicBlock& backEdgeDestination, const unordered_multimap<BasicBlock*, BasicBlock*>& destToOrigin, unordered_set<BasicBlock*>& members, unordered_set<BasicBlock*>& entries, unordered_set<BasicBlock*>& exits) { // Build paths to back-edge start nodes. unordered_set<BasicBlock*> sinkNodeSet; auto range = destToOrigin.equal_range(&backEdgeDestination); for (auto iter = range.first; iter != range.second; iter++) { sinkNodeSet.insert(iter->second); } auto pathsToBackNodes = findPathsToSinkNodes(&backEdgeDestination, sinkNodeSet); // Build initial loop membership set for (const auto& path : pathsToBackNodes) { members.insert(path.begin(), path.end()); } // The path-to-sink-nodes algorithm won't follow back edges. Because of that, if the cycle contains a // sub-cycle, we need to add its member nodes. This is probably handled by the loop membership refinement // step from the "No More Gotos" paper, but as noted below, we don't use that step. unordered_set<BasicBlock*> newMembers; for (BasicBlock* bb : members) { auto range = loopMembers.equal_range(bb); for (auto iter = range.first; iter != range.second; iter++) { newMembers.insert(iter->second); } } members.insert(newMembers.begin(), newMembers.end()); for (BasicBlock* member : members) { loopMembers.insert({&backEdgeDestination, member}); for (BasicBlock* pred : predecessors(member)) { if (members.count(pred) == 0) { entries.insert(member); } } for (BasicBlock* succ : successors(member)) { if (members.count(succ) == 0) { exits.insert(succ); } } } }
void getPath(string &start, string &end, unordered_set<string> &dict, unordered_multimap<string, string> &father, vector<vector<string>> &ret, vector<string> &path) { path.push_back(start); if (start == end) { ret.push_back(vector<string>(path.rbegin(), path.rend())); } else { auto range = father.equal_range(start); for (auto ite = range.first; ite != range.second; ++ite) { getPath(ite->second, end, dict, father, ret, path); } } path.pop_back(); }
double percentStrandedErrors(uint64_t k, const string& seq, const unordered_multimap<string, string>& genomicKmers, char nuc){ double inter(0); string kmer; kmer.reserve(k); uint64_t i(0); for(; i+k<=seq.size(); ++i){ kmer=seq.substr(i,k); if(kmer.size()!=k){cout<<"wtf"<<endl;} auto range(genomicKmers.equal_range(kmer.substr(0,nuc))); for(auto it(range.first); it!=range.second; ++it){ if(isCorrect(kmer.substr(nuc),it->second)){ ++inter; break; }else{} } } return double(100*inter/(seq.size()-k+1));; }
void reconcile( PhyloTree< TreeNode >& reftree, string treefile, unordered_multimap<string, string>& gene_map, string output_fname ){ // read ref tree ifstream treein(treefile.c_str()); if(!treein.is_open()){ cerr << "Unable to read file " << treefile << endl; return; } // // read a tree with edge numberings from pplacer // assume jplace format with treestring on second line // string line; string treestring; getline( treein, line ); getline( treein, treestring ); size_t qpos = treestring.find("\""); size_t rqpos = treestring.rfind("\""); treestring = treestring.substr( qpos + 1, rqpos - qpos - 1); stringstream treestr(treestring); // cout << "Trying to read " << treestring << endl; PhyloTree< TreeNode > tree; tree.readTree( treestr ); cout << "The read tree has " << tree.size() << " nodes\n"; // // remove edge numbers // assume jplace format // std::unordered_map<int,int> edgenum_map; for(int i=0; i<tree.size(); i++){ size_t atpos = tree[i].name.find("{"); size_t ratpos = tree[i].name.rfind("}"); int edgenum = -1; if( atpos == string::npos ){ edgenum = atoi(tree[i].name.c_str()); }else{ edgenum = atoi(tree[i].name.substr(atpos+1, ratpos - atpos - 1).c_str()); // cerr << "node " << i << " edgenum is " << tree[i].name.substr(atpos+1, ratpos - atpos - 1) << " name is " << tree[i].name.substr(0, atpos) << endl; tree[i].name = tree[i].name.substr(0, atpos); } // cerr << "mapping " << i << " to " << edgenum << "\n"; edgenum_map.insert(make_pair(i,edgenum)); } // cerr << "Done removing edge numbers\n"; // // construct boost graphs of the trees // PhyloGraph pg; make_graph( tree, pg ); PhyloGraph refpg; make_graph( reftree, refpg ); // // Phase 3: construct map to reference tree // // a) cut gene tree on each edge // b) compute splits at cut point // c) cut species tree on each edge // d) determine which species tree split matches the gene tree split best // e) write out the split match // plan for later... // c) compute PD on either side of cut point // d) logical AND splits with reftree splits // e) compute minimum spanning tree among remaining nodes // f) compute PD of minimum spanning trees // vector< boost::dynamic_bitset<> > pg_splitlist; vector<Vertex> pg_vertex_map; enumerate_splits( pg, pg_splitlist, pg_vertex_map ); cout << "Done with gene tree splits\n"; vector< boost::dynamic_bitset<> > ref_splitlist; vector<Vertex> ref_vertex_map; enumerate_splits( refpg, ref_splitlist, ref_vertex_map ); // need a mapping from vertex numbers in refpg to vertex numbers in pg cout << "Making gene tree map\n"; unordered_map< string, int > gtmap; for(int i=0; i<tree.size(); i++){ if(tree[i].children.size()==0){ gtmap.insert(make_pair(tree[i].name, i)); } } cout << gtmap.size() << " genes mapped\n"; cout << "Making species to gene tree map\n"; vector< vector< int > > species_to_gene_map; // maps split IDs in species tree to split IDs in gene tree for(int i=0; i<refpg.V; i++){ if(ref_vertex_map[i]==-1) continue; // which genes does this species contain? pair< unordered_multimap<string,string>::iterator, unordered_multimap<string,string>::iterator> iter; iter = gene_map.equal_range(reftree[i].name); vector<int> curmap; if(iter.first ==iter.second){ cerr << "Error no mapping found for " << reftree[i].name << endl; } for(; iter.first !=iter.second; iter.first++){ if( pg_vertex_map[ gtmap[iter.first->second] ] == -1 ) continue; curmap.push_back( pg_vertex_map[ gtmap[iter.first->second] ] ); // cout << "mapped ref " << reftree[i].name << "\t" << ref_vertex_map[i] << " to " << curmap.back() << endl; // cout << "reverse map to " << tree[gtmap[iter.first->second]].name << " and " << other_map[tree[gtmap[iter.first->second]].name] << endl; } // add a list of gene vertices for this species species_to_gene_map.push_back(curmap); } cout << species_to_gene_map.size() << " species mapped\n"; cout << "rs.size() " << ref_splitlist[0].size() << endl; cout << "Finding best edges\n"; ofstream mapout(output_fname.c_str()); for( size_t i=0; i < pg.E; i++ ){ // for each reftree edge, calculate mapping quality between this edge and reftree edges double scoresum = 0; double bestscore = 0; vector<double> maxscores; // cout << "ts1.count()\t" << pg_splitlist[i].count() << endl; if(pg_splitlist[i].count() == 1){ size_t f = pg_splitlist[i].find_first(); int qq=0; for(int abc=-1; abc<(int)f; qq++) if(pg_vertex_map[qq]!=-1) abc++; // cout << "gene tree " << other_map[ tree[qq].name ] << " treenode " << qq << " split id " << f << " edge " << i << endl; } boost::dynamic_bitset<> treesplit1 = pg_splitlist[i]; boost::dynamic_bitset<> treesplit2 = pg_splitlist[i]; treesplit2.flip(); for( size_t j=0; j < refpg.E; j++ ){ // logical AND boost::dynamic_bitset<> refsplit1 = ref_splitlist[j]; boost::dynamic_bitset<> refsplit2 = ref_splitlist[j]; refsplit2.flip(); // cout << "rs1.count() " << refsplit1.count() << "\trs2.count() " << refsplit2.count() << endl; normalize_split( refsplit1, species_to_gene_map, pg_splitlist[i].size() ); normalize_split( refsplit2, species_to_gene_map, pg_splitlist[i].size() ); // cout << "normalized rs1.count() " << refsplit1.count() << "\trs2.count() " << refsplit2.count() << endl; boost::dynamic_bitset<> and11 = treesplit1 & refsplit1; boost::dynamic_bitset<> and21 = treesplit2 & refsplit1; boost::dynamic_bitset<> and12 = treesplit1 & refsplit2; boost::dynamic_bitset<> and22 = treesplit2 & refsplit2; double a11score = (double)and11.count() / (double)treesplit1.count(); double a22score = (double)and22.count() / (double)treesplit2.count(); double a1122score = (a11score + a22score) / 2.0; double a12score = (double)and12.count() / (double)treesplit1.count(); double a21score = (double)and21.count() / (double)treesplit2.count(); double a1212score = (a12score + a21score) / 2.0; a1212score = pow( a1212score, 100.0 ); a1122score = pow( a1122score, 100.0 ); maxscores.push_back( max(a1122score, a1212score)); scoresum += maxscores.back(); bestscore = max(maxscores.back(), bestscore); } // count the number of nodes with the max score. if it is more than a threshold, ignore this node since it is too hard to reconcile int place_count = 0; for(size_t j=0; j<maxscores.size(); j++){ if(maxscores[j] < bestscore) continue; place_count++; } if(place_count < placement_limit ){ for(size_t j=0; j<maxscores.size(); j++){ if(maxscores[j] < bestscore) continue; string refnodename = reftree[ refpg.edge_array[j].first ].name; // cout << "gene tree edge " << i << " linking " << other_map[tree[pg.edge_array[i].first].name] << " best reftree edge " << refnodename << endl; // cout << "found edge " << pg.edge_array[i].first << "\n"; mapout << edgenum_map[pg.edge_array[i].first] << "\t" << refnodename << endl; } } // if(pg_splitlist[i].count() == 1) // return; } }
/*Scan and generate the diff file more(buf,start,len,buffsize): read more date from input */ void Scan(function<offset_t(byte*,int64_t,offset_t,offset_t)> more,int64_t len){ int64_t fpos=0; const int preread=5*1024*1024; byte * input=new byte[preread]; { int toread=(int32_t)min(preread,len-fpos); toread=max(bsbmax,toread); fpos+=more(input,0,toread,preread); } int bs=max_bs; int bsb=1<<bs; hash_t a=1,b=0; for(int di=0;di<bsb;++di){ a=(a+input[di%preread])%hash_magic; b=(b+a)%hash_magic; } offset_t written=0; int64_t written64=0; while(written64<len){ int64_t fileleft=len-fpos; //preread when reach 10*bsbmax byte from fpos if(fileleft>0 && written64+10*bsbmax>fpos){ int toread=(int)min(preread-20*bsbmax,fileleft); fpos+=more(input,fpos,toread,preread); } bool find=false; unsigned char digest[16]; bool digestgen=false; bool after_a_match=false; for(auto it=hashes.find((b<<16)+a);it!=hashes.end();++it){ #if WITH_FIRST16_BYTE_CHECK { int s0=written%preread; int e0=(s0+sizeof(it->second.first16))%preread; //test only for this case if(s0<e0){ if(memcmp(it->second.first16, &input[s0], sizeof(it->second.first16))!=0){ continue; } } } #endif if(!digestgen){ digestgen=true; MD5_CTX context; MD5_Init(&context); { int s0=written%preread; int e0=(s0+bsb)%preread; if(s0<e0){ MD5_Update(&context, &input[s0], bsb); }else{ MD5_Update(&context, &input[s0], preread-s0); MD5_Update(&context, &input[0], e0); } } MD5_Final(digest, &context); } if(memcmp(it->second.digest,digest,sizeof(digest))==0){ find=true; Coder::writeoff(it->second.offsetx); written+=bsb; written64+=bsb; a=1,b=0; for(int di=0;di<bsb;++di){ a=(a+input[(written+di)%preread])%hash_magic; b=(b+a)%hash_magic; } after_a_match=true; break; }else{ } } if(!find){ int writesize=1; if(after_a_match){ (int)((len/(1024*1024))&0x8fffffff); writesize=rand()%writesize+1; } after_a_match=false; for(int i=0;i<writesize && written64<len;++i){ byte wbyte=input[written%preread]; Coder::writebyte(wbyte); ++written; ++written64; offset_t scan=written+bsb-1; //(32 bits trap) for(;scan<written+bsbmax && scan<len;++scan){ //for [scan-bsb+1,scan] int bs=max_bs; int bsb=1<<bs; hash_t olda=a; hash_t oldb=b; a=(olda+hash_magic-wbyte+input[scan%preread])%hash_magic; b=(oldb +hash_magic-(wbyte*bsb)%hash_magic +a-1 )%hash_magic; //} } } } Coder::writeclose(); }
/* Encode to diff */ int encode( _TCHAR* noldfile, _TCHAR* nnewfile, _TCHAR* ndifffile){ ifstream oldfile(noldfile,ios::binary); ifstream newfile(nnewfile,ios::binary); ofstream difffile(ndifffile,ios::binary); if(!oldfile || !newfile || !difffile)return -1; oldfile.seekg(0,oldfile.end); int64_t oldlen=oldfile.tellg().seekpos(); oldfile.seekg(0,oldfile.beg); cerr<<"Original file length:"<<oldlen<<endl; //Index the old file { //if(oldlen>20*1024*1024)oldlen=20*1024*1024; int bs=max_bs; int bsb=1<<bs; size_t reserve=(size_t)(oldlen/bsb)*4; //cout<<"reserve for "<<reserve<<endl; hashes.rehash(reserve); const int blocksize=5*1024*1024; byte* oldbuf=new byte[blocksize]; int64_t read=0; while(read<oldlen){ int64_t left=oldlen-read; int toread=(int)min(blocksize,left); oldfile.read((char*)oldbuf,toread); /*if(read==0){ memcpy(back,&oldbuf[0xa92000],0x1000); }*/ int toread_aligned=(toread+(1<<max_bs)-1)/(1<<max_bs)*(1<<max_bs); //padding 0 for(int i=toread;i<toread_aligned;++i){ oldbuf[i]=0; } MakeAdler32Index(oldbuf,toread_aligned,read); read+=toread; double per=read*100.0/oldlen; char buf[1024]; sprintf_s(buf,"\rIndexing ... %2.4lf%% [%.0lf/%.0lf]",per,(double)read,(double)oldlen); cerr<<buf; } delete[] oldbuf; cerr<<'\n'; } //Scan the new file, newfile.seekg(0,newfile.end); int64_t newlen=newfile.tellg().seekpos(); newfile.seekg(0,newfile.beg); int64_t newlen_aligned=(newlen+(int64_t)(1<<max_bs)-(int64_t)1)/(1<<max_bs)*(1<<max_bs); if(!Coder::writeinit(newlen)){ cerr<<"Failed to write init!"<<endl; return -1; } function<void()> threadf=[&](){ CompressInc(Coder::pipe,&difffile); }; thread compresst(threadf); //newlen=newlen_aligned=3u*1024u*1024u*1024u; //will roll back, but will aligned to 4 bytes, so it's OK Scan([&](byte* data,int64_t start,offset_t len,offset_t bufsize)->offset_t{ char buf[1024]; sprintf_s(buf,"\nScanning ... P: %2.4lf%%[%.0lfK/%.0lfK] C:%2.4lf%% O:%.0lfK" ,(double)start*100.0/(double)newlen,(double)start/1024.0,(double)newlen/1024.0 ,(double)size_uncompressed*100.0/((double)start+1),(double)size_compressed/1024.0 ); cerr<<buf; size_t toread=(int32_t)min(len,(newlen-start)); { int s0=start%bufsize; int e0=(s0+toread)%bufsize; if(e0>s0){ newfile.read((char*)data+s0,e0-s0); }else{ newfile.read((char*)data+s0,bufsize-s0); newfile.read((char*)data,e0); } } //padding 0 if(toread<len){ int s1=(start+toread)%bufsize; int e1=(start+len)%bufsize; if(e1>s1){ memset((char*)data+s1,0,e1-s1); }else{ memset((char*)data+s1,0,bufsize-s1); memset((char*)data,0,e1); } } return len; },newlen_aligned); compresst.join(); Coder::pipe.Close(); char buf[1024]; sprintf_s(buf,"\nScanning ... P: %2.4lf%%[%.0lfK/%.0lfK] C:%2.4lf%% O:%.0lfK" ,(double)100.0,(double)newlen/1024.0,(double)newlen/1024.0 ,(double)size_uncompressed*100.0/((double)newlen),(double)size_compressed/1024.0 ); cerr<<buf<<endl; //TODO: make a final check of the generated file to make sure it's ok return 0; }
void swap(unordered_multimap<K, T, H, P, A> &m1, unordered_multimap<K, T, H, P, A> &m2) { m1.swap(m2); }
/** Removes a value from the collection. Returns true if the collection contained the specified element. */ bool remove(int val) { auto inSet = map.find(val); if(inSet != map.end()){ int pos = map.find(val)->second; map.erase (map.find(val), ++map.find(val)); int last = elements.back(); elements.pop_back(); elements[pos] = last; for(auto findPos = map.find(last); findPos != map.end(); ++findPos){ if(findPos->second == elements.size()){ map.erase( findPos, std::next(findPos)); map.insert( make_pair(last, pos)); break; } } } return inSet != map.end(); }
inline void swap(unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc, __cache_hash_code>& __x, unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc, __cache_hash_code>& __y) { __x.swap(__y); }
bool cache_access2 (int index, int tag, Data d) { bool hit = 1; pair<int, Data> mypair(index, d); int count = L2.count(index); // Miss if (count <= 0) { L2.insert(mypair); miss_count2++; hit = 0; valid_cacheline2++; } else if (count == 1) { auto range = L2.equal_range(index); auto it = range.first; Data d1 = it->second; // Hit if (d1.tag == tag) { it->second.lru = 0; //cout << "Hit" << endl; hit_count2++; } // Miss else { it->second.lru = 1; //cout << "Miss" << endl; miss_count2++; hit = 0; L2.insert(mypair); valid_cacheline2++; } } else if (count == 2) { auto range = L2.equal_range(index); auto it = range.first; auto it_1 = range.first; auto it_2 = ++it; Data d1 = it_1->second; Data d2 = it_2->second; // Hit if (d1.tag == tag) { it_1->second.lru = 0; it_2->second.lru = 1; //cout << "Hit" << endl; hit_count2++; } else if (d2.tag == tag) { it_1->second.lru = 1; it_2->second.lru = 0; //cout << "Hit" << endl; hit_count2++; } else if (d1.lru == 1) { it_2->second.lru = 1; //cout << "Miss" << endl; miss_count2++; evict_count2++; hit = 0; L2.erase(it_1); L2.insert(mypair); } else { it_1->second.lru = 1; //cout << "Miss" << endl; miss_count2++; evict_count2++; hit = 0; L2.erase(it_2); L2.insert(mypair); } } else { cout << "Containing more than 2 cachelines" <<endl; exit(1); } return hit; }