int main(int argc, char **argv) { if (argc < 2 || argc > 3) { usage(argv[0]); return 1; } bool show_detail = false; if (argc == 3) { show_detail = true; } ifstream is; Block bl; uint64_t size = 0, nblocks = 0; /* uint64_t new_data = 0, l1_data = 0, l2_data = 0, l3_data = 0, l3_cache = 0; uint64_t gb = 1024 * 1024 * 1024; */ is.open(argv[1], std::ios_base::in | std::ios_base::binary); if (!is.is_open()) { cout << "open failed: " << argv[1]; return 0; } while(bl.FromStream(is)) { size += bl.size_; nblocks++; /* switch (bl.file_id_) { case 0: new_data += bl.size_; break; case IN_PARENT: l1_data += bl.size_; break; case IN_CDS: l3_data += bl.size_; break; case IN_DIRTY_SEG: l2_data += bl.size_; break; case IN_CDS_CACHE: l3_data += bl.size_; l3_cache += bl.size_; break; default: break; } */ if (show_detail) cout << bl.ToString(); if (bl.size_ == 0 || bl.size_ > 16384) { cout << "corrupted block info: " << bl.ToString() << endl; } } cout << argv[1] << ": " << nblocks << " blocks, " << size << " bytes, " << /* (float)new_data/gb << "new, " << (float)l1_data/gb << " l1, " << (float)l2_data/gb << " l2, " << (float)l3_data/gb << " l3, " (float)l3_cache/gb << " l3_cache" << */ endl; is.close(); exit(0); }
int main( int argc, char** argv) { if (argc != 3 && argc != 4) { cout << "Usage: " <<argv[0] << " store_file current_trace parent_trace" <<endl; return 0; } DOMConfigurator::configure("Log4cxxConfig.xml"); Block blk; map<Checksum, SizeCount> ublks; map<uint64_t, uint64_t> hit_lens; map<uint64_t, uint64_t> miss_lens; map<uint64_t, uint64_t> seg_hit_lens; map<uint64_t, uint64_t> seg_miss_lens; // read unique block map, use file_id field to store count ifstream is(argv[1], ios_base::in | ios_base::binary); while (blk.FromStream(is)) { ublks[blk.cksum_].ref_count_ = blk.file_id_; ublks[blk.cksum_].size_ = blk.size_; } is.close(); ofstream os(argv[1], ios_base::out | ios_base::binary | ios_base::trunc); // open trace files bool has_parent = false; ifstream cis, pis; cis.open(argv[2], ios_base::in | ios_base::binary); if (argc == 4) { has_parent = true; pis.open(argv[3], ios_base::in | ios_base::binary); } Segment cur_seg, par_seg; uint64_t n_misses = 0, n_hits = 0; uint64_t n_seg_misses = 0, n_seg_hits = 0; map<Checksum, SizeCount>::iterator it; while (cur_seg.LoadFixSize(cis)) { if (has_parent) { // duplicate segment if (par_seg.LoadFixSize(pis) && cur_seg == par_seg) { save_counter(n_seg_hits, n_seg_misses, seg_miss_lens); continue; } } save_counter(n_seg_misses, n_seg_hits, seg_hit_lens); for (size_t i = 0; i < cur_seg.blocklist_.size(); ++i) { it = ublks.find(cur_seg.blocklist_[i].cksum_); if (it == ublks.end()) { // miss ublks[cur_seg.blocklist_[i].cksum_].ref_count_ ++; ublks[cur_seg.blocklist_[i].cksum_].size_ = cur_seg.blocklist_[i].size_; save_counter(n_misses, n_hits, hit_lens); } else { // hit it->second.ref_count_ ++; save_counter(n_hits, n_misses, miss_lens); } } } // save perfect dedup results back to disk for (it = ublks.begin(); it != ublks.end(); it++) { blk.cksum_ = it->first; blk.size_ = it->second.size_; blk.file_id_ = it->second.ref_count_; blk.offset_ = 0; blk.ToStream(os); } map<uint64_t, uint64_t>::iterator cit; ofstream hit_output("./hit.out", ios_base::out | ios_base::app); ofstream miss_output("./miss.out", ios_base::out | ios_base::app); ofstream seg_hit_output("./seg_hit.out", ios_base::out | ios_base::app); ofstream seg_miss_output("./seg_miss.out", ios_base::out | ios_base::app); hit_output << "snapshot: " << argv[2] << endl; miss_output << "snapshot: " << argv[2] << endl; seg_hit_output << "snapshot: " << argv[2] << endl; seg_miss_output << "snapshot: " << argv[2] << endl; if (has_parent) { hit_output << "parent: " << argv[3] << endl; miss_output << "parent: " << argv[3] << endl; seg_hit_output << "parent: " << argv[3] << endl; seg_miss_output << "parent: " << argv[3] << endl; } for (cit = hit_lens.begin(); cit != hit_lens.end(); cit ++) hit_output << cit->first << ", " << cit->second << endl; for (cit = miss_lens.begin(); cit != miss_lens.end(); cit ++) miss_output << cit->first << ", " << cit->second << endl; for (cit = seg_hit_lens.begin(); cit != seg_hit_lens.end(); cit ++) seg_hit_output << cit->first << ", " << cit->second << endl; for (cit = seg_miss_lens.begin(); cit != seg_miss_lens.end(); cit ++) seg_miss_output << cit->first << ", " << cit->second << endl; hit_output.close(); miss_output.close(); seg_hit_output.close(); seg_miss_output.close(); os.close(); cis.close(); if (pis.is_open()) pis.close(); exit(0); }
int main( int argc, char** argv) { if (argc != 3 && argc != 4) { cout << "Usage: " <<argv[0] << " store_file current_trace parent_trace" <<endl; return 0; } DOMConfigurator::configure("Log4cxxConfig.xml"); LoggerPtr logger = Logger::getLogger("Benchamrk"); Block blk; map<Checksum, SizeCount> ublks; // read unique block map, use file_id field to store count ifstream is(argv[1], ios_base::in | ios_base::binary); while (blk.FromStream(is)) { ublks[blk.cksum_].ref_count_ = blk.file_id_; ublks[blk.cksum_].size_ = blk.size_; } is.close(); ofstream os(argv[1], ios_base::out | ios_base::binary | ios_base::trunc); // open trace files bool has_parent = false; ifstream cis, pis; cis.open(argv[2], ios_base::in | ios_base::binary); if (argc == 4) { has_parent = true; pis.open(argv[3], ios_base::in | ios_base::binary); } Segment cur_seg, par_seg; uint64_t num_queries = 0; map<Checksum, SizeCount>::iterator it; while (cur_seg.LoadFixSize(cis)) { if (has_parent) { // duplicate segment if (par_seg.LoadFixSize(pis) && cur_seg == par_seg) { continue; } } TIMER_START(); num_queries += cur_seg.blocklist_.size(); for (size_t i = 0; i < cur_seg.blocklist_.size(); ++i) { it = ublks.find(cur_seg.blocklist_[i].cksum_); if (it == ublks.end()) { // miss ublks[cur_seg.blocklist_[i].cksum_].ref_count_ ++; ublks[cur_seg.blocklist_[i].cksum_].size_ = cur_seg.blocklist_[i].size_; } else { // hit ++ it->second.ref_count_; } } TIMER_STOP(); } TIMER_PRINT(); LOG4CXX_INFO(logger, "Num of items: " << ublks.size() << " , Num of queries: " << num_queries); // save perfect dedup results back to disk for (it = ublks.begin(); it != ublks.end(); it++) { blk.cksum_ = it->first; blk.size_ = it->second.size_; blk.file_id_ = it->second.ref_count_; blk.offset_ = 0; blk.ToStream(os); } os.close(); cis.close(); if (pis.is_open()) pis.close(); exit(0); }
int main(int argc, char** argv) { if (argc < 3 || argc > 5) { usage(argv[0]); return 1; } string cds_name = argv[1]; string list_fname = argv[2]; string fprefix, fsuffix; if (argc > 3) { fprefix = argv[3]; } if (argc > 4) { fsuffix = argv[4]; } // prepare CDS std::vector<Block> cds; ifstream cds_ifs; cds_ifs.open(cds_name.c_str(), ios::in | ios::binary); Block blk; while (blk.FromStream(cds_ifs)) { cds.push_back(blk); } cds_ifs.close(); sort(cds.begin(), cds.end()); cout << "CDS file " << cds_name << " is loaded and sorted, it has " << cds.size() << " objects" << endl; // open all snapshot traces vector<ifstream*> trace_inputs; ifstream ss_ifs; ss_ifs.open(list_fname.c_str(), ios::in); while (ss_ifs.good()) { // open all the snapshots for that VM string ss_fname; std::getline(ss_ifs, ss_fname); if (ss_fname.length() == 0) { continue; } string trace_fname = fprefix + ss_fname + fsuffix; trace_inputs.push_back(new ifstream(trace_fname.c_str(), ios::in | ios::binary)); } ss_ifs.close(); // dedup each snapshot with its parent, count the new data brought by each snapshot int num_ss = trace_inputs.size(); Segment* segs = new Segment[num_ss]; DedupCounter* new_data = new DedupCounter[num_ss]; cout << "processing " << num_ss << " snapshots in " << list_fname << endl; bool finished = false; while (!finished) { for (int j = 0; j < num_ss; j++) { // TODO: add snapshot in reverse order segs[j].LoadFixSize(*trace_inputs[j]); // level 1 if (j > 0 && segs[j] == segs[j-1]) { // detected at level 1 } else { // level 2 // sort previous segment if (j != 0) { sort(segs[j-1].blocklist_.begin(), segs[j-1].blocklist_.end()); } for (vector<Block>::iterator it = segs[j].blocklist_.begin(); it != segs[j].blocklist_.end(); ++it) { if (j != 0 && binary_search(segs[j-1].blocklist_.begin(), segs[j-1].blocklist_.end(), *it)) { // detected at level 2 } else { // level 3 if (binary_search(cds.begin(), cds.end(), *it)) { // detected at level 3 } else { // new_data[j].num_bytes_ += it->size_; new_data[j].num_blocks_ += 1; } } } } } // are we done? finished = true; for (int j = 0; j < num_ss; j++) { if (segs[j].blocklist_.size() != 0) { finished = false; } } } // print the new data brought by each snapshot, then reset the inputs for (int j = 0; j < num_ss; j++) { cout << "snapshot" << j << " add: " << new_data[j].num_blocks_ << " blocks, " << new_data[j].num_bytes_ << " bytes" << endl; trace_inputs[j]->clear(); trace_inputs[j]->seekg(0, ios::beg); } // estimate the total number of items for bloom filter uint64_t begin = trace_inputs[0]->tellg(); trace_inputs[0]->seekg(0, ios::end); uint64_t end = trace_inputs[0]->tellg(); trace_inputs[0]->clear(); trace_inputs[0]->seekg(0, ios::beg); uint64_t num_items = 2 * (end - begin) / 36; cout << "create bloom filter with these settings: " << num_items << " items, " << BLOOM_FILTER_FP_RATE << " rate, " << BLOOM_FILTER_NUM_FUNCS << " functions" << endl; DedupCounter* del_data = new DedupCounter[num_ss]; BloomFilter<Checksum> filter(num_items, BLOOM_FILTER_FP_RATE, kBloomFilterFunctions, BLOOM_FILTER_NUM_FUNCS); for (int j = 0; j < num_ss - 1; j++) { // add snapshot into bloom filter trace_inputs[j]->clear(); trace_inputs[j]->seekg(0, ios::beg); while (blk.FromStream(*trace_inputs[j])) { filter.AddElement(blk.cksum_); } // check the next snapshot as if it's going to be deleted while (blk.FromStream(*trace_inputs[j + 1])) { if (filter.Exist(blk.cksum_)) { // exist means keep it } else { // this one can be safely deleted if it's not in CDS if (!binary_search(cds.begin(), cds.end(), blk)) { del_data[j + 1].num_blocks_ += 1; del_data[j + 1].num_bytes_ += blk.size_; } } } } for (int j = 0; j < num_ss; j++) { cout << "snapshot" << j << " del: " << del_data[j].num_blocks_ << " blocks, " << del_data[j].num_bytes_ << " bytes" << endl; } // clean up delete[] segs; delete[] new_data; delete[] del_data; for (size_t j = 0; j < trace_inputs.size(); ++j) { trace_inputs[j]->close(); delete trace_inputs[j]; } return 0; }