Esempio n. 1
0
int main(int argc, char **argv)
{
	if (argc < 2 || argc > 3) {
		usage(argv[0]);
		return 1;
	}

	bool show_detail = false;

	if (argc == 3) {
		show_detail = true;
	}

	ifstream is;
	Block bl;
	uint64_t size = 0, nblocks = 0;
	/*
    uint64_t new_data = 0, l1_data = 0, l2_data = 0, l3_data = 0, l3_cache = 0;
    uint64_t gb = 1024 * 1024 * 1024;
    */

	is.open(argv[1], std::ios_base::in | std::ios_base::binary);
	if (!is.is_open()) {
		cout << "open failed: " << argv[1];
		return 0;
	}

	while(bl.FromStream(is)) {
		size += bl.size_;
		nblocks++;
        /*		
        switch (bl.file_id_) {
        case 0:
            new_data += bl.size_;
            break;
        case IN_PARENT:
            l1_data += bl.size_;
            break;
        case IN_CDS:
            l3_data += bl.size_;
            break;
        case IN_DIRTY_SEG:
            l2_data += bl.size_;
            break;
        case IN_CDS_CACHE:
            l3_data += bl.size_;
            l3_cache += bl.size_;
            break;
        default:
            break;
        }
		*/
		if (show_detail)
			cout << bl.ToString();

		if (bl.size_ == 0 || bl.size_ > 16384) {
			cout << "corrupted block info: " << bl.ToString() << endl;
		}
	}
	cout << argv[1] << ": " <<
        nblocks << " blocks, " << 
        size << " bytes, " <<
        /*
        (float)new_data/gb << "new, " <<
        (float)l1_data/gb << " l1, " <<
        (float)l2_data/gb << " l2, " <<
        (float)l3_data/gb << " l3, "
        (float)l3_cache/gb << " l3_cache" <<
        */
        endl;
	is.close();
	exit(0);
}
Esempio n. 2
0
int main( int argc, char** argv)
{
    if (argc != 3 && argc != 4) {
        cout << "Usage: " <<argv[0] << " store_file current_trace parent_trace" <<endl;
        return 0;
    }

    DOMConfigurator::configure("Log4cxxConfig.xml");

    Block blk;
    map<Checksum, SizeCount> ublks;
    map<uint64_t, uint64_t> hit_lens;
    map<uint64_t, uint64_t> miss_lens;
    map<uint64_t, uint64_t> seg_hit_lens;
    map<uint64_t, uint64_t> seg_miss_lens;
    // read unique block map, use file_id field to store count
    ifstream is(argv[1], ios_base::in | ios_base::binary);
    while (blk.FromStream(is)) {
        ublks[blk.cksum_].ref_count_ = blk.file_id_;
        ublks[blk.cksum_].size_ = blk.size_;
    }
    is.close();
    ofstream os(argv[1], ios_base::out | ios_base::binary | ios_base::trunc);

    // open trace files
    bool has_parent = false;
    ifstream cis, pis;
    cis.open(argv[2], ios_base::in | ios_base::binary);
    if (argc == 4) {
        has_parent = true;
        pis.open(argv[3], ios_base::in | ios_base::binary);
    }

    Segment cur_seg, par_seg;
    uint64_t n_misses = 0, n_hits = 0;
    uint64_t n_seg_misses = 0, n_seg_hits = 0;
    map<Checksum, SizeCount>::iterator it;

    while (cur_seg.LoadFixSize(cis)) {
        if (has_parent) {	// duplicate segment
            if (par_seg.LoadFixSize(pis) && cur_seg == par_seg) {
                save_counter(n_seg_hits, n_seg_misses, seg_miss_lens);
                continue;
            }
        }
        save_counter(n_seg_misses, n_seg_hits, seg_hit_lens);

        for (size_t i = 0; i < cur_seg.blocklist_.size(); ++i) {
            it = ublks.find(cur_seg.blocklist_[i].cksum_);
            if (it == ublks.end()) {	// miss
                ublks[cur_seg.blocklist_[i].cksum_].ref_count_ ++;
                ublks[cur_seg.blocklist_[i].cksum_].size_ = cur_seg.blocklist_[i].size_;
                save_counter(n_misses, n_hits, hit_lens);
            }
            else {	// hit
                it->second.ref_count_ ++;
                save_counter(n_hits, n_misses, miss_lens);
            }
        }
    }

    // save perfect dedup results back to disk
    for (it = ublks.begin(); it != ublks.end(); it++) {
        blk.cksum_ = it->first;
        blk.size_ = it->second.size_;
        blk.file_id_ = it->second.ref_count_;
        blk.offset_ = 0;
        blk.ToStream(os);
    }

    map<uint64_t, uint64_t>::iterator cit;
    ofstream hit_output("./hit.out", ios_base::out | ios_base::app);
    ofstream miss_output("./miss.out", ios_base::out | ios_base::app);
    ofstream seg_hit_output("./seg_hit.out", ios_base::out | ios_base::app);
    ofstream seg_miss_output("./seg_miss.out", ios_base::out | ios_base::app);
    hit_output << "snapshot: " << argv[2] << endl;
    miss_output <<  "snapshot: " << argv[2] << endl;
    seg_hit_output << "snapshot: " << argv[2] << endl;
    seg_miss_output <<  "snapshot: " << argv[2] << endl;
    if (has_parent) {
        hit_output << "parent: " << argv[3] << endl;
        miss_output <<  "parent: " << argv[3] << endl;
        seg_hit_output << "parent: " << argv[3] << endl;
        seg_miss_output <<  "parent: " << argv[3] << endl;
    }
    for (cit = hit_lens.begin(); cit != hit_lens.end(); cit ++)
        hit_output << cit->first << ", " << cit->second << endl;
    for (cit = miss_lens.begin(); cit != miss_lens.end(); cit ++)
        miss_output << cit->first << ", " << cit->second << endl;
    for (cit = seg_hit_lens.begin(); cit != seg_hit_lens.end(); cit ++)
        seg_hit_output << cit->first << ", " << cit->second << endl;
    for (cit = seg_miss_lens.begin(); cit != seg_miss_lens.end(); cit ++)
        seg_miss_output << cit->first << ", " << cit->second << endl;
    hit_output.close();
    miss_output.close();
    seg_hit_output.close();
    seg_miss_output.close();
    os.close();
    cis.close();
    if (pis.is_open())
        pis.close();
    exit(0);
}
Esempio n. 3
0
int main( int argc, char** argv)
{
    if (argc != 3 && argc != 4) {
        cout << "Usage: " <<argv[0] << " store_file current_trace parent_trace" <<endl;
        return 0;
    }

    DOMConfigurator::configure("Log4cxxConfig.xml");
    LoggerPtr logger = Logger::getLogger("Benchamrk");
    Block blk;
    map<Checksum, SizeCount> ublks;

    // read unique block map, use file_id field to store count
    ifstream is(argv[1], ios_base::in | ios_base::binary);
    while (blk.FromStream(is)) {
        ublks[blk.cksum_].ref_count_ = blk.file_id_;
        ublks[blk.cksum_].size_ = blk.size_;
    }
    is.close();
    ofstream os(argv[1], ios_base::out | ios_base::binary | ios_base::trunc);

    // open trace files
    bool has_parent = false;
    ifstream cis, pis;
    cis.open(argv[2], ios_base::in | ios_base::binary);
    if (argc == 4) {
        has_parent = true;
        pis.open(argv[3], ios_base::in | ios_base::binary);
    }

    Segment cur_seg, par_seg;
    uint64_t num_queries = 0;
    map<Checksum, SizeCount>::iterator it;
    while (cur_seg.LoadFixSize(cis)) {
        if (has_parent) {	// duplicate segment
            if (par_seg.LoadFixSize(pis) && cur_seg == par_seg) {
                continue;
            }
        }
        TIMER_START();
        num_queries += cur_seg.blocklist_.size();
        for (size_t i = 0; i < cur_seg.blocklist_.size(); ++i) {
            it = ublks.find(cur_seg.blocklist_[i].cksum_);
            if (it == ublks.end()) {	// miss
                ublks[cur_seg.blocklist_[i].cksum_].ref_count_ ++;
                ublks[cur_seg.blocklist_[i].cksum_].size_ = cur_seg.blocklist_[i].size_;
            }
            else {	// hit
                ++ it->second.ref_count_;
            }
        }
        TIMER_STOP();
    }

    TIMER_PRINT();
    LOG4CXX_INFO(logger, "Num of items: " << ublks.size() << " , Num of queries: " << num_queries);
    // save perfect dedup results back to disk
    for (it = ublks.begin(); it != ublks.end(); it++) {
        blk.cksum_ = it->first;
        blk.size_ = it->second.size_;
        blk.file_id_ = it->second.ref_count_;
        blk.offset_ = 0;
        blk.ToStream(os);
    }

    os.close();
    cis.close();
    if (pis.is_open())
        pis.close();
    exit(0);
}
Esempio n. 4
0
int main(int argc, char** argv)
{
    if (argc < 3 || argc > 5) {
        usage(argv[0]);
        return 1;
    }

    string cds_name = argv[1];
    string list_fname = argv[2];
    string fprefix, fsuffix;
    if (argc > 3) {
        fprefix = argv[3];
    }
    if (argc > 4) {
        fsuffix = argv[4];
    }

    // prepare CDS
    std::vector<Block> cds;
    ifstream cds_ifs;
    cds_ifs.open(cds_name.c_str(), ios::in | ios::binary);
    Block blk;
    while (blk.FromStream(cds_ifs)) {
        cds.push_back(blk);
    }
    cds_ifs.close();
    sort(cds.begin(), cds.end());
    cout << "CDS file " << cds_name 
         << " is loaded and sorted, it has " 
         << cds.size() << " objects" << endl;

    // open all snapshot traces
    vector<ifstream*> trace_inputs;
    ifstream ss_ifs;
    ss_ifs.open(list_fname.c_str(), ios::in);
    while (ss_ifs.good()) {
        // open all the snapshots for that VM
        string ss_fname;
        std::getline(ss_ifs, ss_fname);
        if (ss_fname.length() == 0) {
            continue;
        }
        string trace_fname = fprefix + ss_fname + fsuffix;
        trace_inputs.push_back(new ifstream(trace_fname.c_str(), ios::in | ios::binary));
    }
    ss_ifs.close();
        
    // dedup each snapshot with its parent, count the new data brought by each snapshot
    int num_ss = trace_inputs.size();
    Segment* segs = new Segment[num_ss];
    DedupCounter* new_data = new DedupCounter[num_ss];
    cout << "processing " << num_ss << " snapshots in " << list_fname << endl;
    bool finished = false;
    while (!finished) {
        for (int j = 0; j < num_ss; j++) {	// TODO: add snapshot in reverse order
            segs[j].LoadFixSize(*trace_inputs[j]);
            // level 1
            if (j > 0 && segs[j] == segs[j-1]) {
                // detected at level 1
            }
            else {
                // level 2
                // sort previous segment
                if (j != 0) {
                    sort(segs[j-1].blocklist_.begin(), segs[j-1].blocklist_.end());
                }
                for (vector<Block>::iterator it = segs[j].blocklist_.begin(); 
                     it != segs[j].blocklist_.end(); ++it) {
                    if (j != 0 
                        && binary_search(segs[j-1].blocklist_.begin(), 
                                         segs[j-1].blocklist_.end(), 
                                         *it)) {
                        // detected at level 2
                    }
                    else {
                        // level 3
                        if (binary_search(cds.begin(), cds.end(), *it)) {
                            // detected at level 3
                        }
                        else {
                            // 
                            new_data[j].num_bytes_ += it->size_;
                            new_data[j].num_blocks_ += 1;
                        }
                    }
                }
            }
        }
        // are we done?
        finished = true;
        for (int j = 0; j < num_ss; j++) {
            if (segs[j].blocklist_.size() != 0) {
                finished = false;
            }
        }
    }

    // print the new data brought by each snapshot, then reset the inputs
    for (int j = 0; j < num_ss; j++) {
        cout << "snapshot" << j << " add: "
             << new_data[j].num_blocks_ << " blocks, "
             << new_data[j].num_bytes_ << " bytes" << endl;
        trace_inputs[j]->clear();
        trace_inputs[j]->seekg(0, ios::beg);
    }

    // estimate the total number of items for bloom filter
    uint64_t begin = trace_inputs[0]->tellg();
    trace_inputs[0]->seekg(0, ios::end);
    uint64_t end = trace_inputs[0]->tellg();
    trace_inputs[0]->clear();
    trace_inputs[0]->seekg(0, ios::beg);
    uint64_t num_items = 2 * (end - begin) / 36;
    cout << "create bloom filter with these settings: " 
         << num_items << " items, "
         << BLOOM_FILTER_FP_RATE << " rate, "
         << BLOOM_FILTER_NUM_FUNCS << " functions" << endl;

    DedupCounter* del_data = new DedupCounter[num_ss];
    BloomFilter<Checksum> filter(num_items, 
                                 BLOOM_FILTER_FP_RATE, 
                                 kBloomFilterFunctions, 
                                 BLOOM_FILTER_NUM_FUNCS);
    for (int j = 0; j < num_ss - 1; j++) {
        // add snapshot into bloom filter
        trace_inputs[j]->clear();
        trace_inputs[j]->seekg(0, ios::beg);
        while (blk.FromStream(*trace_inputs[j])) {
            filter.AddElement(blk.cksum_);
        }
        // check the next snapshot as if it's going to be deleted
        while (blk.FromStream(*trace_inputs[j + 1])) {
            if (filter.Exist(blk.cksum_)) {
                // exist means keep it
            }
            else {
                // this one can be safely deleted if it's not in CDS
                if (!binary_search(cds.begin(), cds.end(), blk)) {
                    del_data[j + 1].num_blocks_ += 1;
                    del_data[j + 1].num_bytes_ += blk.size_;
                }
            }
        }
    }

    for (int j = 0; j < num_ss; j++) {
        cout << "snapshot" << j << " del: "
             << del_data[j].num_blocks_ << " blocks, "
             << del_data[j].num_bytes_ << " bytes" << endl;
    }

    // clean up
    delete[] segs;
    delete[] new_data;
    delete[] del_data;
    for (size_t j = 0; j < trace_inputs.size(); ++j) {
        trace_inputs[j]->close();
        delete trace_inputs[j];
    }
    return 0;
}