Exemplo n.º 1
0
void cp5(char* src_file, char* dst_file)
{
    Checksum summer;
    unordered_map<uint32_t, ChunkInfo*> sum_offset;
    list<ChunkInfo*> file_meta;
    int32_t src_size = FileUtils::get_file_size(src_file);
    int32_t count = src_size / CHUNK_SIZE;
    char* buf = NULL;
    int length = CHUNK_SIZE;
    int i = 0;
    for (; i < count; ++i) {
        FileUtils::read(&buf, i * CHUNK_SIZE, length, src_file);
        ChunkInfo* chunk_info = new ChunkInfo();
        chunk_info->from_ = true;
        chunk_info->offset_ = i * CHUNK_SIZE;
        chunk_info->length_ = CHUNK_SIZE;
        string* strong_sum = summer.StrongSum(buf, length);
        chunk_info->strong_sum_ = strong_sum;
        sum_offset[summer.Sum1(buf, length)] = chunk_info;
        file_meta.push_back(chunk_info);
        delete [] buf;
    }
    
    int32_t last = src_size % CHUNK_SIZE;
    if (last > 0) {
        ChunkInfo* chunk_info = new ChunkInfo();
        chunk_info->from_ = true;
        chunk_info->offset_ = i * CHUNK_SIZE;
        chunk_info->length_ = last;
        file_meta.push_back(chunk_info);
    }

    for (auto iter = sum_offset.begin(); iter != sum_offset.end(); ++iter) {
        WLOG(DEBUG, "sum : %u, off: %d len: %d", iter->first,
                iter->second->offset_, iter->second->length_);
    }
    int32_t dst_size = FileUtils::get_file_size(dst_file);
    int32_t len = CHUNK_SIZE;
    FileUtils::read(&buf, 0, len, dst_file);
    int32_t curr = CHUNK_SIZE;
    summer.Init(buf, CHUNK_SIZE);
    auto iter = sum_offset.find(summer.tmp_sum());
    int32_t total_left = dst_size - CHUNK_SIZE;
    if (iter != sum_offset.end()) {
        WLOG(INFO, "%s and %s first chunk is the same", src_file, dst_file);
        string* strong_sum = summer.StrongSum(buf, CHUNK_SIZE);
        if (*strong_sum == *(iter->second->strong_sum_)) {
            iter->second->from_ = false;
            iter->second->offset_ = 0;
        }
        WLOG(INFO, "md5 sum of src chunk %s", iter->second->strong_sum_->c_str());
        WLOG(INFO, "md5 sum of dst chunk %s", strong_sum->c_str());
        WLOG(INFO, "%d is replaced with %d", iter->second->offset_, 0);
    }
    delete [] buf;
    clock_t start, end;
    start = clock();
    int32_t sum_time = 0;
    while (true) {
        WLOG(DEBUG, "total_left: %d", total_left);
        if (total_left <= 0) break;
        int32_t buf_len = SCAN_BUFFER_SIZE;
        if (buf_len > total_left) {
            buf_len = total_left;
        }
        int32_t read_len = buf_len + CHUNK_SIZE;
        FileUtils::read(&buf, curr - CHUNK_SIZE, read_len, dst_file);
        for (int i = CHUNK_SIZE; i < read_len; ++i) {
            uint32_t new_sum = summer.Update(buf[i]);
            auto iter = sum_offset.find(summer.tmp_sum());
            if (iter != sum_offset.end()) {
                ++sum_time;
                string* strong_sum = summer.StrongSum(buf + i - CHUNK_SIZE + 1, CHUNK_SIZE);
                WLOG(INFO, "md5 sum of src chunk %s", iter->second->strong_sum_->c_str());
                WLOG(INFO, "md5 sum of dst chunk %s", strong_sum->c_str());
                WLOG(INFO, "%d is replaced with %d, curr: %d",
                        iter->second->offset_, curr - 1 * CHUNK_SIZE + 1, curr);
                if (*strong_sum == *(iter->second->strong_sum_)) {
                    iter->second->from_ = false;
                    iter->second->offset_ = curr - 1 * CHUNK_SIZE + 1;
                }
            }
            ++curr;
        }
        delete [] buf;
        total_left -= buf_len;
    }
    end = clock();
    WLOG(NOTICE, "find cost %f s, sum time:%d", (double)(end - start) / CLOCKS_PER_SEC, sum_time);

    construct_file(src_file, dst_file, file_meta, "client_data/test.dat");
}