void make_trace(char* path) { init_jcr(path); sds trace_file = sdsnew(path); char *p = trace_file + sdslen(trace_file) - 1; while (*p == '/') --p; *(p + 1) = 0; sdsupdatelen(trace_file); trace_file = sdscat(trace_file, ".trace"); NOTICE("output to %s", trace_file); start_read_phase(); start_chunk_phase(); start_hash_phase(); unsigned char code[41]; FILE *fp = fopen(trace_file, "w"); while (1) { struct chunk *c = sync_queue_pop(hash_queue); if (c == NULL) { break; } if (CHECK_CHUNK(c, CHUNK_FILE_START)) { destor_log(DESTOR_NOTICE, c->data); fprintf(fp, "file start %zd\n", strlen(c->data)); fprintf(fp, "%s\n", c->data); } else if (CHECK_CHUNK(c, CHUNK_FILE_END)) { fprintf(fp, "file end\n"); } else { hash2code(c->fp, code); code[40] = 0; fprintf(fp, "%s %d\n", code, c->size); } free_chunk(c); } fprintf(fp, "stream end"); fclose(fp); }
void do_backup(char *path) { init_recipe_store(); init_container_store(); init_index(); init_backup_jcr(path); NOTICE("\n\n==== backup begin ===="); TIMER_DECLARE(1); TIMER_BEGIN(1); time_t start = time(NULL); if (destor.simulation_level == SIMULATION_ALL) { start_read_trace_phase(); } else { start_read_phase(); start_chunk_phase(); start_hash_phase(); } start_dedup_phase(); start_rewrite_phase(); start_filter_phase(); do{ sleep(5); /*time_t now = time(NULL);*/ fprintf(stderr,"job %" PRId32 ", data size %" PRId64 " bytes, %" PRId32 " chunks, %d files processed\r", jcr.id, jcr.data_size, jcr.chunk_num, jcr.file_num); }while(jcr.status == JCR_STATUS_RUNNING || jcr.status != JCR_STATUS_DONE); fprintf(stderr,"job %" PRId32 ", data size %" PRId64 " bytes, %" PRId32 " chunks, %d files processed\n", jcr.id, jcr.data_size, jcr.chunk_num, jcr.file_num); if (destor.simulation_level == SIMULATION_ALL) { stop_read_trace_phase(); } else { stop_read_phase(); stop_chunk_phase(); stop_hash_phase(); } stop_dedup_phase(); stop_rewrite_phase(); stop_filter_phase(); TIMER_END(1, jcr.total_time); close_index(); close_container_store(); close_recipe_store(); update_backup_version(jcr.bv); free_backup_version(jcr.bv); printf("\n\njob id: %" PRId32 "\n", jcr.id); printf("index method: %d.(Remark 0: NO; 1: DDFS; 2: Extreme binning; 3: Silo; 4: Sparse; 5: Sampled; 6: Block; 7: Learn)\n", destor.index_specific); printf("sampling method: %d (%d) (Remark 1:Random; 2: Min; 3: Uniform; 4: Optimized_min)\n", destor.index_sampling_method[0], destor.index_sampling_method[1]); printf("segment method: %d (%d) (Remark 0: Fixed; 1: Content; 2: File)\n", destor.index_segment_algorithm[0], destor.index_segment_algorithm[1]); printf("prefetch # of segments: %d (Remark 1 for sparse index)\n", destor.index_segment_prefech); printf("segment selection method: %d (%d)(Remark 0: Base; 1: Top; 2: Mix)\n", destor.index_segment_selection_method[0], destor.index_segment_selection_method[1]); printf("backup path: %s\n", jcr.path); printf("number of files: %d\n", jcr.file_num); printf("number of chunks: %" PRId32 " (%" PRId64 " bytes on average)\n", jcr.chunk_num, jcr.data_size / jcr.chunk_num); printf("number of unique chunks: %" PRId32 "\n", jcr.unique_chunk_num); printf("total size(B): %" PRId64 "\n", jcr.data_size); printf("stored data size(B): %" PRId64 "\n", jcr.unique_data_size + jcr.rewritten_chunk_size); printf("deduplication ratio: %.4f, %.4f\n", jcr.data_size != 0 ? (jcr.data_size - jcr.unique_data_size - jcr.rewritten_chunk_size) / (double) (jcr.data_size) : 0, jcr.data_size / (double) (jcr.unique_data_size + jcr.rewritten_chunk_size)); printf("total time(s): %.3f\n", jcr.total_time / 1000000); printf("the index memory footprint (B): %" PRId32 "\n", destor.index_memory_footprint); printf("throughput(MB/s): %.2f\n", (double) jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); printf("number of zero chunks: %" PRId32 "\n", jcr.zero_chunk_num); printf("size of zero chunks: %" PRId64 "\n", jcr.zero_chunk_size); printf("number of rewritten chunks: %" PRId32 "\n", jcr.rewritten_chunk_num); printf("size of rewritten chunks: %" PRId64 "\n", jcr.rewritten_chunk_size); printf("rewritten rate in size: %.3f\n", jcr.rewritten_chunk_size / (double) jcr.data_size); destor.data_size += jcr.data_size; destor.stored_data_size += jcr.unique_data_size + jcr.rewritten_chunk_size; destor.chunk_num += jcr.chunk_num; destor.stored_chunk_num += jcr.unique_chunk_num + jcr.rewritten_chunk_num; destor.zero_chunk_num += jcr.zero_chunk_num; destor.zero_chunk_size += jcr.zero_chunk_size; destor.rewritten_chunk_num += jcr.rewritten_chunk_num; destor.rewritten_chunk_size += jcr.rewritten_chunk_size; printf("read_time : %.3fs, %.2fMB/s\n", jcr.read_time / 1000000, jcr.data_size * 1000000 / jcr.read_time / 1024 / 1024); printf("chunk_time : %.3fs, %.2fMB/s\n", jcr.chunk_time / 1000000, jcr.data_size * 1000000 / jcr.chunk_time / 1024 / 1024); printf("hash_time : %.3fs, %.2fMB/s\n", jcr.hash_time / 1000000, jcr.data_size * 1000000 / jcr.hash_time / 1024 / 1024); printf("dedup_time : %.3fs, %.2fMB/s\n", jcr.dedup_time / 1000000, jcr.data_size * 1000000 / jcr.dedup_time / 1024 / 1024); printf("rewrite_time : %.3fs, %.2fMB/s\n", jcr.rewrite_time / 1000000, jcr.data_size * 1000000 / jcr.rewrite_time / 1024 / 1024); printf("filter_time : %.3fs, %.2fMB/s\n", jcr.filter_time / 1000000, jcr.data_size * 1000000 / jcr.filter_time / 1024 / 1024); printf("write_time : %.3fs, %.2fMB/s\n", jcr.write_time / 1000000, jcr.data_size * 1000000 / jcr.write_time / 1024 / 1024); //double seek_time = 0.005; //5ms //double bandwidth = 120 * 1024 * 1024; //120MB/s /* double index_lookup_throughput = jcr.data_size / (index_read_times * seek_time + index_read_entry_counter * 24 / bandwidth) / 1024 / 1024; double write_data_throughput = 1.0 * jcr.data_size * bandwidth / (jcr->unique_chunk_num) / 1024 / 1024; double index_read_throughput = 1.0 * jcr.data_size / 1024 / 1024 / (index_read_times * seek_time + index_read_entry_counter * 24 / bandwidth); double index_write_throughput = 1.0 * jcr.data_size / 1024 / 1024 / (index_write_times * seek_time + index_write_entry_counter * 24 / bandwidth);*/ /* double estimated_throughput = write_data_throughput; if (estimated_throughput > index_read_throughput) estimated_throughput = index_read_throughput;*/ /*if (estimated_throughput > index_write_throughput) estimated_throughput = index_write_throughput;*/ char logfile[] = "backup.log"; FILE *fp = fopen(logfile, "a"); /* * job id, * the size of backup * accumulative consumed capacity, * deduplication rate, * rewritten rate, * total container number, * sparse container number, * inherited container number, * 4 * index overhead (4 * int) * throughput, */ fprintf(fp, "%" PRId32 " %" PRId64 " %" PRId64 " %.4f %.4f %" PRId32 " %" PRId32 " %" PRId32 " %" PRId32" %" PRId32 " %" PRId32" %" PRId32" %.2f\n", jcr.id, jcr.data_size, destor.stored_data_size, jcr.data_size != 0 ? (jcr.data_size - jcr.rewritten_chunk_size - jcr.unique_data_size)/(double) (jcr.data_size) : 0, jcr.data_size != 0 ? (double) (jcr.rewritten_chunk_size) / (double) (jcr.data_size) : 0, jcr.total_container_num, jcr.sparse_container_num, jcr.inherited_sparse_num, index_overhead.lookup_requests, index_overhead.lookup_requests_for_unique, index_overhead.update_requests, index_overhead.read_prefetching_units, (double) jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); fclose(fp); }
int backup(Jcr* jcr) { fingerchunk_queue = sync_queue_new(-1); ContainerUsageMonitor* usage_monitor = container_usage_monitor_new(); cfl_monitor = cfl_monitor_new(read_cache_size); if (simulation_level == SIMULATION_ALL) { start_read_trace_phase(jcr); } else { start_read_phase(jcr); start_chunk_phase(jcr); start_hash_phase(jcr); } start_segment_phase(jcr); start_filter_phase(jcr); start_append_phase(jcr); ContainerId seed_id = -1; int32_t seed_len = 0; FingerChunk* fchunk = NULL; int signal = recv_fingerchunk(&fchunk); while (signal != STREAM_END) { container_usage_monitor_update(usage_monitor, fchunk->container_id, &fchunk->fingerprint, fchunk->length); jvol_append_fingerchunk(jcr->job_volume, fchunk); if (seed_id != -1 && seed_id != fchunk->container_id) { jvol_append_seed(jcr->job_volume, seed_id, seed_len); seed_len = 0; } /* merge sequential accesses */ seed_id = fchunk->container_id; seed_len += fchunk->length; free(fchunk); signal = recv_fingerchunk(&fchunk); } if (seed_len > 0) jvol_append_seed(jcr->job_volume, seed_id, seed_len); sync_queue_free(fingerchunk_queue, NULL); jcr->sparse_container_num = g_hash_table_size(usage_monitor->sparse_map); jcr->total_container_num = g_hash_table_size(usage_monitor->dense_map) + jcr->sparse_container_num; while ((jcr->inherited_sparse_num = container_usage_monitor_print( usage_monitor, jcr->job_id, jcr->historical_sparse_containers)) < 0) { dprint("retry!"); } /* store recipes of processed file */ int i = 0; for (; i < jcr->file_num; ++i) { Recipe *recipe = (Recipe*) sync_queue_pop(jcr->completed_files_queue); recipe->fileindex = i; if (jvol_append_meta(jcr->job_volume, recipe) != SUCCESS) { printf("%s, %d: some errors happened in appending recipe!\n", __FILE__, __LINE__); return FAILURE; } jcr->chunk_num += recipe->chunknum; recipe_free(recipe); } stop_append_phase(); stop_filter_phase(); stop_segment_phase(); if (simulation_level == SIMULATION_ALL) { stop_read_trace_phase(jcr); } else { stop_hash_phase(); stop_chunk_phase(); stop_read_phase(); } container_usage_monitor_free(usage_monitor); print_cfl(cfl_monitor); cfl_monitor_free(cfl_monitor); return 0; }