void do_restore(int revision, char *path) { init_recipe_store(); init_container_store(); init_restore_jcr(revision, path); destor_log(DESTOR_NOTICE, "job id: %d", jcr.id); destor_log(DESTOR_NOTICE, "backup path: %s", jcr.bv->path); destor_log(DESTOR_NOTICE, "restore to: %s", jcr.path); restore_chunk_queue = sync_queue_new(100); restore_recipe_queue = sync_queue_new(100); TIMER_DECLARE(1); TIMER_BEGIN(1); puts("==== restore begin ===="); pthread_t recipe_t, read_t; pthread_create(&recipe_t, NULL, read_recipe_thread, NULL); if (destor.restore_cache[0] == RESTORE_CACHE_LRU) { destor_log(DESTOR_NOTICE, "restore cache is LRU"); pthread_create(&read_t, NULL, lru_restore_thread, NULL); } else if (destor.restore_cache[0] == RESTORE_CACHE_OPT) { destor_log(DESTOR_NOTICE, "restore cache is OPT"); pthread_create(&read_t, NULL, optimal_restore_thread, NULL); } else if (destor.restore_cache[0] == RESTORE_CACHE_ASM) { destor_log(DESTOR_NOTICE, "restore cache is ASM"); pthread_create(&read_t, NULL, assembly_restore_thread, NULL); } else { fprintf(stderr, "Invalid restore cache.\n"); exit(1); } write_restore_data(); assert(sync_queue_size(restore_chunk_queue) == 0); assert(sync_queue_size(restore_recipe_queue) == 0); free_backup_version(jcr.bv); TIMER_END(1, jcr.total_time); puts("==== restore end ===="); printf("job id: %d\n", jcr.id); printf("restore path: %s\n", jcr.path); printf("number of files: %d\n", jcr.file_num); printf("number of chunks: %d\n", jcr.chunk_num); printf("total size(B): %ld\n", jcr.data_size); printf("total time(s): %.3f\n", jcr.total_time / 1000000); printf("throughput(MB/s): %.2f\n", jcr.data_size * 1000000 / (1024.0 * 1024 * jcr.total_time)); printf("speed factor: %.2f\n", jcr.data_size / (1024.0 * 1024 * jcr.read_container_num)); printf("read_recipe_time : %.3fs, %.2fMB/s\n", jcr.read_recipe_time / 1000000, jcr.data_size * 1000000 / jcr.read_recipe_time / 1024 / 1024); printf("read_chunk_time : %.3fs, %.2fMB/s\n", jcr.read_chunk_time / 1000000, jcr.data_size * 1000000 / jcr.read_chunk_time / 1024 / 1024); printf("write_chunk_time : %.3fs, %.2fMB/s\n", jcr.write_chunk_time / 1000000, jcr.data_size * 1000000 / jcr.write_chunk_time / 1024 / 1024); char logfile[] = "restore.log"; FILE *fp = fopen(logfile, "a"); /* * job id, * chunk num, * data size, * actually read container number, * speed factor, * throughput */ fprintf(fp, "%d %lld %d %.4f %.4f\n", jcr.id, jcr.data_size, jcr.read_container_num, jcr.data_size / (1024.0 * 1024 * jcr.read_container_num), jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); fclose(fp); close_container_store(); close_recipe_store(); }
void do_backup(char *path) { init_recipe_store(); init_container_store(); init_index(); init_backup_jcr(path); NOTICE("\n\n==== backup begin ===="); TIMER_DECLARE(1); TIMER_BEGIN(1); time_t start = time(NULL); if (destor.simulation_level == SIMULATION_ALL) { start_read_trace_phase(); } else { start_read_phase(); start_chunk_phase(); start_hash_phase(); } start_dedup_phase(); start_rewrite_phase(); start_filter_phase(); do{ sleep(5); /*time_t now = time(NULL);*/ fprintf(stderr,"job %" PRId32 ", data size %" PRId64 " bytes, %" PRId32 " chunks, %d files processed\r", jcr.id, jcr.data_size, jcr.chunk_num, jcr.file_num); }while(jcr.status == JCR_STATUS_RUNNING || jcr.status != JCR_STATUS_DONE); fprintf(stderr,"job %" PRId32 ", data size %" PRId64 " bytes, %" PRId32 " chunks, %d files processed\n", jcr.id, jcr.data_size, jcr.chunk_num, jcr.file_num); if (destor.simulation_level == SIMULATION_ALL) { stop_read_trace_phase(); } else { stop_read_phase(); stop_chunk_phase(); stop_hash_phase(); } stop_dedup_phase(); stop_rewrite_phase(); stop_filter_phase(); TIMER_END(1, jcr.total_time); close_index(); close_container_store(); close_recipe_store(); update_backup_version(jcr.bv); free_backup_version(jcr.bv); printf("\n\njob id: %" PRId32 "\n", jcr.id); printf("index method: %d.(Remark 0: NO; 1: DDFS; 2: Extreme binning; 3: Silo; 4: Sparse; 5: Sampled; 6: Block; 7: Learn)\n", destor.index_specific); printf("sampling method: %d (%d) (Remark 1:Random; 2: Min; 3: Uniform; 4: Optimized_min)\n", destor.index_sampling_method[0], destor.index_sampling_method[1]); printf("segment method: %d (%d) (Remark 0: Fixed; 1: Content; 2: File)\n", destor.index_segment_algorithm[0], destor.index_segment_algorithm[1]); printf("prefetch # of segments: %d (Remark 1 for sparse index)\n", destor.index_segment_prefech); printf("segment selection method: %d (%d)(Remark 0: Base; 1: Top; 2: Mix)\n", destor.index_segment_selection_method[0], destor.index_segment_selection_method[1]); printf("backup path: %s\n", jcr.path); printf("number of files: %d\n", jcr.file_num); printf("number of chunks: %" PRId32 " (%" PRId64 " bytes on average)\n", jcr.chunk_num, jcr.data_size / jcr.chunk_num); printf("number of unique chunks: %" PRId32 "\n", jcr.unique_chunk_num); printf("total size(B): %" PRId64 "\n", jcr.data_size); printf("stored data size(B): %" PRId64 "\n", jcr.unique_data_size + jcr.rewritten_chunk_size); printf("deduplication ratio: %.4f, %.4f\n", jcr.data_size != 0 ? (jcr.data_size - jcr.unique_data_size - jcr.rewritten_chunk_size) / (double) (jcr.data_size) : 0, jcr.data_size / (double) (jcr.unique_data_size + jcr.rewritten_chunk_size)); printf("total time(s): %.3f\n", jcr.total_time / 1000000); printf("the index memory footprint (B): %" PRId32 "\n", destor.index_memory_footprint); printf("throughput(MB/s): %.2f\n", (double) jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); printf("number of zero chunks: %" PRId32 "\n", jcr.zero_chunk_num); printf("size of zero chunks: %" PRId64 "\n", jcr.zero_chunk_size); printf("number of rewritten chunks: %" PRId32 "\n", jcr.rewritten_chunk_num); printf("size of rewritten chunks: %" PRId64 "\n", jcr.rewritten_chunk_size); printf("rewritten rate in size: %.3f\n", jcr.rewritten_chunk_size / (double) jcr.data_size); destor.data_size += jcr.data_size; destor.stored_data_size += jcr.unique_data_size + jcr.rewritten_chunk_size; destor.chunk_num += jcr.chunk_num; destor.stored_chunk_num += jcr.unique_chunk_num + jcr.rewritten_chunk_num; destor.zero_chunk_num += jcr.zero_chunk_num; destor.zero_chunk_size += jcr.zero_chunk_size; destor.rewritten_chunk_num += jcr.rewritten_chunk_num; destor.rewritten_chunk_size += jcr.rewritten_chunk_size; printf("read_time : %.3fs, %.2fMB/s\n", jcr.read_time / 1000000, jcr.data_size * 1000000 / jcr.read_time / 1024 / 1024); printf("chunk_time : %.3fs, %.2fMB/s\n", jcr.chunk_time / 1000000, jcr.data_size * 1000000 / jcr.chunk_time / 1024 / 1024); printf("hash_time : %.3fs, %.2fMB/s\n", jcr.hash_time / 1000000, jcr.data_size * 1000000 / jcr.hash_time / 1024 / 1024); printf("dedup_time : %.3fs, %.2fMB/s\n", jcr.dedup_time / 1000000, jcr.data_size * 1000000 / jcr.dedup_time / 1024 / 1024); printf("rewrite_time : %.3fs, %.2fMB/s\n", jcr.rewrite_time / 1000000, jcr.data_size * 1000000 / jcr.rewrite_time / 1024 / 1024); printf("filter_time : %.3fs, %.2fMB/s\n", jcr.filter_time / 1000000, jcr.data_size * 1000000 / jcr.filter_time / 1024 / 1024); printf("write_time : %.3fs, %.2fMB/s\n", jcr.write_time / 1000000, jcr.data_size * 1000000 / jcr.write_time / 1024 / 1024); //double seek_time = 0.005; //5ms //double bandwidth = 120 * 1024 * 1024; //120MB/s /* double index_lookup_throughput = jcr.data_size / (index_read_times * seek_time + index_read_entry_counter * 24 / bandwidth) / 1024 / 1024; double write_data_throughput = 1.0 * jcr.data_size * bandwidth / (jcr->unique_chunk_num) / 1024 / 1024; double index_read_throughput = 1.0 * jcr.data_size / 1024 / 1024 / (index_read_times * seek_time + index_read_entry_counter * 24 / bandwidth); double index_write_throughput = 1.0 * jcr.data_size / 1024 / 1024 / (index_write_times * seek_time + index_write_entry_counter * 24 / bandwidth);*/ /* double estimated_throughput = write_data_throughput; if (estimated_throughput > index_read_throughput) estimated_throughput = index_read_throughput;*/ /*if (estimated_throughput > index_write_throughput) estimated_throughput = index_write_throughput;*/ char logfile[] = "backup.log"; FILE *fp = fopen(logfile, "a"); /* * job id, * the size of backup * accumulative consumed capacity, * deduplication rate, * rewritten rate, * total container number, * sparse container number, * inherited container number, * 4 * index overhead (4 * int) * throughput, */ fprintf(fp, "%" PRId32 " %" PRId64 " %" PRId64 " %.4f %.4f %" PRId32 " %" PRId32 " %" PRId32 " %" PRId32" %" PRId32 " %" PRId32" %" PRId32" %.2f\n", jcr.id, jcr.data_size, destor.stored_data_size, jcr.data_size != 0 ? (jcr.data_size - jcr.rewritten_chunk_size - jcr.unique_data_size)/(double) (jcr.data_size) : 0, jcr.data_size != 0 ? (double) (jcr.rewritten_chunk_size) / (double) (jcr.data_size) : 0, jcr.total_container_num, jcr.sparse_container_num, jcr.inherited_sparse_num, index_overhead.lookup_requests, index_overhead.lookup_requests_for_unique, index_overhead.update_requests, index_overhead.read_prefetching_units, (double) jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); fclose(fp); }
/* * We assume a FIFO order of deleting backup, namely the oldest backup is deleted first. */ void do_delete(int jobid) { GHashTable *invalid_containers = trunc_manifest(jobid); init_index(); init_recipe_store(); /* Delete the invalid entries in the key-value store */ if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){ init_container_store(); struct backupVersion* bv = open_backup_version(jobid); /* The entries pointing to Invalid Containers are invalid. */ GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, invalid_containers); while(g_hash_table_iter_next(&iter, &key, &value)){ containerid id = *(containerid*)key; NOTICE("Reclaim container %lld", id); struct containerMeta* cm = retrieve_container_meta_by_id(id); container_meta_foreach(cm, delete_an_entry, &id); free_container_meta(cm); } bv->deleted = 1; update_backup_version(bv); free_backup_version(bv); close_container_store(); }else if(destor.index_category[1] == INDEX_CATEGORY_LOGICAL_LOCALITY){ /* Ideally, the entries pointing to segments in backup versions of a 'bv_num' less than 'jobid' are invalid. */ /* (For simplicity) Since a FIFO order is given, we only need to remove the IDs exactly matched 'bv_num'. */ struct backupVersion* bv = open_backup_version(jobid); struct segmentRecipe* sr; while((sr=read_next_segment(bv))){ segment_recipe_foreach(sr, delete_an_entry, &sr->id); } bv->deleted = 1; update_backup_version(bv); free_backup_version(bv); }else{ WARNING("Invalid index type"); exit(1); } close_recipe_store(); close_index(); char logfile[] = "delete.log"; FILE *fp = fopen(logfile, "a"); /* * ID of the job we delete, * number of live containers, * memory footprint */ fprintf(fp, "%d %d %d\n", jobid, destor.live_container_num, destor.index_memory_footprint); fclose(fp); /* record the IDs of invalid containers */ sds didfilepath = sdsdup(destor.working_directory); char s[128]; sprintf(s, "recipes/delete_%d.id", jobid); didfilepath = sdscat(didfilepath, s); FILE* didfile = fopen(didfilepath, "w"); if(didfile){ GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, invalid_containers); while(g_hash_table_iter_next(&iter, &key, &value)){ containerid id = *(containerid*)key; fprintf(didfile, "%lld\n", id); } fclose(didfile); } g_hash_table_destroy(invalid_containers); }