static void* sha1_thread(void* arg) { char code[41]; while (1) { struct chunk* c = sync_queue_pop(chunk_queue); if (c == NULL) { sync_queue_term(hash_queue); break; } if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END)) { sync_queue_push(hash_queue, c); continue; } TIMER_DECLARE(1); TIMER_BEGIN(1); SHA_CTX ctx; SHA_Init(&ctx); SHA_Update(&ctx, c->data, c->size); SHA_Final(c->fp, &ctx); TIMER_END(1, jcr.hash_time); hash2code(c->fp, code); code[40] = 0; VERBOSE("Hash phase: %ldth chunk identified by %s", chunk_num++, code); sync_queue_push(hash_queue, c); } return NULL; }
static int recv_fingerchunk(FingerChunk **fc) { FingerChunk* fchunk = (FingerChunk*) sync_queue_pop(fingerchunk_queue); if (fchunk->length == STREAM_END) { free(fchunk); *fc = 0; return STREAM_END; } *fc = fchunk; return SUCCESS; }
static void* lru_restore_thread(void *arg) { struct lruCache *cache; if (destor.simulation_level >= SIMULATION_RESTORE) cache = new_lru_cache(destor.restore_cache[1], free_container_meta, lookup_fingerprint_in_container_meta); else cache = new_lru_cache(destor.restore_cache[1], free_container, lookup_fingerprint_in_container); struct chunk* c; while ((c = sync_queue_pop(restore_recipe_queue))) { if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END)) { sync_queue_push(restore_chunk_queue, c); continue; } TIMER_DECLARE(1); TIMER_BEGIN(1); if (destor.simulation_level >= SIMULATION_RESTORE) { struct containerMeta *cm = lru_cache_lookup(cache, &c->fp); if (!cm) { VERBOSE("Restore cache: container %lld is missed", c->id); cm = retrieve_container_meta_by_id(c->id); assert(lookup_fingerprint_in_container_meta(cm, &c->fp)); lru_cache_insert(cache, cm, NULL, NULL); jcr.read_container_num++; } TIMER_END(1, jcr.read_chunk_time); } else { struct container *con = lru_cache_lookup(cache, &c->fp); if (!con) { VERBOSE("Restore cache: container %lld is missed", c->id); con = retrieve_container_by_id(c->id); lru_cache_insert(cache, con, NULL, NULL); jcr.read_container_num++; } struct chunk *rc = get_chunk_in_container(con, &c->fp); assert(rc); TIMER_END(1, jcr.read_chunk_time); sync_queue_push(restore_chunk_queue, rc); } free_chunk(c); } sync_queue_term(restore_chunk_queue); free_lru_cache(cache); return NULL; }
void *dedup_thread(void *arg) { struct segment* s = NULL; while (1) { struct chunk *c = NULL; if (destor.simulation_level != SIMULATION_ALL) c = sync_queue_pop(hash_queue); else c = sync_queue_pop(trace_queue); /* Add the chunk to the segment. */ s = segmenting(c); if (!s) continue; /* segmenting success */ if (s->chunk_num > 0) { VERBOSE("Dedup phase: the %lldth segment of %lld chunks", segment_num++, s->chunk_num); /* Each duplicate chunk will be marked. */ pthread_mutex_lock(&index_lock.mutex); while (index_lookup(s) == 0) { pthread_cond_wait(&index_lock.cond, &index_lock.mutex); } pthread_mutex_unlock(&index_lock.mutex); } else { VERBOSE("Dedup phase: an empty segment"); } /* Send chunks in the segment to the next phase. * The segment will be cleared. */ send_segment(s); free_segment(s); s = NULL; if (c == NULL) break; } sync_queue_term(dedup_queue); return NULL; }
void make_trace(char* path) { init_jcr(path); sds trace_file = sdsnew(path); char *p = trace_file + sdslen(trace_file) - 1; while (*p == '/') --p; *(p + 1) = 0; sdsupdatelen(trace_file); trace_file = sdscat(trace_file, ".trace"); NOTICE("output to %s", trace_file); start_read_phase(); start_chunk_phase(); start_hash_phase(); unsigned char code[41]; FILE *fp = fopen(trace_file, "w"); while (1) { struct chunk *c = sync_queue_pop(hash_queue); if (c == NULL) { break; } if (CHECK_CHUNK(c, CHUNK_FILE_START)) { destor_log(DESTOR_NOTICE, c->data); fprintf(fp, "file start %zd\n", strlen(c->data)); fprintf(fp, "%s\n", c->data); } else if (CHECK_CHUNK(c, CHUNK_FILE_END)) { fprintf(fp, "file end\n"); } else { hash2code(c->fp, code); code[40] = 0; fprintf(fp, "%s %d\n", code, c->size); } free_chunk(c); } fprintf(fp, "stream end"); fclose(fp); }
/* * We must ensure a container is either in the buffer or written to disks. */ static void* append_thread(void *arg) { while (1) { struct container *c = sync_queue_get_top(container_buffer); if (c == NULL) break; TIMER_DECLARE(1); TIMER_BEGIN(1); write_container(c); TIMER_END(1, jcr.write_time); sync_queue_pop(container_buffer); free_container(c); } return NULL; }
void write_restore_data() { char *p, *q; q = jcr.path + 1;/* ignore the first char*/ /* * recursively make directory */ while ((p = strchr(q, '/'))) { if (*p == *(p - 1)) { q++; continue; } *p = 0; if (access(jcr.path, 0) != 0) { mkdir(jcr.path, S_IRWXU | S_IRWXG | S_IRWXO); } *p = '/'; q = p + 1; } struct chunk *c = NULL; FILE *fp = NULL; while ((c = sync_queue_pop(restore_chunk_queue))) { TIMER_DECLARE(1); TIMER_BEGIN(1); if (CHECK_CHUNK(c, CHUNK_FILE_START)) { NOTICE("Restoring: %s", c->data); sds filepath = sdsdup(jcr.path); filepath = sdscat(filepath, c->data); int len = sdslen(jcr.path); char *q = filepath + len; char *p; while ((p = strchr(q, '/'))) { if (*p == *(p - 1)) { q++; continue; } *p = 0; if (access(filepath, 0) != 0) { mkdir(filepath, S_IRWXU | S_IRWXG | S_IRWXO); } *p = '/'; q = p + 1; } if (destor.simulation_level == SIMULATION_NO) { assert(fp == NULL); fp = fopen(filepath, "w"); } sdsfree(filepath); } else if (CHECK_CHUNK(c, CHUNK_FILE_END)) { if (fp) fclose(fp); fp = NULL; } else { assert(destor.simulation_level == SIMULATION_NO); VERBOSE("Restoring %d bytes", c->size); fwrite(c->data, c->size, 1, fp); } free_chunk(c); TIMER_END(1, jcr.write_chunk_time); } }
int backup(Jcr* jcr) { fingerchunk_queue = sync_queue_new(-1); ContainerUsageMonitor* usage_monitor = container_usage_monitor_new(); cfl_monitor = cfl_monitor_new(read_cache_size); if (simulation_level == SIMULATION_ALL) { start_read_trace_phase(jcr); } else { start_read_phase(jcr); start_chunk_phase(jcr); start_hash_phase(jcr); } start_segment_phase(jcr); start_filter_phase(jcr); start_append_phase(jcr); ContainerId seed_id = -1; int32_t seed_len = 0; FingerChunk* fchunk = NULL; int signal = recv_fingerchunk(&fchunk); while (signal != STREAM_END) { container_usage_monitor_update(usage_monitor, fchunk->container_id, &fchunk->fingerprint, fchunk->length); jvol_append_fingerchunk(jcr->job_volume, fchunk); if (seed_id != -1 && seed_id != fchunk->container_id) { jvol_append_seed(jcr->job_volume, seed_id, seed_len); seed_len = 0; } /* merge sequential accesses */ seed_id = fchunk->container_id; seed_len += fchunk->length; free(fchunk); signal = recv_fingerchunk(&fchunk); } if (seed_len > 0) jvol_append_seed(jcr->job_volume, seed_id, seed_len); sync_queue_free(fingerchunk_queue, NULL); jcr->sparse_container_num = g_hash_table_size(usage_monitor->sparse_map); jcr->total_container_num = g_hash_table_size(usage_monitor->dense_map) + jcr->sparse_container_num; while ((jcr->inherited_sparse_num = container_usage_monitor_print( usage_monitor, jcr->job_id, jcr->historical_sparse_containers)) < 0) { dprint("retry!"); } /* store recipes of processed file */ int i = 0; for (; i < jcr->file_num; ++i) { Recipe *recipe = (Recipe*) sync_queue_pop(jcr->completed_files_queue); recipe->fileindex = i; if (jvol_append_meta(jcr->job_volume, recipe) != SUCCESS) { printf("%s, %d: some errors happened in appending recipe!\n", __FILE__, __LINE__); return FAILURE; } jcr->chunk_num += recipe->chunknum; recipe_free(recipe); } stop_append_phase(); stop_filter_phase(); stop_segment_phase(); if (simulation_level == SIMULATION_ALL) { stop_read_trace_phase(jcr); } else { stop_hash_phase(); stop_chunk_phase(); stop_read_phase(); } container_usage_monitor_free(usage_monitor); print_cfl(cfl_monitor); cfl_monitor_free(cfl_monitor); return 0; }
/* * When a container buffer is full, we push it into container_queue. */ static void* filter_thread(void *arg) { int enable_rewrite = 1; struct fileRecipeMeta* r = NULL; while (1) { struct chunk* c = sync_queue_pop(rewrite_queue); if (c == NULL) /* backup job finish */ break; /* reconstruct a segment */ struct segment* s = new_segment(); /* segment head */ assert(CHECK_CHUNK(c, CHUNK_SEGMENT_START)); free_chunk(c); c = sync_queue_pop(rewrite_queue); while (!(CHECK_CHUNK(c, CHUNK_SEGMENT_END))) { g_sequence_append(s->chunks, c); if (!CHECK_CHUNK(c, CHUNK_FILE_START) && !CHECK_CHUNK(c, CHUNK_FILE_END)) s->chunk_num++; c = sync_queue_pop(rewrite_queue); } free_chunk(c); /* For self-references in a segment. * If we find an early copy of the chunk in this segment has been rewritten, * the rewrite request for it will be denied to avoid repeat rewriting. */ GHashTable *recently_rewritten_chunks = g_hash_table_new_full(g_int64_hash, g_fingerprint_equal, NULL, free_chunk); GHashTable *recently_unique_chunks = g_hash_table_new_full(g_int64_hash, g_fingerprint_equal, NULL, free_chunk); pthread_mutex_lock(&index_lock.mutex); TIMER_DECLARE(1); TIMER_BEGIN(1); /* This function will check the fragmented chunks * that would be rewritten later. * If we find an early copy of the chunk in earlier segments, * has been rewritten, * the rewrite request for it will be denied. */ index_check_buffer(s); GSequenceIter *iter = g_sequence_get_begin_iter(s->chunks); GSequenceIter *end = g_sequence_get_end_iter(s->chunks); for (; iter != end; iter = g_sequence_iter_next(iter)) { c = g_sequence_get(iter); if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END)) continue; VERBOSE("Filter phase: %dth chunk in %s container %lld", chunk_num, CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER) ? "out-of-order" : "", c->id); /* Cache-Aware Filter */ if (destor.rewrite_enable_cache_aware && restore_aware_contains(c->id)) { assert(c->id != TEMPORARY_ID); VERBOSE("Filter phase: %dth chunk is cached", chunk_num); SET_CHUNK(c, CHUNK_IN_CACHE); } /* A cfl-switch for rewriting out-of-order chunks. */ if (destor.rewrite_enable_cfl_switch) { double cfl = restore_aware_get_cfl(); if (enable_rewrite && cfl > destor.rewrite_cfl_require) { VERBOSE("Filter phase: Turn OFF the (out-of-order) rewrite switch of %.3f", cfl); enable_rewrite = 0; } else if (!enable_rewrite && cfl < destor.rewrite_cfl_require) { VERBOSE("Filter phase: Turn ON the (out-of-order) rewrite switch of %.3f", cfl); enable_rewrite = 1; } } if(CHECK_CHUNK(c, CHUNK_DUPLICATE) && c->id == TEMPORARY_ID){ struct chunk* ruc = g_hash_table_lookup(recently_unique_chunks, &c->fp); assert(ruc); c->id = ruc->id; } struct chunk* rwc = g_hash_table_lookup(recently_rewritten_chunks, &c->fp); if(rwc){ c->id = rwc->id; SET_CHUNK(c, CHUNK_REWRITE_DENIED); } /* A fragmented chunk will be denied if it has been rewritten recently */ if (!CHECK_CHUNK(c, CHUNK_DUPLICATE) || (!CHECK_CHUNK(c, CHUNK_REWRITE_DENIED) && (CHECK_CHUNK(c, CHUNK_SPARSE) || (enable_rewrite && CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER) && !CHECK_CHUNK(c, CHUNK_IN_CACHE))))) { /* * If the chunk is unique, or be fragmented and not denied, * we write it to a container. * Fragmented indicates: sparse, or out of order and not in cache, */ if (storage_buffer.container_buffer == NULL){ storage_buffer.container_buffer = create_container(); if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY) storage_buffer.chunks = g_sequence_new(free_chunk); } if (container_overflow(storage_buffer.container_buffer, c->size)) { if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){ /* * TO-DO * Update_index for physical locality */ GHashTable *features = sampling(storage_buffer.chunks, g_sequence_get_length(storage_buffer.chunks)); index_update(features, get_container_id(storage_buffer.container_buffer)); g_hash_table_destroy(features); g_sequence_free(storage_buffer.chunks); storage_buffer.chunks = g_sequence_new(free_chunk); } TIMER_END(1, jcr.filter_time); write_container_async(storage_buffer.container_buffer); TIMER_BEGIN(1); storage_buffer.container_buffer = create_container(); } if(add_chunk_to_container(storage_buffer.container_buffer, c)){ struct chunk* wc = new_chunk(0); memcpy(&wc->fp, &c->fp, sizeof(fingerprint)); wc->id = c->id; if (!CHECK_CHUNK(c, CHUNK_DUPLICATE)) { jcr.unique_chunk_num++; jcr.unique_data_size += c->size; g_hash_table_insert(recently_unique_chunks, &wc->fp, wc); VERBOSE("Filter phase: %dth chunk is recently unique, size %d", chunk_num, g_hash_table_size(recently_unique_chunks)); } else { jcr.rewritten_chunk_num++; jcr.rewritten_chunk_size += c->size; g_hash_table_insert(recently_rewritten_chunks, &wc->fp, wc); } if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){ struct chunk* ck = new_chunk(0); memcpy(&ck->fp, &c->fp, sizeof(fingerprint)); g_sequence_append(storage_buffer.chunks, ck); } VERBOSE("Filter phase: Write %dth chunk to container %lld", chunk_num, c->id); }else{ VERBOSE("Filter phase: container %lld already has this chunk", c->id); assert(destor.index_category[0] != INDEX_CATEGORY_EXACT || destor.rewrite_algorithm[0]!=REWRITE_NO); } }else{ if(CHECK_CHUNK(c, CHUNK_REWRITE_DENIED)){ VERBOSE("Filter phase: %lldth fragmented chunk is denied", chunk_num); }else if (CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER)) { VERBOSE("Filter phase: %lldth chunk in out-of-order container %lld is already cached", chunk_num, c->id); } } assert(c->id != TEMPORARY_ID); /* Collect historical information. */ har_monitor_update(c->id, c->size); /* Restore-aware */ restore_aware_update(c->id, c->size); chunk_num++; } int full = index_update_buffer(s); /* Write a SEGMENT_BEGIN */ segmentid sid = append_segment_flag(jcr.bv, CHUNK_SEGMENT_START, s->chunk_num); /* Write recipe */ iter = g_sequence_get_begin_iter(s->chunks); end = g_sequence_get_end_iter(s->chunks); for (; iter != end; iter = g_sequence_iter_next(iter)) { c = g_sequence_get(iter); if(r == NULL){ assert(CHECK_CHUNK(c,CHUNK_FILE_START)); r = new_file_recipe_meta(c->data); }else if(!CHECK_CHUNK(c,CHUNK_FILE_END)){ struct chunkPointer cp; cp.id = c->id; assert(cp.id>=0); memcpy(&cp.fp, &c->fp, sizeof(fingerprint)); cp.size = c->size; append_n_chunk_pointers(jcr.bv, &cp ,1); r->chunknum++; r->filesize += c->size; jcr.chunk_num++; jcr.data_size += c->size; }else{ assert(CHECK_CHUNK(c,CHUNK_FILE_END)); append_file_recipe_meta(jcr.bv, r); free_file_recipe_meta(r); r = NULL; jcr.file_num++; } } /* Write a SEGMENT_END */ append_segment_flag(jcr.bv, CHUNK_SEGMENT_END, 0); if(destor.index_category[1] == INDEX_CATEGORY_LOGICAL_LOCALITY){ /* * TO-DO * Update_index for logical locality */ s->features = sampling(s->chunks, s->chunk_num); if(destor.index_category[0] == INDEX_CATEGORY_EXACT){ /* * For exact deduplication, * unique fingerprints are inserted. */ VERBOSE("Filter phase: add %d unique fingerprints to %d features", g_hash_table_size(recently_unique_chunks), g_hash_table_size(s->features)); GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, recently_unique_chunks); while(g_hash_table_iter_next(&iter, &key, &value)){ struct chunk* uc = value; fingerprint *ft = malloc(sizeof(fingerprint)); memcpy(ft, &uc->fp, sizeof(fingerprint)); g_hash_table_insert(s->features, ft, NULL); } /* * OPTION: * It is still an open problem whether we need to update * rewritten fingerprints. * It would increase index update overhead, while the benefit * remains unclear. * More experiments are required. */ VERBOSE("Filter phase: add %d rewritten fingerprints to %d features", g_hash_table_size(recently_rewritten_chunks), g_hash_table_size(s->features)); g_hash_table_iter_init(&iter, recently_rewritten_chunks); while(g_hash_table_iter_next(&iter, &key, &value)){ struct chunk* uc = value; fingerprint *ft = malloc(sizeof(fingerprint)); memcpy(ft, &uc->fp, sizeof(fingerprint)); g_hash_table_insert(s->features, ft, NULL); } } index_update(s->features, sid); } free_segment(s); if(index_lock.wait_threshold > 0 && full == 0){ pthread_cond_broadcast(&index_lock.cond); } TIMER_END(1, jcr.filter_time); pthread_mutex_unlock(&index_lock.mutex); g_hash_table_destroy(recently_rewritten_chunks); g_hash_table_destroy(recently_unique_chunks); } if (storage_buffer.container_buffer && !container_empty(storage_buffer.container_buffer)){ if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){ /* * TO-DO * Update_index for physical locality */ GHashTable *features = sampling(storage_buffer.chunks, g_sequence_get_length(storage_buffer.chunks)); index_update(features, get_container_id(storage_buffer.container_buffer)); g_hash_table_destroy(features); g_sequence_free(storage_buffer.chunks); } write_container_async(storage_buffer.container_buffer); } /* All files done */ jcr.status = JCR_STATUS_DONE; return NULL; }