static void* read_recipe_thread(void *arg) { int i, j, k; for (i = 0; i < jcr.bv->number_of_files; i++) { TIMER_DECLARE(1); TIMER_BEGIN(1); struct recipeMeta *r = read_next_recipe_meta(jcr.bv); struct chunk *c = new_chunk(sdslen(r->filename) + 1); strcpy(c->data, r->filename); SET_CHUNK(c, CHUNK_FILE_START); TIMER_END(1, jcr.read_recipe_time); sync_queue_push(restore_recipe_queue, c); jcr.file_num++; for (j = 0; j < r->chunknum; j++) { TIMER_DECLARE(1); TIMER_BEGIN(1); struct chunkPointer* cp = read_next_n_chunk_pointers(jcr.bv, 1, &k); struct chunk* c = new_chunk(0); memcpy(&c->fp, &cp->fp, sizeof(fingerprint)); c->size = cp->size; c->id = cp->id; TIMER_END(1, jcr.read_recipe_time); jcr.data_size += c->size; jcr.chunk_num++; sync_queue_push(restore_recipe_queue, c); free(cp); } c = new_chunk(0); SET_CHUNK(c, CHUNK_FILE_END); sync_queue_push(restore_recipe_queue, c); free_recipe_meta(r); } sync_queue_term(restore_recipe_queue); return NULL; }
static void* sha1_thread(void* arg) { char code[41]; while (1) { struct chunk* c = sync_queue_pop(chunk_queue); if (c == NULL) { sync_queue_term(hash_queue); break; } if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END)) { sync_queue_push(hash_queue, c); continue; } TIMER_DECLARE(1); TIMER_BEGIN(1); SHA_CTX ctx; SHA_Init(&ctx); SHA_Update(&ctx, c->data, c->size); SHA_Final(c->fp, &ctx); TIMER_END(1, jcr.hash_time); hash2code(c->fp, code); code[40] = 0; VERBOSE("Hash phase: %ldth chunk identified by %s", chunk_num++, code); sync_queue_push(hash_queue, c); } return NULL; }
static void* lru_restore_thread(void *arg) { struct lruCache *cache; if (destor.simulation_level >= SIMULATION_RESTORE) cache = new_lru_cache(destor.restore_cache[1], free_container_meta, lookup_fingerprint_in_container_meta); else cache = new_lru_cache(destor.restore_cache[1], free_container, lookup_fingerprint_in_container); struct chunk* c; while ((c = sync_queue_pop(restore_recipe_queue))) { if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END)) { sync_queue_push(restore_chunk_queue, c); continue; } TIMER_DECLARE(1); TIMER_BEGIN(1); if (destor.simulation_level >= SIMULATION_RESTORE) { struct containerMeta *cm = lru_cache_lookup(cache, &c->fp); if (!cm) { VERBOSE("Restore cache: container %lld is missed", c->id); cm = retrieve_container_meta_by_id(c->id); assert(lookup_fingerprint_in_container_meta(cm, &c->fp)); lru_cache_insert(cache, cm, NULL, NULL); jcr.read_container_num++; } TIMER_END(1, jcr.read_chunk_time); } else { struct container *con = lru_cache_lookup(cache, &c->fp); if (!con) { VERBOSE("Restore cache: container %lld is missed", c->id); con = retrieve_container_by_id(c->id); lru_cache_insert(cache, con, NULL, NULL); jcr.read_container_num++; } struct chunk *rc = get_chunk_in_container(con, &c->fp); assert(rc); TIMER_END(1, jcr.read_chunk_time); sync_queue_push(restore_chunk_queue, rc); } free_chunk(c); } sync_queue_term(restore_chunk_queue); free_lru_cache(cache); return NULL; }
void test_data(int socket,char *filename) { TIMER_DECLARE(start,end); TIMER_DECLARE(Rstart,Rend); TIMER_DECLARE(Sstart,Send); char buf[SOCKET_BUF_SIZE+1] = {0}; int readlen=0; double total_time=0; double send_time=0; double read_time=0; double total_len=0; int fd=-1; if ((fd=open(filename, O_RDONLY)) < 0) { printf("%s,%d open file error!\n",__FILE__,__LINE__); return; } TIMER_START(start); while(1){ TIMER_START(Rstart); if((readlen=readn(fd, buf, SOCKET_BUF_SIZE))<=0) break; TIMER_END(Rend); TIMER_DIFF(read_time,Rstart,Rend); TIMER_START(Sstart); bnet_send(socket,buf,readlen); TIMER_END(Send); TIMER_DIFF(send_time,Sstart,Send); total_len+=readlen; } TIMER_END(end); TIMER_DIFF(total_time,start,end); close(fd); close(socket); printf("read time=%.4f %.4fMB/s\n",read_time,total_len/read_time/1036288); printf("send time=%.4f %.4fMB/s\n",send_time,total_len/send_time/1036288); printf("total time=%.4f %.4fMB/s\n", total_time,total_len/total_time/1036288); }
static void * timerThread(void *_data) { TIMER_DECLARE(period); Timer *timer = (Timer *) _data; #if defined(HAVE_STRUCT_TIMESPEC) pthreadSleep(timer->delay.tv_sec, timer->delay.tv_nsec); #elif defined(HAVE_STRUCT_TIMEVAL) pthreadSleep(timer->delay.tv_sec, timer->delay.tv_usec); #else pthreadSleep(timer->delay, 0); #endif do { if (timer->task == NULL) break; TIMER_START(period); #ifdef __unix__ pthread_testcancel(); #endif #if defined(__WIN32__) || defined(__CYGWIN__) if (timerIsCanceled(timer)) break; #endif (*timer->task)(timer); #ifdef __unix__ pthread_testcancel(); #endif #if defined(__WIN32__) || defined(__CYGWIN__) if (timerIsCanceled(timer)) break; #endif TIMER_DIFF(period); period = timer->period; CLOCK_SUB(&period, &TIMER_DIFF_VAR(period)); #if defined(HAVE_STRUCT_TIMESPEC) pthreadSleep(period.tv_sec, period.tv_nsec); #elif defined(HAVE_STRUCT_TIMEVAL) pthreadSleep(period.tv_sec, period.tv_usec); #else pthreadSleep(period, 0); #endif } while (TIMER_NE_CONST(timer->period, 0, 0)); return NULL; }
static void * timerThread(void *_data) { int error; TIMER_DECLARE(period); struct timespec abstime, delay; Timer *timer = (Timer *) _data; PTHREAD_MUTEX_LOCK(&timer->mutex); /* Set initial delay. */ delay = *(struct timespec *) &timer->delay; #if !defined(HAVE_CLOCK_GETTIME) && defined(HAVE_GETTIMEOFDAY) delay.tv_nsec *= 1000; #endif timespecSetAbstime(&abstime, &delay); while ((error = pthread_cond_timedwait(&timer->cv, &timer->mutex, &abstime)) != 0) { if (error != ETIMEDOUT || timer->task == NULL) break; TIMER_START(period); (*timer->task)(timer); pthread_testcancel(); if (memcmp(&timer->period, &time_zero, sizeof (time_zero)) == 0) break; /* Compute execution time of task. */ TIMER_DIFF(period); /* Compute remainder of period. */ period = timer->period; CLOCK_SUB(&period, &TIMER_DIFF_VAR(period)); #if !defined(HAVE_CLOCK_GETTIME) && defined(HAVE_GETTIMEOFDAY) period.tv_usec *= 1000; #endif /* Set end of next iteration. */ timespecSetAbstime(&abstime, (struct timespec *) &period); } PTHREAD_MUTEX_UNLOCK(&timer->mutex); #if defined(__WIN32__) || defined(__CYGWIN__) pthread_exit(NULL); #endif return NULL; }
/* * We must ensure a container is either in the buffer or written to disks. */ static void* append_thread(void *arg) { while (1) { struct container *c = sync_queue_get_top(container_buffer); if (c == NULL) break; TIMER_DECLARE(1); TIMER_BEGIN(1); write_container(c); TIMER_END(1, jcr.write_time); sync_queue_pop(container_buffer); free_container(c); } return NULL; }
void har_monitor_update(containerid id, int32_t size) { TIMER_DECLARE(1); TIMER_BEGIN(1); struct containerRecord* record = g_hash_table_lookup( container_utilization_monitor, &id); if (record) { record->size += size; } else { record = (struct containerRecord*) malloc( sizeof(struct containerRecord)); record->cid = id; record->size = 0; g_hash_table_insert(container_utilization_monitor, &record->cid, record); record->size += size; } TIMER_END(1, jcr.rewrite_time); }
static void* read_trace_thread(void *argv) { FILE *trace_file = fopen(jcr.path, "r"); char line[128]; while (1) { TIMER_DECLARE(1); TIMER_BEGIN(1); fgets(line, 128, trace_file); TIMER_END(1, jcr.read_time); if (strcmp(line, "stream end") == 0) { sync_queue_term(trace_queue); break; } struct chunk* c; TIMER_BEGIN(1), assert(strncmp(line, "file start ", 11) == 0); int filenamelen; sscanf(line, "file start %d", &filenamelen); /* An additional '\n' is read */ c = new_chunk(filenamelen + 2); fgets(c->data, filenamelen + 2, trace_file); c->data[filenamelen] = 0; VERBOSE("Reading: %s", c->data); SET_CHUNK(c, CHUNK_FILE_START); TIMER_END(1, jcr.read_time); sync_queue_push(trace_queue, c); TIMER_BEGIN(1); fgets(line, 128, trace_file); while (strncmp(line, "file end", 8) != 0) { c = new_chunk(0); char code[41]; strncpy(code, line, 40); code2hash(code, c->fp); c->size = atoi(line + 41); jcr.chunk_num++; jcr.data_size += c->size; TIMER_END(1, jcr.read_time); sync_queue_push(trace_queue, c); TIMER_BEGIN(1), fgets(line, 128, trace_file); } c = new_chunk(0); SET_CHUNK(c, CHUNK_FILE_END); sync_queue_push(trace_queue, c); jcr.file_num++; } fclose(trace_file); return NULL; }
void do_backup(char *path) { init_recipe_store(); init_container_store(); init_index(); init_backup_jcr(path); NOTICE("\n\n==== backup begin ===="); TIMER_DECLARE(1); TIMER_BEGIN(1); time_t start = time(NULL); if (destor.simulation_level == SIMULATION_ALL) { start_read_trace_phase(); } else { start_read_phase(); start_chunk_phase(); start_hash_phase(); } start_dedup_phase(); start_rewrite_phase(); start_filter_phase(); do{ sleep(5); /*time_t now = time(NULL);*/ fprintf(stderr,"job %" PRId32 ", data size %" PRId64 " bytes, %" PRId32 " chunks, %d files processed\r", jcr.id, jcr.data_size, jcr.chunk_num, jcr.file_num); }while(jcr.status == JCR_STATUS_RUNNING || jcr.status != JCR_STATUS_DONE); fprintf(stderr,"job %" PRId32 ", data size %" PRId64 " bytes, %" PRId32 " chunks, %d files processed\n", jcr.id, jcr.data_size, jcr.chunk_num, jcr.file_num); if (destor.simulation_level == SIMULATION_ALL) { stop_read_trace_phase(); } else { stop_read_phase(); stop_chunk_phase(); stop_hash_phase(); } stop_dedup_phase(); stop_rewrite_phase(); stop_filter_phase(); TIMER_END(1, jcr.total_time); close_index(); close_container_store(); close_recipe_store(); update_backup_version(jcr.bv); free_backup_version(jcr.bv); printf("\n\njob id: %" PRId32 "\n", jcr.id); printf("index method: %d.(Remark 0: NO; 1: DDFS; 2: Extreme binning; 3: Silo; 4: Sparse; 5: Sampled; 6: Block; 7: Learn)\n", destor.index_specific); printf("sampling method: %d (%d) (Remark 1:Random; 2: Min; 3: Uniform; 4: Optimized_min)\n", destor.index_sampling_method[0], destor.index_sampling_method[1]); printf("segment method: %d (%d) (Remark 0: Fixed; 1: Content; 2: File)\n", destor.index_segment_algorithm[0], destor.index_segment_algorithm[1]); printf("prefetch # of segments: %d (Remark 1 for sparse index)\n", destor.index_segment_prefech); printf("segment selection method: %d (%d)(Remark 0: Base; 1: Top; 2: Mix)\n", destor.index_segment_selection_method[0], destor.index_segment_selection_method[1]); printf("backup path: %s\n", jcr.path); printf("number of files: %d\n", jcr.file_num); printf("number of chunks: %" PRId32 " (%" PRId64 " bytes on average)\n", jcr.chunk_num, jcr.data_size / jcr.chunk_num); printf("number of unique chunks: %" PRId32 "\n", jcr.unique_chunk_num); printf("total size(B): %" PRId64 "\n", jcr.data_size); printf("stored data size(B): %" PRId64 "\n", jcr.unique_data_size + jcr.rewritten_chunk_size); printf("deduplication ratio: %.4f, %.4f\n", jcr.data_size != 0 ? (jcr.data_size - jcr.unique_data_size - jcr.rewritten_chunk_size) / (double) (jcr.data_size) : 0, jcr.data_size / (double) (jcr.unique_data_size + jcr.rewritten_chunk_size)); printf("total time(s): %.3f\n", jcr.total_time / 1000000); printf("the index memory footprint (B): %" PRId32 "\n", destor.index_memory_footprint); printf("throughput(MB/s): %.2f\n", (double) jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); printf("number of zero chunks: %" PRId32 "\n", jcr.zero_chunk_num); printf("size of zero chunks: %" PRId64 "\n", jcr.zero_chunk_size); printf("number of rewritten chunks: %" PRId32 "\n", jcr.rewritten_chunk_num); printf("size of rewritten chunks: %" PRId64 "\n", jcr.rewritten_chunk_size); printf("rewritten rate in size: %.3f\n", jcr.rewritten_chunk_size / (double) jcr.data_size); destor.data_size += jcr.data_size; destor.stored_data_size += jcr.unique_data_size + jcr.rewritten_chunk_size; destor.chunk_num += jcr.chunk_num; destor.stored_chunk_num += jcr.unique_chunk_num + jcr.rewritten_chunk_num; destor.zero_chunk_num += jcr.zero_chunk_num; destor.zero_chunk_size += jcr.zero_chunk_size; destor.rewritten_chunk_num += jcr.rewritten_chunk_num; destor.rewritten_chunk_size += jcr.rewritten_chunk_size; printf("read_time : %.3fs, %.2fMB/s\n", jcr.read_time / 1000000, jcr.data_size * 1000000 / jcr.read_time / 1024 / 1024); printf("chunk_time : %.3fs, %.2fMB/s\n", jcr.chunk_time / 1000000, jcr.data_size * 1000000 / jcr.chunk_time / 1024 / 1024); printf("hash_time : %.3fs, %.2fMB/s\n", jcr.hash_time / 1000000, jcr.data_size * 1000000 / jcr.hash_time / 1024 / 1024); printf("dedup_time : %.3fs, %.2fMB/s\n", jcr.dedup_time / 1000000, jcr.data_size * 1000000 / jcr.dedup_time / 1024 / 1024); printf("rewrite_time : %.3fs, %.2fMB/s\n", jcr.rewrite_time / 1000000, jcr.data_size * 1000000 / jcr.rewrite_time / 1024 / 1024); printf("filter_time : %.3fs, %.2fMB/s\n", jcr.filter_time / 1000000, jcr.data_size * 1000000 / jcr.filter_time / 1024 / 1024); printf("write_time : %.3fs, %.2fMB/s\n", jcr.write_time / 1000000, jcr.data_size * 1000000 / jcr.write_time / 1024 / 1024); //double seek_time = 0.005; //5ms //double bandwidth = 120 * 1024 * 1024; //120MB/s /* double index_lookup_throughput = jcr.data_size / (index_read_times * seek_time + index_read_entry_counter * 24 / bandwidth) / 1024 / 1024; double write_data_throughput = 1.0 * jcr.data_size * bandwidth / (jcr->unique_chunk_num) / 1024 / 1024; double index_read_throughput = 1.0 * jcr.data_size / 1024 / 1024 / (index_read_times * seek_time + index_read_entry_counter * 24 / bandwidth); double index_write_throughput = 1.0 * jcr.data_size / 1024 / 1024 / (index_write_times * seek_time + index_write_entry_counter * 24 / bandwidth);*/ /* double estimated_throughput = write_data_throughput; if (estimated_throughput > index_read_throughput) estimated_throughput = index_read_throughput;*/ /*if (estimated_throughput > index_write_throughput) estimated_throughput = index_write_throughput;*/ char logfile[] = "backup.log"; FILE *fp = fopen(logfile, "a"); /* * job id, * the size of backup * accumulative consumed capacity, * deduplication rate, * rewritten rate, * total container number, * sparse container number, * inherited container number, * 4 * index overhead (4 * int) * throughput, */ fprintf(fp, "%" PRId32 " %" PRId64 " %" PRId64 " %.4f %.4f %" PRId32 " %" PRId32 " %" PRId32 " %" PRId32" %" PRId32 " %" PRId32" %" PRId32" %.2f\n", jcr.id, jcr.data_size, destor.stored_data_size, jcr.data_size != 0 ? (jcr.data_size - jcr.rewritten_chunk_size - jcr.unique_data_size)/(double) (jcr.data_size) : 0, jcr.data_size != 0 ? (double) (jcr.rewritten_chunk_size) / (double) (jcr.data_size) : 0, jcr.total_container_num, jcr.sparse_container_num, jcr.inherited_sparse_num, index_overhead.lookup_requests, index_overhead.lookup_requests_for_unique, index_overhead.update_requests, index_overhead.read_prefetching_units, (double) jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); fclose(fp); }
Container *container_cache_insert_container(ContainerCache *cc, ContainerId cid) { /* read container */ Container *container = 0; TIMER_DECLARE(b, e); TIMER_BEGIN(b); if (cc->enable_data) { if (simulation_level >= SIMULATION_RECOVERY) { container = read_container_meta_only(cid); } else { container = read_container(cid); } } else { container = read_container_meta_only(cid); } TIMER_END(read_container_time, b, e); /* If this container is newly appended, * maybe we can read nothing. */ if (container == NULL) { return NULL; } /* insert */ Container *evictor = lru_cache_insert(cc->lru_cache, container); /* evict */ if (evictor) { int32_t chunknum = container_get_chunk_num(evictor); Fingerprint *fingers = container_get_all_fingers(evictor); int i = 0; /* evict all fingers of evictor from map */ for (; i < chunknum; ++i) { GSequence* container_list = g_hash_table_lookup(cc->map, &fingers[i]); /* remove the specified container from list */ GSequenceIter *iter = g_sequence_lookup(container_list, evictor, container_cmp_des, NULL); if (iter) g_sequence_remove(iter); else dprint("Error! The sequence does not contain the container."); if (g_sequence_get_length(container_list) == 0) { g_hash_table_remove(cc->map, &fingers[i]); } } free(fingers); if (fragment_stream) fprintf(fragment_stream, "%.4f\n", 1.0 * evictor->used_size / CONTAINER_SIZE); container_free_full(evictor); } /* insert */ int32_t num = container_get_chunk_num(container); Fingerprint *nfingers = container_get_all_fingers(container); int i = 0; for (; i < num; ++i) { GSequence* container_list = g_hash_table_lookup(cc->map, &nfingers[i]); if (container_list == 0) { container_list = g_sequence_new(NULL); Fingerprint *finger = (Fingerprint *) malloc(sizeof(Fingerprint)); memcpy(finger, &nfingers[i], sizeof(Fingerprint)); g_hash_table_insert(cc->map, finger, container_list); } g_sequence_insert_sorted(container_list, container, container_cmp_des, NULL); } free(nfingers); return container; }
void do_restore(int revision, char *path) { init_recipe_store(); init_container_store(); init_restore_jcr(revision, path); destor_log(DESTOR_NOTICE, "job id: %d", jcr.id); destor_log(DESTOR_NOTICE, "backup path: %s", jcr.bv->path); destor_log(DESTOR_NOTICE, "restore to: %s", jcr.path); restore_chunk_queue = sync_queue_new(100); restore_recipe_queue = sync_queue_new(100); TIMER_DECLARE(1); TIMER_BEGIN(1); puts("==== restore begin ===="); pthread_t recipe_t, read_t; pthread_create(&recipe_t, NULL, read_recipe_thread, NULL); if (destor.restore_cache[0] == RESTORE_CACHE_LRU) { destor_log(DESTOR_NOTICE, "restore cache is LRU"); pthread_create(&read_t, NULL, lru_restore_thread, NULL); } else if (destor.restore_cache[0] == RESTORE_CACHE_OPT) { destor_log(DESTOR_NOTICE, "restore cache is OPT"); pthread_create(&read_t, NULL, optimal_restore_thread, NULL); } else if (destor.restore_cache[0] == RESTORE_CACHE_ASM) { destor_log(DESTOR_NOTICE, "restore cache is ASM"); pthread_create(&read_t, NULL, assembly_restore_thread, NULL); } else { fprintf(stderr, "Invalid restore cache.\n"); exit(1); } write_restore_data(); assert(sync_queue_size(restore_chunk_queue) == 0); assert(sync_queue_size(restore_recipe_queue) == 0); free_backup_version(jcr.bv); TIMER_END(1, jcr.total_time); puts("==== restore end ===="); printf("job id: %d\n", jcr.id); printf("restore path: %s\n", jcr.path); printf("number of files: %d\n", jcr.file_num); printf("number of chunks: %d\n", jcr.chunk_num); printf("total size(B): %ld\n", jcr.data_size); printf("total time(s): %.3f\n", jcr.total_time / 1000000); printf("throughput(MB/s): %.2f\n", jcr.data_size * 1000000 / (1024.0 * 1024 * jcr.total_time)); printf("speed factor: %.2f\n", jcr.data_size / (1024.0 * 1024 * jcr.read_container_num)); printf("read_recipe_time : %.3fs, %.2fMB/s\n", jcr.read_recipe_time / 1000000, jcr.data_size * 1000000 / jcr.read_recipe_time / 1024 / 1024); printf("read_chunk_time : %.3fs, %.2fMB/s\n", jcr.read_chunk_time / 1000000, jcr.data_size * 1000000 / jcr.read_chunk_time / 1024 / 1024); printf("write_chunk_time : %.3fs, %.2fMB/s\n", jcr.write_chunk_time / 1000000, jcr.data_size * 1000000 / jcr.write_chunk_time / 1024 / 1024); char logfile[] = "restore.log"; FILE *fp = fopen(logfile, "a"); /* * job id, * chunk num, * data size, * actually read container number, * speed factor, * throughput */ fprintf(fp, "%d %lld %d %.4f %.4f\n", jcr.id, jcr.data_size, jcr.read_container_num, jcr.data_size / (1024.0 * 1024 * jcr.read_container_num), jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); fclose(fp); close_container_store(); close_recipe_store(); }
void write_restore_data() { char *p, *q; q = jcr.path + 1;/* ignore the first char*/ /* * recursively make directory */ while ((p = strchr(q, '/'))) { if (*p == *(p - 1)) { q++; continue; } *p = 0; if (access(jcr.path, 0) != 0) { mkdir(jcr.path, S_IRWXU | S_IRWXG | S_IRWXO); } *p = '/'; q = p + 1; } struct chunk *c = NULL; FILE *fp = NULL; while ((c = sync_queue_pop(restore_chunk_queue))) { TIMER_DECLARE(1); TIMER_BEGIN(1); if (CHECK_CHUNK(c, CHUNK_FILE_START)) { NOTICE("Restoring: %s", c->data); sds filepath = sdsdup(jcr.path); filepath = sdscat(filepath, c->data); int len = sdslen(jcr.path); char *q = filepath + len; char *p; while ((p = strchr(q, '/'))) { if (*p == *(p - 1)) { q++; continue; } *p = 0; if (access(filepath, 0) != 0) { mkdir(filepath, S_IRWXU | S_IRWXG | S_IRWXO); } *p = '/'; q = p + 1; } if (destor.simulation_level == SIMULATION_NO) { assert(fp == NULL); fp = fopen(filepath, "w"); } sdsfree(filepath); } else if (CHECK_CHUNK(c, CHUNK_FILE_END)) { if (fp) fclose(fp); fp = NULL; } else { assert(destor.simulation_level == SIMULATION_NO); VERBOSE("Restoring %d bytes", c->size); fwrite(c->data, c->size, 1, fp); } free_chunk(c); TIMER_END(1, jcr.write_chunk_time); } }
/* * When a container buffer is full, we push it into container_queue. */ static void* filter_thread(void *arg) { int enable_rewrite = 1; struct fileRecipeMeta* r = NULL; while (1) { struct chunk* c = sync_queue_pop(rewrite_queue); if (c == NULL) /* backup job finish */ break; /* reconstruct a segment */ struct segment* s = new_segment(); /* segment head */ assert(CHECK_CHUNK(c, CHUNK_SEGMENT_START)); free_chunk(c); c = sync_queue_pop(rewrite_queue); while (!(CHECK_CHUNK(c, CHUNK_SEGMENT_END))) { g_sequence_append(s->chunks, c); if (!CHECK_CHUNK(c, CHUNK_FILE_START) && !CHECK_CHUNK(c, CHUNK_FILE_END)) s->chunk_num++; c = sync_queue_pop(rewrite_queue); } free_chunk(c); /* For self-references in a segment. * If we find an early copy of the chunk in this segment has been rewritten, * the rewrite request for it will be denied to avoid repeat rewriting. */ GHashTable *recently_rewritten_chunks = g_hash_table_new_full(g_int64_hash, g_fingerprint_equal, NULL, free_chunk); GHashTable *recently_unique_chunks = g_hash_table_new_full(g_int64_hash, g_fingerprint_equal, NULL, free_chunk); pthread_mutex_lock(&index_lock.mutex); TIMER_DECLARE(1); TIMER_BEGIN(1); /* This function will check the fragmented chunks * that would be rewritten later. * If we find an early copy of the chunk in earlier segments, * has been rewritten, * the rewrite request for it will be denied. */ index_check_buffer(s); GSequenceIter *iter = g_sequence_get_begin_iter(s->chunks); GSequenceIter *end = g_sequence_get_end_iter(s->chunks); for (; iter != end; iter = g_sequence_iter_next(iter)) { c = g_sequence_get(iter); if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END)) continue; VERBOSE("Filter phase: %dth chunk in %s container %lld", chunk_num, CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER) ? "out-of-order" : "", c->id); /* Cache-Aware Filter */ if (destor.rewrite_enable_cache_aware && restore_aware_contains(c->id)) { assert(c->id != TEMPORARY_ID); VERBOSE("Filter phase: %dth chunk is cached", chunk_num); SET_CHUNK(c, CHUNK_IN_CACHE); } /* A cfl-switch for rewriting out-of-order chunks. */ if (destor.rewrite_enable_cfl_switch) { double cfl = restore_aware_get_cfl(); if (enable_rewrite && cfl > destor.rewrite_cfl_require) { VERBOSE("Filter phase: Turn OFF the (out-of-order) rewrite switch of %.3f", cfl); enable_rewrite = 0; } else if (!enable_rewrite && cfl < destor.rewrite_cfl_require) { VERBOSE("Filter phase: Turn ON the (out-of-order) rewrite switch of %.3f", cfl); enable_rewrite = 1; } } if(CHECK_CHUNK(c, CHUNK_DUPLICATE) && c->id == TEMPORARY_ID){ struct chunk* ruc = g_hash_table_lookup(recently_unique_chunks, &c->fp); assert(ruc); c->id = ruc->id; } struct chunk* rwc = g_hash_table_lookup(recently_rewritten_chunks, &c->fp); if(rwc){ c->id = rwc->id; SET_CHUNK(c, CHUNK_REWRITE_DENIED); } /* A fragmented chunk will be denied if it has been rewritten recently */ if (!CHECK_CHUNK(c, CHUNK_DUPLICATE) || (!CHECK_CHUNK(c, CHUNK_REWRITE_DENIED) && (CHECK_CHUNK(c, CHUNK_SPARSE) || (enable_rewrite && CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER) && !CHECK_CHUNK(c, CHUNK_IN_CACHE))))) { /* * If the chunk is unique, or be fragmented and not denied, * we write it to a container. * Fragmented indicates: sparse, or out of order and not in cache, */ if (storage_buffer.container_buffer == NULL){ storage_buffer.container_buffer = create_container(); if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY) storage_buffer.chunks = g_sequence_new(free_chunk); } if (container_overflow(storage_buffer.container_buffer, c->size)) { if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){ /* * TO-DO * Update_index for physical locality */ GHashTable *features = sampling(storage_buffer.chunks, g_sequence_get_length(storage_buffer.chunks)); index_update(features, get_container_id(storage_buffer.container_buffer)); g_hash_table_destroy(features); g_sequence_free(storage_buffer.chunks); storage_buffer.chunks = g_sequence_new(free_chunk); } TIMER_END(1, jcr.filter_time); write_container_async(storage_buffer.container_buffer); TIMER_BEGIN(1); storage_buffer.container_buffer = create_container(); } if(add_chunk_to_container(storage_buffer.container_buffer, c)){ struct chunk* wc = new_chunk(0); memcpy(&wc->fp, &c->fp, sizeof(fingerprint)); wc->id = c->id; if (!CHECK_CHUNK(c, CHUNK_DUPLICATE)) { jcr.unique_chunk_num++; jcr.unique_data_size += c->size; g_hash_table_insert(recently_unique_chunks, &wc->fp, wc); VERBOSE("Filter phase: %dth chunk is recently unique, size %d", chunk_num, g_hash_table_size(recently_unique_chunks)); } else { jcr.rewritten_chunk_num++; jcr.rewritten_chunk_size += c->size; g_hash_table_insert(recently_rewritten_chunks, &wc->fp, wc); } if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){ struct chunk* ck = new_chunk(0); memcpy(&ck->fp, &c->fp, sizeof(fingerprint)); g_sequence_append(storage_buffer.chunks, ck); } VERBOSE("Filter phase: Write %dth chunk to container %lld", chunk_num, c->id); }else{ VERBOSE("Filter phase: container %lld already has this chunk", c->id); assert(destor.index_category[0] != INDEX_CATEGORY_EXACT || destor.rewrite_algorithm[0]!=REWRITE_NO); } }else{ if(CHECK_CHUNK(c, CHUNK_REWRITE_DENIED)){ VERBOSE("Filter phase: %lldth fragmented chunk is denied", chunk_num); }else if (CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER)) { VERBOSE("Filter phase: %lldth chunk in out-of-order container %lld is already cached", chunk_num, c->id); } } assert(c->id != TEMPORARY_ID); /* Collect historical information. */ har_monitor_update(c->id, c->size); /* Restore-aware */ restore_aware_update(c->id, c->size); chunk_num++; } int full = index_update_buffer(s); /* Write a SEGMENT_BEGIN */ segmentid sid = append_segment_flag(jcr.bv, CHUNK_SEGMENT_START, s->chunk_num); /* Write recipe */ iter = g_sequence_get_begin_iter(s->chunks); end = g_sequence_get_end_iter(s->chunks); for (; iter != end; iter = g_sequence_iter_next(iter)) { c = g_sequence_get(iter); if(r == NULL){ assert(CHECK_CHUNK(c,CHUNK_FILE_START)); r = new_file_recipe_meta(c->data); }else if(!CHECK_CHUNK(c,CHUNK_FILE_END)){ struct chunkPointer cp; cp.id = c->id; assert(cp.id>=0); memcpy(&cp.fp, &c->fp, sizeof(fingerprint)); cp.size = c->size; append_n_chunk_pointers(jcr.bv, &cp ,1); r->chunknum++; r->filesize += c->size; jcr.chunk_num++; jcr.data_size += c->size; }else{ assert(CHECK_CHUNK(c,CHUNK_FILE_END)); append_file_recipe_meta(jcr.bv, r); free_file_recipe_meta(r); r = NULL; jcr.file_num++; } } /* Write a SEGMENT_END */ append_segment_flag(jcr.bv, CHUNK_SEGMENT_END, 0); if(destor.index_category[1] == INDEX_CATEGORY_LOGICAL_LOCALITY){ /* * TO-DO * Update_index for logical locality */ s->features = sampling(s->chunks, s->chunk_num); if(destor.index_category[0] == INDEX_CATEGORY_EXACT){ /* * For exact deduplication, * unique fingerprints are inserted. */ VERBOSE("Filter phase: add %d unique fingerprints to %d features", g_hash_table_size(recently_unique_chunks), g_hash_table_size(s->features)); GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, recently_unique_chunks); while(g_hash_table_iter_next(&iter, &key, &value)){ struct chunk* uc = value; fingerprint *ft = malloc(sizeof(fingerprint)); memcpy(ft, &uc->fp, sizeof(fingerprint)); g_hash_table_insert(s->features, ft, NULL); } /* * OPTION: * It is still an open problem whether we need to update * rewritten fingerprints. * It would increase index update overhead, while the benefit * remains unclear. * More experiments are required. */ VERBOSE("Filter phase: add %d rewritten fingerprints to %d features", g_hash_table_size(recently_rewritten_chunks), g_hash_table_size(s->features)); g_hash_table_iter_init(&iter, recently_rewritten_chunks); while(g_hash_table_iter_next(&iter, &key, &value)){ struct chunk* uc = value; fingerprint *ft = malloc(sizeof(fingerprint)); memcpy(ft, &uc->fp, sizeof(fingerprint)); g_hash_table_insert(s->features, ft, NULL); } } index_update(s->features, sid); } free_segment(s); if(index_lock.wait_threshold > 0 && full == 0){ pthread_cond_broadcast(&index_lock.cond); } TIMER_END(1, jcr.filter_time); pthread_mutex_unlock(&index_lock.mutex); g_hash_table_destroy(recently_rewritten_chunks); g_hash_table_destroy(recently_unique_chunks); } if (storage_buffer.container_buffer && !container_empty(storage_buffer.container_buffer)){ if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){ /* * TO-DO * Update_index for physical locality */ GHashTable *features = sampling(storage_buffer.chunks, g_sequence_get_length(storage_buffer.chunks)); index_update(features, get_container_id(storage_buffer.container_buffer)); g_hash_table_destroy(features); g_sequence_free(storage_buffer.chunks); } write_container_async(storage_buffer.container_buffer); } /* All files done */ jcr.status = JCR_STATUS_DONE; return NULL; }
void backup_formal(int fd,char *msg){ JCR *jcr=NULL; char fileset[256]={0}; char *buf=(char *)calloc(1,SOCKET_BUF_SIZE+21); int len; int index=1; char vol_name[FILE_NAME_LEN]; int vol_fd; Recipe *rp=NULL; FingerChunk *fc=NULL; char *p=NULL; int64_t rwlen=0; jobcount_init(); jcr=jcr_new(); jcr->dataSocket=fd; memset(vol_name,0,FILE_NAME_LEN); strcpy(vol_name,BackupVolPath); strcat(vol_name,"data_vol"); vol_fd=open(vol_name,O_RDWR| O_CREAT,00644); if(vol_fd<0){ err_msg1("can't open file"); goto FAIL; } printf("%s %d vol_name:%s\n",__FILE__,__LINE__,vol_name); rwlen=lseek(vol_fd,0,SEEK_END); TIMER_DECLARE(gstart,gend); TIMER_DECLARE(wstart,wend); TIMER_START(gstart); if(sscanf(msg,backup_cmd,fileset)!=1){ // backup cmd goto FAIL; } jcr->jobv=jobv_new(fileset); jcr->nJobId=jcr->jobv->nJobId; printf("===========backup start==============\n"); printf("%s,%d pathname:%s \n", __FILE__,__LINE__,fileset); while(bnet_recv(jcr->dataSocket,buf,&len)!=ERROR){ //文件名 if(len==STREAM_END){ printf("%s %d backup is over\n",__FILE__,__LINE__); break; } //printf("\033[40;32m recv file: %s (%d) \033[0m\n",buf,len); rp=recipe_new(); memcpy(rp->filename,buf,len); rp->fileindex=index++; while(bnet_recv(jcr->dataSocket,buf,&len)>0){ /*format: fingerprintf data data dta..*/ //printf("\033[40;32m recv: file data (%d) \033[0m\n",len); fc=fingerchunk_new(buf,0); fc->offset=rwlen; fc->length=len-sizeof(Fingerprint); check_data(fc->fingerprint,buf+sizeof(Fingerprint),fc->length); TIMER_START(wstart); if(writen(vol_fd,buf+sizeof(Fingerprint),fc->length)!=fc->length) err_msg1("wrintn wrong"); TIMER_END(wend); TIMER_DIFF(jcr->writeDataTime,wstart,wend); rwlen+=fc->length; jcr->nChunkCount++; jcr->nSize+=fc->length; recipe_append_fingerchunk(rp,fc); } jcr->nFileCount++; if(G_VERBOSE) printf("receive file %s OK, total: %d\n",rp->filename,jcr->nFileCount); jobv_insert_recipe(jcr->jobv, rp); rp=NULL; } FAIL: bnet_send(fd,"OK",2); // 发送备份成功信息 TIMER_END(gend); TIMER_DIFF(jcr->recvTime,gstart,gend); printf("============back over===============\n"); printf("total time:%.4f %.4f MB/s\n",jcr->recvTime,jcr->nSize*1.0/jcr->recvTime/1036288.0); printf("write time:%.4f %.4f MB/s\n",jcr->writeDataTime,jcr->nSize*1.0/jcr->writeDataTime/1036288.0); printf("chunk count:%d\n",jcr->nChunkCount); printf("file count:%d\n",jcr->nFileCount); if(rp){ recipe_free(rp); } jobv_destroy(jcr->jobv); jcr_free(jcr); jobcount_close(); close(vol_fd); }