Beispiel #1
0
static void* sha1_thread(void* arg) {
	char code[41];
	while (1) {
		struct chunk* c = sync_queue_pop(chunk_queue);

		if (c == NULL) {
			sync_queue_term(hash_queue);
			break;
		}

		if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END)) {
			sync_queue_push(hash_queue, c);
			continue;
		}

		TIMER_DECLARE(1);
		TIMER_BEGIN(1);
		SHA_CTX ctx;
		SHA_Init(&ctx);
		SHA_Update(&ctx, c->data, c->size);
		SHA_Final(c->fp, &ctx);
		TIMER_END(1, jcr.hash_time);

		hash2code(c->fp, code);
		code[40] = 0;
		VERBOSE("Hash phase: %ldth chunk identified by %s", chunk_num++, code);

		sync_queue_push(hash_queue, c);
	}
	return NULL;
}
Beispiel #2
0
static int recv_fingerchunk(FingerChunk **fc) {
	FingerChunk* fchunk = (FingerChunk*) sync_queue_pop(fingerchunk_queue);
	if (fchunk->length == STREAM_END) {
		free(fchunk);
		*fc = 0;
		return STREAM_END;
	}
	*fc = fchunk;
	return SUCCESS;
}
Beispiel #3
0
static void* lru_restore_thread(void *arg) {
	struct lruCache *cache;
	if (destor.simulation_level >= SIMULATION_RESTORE)
		cache = new_lru_cache(destor.restore_cache[1], free_container_meta,
				lookup_fingerprint_in_container_meta);
	else
		cache = new_lru_cache(destor.restore_cache[1], free_container,
				lookup_fingerprint_in_container);

	struct chunk* c;
	while ((c = sync_queue_pop(restore_recipe_queue))) {

		if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END)) {
			sync_queue_push(restore_chunk_queue, c);
			continue;
		}

		TIMER_DECLARE(1);
		TIMER_BEGIN(1);

		if (destor.simulation_level >= SIMULATION_RESTORE) {
			struct containerMeta *cm = lru_cache_lookup(cache, &c->fp);
			if (!cm) {
				VERBOSE("Restore cache: container %lld is missed", c->id);
				cm = retrieve_container_meta_by_id(c->id);
				assert(lookup_fingerprint_in_container_meta(cm, &c->fp));
				lru_cache_insert(cache, cm, NULL, NULL);
				jcr.read_container_num++;
			}

			TIMER_END(1, jcr.read_chunk_time);
		} else {
			struct container *con = lru_cache_lookup(cache, &c->fp);
			if (!con) {
				VERBOSE("Restore cache: container %lld is missed", c->id);
				con = retrieve_container_by_id(c->id);
				lru_cache_insert(cache, con, NULL, NULL);
				jcr.read_container_num++;
			}
			struct chunk *rc = get_chunk_in_container(con, &c->fp);
			assert(rc);
			TIMER_END(1, jcr.read_chunk_time);
			sync_queue_push(restore_chunk_queue, rc);
		}

		free_chunk(c);
	}

	sync_queue_term(restore_chunk_queue);

	free_lru_cache(cache);

	return NULL;
}
Beispiel #4
0
void *dedup_thread(void *arg) {
    struct segment* s = NULL;
    while (1) {
        struct chunk *c = NULL;
        if (destor.simulation_level != SIMULATION_ALL)
            c = sync_queue_pop(hash_queue);
        else
            c = sync_queue_pop(trace_queue);

        /* Add the chunk to the segment. */
        s = segmenting(c);
        if (!s)
            continue;
        /* segmenting success */
        if (s->chunk_num > 0) {
            VERBOSE("Dedup phase: the %lldth segment of %lld chunks", segment_num++,
                    s->chunk_num);
            /* Each duplicate chunk will be marked. */
            pthread_mutex_lock(&index_lock.mutex);
            while (index_lookup(s) == 0) {
                pthread_cond_wait(&index_lock.cond, &index_lock.mutex);
            }
            pthread_mutex_unlock(&index_lock.mutex);
        } else {
            VERBOSE("Dedup phase: an empty segment");
        }
        /* Send chunks in the segment to the next phase.
         * The segment will be cleared. */
        send_segment(s);

        free_segment(s);
        s = NULL;

        if (c == NULL)
            break;
    }

    sync_queue_term(dedup_queue);

    return NULL;
}
Beispiel #5
0
void make_trace(char* path) {
	init_jcr(path);

	sds trace_file = sdsnew(path);

	char *p = trace_file + sdslen(trace_file) - 1;
	while (*p == '/')
		--p;
	*(p + 1) = 0;
	sdsupdatelen(trace_file);

	trace_file = sdscat(trace_file, ".trace");
	NOTICE("output to %s", trace_file);

	start_read_phase();
	start_chunk_phase();
	start_hash_phase();

	unsigned char code[41];

	FILE *fp = fopen(trace_file, "w");
	while (1) {
		struct chunk *c = sync_queue_pop(hash_queue);

		if (c == NULL) {
			break;
		}

		if (CHECK_CHUNK(c, CHUNK_FILE_START)) {
			destor_log(DESTOR_NOTICE, c->data);
			fprintf(fp, "file start %zd\n", strlen(c->data));
			fprintf(fp, "%s\n", c->data);

		} else if (CHECK_CHUNK(c, CHUNK_FILE_END)) {
			fprintf(fp, "file end\n");
		} else {
			hash2code(c->fp, code);
			code[40] = 0;
			fprintf(fp, "%s %d\n", code, c->size);
		}
		free_chunk(c);
	}

	fprintf(fp, "stream end");
	fclose(fp);

}
Beispiel #6
0
/*
 * We must ensure a container is either in the buffer or written to disks.
 */
static void* append_thread(void *arg) {

	while (1) {
		struct container *c = sync_queue_get_top(container_buffer);
		if (c == NULL)
			break;

		TIMER_DECLARE(1);
		TIMER_BEGIN(1);

		write_container(c);

		TIMER_END(1, jcr.write_time);

		sync_queue_pop(container_buffer);

		free_container(c);
	}

	return NULL;
}
Beispiel #7
0
void write_restore_data() {

	char *p, *q;
	q = jcr.path + 1;/* ignore the first char*/
	/*
	 * recursively make directory
	 */
	while ((p = strchr(q, '/'))) {
		if (*p == *(p - 1)) {
			q++;
			continue;
		}
		*p = 0;
		if (access(jcr.path, 0) != 0) {
			mkdir(jcr.path, S_IRWXU | S_IRWXG | S_IRWXO);
		}
		*p = '/';
		q = p + 1;
	}

	struct chunk *c = NULL;
	FILE *fp = NULL;

	while ((c = sync_queue_pop(restore_chunk_queue))) {

		TIMER_DECLARE(1);
		TIMER_BEGIN(1);

		if (CHECK_CHUNK(c, CHUNK_FILE_START)) {
			NOTICE("Restoring: %s", c->data);

			sds filepath = sdsdup(jcr.path);
			filepath = sdscat(filepath, c->data);

			int len = sdslen(jcr.path);
			char *q = filepath + len;
			char *p;
			while ((p = strchr(q, '/'))) {
				if (*p == *(p - 1)) {
					q++;
					continue;
				}
				*p = 0;
				if (access(filepath, 0) != 0) {
					mkdir(filepath, S_IRWXU | S_IRWXG | S_IRWXO);
				}
				*p = '/';
				q = p + 1;
			}

			if (destor.simulation_level == SIMULATION_NO) {
				assert(fp == NULL);
				fp = fopen(filepath, "w");
			}

			sdsfree(filepath);

		} else if (CHECK_CHUNK(c, CHUNK_FILE_END)) {
			if (fp)
				fclose(fp);
			fp = NULL;
		} else {
			assert(destor.simulation_level == SIMULATION_NO);
			VERBOSE("Restoring %d bytes", c->size);
			fwrite(c->data, c->size, 1, fp);
		}

		free_chunk(c);

		TIMER_END(1, jcr.write_chunk_time);
	}

}
Beispiel #8
0
int backup(Jcr* jcr) {

	fingerchunk_queue = sync_queue_new(-1);
	ContainerUsageMonitor* usage_monitor = container_usage_monitor_new();
	cfl_monitor = cfl_monitor_new(read_cache_size);

	if (simulation_level == SIMULATION_ALL) {
		start_read_trace_phase(jcr);
	} else {
		start_read_phase(jcr);
		start_chunk_phase(jcr);
		start_hash_phase(jcr);
	}
	start_segment_phase(jcr);
	start_filter_phase(jcr);
	start_append_phase(jcr);

	ContainerId seed_id = -1;
	int32_t seed_len = 0;
	FingerChunk* fchunk = NULL;
	int signal = recv_fingerchunk(&fchunk);
	while (signal != STREAM_END) {
		container_usage_monitor_update(usage_monitor, fchunk->container_id,
				&fchunk->fingerprint, fchunk->length);
		jvol_append_fingerchunk(jcr->job_volume, fchunk);

		if (seed_id != -1 && seed_id != fchunk->container_id) {
			jvol_append_seed(jcr->job_volume, seed_id, seed_len);
			seed_len = 0;
		}
		/* merge sequential accesses */
		seed_id = fchunk->container_id;
		seed_len += fchunk->length;

		free(fchunk);
		signal = recv_fingerchunk(&fchunk);
	}

	if (seed_len > 0)
		jvol_append_seed(jcr->job_volume, seed_id, seed_len);
	sync_queue_free(fingerchunk_queue, NULL);

	jcr->sparse_container_num = g_hash_table_size(usage_monitor->sparse_map);
	jcr->total_container_num = g_hash_table_size(usage_monitor->dense_map)
			+ jcr->sparse_container_num;

	while ((jcr->inherited_sparse_num = container_usage_monitor_print(
			usage_monitor, jcr->job_id, jcr->historical_sparse_containers)) < 0) {
		dprint("retry!");
	}

	/* store recipes of processed file */
	int i = 0;
	for (; i < jcr->file_num; ++i) {
		Recipe *recipe = (Recipe*) sync_queue_pop(jcr->completed_files_queue);
		recipe->fileindex = i;
		if (jvol_append_meta(jcr->job_volume, recipe) != SUCCESS) {
			printf("%s, %d: some errors happened in appending recipe!\n",
			__FILE__, __LINE__);
			return FAILURE;
		}
		jcr->chunk_num += recipe->chunknum;
		recipe_free(recipe);
	}

	stop_append_phase();
	stop_filter_phase();
	stop_segment_phase();
	if (simulation_level == SIMULATION_ALL) {
		stop_read_trace_phase(jcr);
	} else {
		stop_hash_phase();
		stop_chunk_phase();
		stop_read_phase();
	}

	container_usage_monitor_free(usage_monitor);
	print_cfl(cfl_monitor);
	cfl_monitor_free(cfl_monitor);

	return 0;
}
Beispiel #9
0
/*
 * When a container buffer is full, we push it into container_queue.
 */
static void* filter_thread(void *arg) {
    int enable_rewrite = 1;
    struct fileRecipeMeta* r = NULL;

    while (1) {
        struct chunk* c = sync_queue_pop(rewrite_queue);

        if (c == NULL)
            /* backup job finish */
            break;

        /* reconstruct a segment */
        struct segment* s = new_segment();

        /* segment head */
        assert(CHECK_CHUNK(c, CHUNK_SEGMENT_START));
        free_chunk(c);

        c = sync_queue_pop(rewrite_queue);
        while (!(CHECK_CHUNK(c, CHUNK_SEGMENT_END))) {
            g_sequence_append(s->chunks, c);
            if (!CHECK_CHUNK(c, CHUNK_FILE_START)
                    && !CHECK_CHUNK(c, CHUNK_FILE_END))
                s->chunk_num++;

            c = sync_queue_pop(rewrite_queue);
        }
        free_chunk(c);

        /* For self-references in a segment.
         * If we find an early copy of the chunk in this segment has been rewritten,
         * the rewrite request for it will be denied to avoid repeat rewriting. */
        GHashTable *recently_rewritten_chunks = g_hash_table_new_full(g_int64_hash,
        		g_fingerprint_equal, NULL, free_chunk);
        GHashTable *recently_unique_chunks = g_hash_table_new_full(g_int64_hash,
        			g_fingerprint_equal, NULL, free_chunk);

        pthread_mutex_lock(&index_lock.mutex);

        TIMER_DECLARE(1);
        TIMER_BEGIN(1);
        /* This function will check the fragmented chunks
         * that would be rewritten later.
         * If we find an early copy of the chunk in earlier segments,
         * has been rewritten,
         * the rewrite request for it will be denied. */
        index_check_buffer(s);

    	GSequenceIter *iter = g_sequence_get_begin_iter(s->chunks);
    	GSequenceIter *end = g_sequence_get_end_iter(s->chunks);
        for (; iter != end; iter = g_sequence_iter_next(iter)) {
            c = g_sequence_get(iter);

    		if (CHECK_CHUNK(c, CHUNK_FILE_START) || CHECK_CHUNK(c, CHUNK_FILE_END))
    			continue;

            VERBOSE("Filter phase: %dth chunk in %s container %lld", chunk_num,
                    CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER) ? "out-of-order" : "", c->id);

            /* Cache-Aware Filter */
            if (destor.rewrite_enable_cache_aware && restore_aware_contains(c->id)) {
                assert(c->id != TEMPORARY_ID);
                VERBOSE("Filter phase: %dth chunk is cached", chunk_num);
                SET_CHUNK(c, CHUNK_IN_CACHE);
            }

            /* A cfl-switch for rewriting out-of-order chunks. */
            if (destor.rewrite_enable_cfl_switch) {
                double cfl = restore_aware_get_cfl();
                if (enable_rewrite && cfl > destor.rewrite_cfl_require) {
                    VERBOSE("Filter phase: Turn OFF the (out-of-order) rewrite switch of %.3f",
                            cfl);
                    enable_rewrite = 0;
                } else if (!enable_rewrite && cfl < destor.rewrite_cfl_require) {
                    VERBOSE("Filter phase: Turn ON the (out-of-order) rewrite switch of %.3f",
                            cfl);
                    enable_rewrite = 1;
                }
            }

            if(CHECK_CHUNK(c, CHUNK_DUPLICATE) && c->id == TEMPORARY_ID){
            	struct chunk* ruc = g_hash_table_lookup(recently_unique_chunks, &c->fp);
            	assert(ruc);
            	c->id = ruc->id;
            }
            struct chunk* rwc = g_hash_table_lookup(recently_rewritten_chunks, &c->fp);
            if(rwc){
            	c->id = rwc->id;
            	SET_CHUNK(c, CHUNK_REWRITE_DENIED);
            }

            /* A fragmented chunk will be denied if it has been rewritten recently */
            if (!CHECK_CHUNK(c, CHUNK_DUPLICATE) || (!CHECK_CHUNK(c, CHUNK_REWRITE_DENIED)
            		&& (CHECK_CHUNK(c, CHUNK_SPARSE)
                    || (enable_rewrite && CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER)
                        && !CHECK_CHUNK(c, CHUNK_IN_CACHE))))) {
                /*
                 * If the chunk is unique, or be fragmented and not denied,
                 * we write it to a container.
                 * Fragmented indicates: sparse, or out of order and not in cache,
                 */
                if (storage_buffer.container_buffer == NULL){
                	storage_buffer.container_buffer = create_container();
                	if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY)
                		storage_buffer.chunks = g_sequence_new(free_chunk);
                }

                if (container_overflow(storage_buffer.container_buffer, c->size)) {

                    if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){
                        /*
                         * TO-DO
                         * Update_index for physical locality
                         */
                        GHashTable *features = sampling(storage_buffer.chunks,
                        		g_sequence_get_length(storage_buffer.chunks));
                        index_update(features, get_container_id(storage_buffer.container_buffer));
                        g_hash_table_destroy(features);
                        g_sequence_free(storage_buffer.chunks);
                        storage_buffer.chunks = g_sequence_new(free_chunk);
                    }
                    TIMER_END(1, jcr.filter_time);
                    write_container_async(storage_buffer.container_buffer);
                    TIMER_BEGIN(1);
                    storage_buffer.container_buffer = create_container();
                }

                if(add_chunk_to_container(storage_buffer.container_buffer, c)){

                	struct chunk* wc = new_chunk(0);
                	memcpy(&wc->fp, &c->fp, sizeof(fingerprint));
                	wc->id = c->id;
                	if (!CHECK_CHUNK(c, CHUNK_DUPLICATE)) {
                		jcr.unique_chunk_num++;
                		jcr.unique_data_size += c->size;
                		g_hash_table_insert(recently_unique_chunks, &wc->fp, wc);
                    	VERBOSE("Filter phase: %dth chunk is recently unique, size %d", chunk_num,
                    			g_hash_table_size(recently_unique_chunks));
                	} else {
                		jcr.rewritten_chunk_num++;
                		jcr.rewritten_chunk_size += c->size;
                		g_hash_table_insert(recently_rewritten_chunks, &wc->fp, wc);
                	}

                	if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){
                		struct chunk* ck = new_chunk(0);
                		memcpy(&ck->fp, &c->fp, sizeof(fingerprint));
                		g_sequence_append(storage_buffer.chunks, ck);
                	}

                	VERBOSE("Filter phase: Write %dth chunk to container %lld",
                			chunk_num, c->id);
                }else{
                	VERBOSE("Filter phase: container %lld already has this chunk", c->id);
            		assert(destor.index_category[0] != INDEX_CATEGORY_EXACT
            				|| destor.rewrite_algorithm[0]!=REWRITE_NO);
                }

            }else{
                if(CHECK_CHUNK(c, CHUNK_REWRITE_DENIED)){
                    VERBOSE("Filter phase: %lldth fragmented chunk is denied", chunk_num);
                }else if (CHECK_CHUNK(c, CHUNK_OUT_OF_ORDER)) {
                    VERBOSE("Filter phase: %lldth chunk in out-of-order container %lld is already cached",
                            chunk_num, c->id);
                }
            }

            assert(c->id != TEMPORARY_ID);

            /* Collect historical information. */
            har_monitor_update(c->id, c->size);

            /* Restore-aware */
            restore_aware_update(c->id, c->size);

            chunk_num++;
        }

        int full = index_update_buffer(s);

        /* Write a SEGMENT_BEGIN */
        segmentid sid = append_segment_flag(jcr.bv, CHUNK_SEGMENT_START, s->chunk_num);

        /* Write recipe */
    	iter = g_sequence_get_begin_iter(s->chunks);
    	end = g_sequence_get_end_iter(s->chunks);
        for (; iter != end; iter = g_sequence_iter_next(iter)) {
            c = g_sequence_get(iter);

        	if(r == NULL){
        		assert(CHECK_CHUNK(c,CHUNK_FILE_START));
        		r = new_file_recipe_meta(c->data);
        	}else if(!CHECK_CHUNK(c,CHUNK_FILE_END)){
        		struct chunkPointer cp;
        		cp.id = c->id;
        		assert(cp.id>=0);
        		memcpy(&cp.fp, &c->fp, sizeof(fingerprint));
        		cp.size = c->size;
        		append_n_chunk_pointers(jcr.bv, &cp ,1);
        		r->chunknum++;
        		r->filesize += c->size;

    	    	jcr.chunk_num++;
	    	    jcr.data_size += c->size;

        	}else{
        		assert(CHECK_CHUNK(c,CHUNK_FILE_END));
        		append_file_recipe_meta(jcr.bv, r);
        		free_file_recipe_meta(r);
        		r = NULL;

	            jcr.file_num++;
        	}
        }

       	/* Write a SEGMENT_END */
       	append_segment_flag(jcr.bv, CHUNK_SEGMENT_END, 0);

        if(destor.index_category[1] == INDEX_CATEGORY_LOGICAL_LOCALITY){
             /*
              * TO-DO
              * Update_index for logical locality
              */
            s->features = sampling(s->chunks, s->chunk_num);
         	if(destor.index_category[0] == INDEX_CATEGORY_EXACT){
         		/*
         		 * For exact deduplication,
         		 * unique fingerprints are inserted.
         		 */
         		VERBOSE("Filter phase: add %d unique fingerprints to %d features",
         				g_hash_table_size(recently_unique_chunks),
         				g_hash_table_size(s->features));
         		GHashTableIter iter;
         		gpointer key, value;
         		g_hash_table_iter_init(&iter, recently_unique_chunks);
         		while(g_hash_table_iter_next(&iter, &key, &value)){
         			struct chunk* uc = value;
         			fingerprint *ft = malloc(sizeof(fingerprint));
         			memcpy(ft, &uc->fp, sizeof(fingerprint));
         			g_hash_table_insert(s->features, ft, NULL);
         		}

         		/*
         		 * OPTION:
         		 * 	It is still an open problem whether we need to update
         		 * 	rewritten fingerprints.
         		 * 	It would increase index update overhead, while the benefit
         		 * 	remains unclear.
         		 * 	More experiments are required.
         		 */
         		VERBOSE("Filter phase: add %d rewritten fingerprints to %d features",
         				g_hash_table_size(recently_rewritten_chunks),
         				g_hash_table_size(s->features));
         		g_hash_table_iter_init(&iter, recently_rewritten_chunks);
         		while(g_hash_table_iter_next(&iter, &key, &value)){
         			struct chunk* uc = value;
         			fingerprint *ft = malloc(sizeof(fingerprint));
         			memcpy(ft, &uc->fp, sizeof(fingerprint));
         			g_hash_table_insert(s->features, ft, NULL);
         		}
         	}
         	index_update(s->features, sid);
         }

        free_segment(s);

        if(index_lock.wait_threshold > 0 && full == 0){
        	pthread_cond_broadcast(&index_lock.cond);
        }
        TIMER_END(1, jcr.filter_time);
        pthread_mutex_unlock(&index_lock.mutex);

        g_hash_table_destroy(recently_rewritten_chunks);
        g_hash_table_destroy(recently_unique_chunks);

    }

    if (storage_buffer.container_buffer
    		&& !container_empty(storage_buffer.container_buffer)){
        if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){
            /*
             * TO-DO
             * Update_index for physical locality
             */
        	GHashTable *features = sampling(storage_buffer.chunks,
        			g_sequence_get_length(storage_buffer.chunks));
        	index_update(features, get_container_id(storage_buffer.container_buffer));
        	g_hash_table_destroy(features);
        	g_sequence_free(storage_buffer.chunks);
        }
        write_container_async(storage_buffer.container_buffer);
    }

    /* All files done */
    jcr.status = JCR_STATUS_DONE;
    return NULL;
}