int distance_between_all_chunk_references(unsigned int lb, unsigned int rb){
    init_iterator("CHUNK");

    int64_t sum = 0;
    int count = 0;

    struct chunk_rec r;
    memset(&r, 0, sizeof(r));

    while(iterate_chunk(&r, 0) == 0){
        if(r.rcount >= lb && r.rcount <= rb){
            int prev = r.list[0];
            int i = 1, dist;
            for(; i < r.rcount; i++){
                dist = r.list[i] - prev;
                assert(dist > 0);
                fprintf(stdout, "%d\n", dist);
                prev = r.list[i];

                sum += dist;
                count++;
            }
        }
    }

    fprintf(stderr, "avg. = %10.2f\n", 1.0*sum/count);

    close_iterator();

    return 0;
}
Beispiel #2
0
/* Fixed-sized file system block of 8 KB if weighted */
void chunk_nodedup_simd_trace(char **path, int count,  int weighted){
	if (weighted) {
		fprintf(stderr, "CHUNK:NO DEDUP:WEIGHTED\n");
		printf("CHUNK:NO DEDUP:WEIGHTED\n");
		init_iterator("CHUNK");

		struct chunk_rec chunk;
		memset(&chunk, 0, sizeof(chunk));

		int64_t sum = 0;
		int64_t count = 0;
		/* USE part */
		while(iterate_chunk(&chunk, 0) == 0){

			int i = 0;
			int64_t size = chunk.csize;
			for(; i<chunk.rcount; i++){
				int lines_no = (chunk.csize+1023)/1024;
				sum += size*lines_no;
				count += lines_no;
			}
		}

		fprintf(stderr, "%.6f\n", 1.0*sum/count);

		close_iterator();
	}else{
		fprintf(stderr, "CHUNK:NO DEDUP:NOT WEIGHTED\n");
		printf("CHUNK:NO DEDUP:NOT WEIGHTED\n");
	}

	fprintf(stderr, "No trace for this model; only for test\n");
}
Beispiel #3
0
void avg_chunksize(){
    init_iterator("CHUNK");

    struct chunk_rec r;
    memset(&r, 0, sizeof(r));

    int64_t sum_nodedup = 0;
    int64_t count_nodedup = 0;
    int64_t sum_removed = 0;
    int64_t count_removed = 0;
    int64_t sum_stored = 0;
    int64_t count_stored = 0;

    while(iterate_chunk(&r, 0) == 0){
        int64_t tmp = r.csize;
        tmp *= r.rcount;
        sum_nodedup += tmp;
        count_nodedup += r.rcount;

        tmp = r.csize;
        tmp *= r.rcount - 1;
        sum_removed += tmp;
        count_removed += r.rcount - 1;

        sum_stored += r.csize;
        count_stored++;
    }

    close_iterator();

    fprintf(stderr, "nodedup = %10.2f\n", 1.0*sum_nodedup/count_nodedup);
    fprintf(stderr, "removed = %10.2f\n", 1.0*sum_removed/count_removed);
    fprintf(stderr, "stored = %10.2f\n", 1.0*sum_stored/count_stored);
}
void  bmiter__edge_of_vert_begin(BMIter *iter)
{
	init_iterator(iter);
	if (iter->vdata->e) {
		iter->e_first = iter->vdata->e;
		iter->e_next = iter->vdata->e;
	}
}
void  bmiter__face_of_edge_begin(BMIter *iter)
{
	init_iterator(iter);
	
	if (iter->edata->l) {
		iter->l_first = iter->edata->l;
		iter->l_next = iter->edata->l;
	}
}
Beispiel #6
0
int collect_identical_files(){
    init_iterator("FILE");

    struct file_rec r;
    memset(&r, 0, sizeof(r));

    GHashTable* hashset = g_hash_table_new_full(g_int_hash, hash_equal, NULL, free_file_list);

    while(iterate_file(&r) == 0){
        if(r.fsize > 0){
            struct file_list* fl = g_hash_table_lookup(hashset, r.hash);
            if(fl == NULL){
                fl = malloc(sizeof(struct file_list));
                fl->head = NULL;
                memcpy(fl->hash, r.hash, sizeof(r.hash));
                g_hash_table_insert(hashset, fl->hash, fl);
            }

            struct file_item* item = malloc(sizeof(*item) + strlen(r.fname) + 1);
            item->fid = r.fid;
            item->fsize = r.fsize;
            strcpy(item->fname, r.fname);
            fl->head = g_list_prepend(fl->head, item);
        }
    }

    close_iterator();

    g_hash_table_foreach_remove(hashset, only_one_item, NULL);

    GHashTableIter iter;
    gpointer key, value;
    g_hash_table_iter_init(&iter, hashset);
    char suffix[8];
    while(g_hash_table_iter_next(&iter, &key, &value)){
        struct file_list* fl = value;
        printf("HASH %d ", g_list_length(fl->head));
        print_hash(fl->hash, 10);
        GList* elem = g_list_first(fl->head);
        do{
            struct file_item* item = elem->data;
            parse_file_suffix(item->fname, suffix, sizeof(suffix));
            if(strncmp(suffix, "edu,", 4) == 0){
                strcpy(suffix, "edu,?");
            }else if(strlen(suffix) == 0){
                strcpy(suffix, ".None");
            }
            printf("FILE %d %" PRId64 " %s %s\n", item->fid, item->fsize,
                    item->fname, suffix);
        }while((elem = g_list_next(elem)));
    }

    g_hash_table_destroy(hashset);

    return 0;
}
void  bmiter__loops_of_edge_begin(BMIter *iter)
{
	BMLoop *l;

	l = iter->edata->l;

	/* note sure why this sets ldata ... */
	init_iterator(iter);
	
	iter->l_first = iter->l_next = l;
}
void  bmiter__loop_of_vert_begin(BMIter *iter)
{
	init_iterator(iter);
	iter->count = 0;
	if (iter->vdata->e)
		iter->count = bmesh_disk_facevert_count(iter->vdata);
	if (iter->count) {
		iter->e_first = bmesh_disk_faceedge_find_first(iter->vdata->e, iter->vdata);
		iter->e_next = iter->e_first;
		iter->l_first = bmesh_radial_faceloop_find_first(iter->e_first->l, iter->vdata);
		iter->l_next = iter->l_first;
	}
}
Beispiel #9
0
void  bmiter__face_of_vert_begin(BMIter *iter)
{
	init_iterator(iter);
	iter->count = 0;
	if (iter->vdata->e)
		iter->count = bmesh_disk_facevert_count(iter->vdata);
	if (iter->count) {
		iter->firstedge = bmesh_disk_faceedge_find_first(iter->vdata->e, iter->vdata);
		iter->nextedge = iter->firstedge;
		iter->firstloop = bmesh_radial_faceloop_find_first(iter->firstedge->l, iter->vdata);
		iter->nextloop = iter->firstloop;
	}
}
void  bmiter__loops_of_loop_begin(BMIter *iter)
{
	BMLoop *l;

	l = iter->ldata;

	/* note sure why this sets ldata ... */
	init_iterator(iter);

	iter->l_first = l;
	iter->l_next = iter->l_first->radial_next;
	
	if (iter->l_next == iter->l_first)
		iter->l_next = NULL;
}
Beispiel #11
0
/* Write the DSUI header to an output file. This includes a timekeeping
 * event (to establish tsc-to-nanosecond correspondence) and then a
 * namespace event for each instrumentation point we know about */
static void write_header(struct logging_thread *log)
{
	hashtable_itr_t itr;

	log_time_state(log);
	if (!hashtable_count(ip_names))
		return;

	init_iterator(&itr, ip_names);

	do {
		struct datastream_ip *ip = hashtable_iterator_value(&itr);
		write_namespace_event(ip->ip, log);
	} while (hashtable_iterator_advance(&itr));
}
Beispiel #12
0
/** Enable all the instrumentation points DSUI knows about for
  * a particular datastream */
void dsui_enable_all_ips(dsui_stream_t ds)
{
	int ctr = 0;

	hashtable_itr_t itr;
	init_iterator(&itr, ip_names);

	do {
		struct datastream_ip *ip = hashtable_iterator_value(&itr);
		ctr++;
		__dsui_enable_ip(ds, ip, NULL);
	} while (hashtable_iterator_advance(&itr));
	dprintf("Enabled %d entities for datastream [%d]\n",
			ctr, ds);
}
Beispiel #13
0
/** Register an instrumentation point with DSUI. This accomplishes
 * several things:
 * 1) The IP is inserted into the global instrumentation point
 *    hash table so it can be looked up by name
 * 2) The numerical ID of the instrumentation point is assigned.
 *    Entities are logged by ID, with the name and other metadata
 *    stored in the previously logged namespace information.
 *
 * If DSUI dicovers that an instrumentation point is already registered
 * under the same name, then it assumes that this IP is another instance
 * of the same point, and modifies the IP's pointers so that it refers
 * to the same namespace/state information as the previous IP.
 *
 * If there are any open log files, a namespace event for this IP
 * will be written to each of them.
 *
 * You must hold the DSUI write-lock when calling this function */
static void __dsui_register_ip(struct datastream_ip *ip)
{
	struct datastream_ip_data *ipdata = ip->ip;
	char *ipname;
	struct datastream_ip *v;
	hashtable_itr_t itr;

	if (ipdata->id) {
		bprintf("Already Registered %s/%s/%d\n", ipdata->group,
			ipdata->name, ipdata->id);
		return;
	}

	ipname = malloc(strlen(ipdata->group) + strlen(ipdata->name) + 2);
	sprintf(ipname, "%s/%s", ipdata->group, ipdata->name);
	v = hashtable_search(ip_names, ipname);
	if (v) {
		//dprintf("%s encountered more than once mapping %p->%p\n",
		//		ipname, ip, v);
		ip->ip = v->ip;
		ip->next = &v->ip->next;
		ip->id = &v->ip->id;
		v->ip->line = -1;
		free(ipname);
		return;
	}

	ipdata->id = starting_id++;
	hashtable_insert(ip_names, ipname, ip);
	//dprintf("Registered %s/%s/%d \n", ipdata->group,
	//		ipdata->name, ipdata->id);

	/* if this ip was registered when there are already active
	 * logging threads, we need to write namespace information
	 * for this ip */

	if (!hashtable_count(logging_threads)) {
		return;
	}
	init_iterator(&itr, logging_threads);
	do {
		struct logging_thread *log;
		log = hashtable_iterator_value(&itr);
		write_namespace_event(ipdata, log);
	} while (hashtable_iterator_advance(&itr));
}
Beispiel #14
0
/* sharing some chunks but with different hash/minhash */
void collect_distinct_files(){
    init_iterator("CHUNK");

    struct chunk_rec r;
    memset(&r, 0, sizeof(r));

    int count = 0;
    while(iterate_chunk(&r, 1) == 0){

        if(r.rcount > 1 && r.fcount > 1){
            struct file_rec files[r.fcount];
            memset(files, 0, sizeof(files));
            int i = 0;
            for(; i < r.fcount; i++){
                files[i].fid = r.list[r.rcount + i];
                search_file(&files[i]);
            }
            for(i=1; i<r.fcount; i++){
                if(memcmp(files[i].minhash, files[i-1].minhash, sizeof(files[i].minhash)) != 0){
                    char suffix[8];
                    printf("CHUK %d ", r.fcount);
                    print_hash(r.hash, 10);
                    int j = 0;
                    for(; j<r.fcount; j++){
                        parse_file_suffix(files[j].fname, suffix, sizeof(suffix));
                        if(strncmp(suffix, "edu,", 4) == 0){
                            strcpy(suffix, "edu,?");
                        }else if(strlen(suffix) == 0){
                            strcpy(suffix, ".None");
                        }
                        printf("FILE %d %" PRId64 " %s %s ", files[j].fid, files[j].fsize,
                                files[j].fname, suffix);
                        print_hash(files[j].minhash, 10);
                    }

                    count++;
                    break;
                }
            }
        }
    }
    fprintf(stderr, "%d chunks are shared between distinct files\n", count);

    close_iterator();

}
Beispiel #15
0
static int init_iterator(grib_iterator_class* c,grib_iterator* i, grib_handle *h, grib_arguments* args)
{

    if(c) {
        int ret = GRIB_SUCCESS;
        grib_iterator_class *s = c->super ? *(c->super) : NULL;
        if(!c->inited)
        {
            if(c->init_class) c->init_class(c);
            c->inited = 1;
        }
        if(s) ret = init_iterator(s,i,h,args);

        if(ret != GRIB_SUCCESS) return ret;

        if(c->init) return c->init(i,h, args);
    }
    return GRIB_INTERNAL_ERROR;
}
Beispiel #16
0
/** returns a configfile linked list of active logging thread filenames.
 * These are the keys in the logging_threads hashtable. */
list_t *get_dsui_output_filenames()
{
	list_t *f = create_list();
	struct logging_thread *log;
	hashtable_itr_t itr;

	km_rdwr_rlock(&dsui_rwlock);

	if (!hashtable_count(logging_threads)) {
		goto out;
	}

	init_iterator(&itr, logging_threads);
	do {
		log = hashtable_iterator_value(&itr);
		list_append(f, encap_string(log->filename));
	} while (hashtable_iterator_advance(&itr));
out:
	km_rdwr_runlock(&dsui_rwlock);
	return f;
}
Beispiel #17
0
static void __dsui_cleanup() {
	int i;
	hashtable_itr_t itr;

	dprintf("called\n");
	for(i=0; i < MAX_DS; i++) {
		__dsui_close_datastream(i);
	}

	if (!hashtable_count(logging_threads)) {
		return;
	}
	init_iterator(&itr, logging_threads);
	do {
		struct logging_thread *log;
		log = hashtable_iterator_value(&itr);
		close_logging_thread(log);
		free(log);

	} while (hashtable_iterator_remove(&itr));
}
Beispiel #18
0
void get_file2ref_ratio(unsigned int lb, unsigned int rb){
    init_iterator("CHUNK");

    struct chunk_rec r;
    memset(&r, 0, sizeof(r));

    float sum = 0;
    int count = 0;

    while(iterate_chunk(&r, 0) == 0){
        if(r.rcount >= lb && r.rcount <= rb){
            fprintf(stdout, "%10.5f\n", 1.0*r.fcount/r.rcount);
            sum += 1.0*r.fcount/r.rcount;
            count++;
        }
    }

    fprintf(stderr, "avg: %10.5f\n", sum/count);

    close_iterator();

}
Beispiel #19
0
int distance_between_first_two_chunk_references(unsigned int lb, unsigned int rb){
    init_iterator("CHUNK");

    struct chunk_rec r;
    memset(&r, 0, sizeof(r));

    int64_t sum = 0;
    int count = 0;

    while(iterate_chunk(&r, 0) == 0){
        if(r.rcount >= lb && r.rcount <= rb){
            /*fprintf(stderr, "%d\n", r.rcount);*/
            assert(r.list[1] > r.list[0]);
            fprintf(stdout, "%d\n", r.list[1] - r.list[0]);
            sum += r.list[1] - r.list[0];
            count++;
        }
    }

    fprintf(stderr, "avg. = %10.2f\n", 1.0*sum/count);
    close_iterator();
    return 0;
}
Beispiel #20
0
/*  
 * Chunk level, without file semantics 
 * Dedup
 * (no trace for chunk-level no-dedup model)
 */
void chunk_dedup_simd_trace(char **path, int count, int weighted, char *pophashfile)
{
	if (weighted) {
		fprintf(stderr, "CHUNK:DEDUP:WEIGHTED\n");
		printf("CHUNK:DEDUP:WEIGHTED\n");
	} else {
		fprintf(stderr, "CHUNK:DEDUP:NOT WEIGHTED\n");
		printf("CHUNK:DEDUP:NOT WEIGHTED\n");
	}

	init_iterator("CHUNK");

	struct chunk_rec chunk;
	memset(&chunk, 0, sizeof(chunk));

	int64_t psize = 0;
	int64_t lsize = 0;
	int64_t total_chunks = 0;
	/* USE part */
	int64_t sum4mean = 0;
	int64_t count4mean = 0;
	while (iterate_chunk(&chunk, 0) == 0) {

		int64_t sum = chunk.csize;
		sum *= chunk.rcount;

		lsize += sum;
		psize += chunk.csize;

		total_chunks += chunk.rcount;

		if (weighted) {
			sum4mean += sum * chunk.csize;
			count4mean += chunk.csize;
			print_a_chunk(chunk.csize, sum);
		} else {
			sum4mean += sum; 
			count4mean += chunk.csize;
			print_a_chunk(chunk.csize, chunk.rcount);
		}
	}

	printf("%.6f\n", 1.0*lsize/psize);
	fprintf(stderr, "D/F = %.4f, total_chunks = %"PRId64"\n", 1.0*lsize/psize, 
			total_chunks);
	fprintf(stderr, "mean = %.4f, per DF = %.6f\n", 1.0*sum4mean/count4mean, 
			1.0*sum4mean*psize/count4mean/lsize);

	close_iterator();

	char buf[4096];
	struct hashfile_handle *handle;
	const struct chunk_info *ci;

	int64_t restore_logical_bytes = 0;
	int64_t restore_physical_bytes = 0;
	int64_t restore_chunks = 0;
	GHashTable* chunks = g_hash_table_new_full(g_int_hash, hash20_equal, free, 
			NULL);

	/* RAID Failure part */
	/* 1 - 99 */
	int step = 1;
	/* All chunks lost */
	puts("0");

	if (pophashfile) {
		int popfd = open(pophashfile, O_RDONLY);
		char pophashbuf[20];
		while (read(popfd, pophashbuf, 20) == 20) {
			char *pophash = malloc(20);
			memcpy(pophash, pophashbuf, 20);

			/* restoring a pop chunk */
			memcpy(chunk.hash, pophash, 20);
			assert(search_chunk(&chunk));

			int64_t sum = chunk.csize;
			sum *= chunk.rcount;
			restore_chunks += chunk.rcount;

			restore_physical_bytes += chunk.csize;
			restore_logical_bytes += sum;

			int progress = restore_physical_bytes * 100/psize;
			while (progress >= step && step <= 99) {
				if (weighted) {
					printf("%.6f\n", 1.0*restore_logical_bytes/lsize);
					fprintf(stderr, "%.6f\n", 1.0*restore_logical_bytes/lsize);
				} else {
					printf("%.6f\n", 1.0*restore_chunks/total_chunks);
					fprintf(stderr, "%.6f\n", 1.0*restore_chunks/total_chunks);
				}
				step++;
			}

			assert(!g_hash_table_contains(chunks, pophash));
			g_hash_table_insert(chunks, pophash, NULL);
		}
		close(popfd);
	}

	int pc = 0;
	for (; pc < count; pc++) {
		handle = hashfile_open(path[pc]);

		if (!handle) {
			fprintf(stderr, "Error opening hash file: %d!", errno);
			exit(-1);
		}


		while (1) {
			int ret = hashfile_next_file(handle);
			if (ret < 0) {
				fprintf(stderr,
						"Cannot get next file from a hashfile: %d!\n",
						errno);
				exit(-1);
			}
			if (ret == 0)
				break;

			while (1) {
				ci = hashfile_next_chunk(handle);
				if (!ci) /* exit the loop if it was the last chunk */
					break;

				int hashsize = hashfile_hash_size(handle)/8;
				int chunksize = ci->size;
				memcpy(chunk.hash, ci->hash, hashsize);
				memcpy(&chunk.hash[hashsize], &chunksize, sizeof(chunksize));
				chunk.hashlen = hashfile_hash_size(handle)/8 + sizeof(chunksize);

				if (!g_hash_table_contains(chunks, chunk.hash)) {
					assert(search_chunk(&chunk));
					int64_t sum = chunk.csize;
					sum *= chunk.rcount;
					restore_chunks += chunk.rcount;

					restore_physical_bytes += chunk.csize;
					restore_logical_bytes += sum;

					int progress = restore_physical_bytes * 100/psize;
					while (progress >= step && step <= 99) {
						if (weighted) {
							printf("%.6f\n", 1.0*restore_logical_bytes/lsize);
							fprintf(stderr, "%.6f\n", 1.0*restore_logical_bytes/lsize);
						} else {
							printf("%.6f\n", 1.0*restore_chunks/total_chunks);
							fprintf(stderr, "%.6f\n", 1.0*restore_chunks/total_chunks);
						}
						step++;
					}
					char* hash = malloc(20);
					memcpy(hash, chunk.hash, 20);
					g_hash_table_insert(chunks, hash, NULL);
				}
			}
		}
		hashfile_close(handle);
	}
	g_hash_table_destroy(chunks);

	puts("1.0");
}
Beispiel #21
0
/* intra-file redundancy */
void collect_intra_redundant_files(){
    init_iterator("CHUNK");

    struct chunk_rec r;
    memset(&r, 0, sizeof(r));
    struct file_rec f;
    memset(&f, 0, sizeof(f));

    int count = 0;
    GHashTable *fileset = g_hash_table_new_full(g_int_hash, g_int_equal, NULL, free);

    while(iterate_chunk(&r, 0) == 0){

        if(r.rcount > r.fcount){
            int *flist = &r.list[r.rcount];
            int i = 1;
            for(; i<r.rcount; i++){
                if(flist[i] == flist[i-1]){
                    /* An intra-file redundancy */
                    struct intra_redundant_file * irfile = g_hash_table_lookup(fileset, &flist[i]);
                    if(irfile == NULL){
                        f.fid = flist[i];
                        int ret = search_file(&f);
                        assert(ret == 1);

                        irfile = malloc(sizeof(struct intra_redundant_file));
                        irfile->fid = f.fid;
                        irfile->fsize = f.fsize;
                        memcpy(irfile->minhash, f.minhash, sizeof(f.minhash));
                        parse_file_suffix(f.fname, irfile->suffix, sizeof(irfile->suffix));
                        if(strncmp(irfile->suffix, "edu,", 4) == 0){
                            strcpy(irfile->suffix, "edu,?");
                        }else if(strlen(irfile->suffix) == 0){
                            strcpy(irfile->suffix, ".None");
                        }
                        irfile->cnum = 0;
                        irfile->csize = 0;

                        g_hash_table_insert(fileset, &irfile->fid, irfile);
                    }

                    irfile->cnum++;
                    irfile->csize += r.csize;
                }
            }
        }
    }

    close_iterator();

    GHashTableIter iter;
    gpointer key, value;
    g_hash_table_iter_init(&iter, fileset);
    while(g_hash_table_iter_next(&iter, &key, &value)){
        struct intra_redundant_file *irfile = value;
        printf("FILE %d %" PRId64 " %s %d %d ", irfile->fid, irfile->fsize, irfile->suffix,
                irfile->cnum, irfile->csize);
        print_hash(irfile->minhash, 10);
    }

    fprintf(stderr, "%d intra-redundant files\n", g_hash_table_size(fileset));

    g_hash_table_destroy(fileset);

}
Beispiel #22
0
void analyze_references_source(unsigned int lb, unsigned int rb){
    init_iterator("CHUNK");

    struct chunk_rec r;
    memset(&r, 0, sizeof(r));

    int total_references = 0;
    int intra_file = 0;
    int identical_file = 0;
    int min_similar_file = 0;
    int similar_file = 0;
    int same_suffix_file = 0;
    int distinct_file = 0;
    while(iterate_chunk(&r, 1) == 0){

        if(r.rcount >= lb && r.rcount <= rb){
            total_references += r.rcount - 1;
            intra_file += r.rcount - r.fcount;
            if(r.fcount == 1)
                continue;

            struct file_rec files[r.fcount];
            memset(files, 0, sizeof(files));
            int i = 0;
            for(; i < r.fcount; i++){
                files[i].fid = r.list[r.rcount + i];
                search_file(&files[i]);
            }
            
            /* analyze files */
            for(i=0; i<r.fcount-1; i++){
                int j = i+1;
            	int identical = 0, min_similar = 0, similar = 0, same_suffix = 0;
                for(; j<r.fcount; j++){
                    if(memcmp(files[i].hash, files[j].hash, 
								sizeof(files[i].hash)) == 0){
                        identical = 1;
                        break;
                    }else if(memcmp(files[i].minhash, files[j].minhash, 
								sizeof(files[i].minhash)) == 0){
                        min_similar = 1;
                    }else if(memcmp(files[i].maxhash, files[j].maxhash, 
								sizeof(files[i].maxhash)) == 0){
                        similar = 1;
                    }else{
                        char suf1[8];
                        char suf2[8];
                        parse_file_suffix(files[i].fname, suf1, sizeof(suf1));
                        parse_file_suffix(files[j].fname, suf2, sizeof(suf2));
                        if(strcmp(suf1, suf2) == 0){
                            same_suffix = 1;
                        }
                    }
                }
                if(identical == 1)
                    identical_file++;
                else if(min_similar == 1)
                    min_similar_file++;
                else if(similar == 1)
                    similar_file++;
                else if(same_suffix == 1)
                    same_suffix_file++;
                else
                    distinct_file++;
            }
            for(i = 0; i < r.fcount; i++){
                free(files[i].fname);
            }
        }
    }

    close_iterator();
    assert(total_references == intra_file + identical_file + min_similar_file
            + similar_file + same_suffix_file + distinct_file);

    fprintf(stderr, "%8s %8s %8s %8s %8s %8s\n", "Intra", "Ident", "Min", "+Max", "Suffix", "Dist");
    fprintf(stdout, "%8.5f %8.5f %8.5f %8.5f %8.5f %8.5f\n", 1.0*intra_file/total_references, 1.0*identical_file/total_references, 
            1.0*min_similar_file/total_references, 1.0*similar_file/total_references, 
            1.0*same_suffix_file/total_references, 1.0*distinct_file/total_references);
}
Beispiel #23
0
int collect_similar_files(){
    init_iterator("FILE");

    struct file_rec r;
    memset(&r, 0, sizeof(r));

    GHashTable* hashset = g_hash_table_new_full(g_int_hash, hash_equal, NULL, free_file_list);

    int empty_files = 0;
    while(iterate_file(&r) == 0){
        if(r.fsize > 0){
            struct file_list* fl = g_hash_table_lookup(hashset, r.minhash);
            if(fl == NULL){
                fl = malloc(sizeof(struct file_list));
                fl->head = NULL;
                memcpy(fl->hash, r.minhash, sizeof(r.minhash));
                g_hash_table_insert(hashset, fl->hash, fl);
            }

            struct file_item* item = malloc(sizeof(*item) + strlen(r.fname) + 1);
            item->fid = r.fid;
            item->fsize = r.fsize;
            memcpy(item->hash, r.hash, sizeof(r.hash));
            strcpy(item->fname, r.fname);
            fl->head = g_list_prepend(fl->head, item);
        }else
            empty_files++;
    }

    close_iterator();

    fprintf(stderr, "totally %d bins, %d empty files\n", g_hash_table_size(hashset), empty_files);
    g_hash_table_foreach_remove(hashset, only_one_item, NULL);

    GHashTableIter iter;
    gpointer key, value;
    g_hash_table_iter_init(&iter, hashset);
    char suffix[8];
    int iden_count = 0;
    while(g_hash_table_iter_next(&iter, &key, &value)){
        struct file_list* fl = value;
        if(all_files_are_identical(fl)){
            /* excluding the bins of all identical files */
            iden_count++;
            continue;
        }

        printf("HASH %d ", g_list_length(fl->head));
        print_hash(fl->hash, 10);
        GList* elem = g_list_first(fl->head);
        do{
            struct file_item* item = elem->data;
            parse_file_suffix(item->fname, suffix, sizeof(suffix));
            if(strncmp(suffix, "edu,", 4) == 0){
                strcpy(suffix, "edu,?");
            }else if(strlen(suffix) == 0){
                strcpy(suffix, ".None");
            }
            printf("FILE %d %" PRId64 " %s %s ", item->fid, item->fsize,
                    item->fname, suffix);
            print_hash(item->hash, 10);
        }while((elem = g_list_next(elem)));
    }

    fprintf(stderr, "%d bins (size > 1), from %d of which all files are identical\n", g_hash_table_size(hashset), iden_count);
    g_hash_table_destroy(hashset);

    return 0;
}
void  bmiter__vert_of_edge_begin(BMIter *iter)
{
	init_iterator(iter);
	iter->count = 0;
}
void  bmiter__loop_of_face_begin(BMIter *iter)
{
	init_iterator(iter);
	iter->l_first = iter->l_next = BM_FACE_FIRST_LOOP(iter->pdata);
}
Beispiel #26
0
void TestSetIterator::set_test_set(ETestSet &test_set)
{
    this->test_set = &test_set;
    init_iterator();
}
Beispiel #27
0
int grib_iterator_init(grib_iterator* i, grib_handle *h, grib_arguments* args)
{
    return init_iterator(i->cclass,i,h,args);
}
Beispiel #28
0
void  bmiter__edge_of_face_begin(BMIter *iter)
{
	init_iterator(iter);
	iter->firstloop = iter->nextloop = BM_FACE_FIRST_LOOP(iter->pdata);
}
Beispiel #29
0
Iterator::Iterator(jobject jiterable)
    : m_persistent(false),
      m_jiterator(init_iterator(jiterable, false))
{}
Beispiel #30
0
Iterator::Iterator(jobject jiterable, bool)
    : m_persistent(true),
      m_jiterator(init_iterator(jiterable, true))
{}