Beispiel #1
0
/*
 * Creates a new hashtable with DESIRED buckets.  If DESIRED == 0 a useful
 * default size is chosen.  If MAXMB > 0, it sets a limit on the maximum 
 * memory consumption of the hashtable, measured in megabytes.  When the limit
 * is reached, previous entries are overwritten.
 */
struct hashtable *new_hashtable(int desired, long maxmb)
{
    struct hashtable *t = calloc(sizeof *t, 1);

    if (!t) return NULL;

    if (desired < 1) desired = 65537;

    t->n = desired;
    t->e = calloc(sizeof *t->e, t->n);
    t->num = calloc(sizeof *t->num, t->n);
    if (maxmb > 0) {
        t->maxmem = maxmb * 1024 * 1024;
    }
    t->startmem = get_mem_usage(getpid());

    if (!t->e || !t->num)
        goto fail;

    return t;
fail:
    free(t->e);
    free(t->num);
    free(t);
    return NULL;
}
Beispiel #2
0
void hash_stats(struct hashtable *t, FILE *f)
{
    int i;
    int numentries = 0;
    int maxbucket, minbucket, len;
    struct hashentry *e;
    int mb, memused;

    minbucket = maxbucket = t->num[0];

    for (i=0; i<t->n; i++) {
        len = t->num[i];
        numentries += len;
        if (len > maxbucket)
            maxbucket = len;
        if (len < minbucket)
            minbucket = len;
    }
    mb = sizeof(*e) * numentries / 1024 / 1024;
    memused = get_mem_usage(getpid()) - t->startmem;
    fprintf(f, "hash: %d entries (%d MiB / %d MiB, %.1f%% VM efficency), bucket min/avg/max %d/%.1f/%d\n",
            numentries, mb, memused, mb * 100. / memused,
            minbucket, numentries * 1. / t->n, maxbucket);
}
Beispiel #3
0
int main(int argc, char **argv)
{
    int rc;
    char *key_name;
    int *key_val;
    int rank, nproc;
    int cnt;
    int *local_ranks, local_cnt;
    int *remote_ranks, remote_cnt;
    double start, total_start, get_loc_time = 0, get_rem_time = 0, put_loc_time = 0,
           put_rem_time = 0, commit_time = 0, fence_time = 0, init_time = 0, total_time = 0;
    int get_loc_cnt = 0, get_rem_cnt = 0, put_loc_cnt = 0, put_rem_cnt = 0;
    double mem_pss = 0.0, mem_rss = 0.0;
    char have_shmem;
    size_t shmem_job_info, shmem_all;

    parse_options(argc, argv);

    total_start = GET_TS;
    start = GET_TS;
    pmi_init(&rank, &nproc);
    init_time += GET_TS - start;

    pmi_get_local_ranks(&local_ranks, &local_cnt);
    remote_cnt = nproc - local_cnt;
    if( remote_cnt ){
        remote_ranks = calloc(remote_cnt, sizeof(int));
        fill_remote_ranks(local_ranks, local_cnt, remote_ranks, nproc);
    }

    pmi_get_shmem_size(&have_shmem, &shmem_job_info);

    /*
     * Make sure that no other rank started publishing keys in the dstore
     * before we finished with shmem size screening
     */
    pmi_fence( 0 );

    if( 0 == rank && debug_on ){
        int i;
        fprintf(stderr,"%d: local ranks: ", rank);
        for(i = 0; i < local_cnt; i++){
            fprintf(stderr,"%d ", local_ranks[i]);
        }
        fprintf(stderr,"\n");
        fflush(stderr);
    }

    key_val = calloc(key_size, sizeof(int));
    for (cnt=0; cnt < key_count; cnt++) {
        int i;
        if( local_cnt > 0 ){
            (void)asprintf(&key_name, "KEY-%d-local-%d", rank, cnt);
            for(i=0; i < key_size; i++){
                key_val[i] = rank * rank_shift + cnt;
            }
            put_loc_cnt++;
            start = GET_TS;
            pmi_put_key_loc(key_name, key_val, key_size);
            put_loc_time += GET_TS - start;
            free(key_name);
        }
        if( remote_cnt > 0 ){
            (void)asprintf(&key_name, "KEY-%d-remote-%d", rank, cnt);
            for(i=0; i < key_size; i++){
                key_val[i] = rank * rank_shift + cnt;
            }
            put_rem_cnt++;
            start = GET_TS;
            pmi_put_key_rem(key_name, key_val, key_size);
            put_rem_time += GET_TS - start;
            free(key_name);
        }
    }
    free(key_val);

    start = GET_TS;
    pmi_commit();
    commit_time += GET_TS - start;

    start = GET_TS;
    pmi_fence( !direct_modex );
    fence_time += GET_TS - start;



    for (cnt=0; cnt < key_count; cnt++) {
        int i;

        for(i = 0; i < remote_cnt; i++){
            int rank = remote_ranks[i], j;
            int *key_val, key_size_new;
            double start;
            (void)asprintf(&key_name, "KEY-%d-remote-%d", rank, cnt);

            start = GET_TS;
            pmi_get_key_rem(rank, key_name, &key_val, &key_size_new);
            get_rem_time += GET_TS - start;
            get_rem_cnt++;

            if( key_size != key_size_new ){
                fprintf(stderr,"%d: error in key %s sizes: %d vs %d\n",
                        rank, key_name, key_size, key_size_new);
                abort();
            }

            for(j=0; j < key_size; j++){
                if( key_val[j] != rank * rank_shift + cnt ){
                    fprintf(stderr, "%d: error in key %s value (byte %d)\n",
                            rank, key_name, j);
                    abort();
                }
            }
            free(key_name);
            free(key_val);
        }

         // check the returned data
        for(i = 0; i < local_cnt; i++){
            int rank = local_ranks[i], j;
            int *key_val, key_size_new;
            double start;
            (void)asprintf(&key_name, "KEY-%d-local-%d", rank, cnt);

            start = GET_TS;
            pmi_get_key_loc(rank, key_name, &key_val, &key_size_new);
            get_loc_time += GET_TS - start;
            get_loc_cnt++;

            if( key_size != key_size_new ){
                fprintf(stderr,"%d: error in key %s sizes: %d vs %d\n",
                        rank, key_name, key_size, key_size_new);
                abort();
            }

            for(j=0; j < key_size; j++){
                if( key_val[j] != rank * rank_shift + cnt ){
                    fprintf(stderr, "%d: error in key %s value (byte %d)",
                            rank, key_name, j);
                    abort();
                }
            }
            free(key_name);
            free(key_val);
        }
    }

    total_time = GET_TS - total_start;

    if (0 != get_mem_usage(&mem_pss, &mem_rss)) {
        fprintf(stderr, "Rank %d: error get memory usage", rank);
        abort();
    }

    if( debug_on ){
        fprintf(stderr,"%d: get: total %lf avg loc %lf rem %lf all %lf ; put: %lf %lf commit: %lf fence %lf\n",
                rank, (get_loc_time + get_rem_time),
                get_loc_time/get_loc_cnt, get_rem_time/get_rem_cnt,
                (get_loc_time + get_rem_time)/(get_loc_cnt + get_rem_cnt),
                put_loc_time/put_loc_cnt, put_rem_time/put_rem_cnt,
                commit_time, fence_time);
    }

    pmi_get_shmem_size(&have_shmem, &shmem_all);
    /*
     * The barrier ensures that all procs finished key fetching
     * we had issues with dstor/lockless case evaluation
     */
    pmi_fence( 0 );

    /* Out of the perf path - send our results to rank 0 using same PMI */
    char key[128];
    sprintf(key, "PMIX_PERF_get_total_time.%d", rank);
    pmi_put_double(key, get_rem_time + get_loc_time);

    sprintf(key, "PMIX_PERF_get_loc_time.%d", rank);
    pmi_put_double(key, get_loc_cnt ? get_loc_time/get_loc_cnt : 0 );

    sprintf(key, "PMIX_PERF_get_rem_time.%d", rank);
    pmi_put_double(key, get_rem_cnt ? get_rem_time/get_rem_cnt : 0);

    sprintf(key, "PMIX_PERF_get_time.%d", rank);
    pmi_put_double(key, (get_loc_time + get_rem_time)/(get_loc_cnt + get_rem_cnt) );

    sprintf(key, "PMIX_PERF_put_loc_time.%d", rank);
    pmi_put_double(key, put_loc_cnt ? put_loc_time / put_loc_cnt : 0);

    sprintf(key, "PMIX_PERF_put_rem_time.%d", rank);
    pmi_put_double(key, put_rem_cnt ? put_rem_time / put_rem_cnt : 0);

    sprintf(key, "PMIX_PERF_commit_time.%d", rank);
    pmi_put_double(key, commit_time);

    sprintf(key, "PMIX_PERF_fence_time.%d", rank);
    pmi_put_double(key, fence_time);

    sprintf(key, "PMIX_PERF_init_time.%d", rank);
    pmi_put_double(key, init_time);

    sprintf(key, "PMIX_PERF_total_time.%d", rank);
    pmi_put_double(key, total_time);

    sprintf(key, "PMIX_PERF_mem_pss.%d", rank);
    pmi_put_double(key, mem_pss);

    sprintf(key, "PMIX_PERF_mem_rss.%d", rank);
    pmi_put_double(key, mem_rss);

    pmi_commit();
    pmi_fence( 1 );

    if( rank == 0 ){
        double  cum_get_total_time = 0,
                cum_get_loc_time = 0,
                cum_get_rem_time = 0,
                cum_get_time = 0,
                cum_put_total_time = 0,
                cum_put_loc_time = 0,
                cum_put_rem_time = 0,
                cum_commit_time = 0,
                cum_fence_time = 0,
                cum_init_time = 0,
                cum_total_time = 0,
                cum_mem_pss = 0.0;

        double  min_get_loc_time = get_loc_time / get_loc_cnt,
                max_get_loc_time = get_loc_time / get_loc_cnt,
                min_get_rem_time = get_rem_time / get_rem_cnt,
                max_get_rem_time = get_rem_time / get_rem_cnt,
                min_init_time = init_time,
                max_init_time = init_time,
                min_total_time = total_time,
                max_total_time = total_time,
                min_mem_pss = mem_pss,
                max_mem_pss = 0.0;

        int min_get_loc_idx = 0, max_get_loc_idx = 0;
        int min_get_rem_idx = 0, max_get_rem_idx = 0;

        char c_get_ltime[128], c_get_rtime[128], c_get_ttime[128];
        char c_put_ltime[128], c_put_rtime[128];
        int i;
        for(i = 0; i < nproc; i++){
            double val;
            sprintf(key, "PMIX_PERF_get_total_time.%d", i);
            cum_get_total_time += pmi_get_double(i, key);

            sprintf(key, "PMIX_PERF_get_loc_time.%d", i);
            val = pmi_get_double(i, key);
            cum_get_loc_time += val;
            if( min_get_loc_time > val ){
                min_get_loc_time = val;
                min_get_loc_idx = i;
            }
            if( max_get_loc_time < val ){
                max_get_loc_time = val;
                max_get_loc_idx = i;
            }

            sprintf(key, "PMIX_PERF_get_rem_time.%d", i);
            val = pmi_get_double(i, key);
            cum_get_rem_time += val;
            if( min_get_rem_time > val ){
                min_get_rem_time = val;
                min_get_rem_idx = i;
            }
            if( max_get_rem_time < val ){
                max_get_rem_time = val;
                max_get_rem_idx = i;
            }

            sprintf(key, "PMIX_PERF_get_time.%d", i);
            cum_get_time += pmi_get_double(i, key);

            sprintf(key, "PMIX_PERF_put_loc_time.%d", i);
            cum_put_loc_time += pmi_get_double(i, key);

            sprintf(key, "PMIX_PERF_put_rem_time.%d", i);
            cum_put_rem_time += pmi_get_double(i, key);

            sprintf(key, "PMIX_PERF_commit_time.%d", i);
            cum_commit_time += pmi_get_double(i, key);

            sprintf(key, "PMIX_PERF_fence_time.%d", i);
            cum_fence_time += pmi_get_double(i, key);

            sprintf(key, "PMIX_PERF_init_time.%d", i);
            val = pmi_get_double(i, key);
            cum_init_time += val;
            if (min_init_time > val) {
                min_init_time = val;
            }
            if (max_init_time < val) {
                max_init_time = val;
            }

            sprintf(key, "PMIX_PERF_total_time.%d", i);
            val = pmi_get_double(i, key);
            cum_total_time += val;
            if (min_total_time > val) {
                min_total_time = val;
            }
            if (max_total_time < val) {
                max_total_time = val;
            }

            sprintf(key, "PMIX_PERF_mem_pss.%d", i);
            val = pmi_get_double(i, key);
            cum_mem_pss += val;
            if (min_mem_pss > val) {
                min_mem_pss = val;
            }
            if (max_mem_pss < val) {
                max_mem_pss = val;
            }
        }

        if( get_loc_cnt ){
            sprintf(c_get_ltime,"%lf", cum_get_loc_time / nproc);
        } else {
            sprintf(c_get_ltime,"--------");
        }
        if( get_rem_cnt ){
            sprintf(c_get_rtime,"%lf", cum_get_rem_time / nproc);
        } else {
            sprintf(c_get_rtime,"--------");
        }

        if( get_loc_cnt + get_rem_cnt ){
            sprintf(c_get_ttime,"%lf", cum_get_time / nproc);
        } else {
            sprintf(c_get_ttime,"--------");
        }

        if( put_loc_cnt ){
            sprintf(c_put_ltime,"%lf", cum_put_loc_time / nproc);
            cum_put_total_time += cum_put_loc_time;
        } else {
            sprintf(c_put_ltime,"--------");
        }
        if( put_rem_cnt ){
            sprintf(c_put_rtime,"%lf", cum_put_rem_time / nproc);
            cum_put_total_time += cum_put_rem_time;
        } else {
            sprintf(c_put_rtime,"--------");
        }

        fprintf(stderr,"init: %lf; put: %lf; commit: %lf; fence: %lf; get: %lf; total: %lf\n",
                cum_init_time / nproc,
                cum_put_total_time / nproc,
                cum_commit_time / nproc, cum_fence_time / nproc,
                cum_get_total_time / nproc,
                cum_total_time / nproc);
        fprintf(stderr,"init:          max %lf min %lf\n",  max_init_time, min_init_time);
        fprintf(stderr,"put:           loc %s rem %s\n", c_put_ltime, c_put_rtime);
        fprintf(stderr,"get:           loc %s rem %s all %s\n", c_get_ltime, c_get_rtime, c_get_ttime);
        fprintf(stderr,"get:           min loc %lf rem %lf (loc: %d, rem: %d)\n",
                min_get_loc_time, min_get_rem_time, min_get_loc_idx, min_get_rem_idx);
        fprintf(stderr,"get:           max loc %lf rem %lf (loc: %d, rem: %d)\n",
                max_get_loc_time, max_get_rem_time, max_get_loc_idx, max_get_rem_idx);
        fprintf(stderr,"total:         max %lf min %lf\n", max_total_time, min_total_time);
        fprintf(stderr,"mem:           loc %0.2lf avg %0.2lf min %0.2lf max %0.2lf total %0.2lf Kb\n",
                mem_pss, cum_mem_pss / nproc, min_mem_pss, max_mem_pss, cum_mem_pss);
        if( have_shmem ) {
            fprintf(stderr,"shmem:         job_info: %0.2lf total %0.2lf Kb\n",
                    (double)shmem_job_info / 1024, (double)shmem_all / 1024);
        }

        /* debug printout *//*
        for(i = 0; i < nproc; i++){
            double val;
            printf("%d: ", i);
            sprintf(key, "PMIX_PERF_get_loc_time.%d", i);
            printf("local = %lf ", pmi_get_double(i, key));

            sprintf(key, "PMIX_PERF_get_rem_time.%d", i);
            printf("remote = %lf\n", pmi_get_double(i, key));
        }
*/
    }

    pmi_fini();

    return 0;
}
Beispiel #4
0
/**
 * Read a directory, saving the file information in entries
 * @param folder the folder where the files are
 * @param passed VAR param number of passed tests
 * @param failed VAR param number of failed tests
 * @return number of files found or 0 on failure
 */
static int read_dir( char *folder, int *passed, int *failed, plugin_log *log )
{
    int n_files = 0;
    DIR *dir;
    struct dirent *ent;
    if ((dir = opendir(folder)) != NULL) 
    {
        while ((ent = readdir(dir)) != NULL) 
        {
            int flen;
            int old_passed = *passed;
            if ( strcmp(ent->d_name,".")!=0&&strcmp(ent->d_name,"..")!=0 )
            {
                char *path = create_path(folder,ent->d_name);
                //printf("building tree for %s\n",ent->d_name);
                char *txt = read_file( path, &flen );
                if ( txt == NULL )
                    break;
                else
                {
                    int tlen = strlen(txt);
                    long mem2,mem1 = get_mem_usage();
                    int64_t time2,time1 = epoch_time();
                    int ulen = measure_from_encoding( txt, flen, "utf-8" );
                    if ( ulen > 0 )
                    {
                        UChar *dst = calloc( ulen+1, sizeof(UChar) );
                        if ( dst != NULL )
                        {
                            int res = convert_from_encoding( txt, flen, dst, 
                                ulen+1, "utf-8" );
                            if ( res )
                            {
                                suffixtree *tree = suffixtree_create( dst, 
                                    ulen, log );
                                if ( tree != NULL )
                                {
                                    mem2 = get_mem_usage();
                                    time2 = epoch_time();
                                    entry *e = calloc( 1, sizeof(entry) );
                                    if ( e != NULL )
                                    {
                                        e->file = strdup(ent->d_name);
                                        e->space = mem2-mem1;
                                        e->time = time2-time1;
                                        e->size = flen;
                                        append_entry( e );
                                        (*passed)++;
                                        n_files++;
                                    }
                                    else
                                    {
                                        n_files = 0;
                                        dispose_entries();
                                        fprintf(stderr,
                                            "test: failed to allocate entry\n");
                                        break;
                                    }
                                    suffixtree_dispose( tree );
                                }
                            }
                            free(dst);
                        }
                    }
                    free( txt );
                }
                if ( *passed == old_passed )
                {
                    (*failed)++;
                    fprintf(stderr,"suffixtree: failed to create tree %s\n",path);
                }
                if ( path != NULL )
                    free( path );
            }
        }
        closedir( dir );
    }
    else
        fprintf(stderr,"test: failed to open directory %s\n",folder);
    return n_files;
}
int main(int argc, char **argv)
{
	FILE *input;
	FILE *repeats = 0;
	FILE *output;

	int start_x, end_x, start_y, end_y;

	debug_config(progname);
	get_options(argc, argv, progname);

	unsigned long start_mem, cand_mem, table_mem;

	input = fopen(sequence_filename, "r");
	if(!input) fatal("couldn't open %s: %s\n",sequence_filename,strerror(errno));

	if(repeat_filename) {
		repeats = fopen(repeat_filename, "r");
		if(!repeats) fatal("couldn't open %s: %s\n",repeat_filename,strerror(errno));
	}

	if(output_filename) {
		output = fopen(output_filename, "w");
	} else {
		output = stdout;
	}

	// Data is in the form:
	// >id metadata
	// data
	// >id metadata
	// data
	// >>
	// ...

	set_k(kmer_size);
	set_window_size(window_size);

	// If we only give one file, do an all vs. all
	// on them.
	if(!second_sequence_filename) {
		num_seqs = load_seqs(input);
		start_x = 0;
		end_x = num_seqs;
		start_y = 0;
		end_y = num_seqs;
	}
	// If we had two files, do not compare ones from
	// the same file to each other.
	else {
		FILE *input2 = fopen(second_sequence_filename, "r");
		if(!input2) {
			fprintf(stderr, "Could not open file %s for reading.\n", second_sequence_filename);
			exit(1);
		}
		num_seqs = load_seqs_two_files(input, &end_x, input2, &end_y);
		start_x = 0;
		start_y = end_x;
		debug(D_DEBUG,"First file contains %d sequences, stored from (%d,%d].\n", end_x, start_x, end_x);
		debug(D_DEBUG,"Second file contains %d sequences, stored from (%d,%d].\n", end_y-end_x, start_y, end_y);
	}
	fclose(input);

	debug(D_DEBUG,"Loaded %d sequences\n",num_seqs);

	init_cand_table(num_seqs * 5);
	init_mer_table(num_seqs * 5);

	if(repeats) {
		int repeat_count = init_repeat_mer_table(repeats, 2000000, 0);
		fclose(repeats);
		debug(D_DEBUG,"Loaded %d repeated mers\n", repeat_count);
	}

	if(rectangle_size == -1) {
		// Do get_mem_avail*0.95 to leave some memory for overhead
		rectangle_size = DYNAMIC_RECTANGLE_SIZE(max_mem_kb);
		debug(D_DEBUG,"Mem avail: %lu, rectangle size: %d\n",(unsigned long)MEMORY_FOR_MERS(max_mem_kb), rectangle_size);
	}

	int curr_start_x = start_x;
	int curr_start_y = start_y;

	candidate_t *output_list = 0;
	int num_in_list;

	while(curr_start_y < end_y) {
		while(curr_start_x < end_x) {
			if(start_x == start_y) {
				debug(D_DEBUG,"Loading mer table (%d,%d)\n", curr_rect_x, curr_rect_y);
			} else {
				debug(D_DEBUG,"Loading mer table for [%d,%d) and [%d,%d)\n",curr_start_x, MIN(curr_start_x + rectangle_size, end_x), curr_start_y, MIN(curr_start_y + rectangle_size, end_y));
			}

			start_mem = get_mem_usage();

			load_mer_table_subset(curr_start_x, MIN(curr_start_x + rectangle_size, end_x), curr_start_y, MIN(curr_start_y + rectangle_size, end_y), (curr_start_x == curr_start_y));

			table_mem = get_mem_usage();

			debug(D_DEBUG,"Finished loading, now generating candidates\n");
			debug(D_DEBUG,"Memory used: %lu\n", table_mem - start_mem);

			generate_candidates();
			cand_mem = get_mem_usage();

			debug(D_DEBUG,"Total candidates generated: %llu\n", (long long unsigned int) total_cand);
			debug(D_DEBUG,"Candidate memory used: %lu\n", cand_mem - table_mem);

			output_list = retrieve_candidates(&num_in_list);
			output_candidate_list(output, output_list, num_in_list);
			free(output_list);
			fflush(output);

			debug(D_DEBUG,"Now freeing\n");

			free_cand_table();
			free_mer_table();

			debug(D_DEBUG,"Successfully output and freed!\n");

			curr_rect_x++;
			curr_start_x += rectangle_size;
		}
		curr_rect_y++;
		curr_start_y += rectangle_size;
		curr_rect_x = curr_rect_y;
		if(start_y == 0) {
			curr_start_x = curr_start_y;
		} else {
			curr_start_x = start_x;
		}
	}

	fclose(output);

	return 0;
}