int main(int argc, char **argv) { int rc; char *key_name; int *key_val; int rank, nproc; int cnt; int *local_ranks, local_cnt; int *remote_ranks, remote_cnt; double start, total_start, get_loc_time = 0, get_rem_time = 0, put_loc_time = 0, put_rem_time = 0, commit_time = 0, fence_time = 0, init_time = 0, total_time = 0; int get_loc_cnt = 0, get_rem_cnt = 0, put_loc_cnt = 0, put_rem_cnt = 0; double mem_pss = 0.0, mem_rss = 0.0; char have_shmem; size_t shmem_job_info, shmem_all; parse_options(argc, argv); total_start = GET_TS; start = GET_TS; pmi_init(&rank, &nproc); init_time += GET_TS - start; pmi_get_local_ranks(&local_ranks, &local_cnt); remote_cnt = nproc - local_cnt; if( remote_cnt ){ remote_ranks = calloc(remote_cnt, sizeof(int)); fill_remote_ranks(local_ranks, local_cnt, remote_ranks, nproc); } pmi_get_shmem_size(&have_shmem, &shmem_job_info); /* * Make sure that no other rank started publishing keys in the dstore * before we finished with shmem size screening */ pmi_fence( 0 ); if( 0 == rank && debug_on ){ int i; fprintf(stderr,"%d: local ranks: ", rank); for(i = 0; i < local_cnt; i++){ fprintf(stderr,"%d ", local_ranks[i]); } fprintf(stderr,"\n"); fflush(stderr); } key_val = calloc(key_size, sizeof(int)); for (cnt=0; cnt < key_count; cnt++) { int i; if( local_cnt > 0 ){ (void)asprintf(&key_name, "KEY-%d-local-%d", rank, cnt); for(i=0; i < key_size; i++){ key_val[i] = rank * rank_shift + cnt; } put_loc_cnt++; start = GET_TS; pmi_put_key_loc(key_name, key_val, key_size); put_loc_time += GET_TS - start; free(key_name); } if( remote_cnt > 0 ){ (void)asprintf(&key_name, "KEY-%d-remote-%d", rank, cnt); for(i=0; i < key_size; i++){ key_val[i] = rank * rank_shift + cnt; } put_rem_cnt++; start = GET_TS; pmi_put_key_rem(key_name, key_val, key_size); put_rem_time += GET_TS - start; free(key_name); } } free(key_val); start = GET_TS; pmi_commit(); commit_time += GET_TS - start; start = GET_TS; pmi_fence( !direct_modex ); fence_time += GET_TS - start; for (cnt=0; cnt < key_count; cnt++) { int i; for(i = 0; i < remote_cnt; i++){ int rank = remote_ranks[i], j; int *key_val, key_size_new; double start; (void)asprintf(&key_name, "KEY-%d-remote-%d", rank, cnt); start = GET_TS; pmi_get_key_rem(rank, key_name, &key_val, &key_size_new); get_rem_time += GET_TS - start; get_rem_cnt++; if( key_size != key_size_new ){ fprintf(stderr,"%d: error in key %s sizes: %d vs %d\n", rank, key_name, key_size, key_size_new); abort(); } for(j=0; j < key_size; j++){ if( key_val[j] != rank * rank_shift + cnt ){ fprintf(stderr, "%d: error in key %s value (byte %d)\n", rank, key_name, j); abort(); } } free(key_name); free(key_val); } // check the returned data for(i = 0; i < local_cnt; i++){ int rank = local_ranks[i], j; int *key_val, key_size_new; double start; (void)asprintf(&key_name, "KEY-%d-local-%d", rank, cnt); start = GET_TS; pmi_get_key_loc(rank, key_name, &key_val, &key_size_new); get_loc_time += GET_TS - start; get_loc_cnt++; if( key_size != key_size_new ){ fprintf(stderr,"%d: error in key %s sizes: %d vs %d\n", rank, key_name, key_size, key_size_new); abort(); } for(j=0; j < key_size; j++){ if( key_val[j] != rank * rank_shift + cnt ){ fprintf(stderr, "%d: error in key %s value (byte %d)", rank, key_name, j); abort(); } } free(key_name); free(key_val); } } total_time = GET_TS - total_start; if (0 != get_mem_usage(&mem_pss, &mem_rss)) { fprintf(stderr, "Rank %d: error get memory usage", rank); abort(); } if( debug_on ){ fprintf(stderr,"%d: get: total %lf avg loc %lf rem %lf all %lf ; put: %lf %lf commit: %lf fence %lf\n", rank, (get_loc_time + get_rem_time), get_loc_time/get_loc_cnt, get_rem_time/get_rem_cnt, (get_loc_time + get_rem_time)/(get_loc_cnt + get_rem_cnt), put_loc_time/put_loc_cnt, put_rem_time/put_rem_cnt, commit_time, fence_time); } pmi_get_shmem_size(&have_shmem, &shmem_all); /* * The barrier ensures that all procs finished key fetching * we had issues with dstor/lockless case evaluation */ pmi_fence( 0 ); /* Out of the perf path - send our results to rank 0 using same PMI */ char key[128]; sprintf(key, "PMIX_PERF_get_total_time.%d", rank); pmi_put_double(key, get_rem_time + get_loc_time); sprintf(key, "PMIX_PERF_get_loc_time.%d", rank); pmi_put_double(key, get_loc_cnt ? get_loc_time/get_loc_cnt : 0 ); sprintf(key, "PMIX_PERF_get_rem_time.%d", rank); pmi_put_double(key, get_rem_cnt ? get_rem_time/get_rem_cnt : 0); sprintf(key, "PMIX_PERF_get_time.%d", rank); pmi_put_double(key, (get_loc_time + get_rem_time)/(get_loc_cnt + get_rem_cnt) ); sprintf(key, "PMIX_PERF_put_loc_time.%d", rank); pmi_put_double(key, put_loc_cnt ? put_loc_time / put_loc_cnt : 0); sprintf(key, "PMIX_PERF_put_rem_time.%d", rank); pmi_put_double(key, put_rem_cnt ? put_rem_time / put_rem_cnt : 0); sprintf(key, "PMIX_PERF_commit_time.%d", rank); pmi_put_double(key, commit_time); sprintf(key, "PMIX_PERF_fence_time.%d", rank); pmi_put_double(key, fence_time); sprintf(key, "PMIX_PERF_init_time.%d", rank); pmi_put_double(key, init_time); sprintf(key, "PMIX_PERF_total_time.%d", rank); pmi_put_double(key, total_time); sprintf(key, "PMIX_PERF_mem_pss.%d", rank); pmi_put_double(key, mem_pss); sprintf(key, "PMIX_PERF_mem_rss.%d", rank); pmi_put_double(key, mem_rss); pmi_commit(); pmi_fence( 1 ); if( rank == 0 ){ double cum_get_total_time = 0, cum_get_loc_time = 0, cum_get_rem_time = 0, cum_get_time = 0, cum_put_total_time = 0, cum_put_loc_time = 0, cum_put_rem_time = 0, cum_commit_time = 0, cum_fence_time = 0, cum_init_time = 0, cum_total_time = 0, cum_mem_pss = 0.0; double min_get_loc_time = get_loc_time / get_loc_cnt, max_get_loc_time = get_loc_time / get_loc_cnt, min_get_rem_time = get_rem_time / get_rem_cnt, max_get_rem_time = get_rem_time / get_rem_cnt, min_init_time = init_time, max_init_time = init_time, min_total_time = total_time, max_total_time = total_time, min_mem_pss = mem_pss, max_mem_pss = 0.0; int min_get_loc_idx = 0, max_get_loc_idx = 0; int min_get_rem_idx = 0, max_get_rem_idx = 0; char c_get_ltime[128], c_get_rtime[128], c_get_ttime[128]; char c_put_ltime[128], c_put_rtime[128]; int i; for(i = 0; i < nproc; i++){ double val; sprintf(key, "PMIX_PERF_get_total_time.%d", i); cum_get_total_time += pmi_get_double(i, key); sprintf(key, "PMIX_PERF_get_loc_time.%d", i); val = pmi_get_double(i, key); cum_get_loc_time += val; if( min_get_loc_time > val ){ min_get_loc_time = val; min_get_loc_idx = i; } if( max_get_loc_time < val ){ max_get_loc_time = val; max_get_loc_idx = i; } sprintf(key, "PMIX_PERF_get_rem_time.%d", i); val = pmi_get_double(i, key); cum_get_rem_time += val; if( min_get_rem_time > val ){ min_get_rem_time = val; min_get_rem_idx = i; } if( max_get_rem_time < val ){ max_get_rem_time = val; max_get_rem_idx = i; } sprintf(key, "PMIX_PERF_get_time.%d", i); cum_get_time += pmi_get_double(i, key); sprintf(key, "PMIX_PERF_put_loc_time.%d", i); cum_put_loc_time += pmi_get_double(i, key); sprintf(key, "PMIX_PERF_put_rem_time.%d", i); cum_put_rem_time += pmi_get_double(i, key); sprintf(key, "PMIX_PERF_commit_time.%d", i); cum_commit_time += pmi_get_double(i, key); sprintf(key, "PMIX_PERF_fence_time.%d", i); cum_fence_time += pmi_get_double(i, key); sprintf(key, "PMIX_PERF_init_time.%d", i); val = pmi_get_double(i, key); cum_init_time += val; if (min_init_time > val) { min_init_time = val; } if (max_init_time < val) { max_init_time = val; } sprintf(key, "PMIX_PERF_total_time.%d", i); val = pmi_get_double(i, key); cum_total_time += val; if (min_total_time > val) { min_total_time = val; } if (max_total_time < val) { max_total_time = val; } sprintf(key, "PMIX_PERF_mem_pss.%d", i); val = pmi_get_double(i, key); cum_mem_pss += val; if (min_mem_pss > val) { min_mem_pss = val; } if (max_mem_pss < val) { max_mem_pss = val; } } if( get_loc_cnt ){ sprintf(c_get_ltime,"%lf", cum_get_loc_time / nproc); } else { sprintf(c_get_ltime,"--------"); } if( get_rem_cnt ){ sprintf(c_get_rtime,"%lf", cum_get_rem_time / nproc); } else { sprintf(c_get_rtime,"--------"); } if( get_loc_cnt + get_rem_cnt ){ sprintf(c_get_ttime,"%lf", cum_get_time / nproc); } else { sprintf(c_get_ttime,"--------"); } if( put_loc_cnt ){ sprintf(c_put_ltime,"%lf", cum_put_loc_time / nproc); cum_put_total_time += cum_put_loc_time; } else { sprintf(c_put_ltime,"--------"); } if( put_rem_cnt ){ sprintf(c_put_rtime,"%lf", cum_put_rem_time / nproc); cum_put_total_time += cum_put_rem_time; } else { sprintf(c_put_rtime,"--------"); } fprintf(stderr,"init: %lf; put: %lf; commit: %lf; fence: %lf; get: %lf; total: %lf\n", cum_init_time / nproc, cum_put_total_time / nproc, cum_commit_time / nproc, cum_fence_time / nproc, cum_get_total_time / nproc, cum_total_time / nproc); fprintf(stderr,"init: max %lf min %lf\n", max_init_time, min_init_time); fprintf(stderr,"put: loc %s rem %s\n", c_put_ltime, c_put_rtime); fprintf(stderr,"get: loc %s rem %s all %s\n", c_get_ltime, c_get_rtime, c_get_ttime); fprintf(stderr,"get: min loc %lf rem %lf (loc: %d, rem: %d)\n", min_get_loc_time, min_get_rem_time, min_get_loc_idx, min_get_rem_idx); fprintf(stderr,"get: max loc %lf rem %lf (loc: %d, rem: %d)\n", max_get_loc_time, max_get_rem_time, max_get_loc_idx, max_get_rem_idx); fprintf(stderr,"total: max %lf min %lf\n", max_total_time, min_total_time); fprintf(stderr,"mem: loc %0.2lf avg %0.2lf min %0.2lf max %0.2lf total %0.2lf Kb\n", mem_pss, cum_mem_pss / nproc, min_mem_pss, max_mem_pss, cum_mem_pss); if( have_shmem ) { fprintf(stderr,"shmem: job_info: %0.2lf total %0.2lf Kb\n", (double)shmem_job_info / 1024, (double)shmem_all / 1024); } /* debug printout *//* for(i = 0; i < nproc; i++){ double val; printf("%d: ", i); sprintf(key, "PMIX_PERF_get_loc_time.%d", i); printf("local = %lf ", pmi_get_double(i, key)); sprintf(key, "PMIX_PERF_get_rem_time.%d", i); printf("remote = %lf\n", pmi_get_double(i, key)); } */ } pmi_fini(); return 0; }
int main (int argc, char *argv[]) { struct context ctx; struct pmi_simple_ops ops = { .kvs_put = s_kvs_put, .kvs_get = s_kvs_get, .barrier = s_barrier, }; pmi_t *cli; int spawned = -1, initialized = -1; int rank = -1, size = -1; int universe_size = -1; int name_len = -1, key_len = -1, val_len = -1; char *name = NULL, *val = NULL, *val2 = NULL; plan (NO_PLAN); if (!(ctx.kvs = zhash_new ())) oom (); ctx.size = 1; ctx.barrier = 0; ok (socketpair (PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ctx.fds) == 0, "socketpair returned client,server file descriptors"); ctx.pmi = pmi_simple_server_create (&ops, 42, ctx.size, "bleepgorp", &ctx); ok (ctx.pmi != NULL, "created simple pmi server context"); ctx.buflen = pmi_simple_server_get_maxrequest (ctx.pmi); ctx.buf = xzmalloc (ctx.buflen); ok (pthread_create (&ctx.t, NULL, server_thread, &ctx) == 0, "pthread_create successfully started server"); ok ((cli = pmi_create_simple (ctx.fds[0], 0, ctx.size)) != NULL, "pmi_create_simple OK"); ok (pmi_initialized (cli, &initialized) == PMI_SUCCESS && initialized == 0, "pmi_initialized OK, initialized=0"); ok (pmi_init (cli, &spawned) == PMI_SUCCESS && spawned == 0, "pmi_init OK, spawned=0"); ok (pmi_initialized (cli, &initialized) == PMI_SUCCESS && initialized == 1, "pmi_initialized OK, initialized=1"); /* retrieve basic params */ ok (pmi_get_size (cli, &size) == PMI_SUCCESS && size == 1, "pmi_get_size OK, size=%d", size); ok (pmi_get_rank (cli, &rank) == PMI_SUCCESS && rank == 0, "pmi_get_rank OK, rank=%d", rank); ok (pmi_get_universe_size (cli, &universe_size) == PMI_SUCCESS && universe_size == size, "pmi_get_universe_size OK, universe_size=%d", universe_size); ok (pmi_kvs_get_name_length_max (cli, &name_len) == PMI_SUCCESS && name_len > 0, "pmi_kvs_get_name_length_max OK, name_len=%d", name_len); ok (pmi_kvs_get_key_length_max (cli, &key_len) == PMI_SUCCESS && key_len > 0, "pmi_kvs_get_key_length_max OK, key_len=%d", key_len); ok (pmi_kvs_get_value_length_max (cli, &val_len) == PMI_SUCCESS && val_len > 0, "pmi_kvs_get_value_length_max OK, val_len=%d", val_len); name = xzmalloc (name_len); ok (pmi_kvs_get_my_name (cli, name, name_len) == PMI_SUCCESS && strlen (name) > 0, "pmi_kvs_get_my_name OK, name=%s", name); /* put foo=bar / commit / barier / get foo */ ok (pmi_kvs_put (cli, name, "foo", "bar") == PMI_SUCCESS, "pmi_kvs_put foo=bar OK"); ok (pmi_kvs_commit (cli, name) == PMI_SUCCESS, "pmi_kvs_commit OK"); ok (pmi_barrier (cli) == PMI_SUCCESS, "pmi_barrier OK"); val = xzmalloc (val_len); ok (pmi_kvs_get (cli, name, "foo", val, val_len) == PMI_SUCCESS && !strcmp (val, "bar"), "pmi_kvs_get foo OK, val=%s", val); /* put long=... / get long */ val2 = xzmalloc (val_len); memset (val2, 'x', val_len - 1); ok (pmi_kvs_put (cli, name, "long", val2) == PMI_SUCCESS, "pmi_kvs_put long=xxx... OK"); memset (val, 'y', val_len); /* not null terminated */ ok (pmi_kvs_get (cli, name, "long", val, val_len) == PMI_SUCCESS && strnlen (val2, val_len) < val_len && strcmp (val, val2) == 0, "pmi_kvs_get long OK, val=xxx..."); ok (pmi_finalize (cli) == PMI_SUCCESS, "pmi_finalize OK"); ok (pthread_join (ctx.t, NULL) == 0, "pthread join successfully reaped server"); free (name); free (val); free (val2); pmi_destroy (cli); if (ctx.pmi) pmi_simple_server_destroy (ctx.pmi); close (ctx.fds[0]); close (ctx.fds[1]); zhash_destroy (&ctx.kvs); done_testing (); return 0; }