void smp_coll_tune_barrier(smp_coll_t handle) { int iter; int i; gasnett_tick_t start,stop; int radix=2; double best_time=1e20; double time; int barrier_iters=gasneti_getenv_int_withdefault("GASNET_COLL_TUNE_SMP_BARRIER_ITER", 1000, 0); int root; static int best_barrier_radix = 2; static int best_barrier_routine = SMP_COLL_BARRIER_DISSEM_ATOMIC; static int best_root = 0; #if VERBOSE_TUNING if(handle->MYTHREAD==0) fprintf(stderr, "starting autotuning of local barrier\n"); #endif for(root=0; root<1; root++) { if(handle->MYTHREAD==0 && VERBOSE_TUNING) fprintf(stderr, "ROOT: %d\n", root); for(i=0; i<SMP_COLL_NUM_BARR_ROUTINES; i++) { if(i==SMP_COLL_BARRIER_COND_VAR) continue; if(handle->MYTHREAD==0 && VERBOSE_TUNING) fprintf(stderr, "\t routine: %d\n",(int)i); for(radix=2; radix<=handle->THREADS; radix*=2) { if(i==SMP_COLL_BARRIER_COND_VAR && radix>2) continue; if(i==SMP_COLL_BARRIER_PTHREAD && radix>2) continue; if(handle->MYTHREAD==0 && VERBOSE_TUNING) fprintf(stderr, "\t\t radix: %d\n",radix); smp_coll_set_barrier_routine_with_root(handle, (smp_coll_barrier_routine_t)i, radix, root); start = gasnett_ticks_now(); for(iter = 0; iter<barrier_iters; iter++) { smp_coll_barrier(handle, 0); } stop = gasnett_ticks_now(); smp_coll_safe_barrier(handle, 0); time = ((double) gasnett_ticks_to_ns(stop-start))/barrier_iters; if(time < best_time && handle->MYTHREAD==0) { best_barrier_radix=radix; best_barrier_routine = i; best_time = time; best_root = root; } smp_coll_reset_all_flags(handle); /*run a root agnostic barrier*/ smp_coll_safe_barrier(handle, 0); } } } #if VERBOSE_TUNING if(handle->MYTHREAD==0) fprintf(stderr, "setting best barrier: routine: %d radix: %d root: %d time: %g ns\n", best_barrier_routine, best_barrier_radix, best_root, best_time); #endif smp_coll_set_barrier_routine_with_root(handle, (smp_coll_barrier_routine_t)best_barrier_routine, best_barrier_radix, best_root); }
void run() { unsigned i; gasnett_tick_t start, end; hashtable_create(TABLE_SIZE); BARRIER(); start = gasnett_ticks_now(); for (i = 0; i < WRITES_PER_PROC; ++i) { hashtable_insert(grt_id * WRITES_PER_PROC + i); } end = gasnett_ticks_now(); unsigned time = ((unsigned) gasnett_ticks_to_us(end - start)); printf("processor %u: insertion time=%f us\n", grt_id, (double) time); fflush(stdout); grt_write(0, time, ×[grt_id]); BARRIER(); /* sanity check */ for (i = 0; i < WRITES_PER_PROC; ++i) { grt_word_t num = grt_id * WRITES_PER_PROC + i; grt_bool_t found = hashtable_find(num); if (!found) { fprintf(stderr, "processor %d: expected to find %d\n", grt_id, (int) num); } } BARRIER(); if (grt_id == 0) { time = 0, max_time = 0; for (i = 0; i < grt_num_procs; ++i) { gasnett_tick_t this_time = times[i]; time += this_time; if (this_time >= max_time) max_time = this_time; } time_per_op = ((float) time) / NUM_WRITES; printf("total CPU time=%f us\n", (double) time); printf("time per operation=%f us\n", time_per_op); printf("max time=%f us\n", (double) max_time); } BARRIER(); hashtable_destroy(); BARRIER(); }
void test_sleep(threaddata_t *tdata) { unsigned usecs = (unsigned) sleep_min_us + (rand() % (sleep_max_us - sleep_min_us)); ACTION_PRINTF("tid=%3d> sleeping %.3f millisecs", tdata->tid, usecs/1000.0); { uint64_t goal = gasnett_ticks_to_us(gasnett_ticks_now()) + usecs; while (gasnett_ticks_to_us(gasnett_ticks_now()) < goal) gasnett_sched_yield(); } ACTION_PRINTF("tid=%3d> awaking", tdata->tid); }
void run() { unsigned i, time; gasnett_tick_t start, end; hashset_create(params[HASHSET_SIZE], params[ON_PTHREAD]); BARRIER(); start = gasnett_ticks_now(); for (i = 0; i < MY_NUM_OPS; ++i) { if (put_flags[i] == GRT_TRUE) { hashset_insert(numbers[i]); } else { hashset_find(numbers[i]); } } end = gasnett_ticks_now(); time = ((unsigned) gasnett_ticks_to_us(end - start)); //printf("processor %u: execution time=%f us\n", // grt_id, (double) time); fflush(stdout); grt_write(0, time, ×[grt_id]); BARRIER(); if (grt_id == 0) { time = 0, max_time = 0; for (i = 0; i < grt_num_procs; ++i) { gasnett_tick_t this_time = times[i]; time += this_time; if (this_time >= max_time) max_time = this_time; } time_per_op = ((float) time) / params[NUM_OPS]; printf("total CPU time=%f us\n", (double) time); printf("time per operation=%f us\n", time_per_op); printf("max time=%f us\n", (double) max_time); } BARRIER(); hashset_destroy(); BARRIER(); }
/* ------------------------------------------------------------------------------------ */ void doit1(void) { GASNET_BEGIN_FUNCTION(); { int i; for (i=0; i<8; i++) handles[i] = GASNET_INVALID_HANDLE; } TEST_SECTION_BEGIN(); TIME_OPERATION("Tester overhead", {}); TIME_OPERATION("gasnett_ticks_now()", { timertemp = gasnett_ticks_now(); });
void run() { unsigned i, time; gasnett_tick_t start, end; hash_map_create(params[HASHMAP_SIZE], (grt_bool_t) params[ON_PTHREAD]); grt_barrier(); #ifdef LOCKS grt_lock_state_t state; #endif for (i = 0; i < MY_NUM_OPS; ++i) { grt_word_t key = keys[i], val = values[i]; #ifdef LOCKS hash_t hash = compute_hash(key); hash_map_lock(hash.proc, hash.offset, WRITE, &state); #endif hash_map_insert(key, val); #ifdef LOCKS hash_map_unlock(hash.proc, hash.offset); #endif } BARRIER(); start = gasnett_ticks_now(); #ifdef LOCKS grt_lock_state_t state1, state2; #endif for (i = 0; i < MY_NUM_OPS; ++i) { unsigned idx = grt_random_next() * MY_NUM_OPS; grt_word_t key1 = keys[i]; unsigned second_idx = grt_random_next() * MY_NUM_OPS; grt_word_t key2 = keys[second_idx]; #ifdef LOCKS lock(key1, key2, &state1, &state2); #endif grt_word_t val1, val2; #ifndef LOCKS #ifndef NOLOCKS stm_start(grt_id); #endif #endif grt_bool_t found1 = hash_map_find(key1, &val1); grt_bool_t found2 = hash_map_find(key2, &val2); hash_map_insert(key1, val2); hash_map_insert(key2, val1); #ifndef LOCKS #ifndef NOLOCKS stm_commit(grt_id); #endif #endif #if LOCKS unlock(key1, key2); #endif } end = gasnett_ticks_now(); time = ((unsigned) gasnett_ticks_to_us(end - start)); printf("processor %u: execution time=%f us\n", grt_id, (double) time); fflush(stdout); grt_write(0, time, ×[grt_id]); BARRIER(); if (grt_id == 0) { time = 0, max_time = 0; for (i = 0; i < grt_num_procs; ++i) { gasnett_tick_t this_time = times[i]; time += this_time; if (this_time >= max_time) max_time = this_time; } time_per_op = ((float) time) / params[NUM_OPS]; printf("total CPU time=%f us\n", (double) time); printf("time per operation=%f us\n", time_per_op); printf("max time=%f us\n", (double) max_time); } BARRIER(); hash_map_destroy(); BARRIER(); }
int main(int argc, char **argv) { int iters = 0; int arg; void *alloc = NULL; int firstlastmode = 0; int fullduplexmode = 0; int crossmachinemode = 0; int singlesender = 0; int help = 0; /* call startup */ GASNET_Safe(gasnet_init(&argc, &argv)); /* parse arguments */ arg = 1; while (argc > arg) { if (!strcmp(argv[arg], "-in")) { insegment = 1; ++arg; } else if (!strcmp(argv[arg], "-out")) { insegment = 0; ++arg; } else if (!strcmp(argv[arg], "-sl")) { ++arg; if (argc > arg) { stridelevels = atoi(argv[arg]); arg++; } else help = 1; } else if (!strcmp(argv[arg], "-mincontig")) { ++arg; if (argc > arg) { min_contig = atoi(argv[arg]); arg++; } else help = 1; } else if (!strcmp(argv[arg], "-maxcontig")) { ++arg; if (argc > arg) { max_contig = atoi(argv[arg]); arg++; } else help = 1; } else if (!strcmp(argv[arg], "-contigfactor")) { ++arg; if (argc > arg) { contigfactor = atoi(argv[arg]); arg++; } else help = 1; } else if (!strcmp(argv[arg], "-mindata")) { ++arg; if (argc > arg) { min_payload = atoi(argv[arg]); arg++; } else help = 1; } else if (!strcmp(argv[arg], "-maxdata")) { ++arg; if (argc > arg) { max_payload = atoi(argv[arg]); arg++; } else help = 1; } else if (!strcmp(argv[arg], "-datafactor")) { ++arg; if (argc > arg) { datafactor = atoi(argv[arg]); arg++; } else help = 1; } else if (!strcmp(argv[arg], "-densitysteps")) { ++arg; if (argc > arg) { densitysteps = atoi(argv[arg]); arg++; } else help = 1; } else if (!strcmp(argv[arg], "-f")) { firstlastmode = 1; ++arg; } else if (!strcmp(argv[arg], "-c")) { crossmachinemode = 1; ++arg; } else if (!strcmp(argv[arg], "-a")) { fullduplexmode = 1; ++arg; } else if (!strcmp(argv[arg], "-p")) { dogets = 0; doputs = 1; ++arg; } else if (!strcmp(argv[arg], "-g")) { dogets = 1; doputs = 0; ++arg; } else if (!strcmp(argv[arg], "-r")) { remotecontig = 1; ++arg; } else if (!strcmp(argv[arg], "-l")) { localcontig = 1; ++arg; } else if (argv[arg][0] == '-') { help = 1; ++arg; } else break; } if (argc > arg) { iters = atoi(argv[arg]); arg++; } if (!iters) iters = 1000; if (argc > arg) { TEST_SECTION_PARSE(argv[arg]); arg++; } if (min_contig && max_contig && min_contig > max_contig) { ERR("min_contig > max_contig"); help = 1; } if (min_payload && max_payload && min_payload > max_payload) { ERR("min_payload > max_payload"); help = 1; } if (min_payload && min_contig && min_payload < min_contig) { ERR("min_payload < min_contig"); help = 1; } if (max_contig && max_payload && max_contig > max_payload) { ERR("max_contig > max_payload"); help = 1; } if (contigfactor < 2) { ERR("contigfactor < 2"); help = 1; } if (datafactor < 2) { ERR("datafactor < 2"); help = 1; } if (!max_payload) max_payload = 2*1024*1024; /* 2 MB default */ #ifdef GASNET_SEGMENT_EVERYTHING maxsz = gasnet_getMaxGlobalSegmentSize(); #else maxsz = 16*1024*1024; #endif max_payload = (int)MIN(maxsz, max_payload); maxsz = MIN(((uint64_t)max_payload) * densitysteps,maxsz); if (!min_contig) min_contig = 8; if (!max_contig) max_contig = MIN(256*1024,max_payload); if (!min_payload) min_payload = min_contig; GASNET_Safe(gasnet_attach(NULL, 0, TEST_SEGSZ_REQUEST, TEST_MINHEAPOFFSET)); test_init("testvisperf",1, "[options] (iters) (test_sections)\n" " -p/-g selects puts only or gets only (default is both).\n" " -r/-l selects remotely contiguous or locally contiguous (default is neither).\n" " -mindata/-maxdata <sz> \n" " selects sz as min/max data payload per operation.\n" " -mincontig/-maxcontig <sz> \n" " selects sz as min/max contig size.\n" " -datafactor/-contigfactor <f> \n" " selects f as growth factor for data/contig sizes.\n" " -densitysteps <d> \n" " selects d density steps, inclusive from 100%..100/d%\n" " -sl <n> selects n striding levels (default is 2).\n" " -in/-out selects whether the initiator-side\n" " memory is in the GASNet segment or not (default is not).\n" " -a enables full-duplex mode, where all nodes send.\n" " -c enables cross-machine pairing, default is nearest neighbor.\n" " -f enables 'first/last' mode, where the first/last\n" " nodes communicate with each other, while all other nodes sit idle."); if (help || argc > arg) test_usage(); /* get SPMD info */ myproc = gasnet_mynode(); numprocs = gasnet_nodes(); if (!firstlastmode) { /* Only allow 1 or even number for numprocs */ if (numprocs > 1 && numprocs % 2 != 0) { MSG0("WARNING: This test requires a unary or even number of nodes. Test skipped.\n"); gasnet_exit(0); /* exit 0 to prevent false negatives in test harnesses for smp-conduit */ } } /* Setting peer thread rank */ if (firstlastmode) { peerproc = (myproc == 0 ? numprocs-1 : 0); iamsender = (fullduplexmode ? myproc == 0 || myproc == numprocs-1 : myproc == 0); } else if (numprocs == 1) { peerproc = 0; iamsender = 1; } else if (crossmachinemode) { if (myproc < numprocs / 2) { peerproc = myproc + numprocs/2; iamsender = 1; } else { peerproc = myproc - numprocs/2; iamsender = fullduplexmode; } } else { peerproc = (myproc % 2) ? (myproc - 1) : (myproc + 1); iamsender = (fullduplexmode || myproc % 2 == 0); } singlesender = (numprocs == 1) || ((numprocs == 2 || firstlastmode) && !fullduplexmode); Rbase = TEST_SEG(peerproc); if (insegment) { Lbase = TEST_SEG(myproc); } else { alloc = test_calloc(maxsz+PAGESZ,1); /* use calloc to prevent valgrind warnings */ Lbase = alignup_ptr(alloc, PAGESZ); /* ensure page alignment of base */ } assert(((uintptr_t)Lbase) % PAGESZ == 0); if (myproc == 0) { MSG0("Running %i iterations of %s%s%snon-contiguous put/get%s%s\n local data %s-segment for sizes: %i...%i\n", iters, (firstlastmode ? "first/last " : ""), (fullduplexmode ? "full-duplex ": ""), (crossmachinemode ? "cross-machine ": ""), (remotecontig?"(remotely-contiguous)":""), (localcontig?"(locally-contiguous)":""), insegment ? "in" : "out", min_payload, max_payload); printf("rows are databytes/op : bandwidth values in MB/s\n"); } BARRIER(); { int contigsz; int rawdatasz; int isget; test_vis_t viscat; for (viscat = TEST_V; viscat <= TEST_S; viscat++) { for (isget = 0; isget < 2; isget++) { if (TEST_SECTION_BEGIN_ENABLED()) { if (isget && !dogets) continue; if (!isget && !doputs) continue; if (!dovis[viscat]) continue; for (contigsz = min_contig; contigsz <= max_contig; contigsz *= contigfactor) { int di; size_t lastdatasz = 0; if (contigsz > max_payload) continue; if (!myproc) { printf("\n%c: %s %s CONTIGSZ = %i\n", TEST_SECTION_NAME(), visdesc[(int)viscat], (isget?"GET":"PUT"), contigsz); printf(" density:"); for (di = 0; di < densitysteps; di++) { printf("%8i%%", (int)((densitysteps-di)*100.0/densitysteps)); } printf("\n"); } for (rawdatasz = min_payload; rawdatasz <= max_payload; rawdatasz *= datafactor) { char mystr[255]; size_t datasz = aligndown(rawdatasz,contigsz); if (datasz == lastdatasz) continue; lastdatasz = datasz; if (singlesender) snprintf(mystr, sizeof(mystr), "%8i: ", (int)datasz); else snprintf(mystr, sizeof(mystr), "P%i: %6i: ", myproc, (int)datasz); for (di = 0; di < densitysteps; di++) { gasnett_tick_t begin=0, end=0; size_t Lcnt = (localcontig ? 1 : datasz/contigsz); size_t Rcnt = (remotecontig? 1 : datasz/contigsz); size_t Lsz = datasz/Lcnt; size_t Rsz = datasz/Rcnt; void **Lilist = NULL; void **Rilist = NULL; gasnet_memvec_t *Lvlist = NULL; gasnet_memvec_t *Rvlist = NULL; size_t *Lstrides = NULL; size_t *Rstrides = NULL; size_t *LRcount = NULL; size_t stride = contigsz*(((double)densitysteps)/(densitysteps-di)); if (stride * MAX(Lcnt,Rcnt) > maxsz) { strcat(mystr," - "); continue; } if (iamsender) { /* setup metadata */ switch (viscat) { case TEST_V: Lvlist = make_vlist(Lbase, stride, Lcnt, Lsz); Rvlist = make_vlist(Rbase, stride, Rcnt, Rsz); break; case TEST_I: Lilist = make_ilist(Lbase, stride, Lcnt, Lsz); Rilist = make_ilist(Rbase, stride, Rcnt, Rsz); break; case TEST_S: { size_t chunkcnt = datasz/contigsz; int dim; Lstrides = test_malloc(sizeof(size_t)*stridelevels); Rstrides = test_malloc(sizeof(size_t)*stridelevels); LRcount = test_malloc(sizeof(size_t)*(stridelevels+1)); LRcount[0] = contigsz; Lstrides[0] = (localcontig ? contigsz : stride); Rstrides[0] = (remotecontig ? contigsz : stride); for (dim = 1; dim < stridelevels; dim++) { size_t factor = 1, fi; for (fi = 1; fi <= chunkcnt/(2*(stridelevels-dim)); fi++) /* choose a reasonable factor */ if (chunkcnt/fi*fi == chunkcnt) factor = fi; LRcount[dim] = factor; chunkcnt /= factor; Lstrides[dim] = LRcount[dim]*Lstrides[dim-1]; Rstrides[dim] = LRcount[dim]*Rstrides[dim-1]; } LRcount[stridelevels] = chunkcnt; { size_t tmp = 1; for (dim = 0; dim <= stridelevels; dim++) tmp *= LRcount[dim]; assert(tmp == datasz); } break; } } } #define DOIT(iters) do { \ int i; \ switch (viscat) { \ case TEST_V: \ for (i = 0; i < iters; i++) { \ if (isget) gasnet_getv_nbi_bulk(Lcnt,Lvlist,peerproc,Rcnt,Rvlist); \ else gasnet_putv_nbi_bulk(peerproc,Rcnt,Rvlist,Lcnt,Lvlist); \ } \ break; \ case TEST_I: \ for (i = 0; i < iters; i++) { \ if (isget) gasnet_geti_nbi_bulk(Lcnt,Lilist,Lsz,peerproc,Rcnt,Rilist,Rsz); \ else gasnet_puti_nbi_bulk(peerproc,Rcnt,Rilist,Rsz,Lcnt,Lilist,Lsz); \ } \ break; \ case TEST_S: \ for (i = 0; i < iters; i++) { \ if (isget) gasnet_gets_nbi_bulk(Lbase,Lstrides,peerproc,Rbase,Rstrides, \ LRcount,stridelevels); \ else gasnet_puts_nbi_bulk(peerproc,Rbase,Rstrides,Lbase,Lstrides, \ LRcount,stridelevels); \ } \ break; \ } \ gasnet_wait_syncnbi_all(); \ } while (0) if (iamsender) DOIT(1); /* pay some warm-up costs */ BARRIER(); if (iamsender) { begin = gasnett_ticks_now(); DOIT(iters); end = gasnett_ticks_now(); } BARRIER(); if (iamsender) { char tmp[80]; double secs = gasnett_ticks_to_ns(end - begin)/1.0E9; double dataMB = ((double)datasz) * iters / (1024*1024); snprintf(tmp, sizeof(tmp), " %8.3f", dataMB / secs); strcat(mystr, tmp); } if (Lilist) test_free(Lilist); if (Rilist) test_free(Rilist); if (Lvlist) test_free(Lvlist); if (Rvlist) test_free(Rvlist); if (Lstrides) test_free(Lstrides); if (Rstrides) test_free(Rstrides); if (LRcount) test_free(LRcount); } if (iamsender) { printf("%s\n", mystr); fflush(stdout); } BARRIER(); } } } } } } BARRIER(); if (alloc) test_free(alloc); gasnet_exit(0); return 0; }
/* placed in a function to avoid excessive inlining */ gasnett_tick_t ticktime(void) { return gasnett_ticks_now(); }