void test_get(threaddata_t *tdata) { int peer = tdata->tid_peer; int node = tt_thread_map[peer]; void *laddr = tt_addr_map[tdata->tid]; void *raddr = tt_addr_map[peer]; int len; do { len = RANDOM_SIZE(); } while (len > TEST_SEGZ_PER_THREAD); ACTION_PRINTF("tid=%3d> get (%p,%8d) <- tid=%3d,node=%d,addr=%p", tdata->tid, laddr, len, peer, node, raddr); gasnet_get(laddr, node, raddr, len); }
int main(int argc, char **argv) { struct delay_s { int64_t delay_us; int64_t delay_loops; } delay_params; int mynode, nodes, iters=0; int64_t start,total,delay_us,baseline_us; int64_t min_time, max_time, avg_time; int64_t delay_loops = 0; int j, i = 0; int pause_len; int pollcnt = 0; GASNET_Safe(gasnet_init(&argc, &argv)); GASNET_Safe(gasnet_attach(NULL, 0, TEST_SEGSZ_REQUEST, TEST_MINHEAPOFFSET)); test_init("testbarrierlate",1,"(iters) (pollcnt)"); mynode = gasnet_mynode(); nodes = gasnet_nodes(); if (argc > 1) iters = atoi(argv[1]); if (!iters) iters = 10000; if (argc > 2) pollcnt = atoi(argv[2]); if (argc > 3) test_usage(); if (mynode == 0) { printf("Running barrier late arrival test with %i iterations, pollcnt=%i...\n",iters, pollcnt); fflush(stdout); } BARRIER(); /* warmup */ for (i=0; i < MIN(100,iters/100); i++) { gasnet_barrier_notify(0, GASNET_BARRIERFLAG_ANONYMOUS); GASNET_Safe(gasnet_barrier_wait(0, GASNET_BARRIERFLAG_ANONYMOUS)); } BARRIER(); start = TIME(); for (i=0; i < iters; i++) { gasnet_barrier_notify(0, GASNET_BARRIERFLAG_ANONYMOUS); GASNET_Safe(gasnet_barrier_wait(0, GASNET_BARRIERFLAG_ANONYMOUS)); } baseline_us = TIME() - start; BARRIER(); if (mynode == 0) { printf("Total time: %8.3f sec Avg Anon. Barrier latency: %8.3f us\n", ((float)baseline_us)/1000000, ((float)baseline_us)/iters); fflush(stdout); } /* Calibrate a delay loop. Given "iters" and "delay_us", we determine the argument * we need when calling test_delay() iters times, to get a _total_ delay no less than * delay_us. The value of delay_us is overwritten with the achieved delay. * We calibrate the delay on exactly one node to avoid spoiling timings on * overcommitted CPUs. Nodes not performing the calibration sleep for at least twice * the time we are calibrating for. (No way to be sure this is enough, since * calibration is iterative.) */ BARRIER(); pause_len = 1 + 4 * (baseline_us + 999999)/1000000; if (mynode == 0) { struct delay_s *p = (struct delay_s *)TEST_MYSEG(); start = TIME(); printf("Calibrating delay loop (expect at least a %d sec pause)...\n", pause_len); fflush(stdout); p->delay_us = 2*baseline_us; /* delay at least two full barrier times */ p->delay_loops = test_calibrate_delay(iters, pollcnt, &(p->delay_us)); } else { sleep(pause_len); } BARRIER(); gasnet_get(&delay_params, 0, TEST_SEG(0), sizeof(struct delay_s)); delay_us = delay_params.delay_us; delay_loops = delay_params.delay_loops; if (mynode == 0) { printf("Calibration complete (actual pause = %5.3f sec).\n", (float)((TIME()-start)/1000000.0)); printf("Ideal loop time = %8.3f sec.\n", (float)(delay_us)/1000000.0); fflush(stdout); } /* Take turns being late to notify * We insert a delay before the _notify() on one node. * This simulates a load imbalance between barriers. * The time reported is how much the barrier costs excluding the delay. * This reported time will often be less than the full barrier because * some progress was made by the other nodes. */ avg_time = 0; max_time = 0; min_time = (int64_t)1 << 62; /* good enough */ for (j=0; j < nodes; j++) { BARRIER(); start = TIME(); for (i=0; i < iters; i++) { if (j == mynode) { test_delay(delay_loops, pollcnt); } gasnet_barrier_notify(0, GASNET_BARRIERFLAG_ANONYMOUS); GASNET_Safe(gasnet_barrier_wait(0, GASNET_BARRIERFLAG_ANONYMOUS)); } total = TIME() - start; if (mynode == 0) { printf("Total time: %8.3f sec Late-notify test on node %d\n", ((float)total)/1000000, j); fflush(stdout); } total -= delay_us; avg_time += total; min_time = MIN(min_time, total); max_time = MAX(max_time, total); } avg_time /= nodes; if (mynode == 0) { printf("Total difference: %8.3f sec Late notify() Anon. Barrier net latency, minimum: %8.3f us (%6.2f%%)\n", ((float)min_time)/1000000, ((float)min_time)/iters, ((float)min_time * 100.)/baseline_us); printf("Total difference: %8.3f sec Late notify() Anon. Barrier net latency, maximum: %8.3f us (%6.2f%%)\n", ((float)max_time)/1000000, ((float)max_time)/iters, ((float)max_time * 100.)/baseline_us); printf("Total difference: %8.3f sec Late notify() Anon. Barrier net latency, average: %8.3f us (%6.2f%%)\n", ((float)avg_time)/1000000, ((float)avg_time)/iters, ((float)avg_time * 100.)/baseline_us); fflush(stdout); } /* Take turns being late to wait * We insert a delay between the _notify() and _wait() on one node. * This simulates a load imbalance between barrier notify and wait. * The time reported is how much the barrier costs excluding the delay. * This reported time will often be less than the full barrier because * some progress was made by the other nodes. */ avg_time = 0; max_time = 0; min_time = (int64_t)1 << 62; /* good enough */ for (j=0; j < nodes; j++) { BARRIER(); start = TIME(); for (i=0; i < iters; i++) { gasnet_barrier_notify(0, GASNET_BARRIERFLAG_ANONYMOUS); if (j == mynode) { test_delay(delay_loops, pollcnt); } GASNET_Safe(gasnet_barrier_wait(0, GASNET_BARRIERFLAG_ANONYMOUS)); } total = TIME() - start; if (mynode == 0) { printf("Total time: %8.3f sec Late-wait test on node %d\n", ((float)total)/1000000, j); fflush(stdout); } total -= delay_us; avg_time += total; min_time = MIN(min_time, total); max_time = MAX(max_time, total); } avg_time /= nodes; if (mynode == 0) { printf("Total difference: %8.3f sec Late wait() Anon. Barrier net latency, minimum: %8.3f us (%6.2f%%)\n", ((float)min_time)/1000000, ((float)min_time)/iters, ((float)min_time * 100.)/baseline_us); printf("Total difference: %8.3f sec Late wait() Anon. Barrier net latency, maximum: %8.3f us (%6.2f%%)\n", ((float)max_time)/1000000, ((float)max_time)/iters, ((float)max_time * 100.)/baseline_us); printf("Total difference: %8.3f sec Late wait() Anon. Barrier net latency, average: %8.3f us (%6.2f%%)\n", ((float)avg_time)/1000000, ((float)avg_time)/iters, ((float)avg_time * 100.)/baseline_us); fflush(stdout); } BARRIER(); MSG("done."); gasnet_exit(0); return 0; }
int main(int argc, char **argv) { int outer_iterations = 0; int inner_iterations = 0; int seedoffset = 0; int numprocs, myproc, peerproc; int sender_p; char *shadow_region_1, *shadow_region_2; int i,j; char *local_base, *target_base; /* call startup */ GASNET_Safe(gasnet_init(&argc, &argv)); /* get SPMD info */ myproc = gasnet_mynode(); numprocs = gasnet_nodes(); if (argc > 1) segsize = atoi(argv[1]); if (!segsize) segsize = 1024*1000; if (argc > 2) outer_iterations = atoi(argv[2]); if (!outer_iterations) outer_iterations = 10; if (argc > 3) inner_iterations = atoi(argv[3]); if (!inner_iterations) inner_iterations = 10; if (argc > 4) seedoffset = atoi(argv[4]); GASNET_Safe(gasnet_attach(NULL, 0, TEST_SEGSZ, TEST_MINHEAPOFFSET)); test_init("testslice",0, "(segsize) (iterations) (# of sizes per iteration) (seed)"); /* parse arguments */ if (argc > 5) test_usage(); if(numprocs & 1) { MSG0("WARNING: This test requires an even number of nodes. Test skipped.\n"); gasnet_exit(0); /* exit 0 to prevent false negatives in test harnesses for smp-conduit */ } sender_p = !(myproc & 1); peerproc = myproc ^ 1; if (seedoffset == 0) { seedoffset = (((unsigned int)TIME()) & 0xFFFF); TEST_BCAST(&seedoffset, 0, &seedoffset, sizeof(&seedoffset)); } TEST_SRAND(myproc+seedoffset); MSG0("Running with segment size = %d outer iterations=%d inner iterations=%d seed=%d", segsize,outer_iterations, inner_iterations, seedoffset); BARRIER(); /* Allocate two shadow regions the same size as the segment */ shadow_region_1 = (char *) test_malloc(segsize); shadow_region_2 = (char *) test_malloc(segsize); /* Fill up the shadow region with random data */ for(i=0; i < segsize; i++) { shadow_region_1[i] = (char) TEST_RAND(0,255); } memset(shadow_region_2,0,segsize); /* Big loop performing the following */ for(i=0; i < outer_iterations; i++) { if(sender_p) { /* Pick a starting point anywhere in the segment */ int starting_point = TEST_RAND(0,(segsize-1)); local_base = TEST_SEG(myproc); target_base = TEST_SEG(peerproc); for(j=0; j < inner_iterations; j++) { /* Pick a length */ int len = TEST_RAND(1,segsize-starting_point); int remote_starting_point = TEST_RAND(0,segsize-len); int local_starting_point_1 = TEST_RAND(0,segsize-len); int local_starting_point_2 = TEST_RAND(0,segsize-len); /* Perform operations */ /* Out of segment put from shadow_region 1 to remote */ gasnet_put(peerproc,target_base+remote_starting_point,shadow_region_1 + starting_point,len); /* In segment get from remote to local segment */ gasnet_get(local_base+local_starting_point_1,peerproc,target_base+remote_starting_point,len); /* Verify */ assert_eq(shadow_region_1 + starting_point, local_base + local_starting_point_1, len,starting_point,i,j,"Out of segment put + in segment get"); /* Out of segment get from remote to shadow_region_2 (starting from 0) */ gasnet_get(shadow_region_2+local_starting_point_2,peerproc,target_base+remote_starting_point,len); /* Verify */ assert_eq(shadow_region_2+local_starting_point_2, shadow_region_1 + starting_point, len,starting_point,i,j,"Out of segment get"); } TEST_PROGRESS_BAR(i,outer_iterations); } BARRIER(); } if(sender_p && !failures) { MSG("testslice PASSED"); } gasnet_exit(0); return 0; }