void test_pairs(int th_idx) { int rem_th, rem_proc; int i, j; void *src, *dst; rem_th = pairs[TH_ME]; rem_proc = TH2PROC(rem_th); prndbg(th_idx, "test_pair: %d<->%d(%d)\n", TH_ME, rem_th, rem_proc); MT_BARRIER(); #if 0 print_array(th_idx, "before", &AELEM(ptrs2[TH_ME],rem_th,0,0), ASIZExITERS); #endif for (i = 0; i < iters; i++) { /* src - addr of my thread block on remote proc/thread */ src = &AELEM(ptrs1[rem_th],TH_ME,i,0); /* src - addr of remote thread block on my proc/thread */ dst = &AELEM(ptrs2[TH_ME],rem_th,i,0); /* get from my pair */ assert(!ARMCI_Get(src, dst, ASIZE_BYTES, rem_proc)); } MT_BARRIER(); #if 0 print_array(th_idx, "rcvd", &AELEM(ptrs2[TH_ME],rem_th,0,0), ASIZExITERS); #endif /* check results */ check_result(&AELEM(ptrs2[TH_ME],rem_th,0,0), rem_th); }
void init_array(int th_idx, void *ptr) { int i, j, k; for(i=0;i<th_size;i++)for(j=0;j<iters;j++)for(k=0;k<asize;k++) AELEM(ptr, i, j, k) = AELEM_VAL(TH_ME, j, k); /*AELEM(ptr, i, j) = THREAD_OFF*TH_ME + ITER_OFF*i + ELEM_INC*(j+1);*/ print_array(th_idx, "initialized", ptr); #if 0 # if 1 for (i = 0, cbufl = 0; i < th_size; i++) { for (j = 0; j < iters; j++) { cbufl+=sprintf(cbuf+cbufl,"(%d,%d)%p:",i,j,&(((atype_t *)ptr)[i*ASIZExITERS+j*asize])); for (k = 0; k < asize; k++) cbufl += sprintf(cbuf+cbufl, " %.4f", ((atype_t *)ptr)[i*ASIZExITERS+j*asize+k]); cbufl += sprintf(cbuf + cbufl, "\n"); } cbufl += sprintf(cbuf + cbufl, "\n"); } # else for (i=0,cbufl=0;i<(th_size*iters*asize);i++) cbufl+=sprintf(cbuf+cbufl," %.4f",((atype_t*)ptr)[i]); # endif prndbg(th_idx, "initialized:\n%s\n", cbuf); #endif }
void zero_array(int th_idx, void *ptr) { int i, j, k; for (i = 0; i < th_size; i++)for (j = 0; j < iters; j++)for (k = 0; k < asize; k++) { AELEM(ptr, i, j, k) = 0.0; } }
void print_array(int th_idx, char *msg, atype_t *array) { #ifdef DEBUG int i, j, k, tbufl; char tbuf[TBUFSIZE]; if (ASIZExITERSxTH_BYTES > TBUFSIZE / 2) prndbg(th_idx, "%s:\n%s\n", msg, "array is too big to print"); for (i = 0, tbufl = 0; i < th_size; i++) { for (j = 0; j < iters; j++) { tbufl+=sprintf(tbuf+tbufl,"(%d,%d)%p:",i,j,&AELEM(array,i,j,0)); for (k = 0; k < asize; k++) tbufl += sprintf(tbuf+tbufl, " %.4f", AELEM(array,i,j,k)); tbufl += sprintf(tbuf + tbufl, "\n"); } tbufl += sprintf(tbuf + tbufl, "\n"); } prndbg(th_idx, "%s:\n%s\n", msg, tbuf); #endif }
/* test Put/Get/Acc sequence regardless of communication pattern * tgt -- remote target for put/get/acc (none if -1) * rmt -- list of remote thread that put/acc to here (correctness is cheked here) * rmt_cnt -- # of threads in rmt */ void test_PutGetAcc(int th_idx, int tgt, int *rmt, int rmt_cnt) { /* a - local thread, b - remote thread */ int a, b, b_proc, stride[2], count[2]; int i, j; void *src, *dst; #ifdef DEBUG for (i = 0, cbufl = 0; i < rmt_cnt; i++) cbufl += sprintf(cbuf+cbufl, " %d", rmt[i]); prndbg(th_idx, "test_PutGetAcc: put/acc to %d, get from %d, check put/acc from %s\n", tgt, tgt, rmt_cnt ? cbuf : "none"); #endif a = TH_ME; stride[0] = ASIZE_BYTES; count[0] = ASIZE_BYTES; count[1] = 1; /* init arrays */ init_array(th_idx, ptrs1[TH_ME]); init_array(th_idx, ptrs2[TH_ME]); MT_BARRIER(); /* put - put a.ptrs1[b] into b.ptrs2[a] */ if (tgt != -1) { b = tgt; b_proc = TH2PROC(b); for (i = 0; i < iters; i++) { src = &AELEM(ptrs1[a], b, i, 0); /* a.ptrs1[b] */ dst = &AELEM(ptrs2[b], a, i, 0); /* b.ptrs2[a] */ // assert(!ARMCI_Put(src, dst, ASIZE_BYTES, b_proc)); assert(!ARMCI_PutS(src, stride, dst, stride, count, 1, b_proc)); } ARMCI_Fence(b_proc); } MT_BARRIER(); print_array(th_idx, "PUT:ptrs1[TH_ME]", ptrs1[TH_ME]); print_array(th_idx, "PUT:ptrs2[TH_ME]", ptrs2[TH_ME]); MT_BARRIER(); /* chk put(s) from b(s): a.ptrs2[b] */ for (j = 0; j < rmt_cnt; j++) { b = rmt[j]; b_proc = TH2PROC(b); check_PutGetAcc(th_idx, b, PUT, &AELEM(ptrs2[a], b, 0, 0)); } //return; // REMOVE WHEN DONE /* init arrays */ init_array(th_idx, ptrs1[TH_ME]); init_array(th_idx, ptrs2[TH_ME]); MT_BARRIER(); /* get - get b.ptrs1[a] into a.ptrs2[b] */ if (tgt != -1) { b = tgt; b_proc = TH2PROC(b); for (i = 0; i < iters; i++) { src = &AELEM(ptrs1[b], a, i, 0); /* b.ptrs1[a] */ dst = &AELEM(ptrs2[a], b, i, 0); /* a.ptrs2[b] */ assert(!ARMCI_GetS(src, stride, dst, stride, count, 1, b_proc)); } } print_array(th_idx, "GET:ptrs1[TH_ME]", ptrs1[TH_ME]); print_array(th_idx, "GET:ptrs2[TH_ME]", ptrs2[TH_ME]); MT_BARRIER(); /* chk get from b: a.ptrs2[b] */ if (tgt != -1) { check_PutGetAcc(th_idx, b, GET, &AELEM(ptrs2[a], b, 0, 0)); } #if 1 /* init arrays */ init_array(th_idx, ptrs1[TH_ME]); init_array(th_idx, ptrs2[TH_ME]); MT_BARRIER(); /* acc - acc a.ptrs1[b] * scale + b.ptrs2[a] into b.ptrs2[a] */ if (tgt != -1) { b = tgt; b_proc = TH2PROC(b); for (i = 0; i < iters; i++) { src = &AELEM(ptrs1[a], b, i, 0); /* a.ptrs1[b] */ dst = &AELEM(ptrs2[b], a, i, 0); /* b.ptrs2[a] */ assert(!ARMCI_AccS(ARMCI_ACC_DBL,&scale,src,stride,dst,stride,count,1,b_proc)); } ARMCI_Fence(b_proc); } MT_BARRIER(); print_array(th_idx, "ACC:ptrs1[TH_ME]", ptrs1[TH_ME]); print_array(th_idx, "ACC:ptrs2[TH_ME]", ptrs2[TH_ME]); MT_BARRIER(); /* chk acc(s) from b(s): a.ptrs2[b] */ for (j = 0; j < rmt_cnt; j++) { b = rmt[j]; b_proc = TH2PROC(b); check_PutGetAcc(th_idx, b, ACC, &AELEM(ptrs2[a], b, 0, 0)); } #endif MT_BARRIER(); }