uint64_t accumulate (size_t sndlen, int mytask, int origin, int target) { TRACE_ERR((stderr, "(%u) Do test ... sndlen = %zu\n", mytask, sndlen)); unsigned i; uint64_t t1 = GetTimeBase(); double scale = 1.0; if (mytask == origin) { for (i = 0; i < ITERATIONS; i++) { //fprintf(stderr, "(%u) Starting Iteration %d of size %zu dstaddr %p\n", mytask, i, sndlen, rcvbuf); A1_Get ( target, srcbuf, dstbuf, sndlen * sizeof(double) ); A1_Flush (target); } } A1_AllFence(); MPI_Barrier(MPI_COMM_WORLD); uint64_t t2 = GetTimeBase(); return ((t2 - t1) / ITERATIONS); }
double time_get(double *src_buf, double *dst_buf, int chunk, int loop, int proc, int levels) { int i, bal = 0; int stride[2]; int count[2]; int stride_levels = levels; double *tmp_buf, *tmp_buf_ptr; double start_time, stop_time, total_time = 0; stride[0] = SIZE * sizeof(double); count[0] = chunk * sizeof(double); count[1] = chunk; if(CHECK_RESULT) { tmp_buf = (double *)malloc(SIZE * SIZE * sizeof(double)); assert(tmp_buf != NULL); fill_array(tmp_buf, SIZE*SIZE, proc); tmp_buf_ptr = tmp_buf; } start_time = TIMER(); for(i=0; i<loop; i++) { if(levels) A1_GetS(proc, stride_levels, count, src_buf, stride, dst_buf, stride); else A1_Get(proc, src_buf, dst_buf, count[0]); if(CHECK_RESULT) { sprintf(check_type, "A1_GetS:"); check_result(tmp_buf_ptr, dst_buf, stride, count, stride_levels); } /* prepare next src and dst ptrs: avoid cache locality */ if(bal == 0) { src_buf += 128; dst_buf += 128; if(CHECK_RESULT) tmp_buf_ptr += 128; bal = 1; } else { src_buf -= 128; dst_buf -= 128; if(CHECK_RESULT) tmp_buf_ptr -= 128; bal = 0; } } stop_time = TIMER(); total_time = (stop_time - start_time); if(CHECK_RESULT) free(tmp_buf); if(total_time == 0.0){ total_time=0.000001; /* workaround for inaccurate timers */ warn_accuracy++; } return(total_time/loop); }