bool run () { bool passed = true; /* create vector with random numbers */ const size_t N = 100; std::vector<size_t> array(N); std::vector<atomic<size_t>> prefix_sum(N); for (size_t j=0; j<N; j++) array[j] = rand() % 10; /* dry run only counts */ ParallelPrefixSumState<size_t> state; size_t S0 = parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t sum) -> size_t { size_t s = 0; for (size_t i=r.begin(); i<r.end(); i++) s += array[i]; return s; }, [](size_t v0, size_t v1) { return v0+v1; }); /* final run calculates prefix sum */ size_t S1 = parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t sum) -> size_t { size_t s = 0; for (size_t i=r.begin(); i<r.end(); i++) { prefix_sum[i].store(sum+s); s += array[i]; } return s; }, [](size_t v0, size_t v1) { return v0+v1; }); /* check calculated prefix sum */ size_t sum=0; for (size_t i=0; i<N; sum+=array[i++]) { passed &= (prefix_sum[i] == sum); } passed &= (S0 == sum); passed &= (S1 == sum); return passed; }
bool operator() () { bool passed = true; printf("%s::%s ... ",TOSTRING(isa),name); fflush(stdout); const size_t M = 10; for (size_t N=10; N<10000000; N*=2.1f) { /* initialize array with random numbers */ uint32_t sum0 = 0; std::vector<uint32_t> src(N); for (size_t i=0; i<N; i++) { sum0 += src[i] = rand(); } /* calculate parallel prefix sum */ std::vector<uint32_t> dst(N); memset(dst.data(),0,N*sizeof(uint32_t)); double t0 = getSeconds(); for (size_t i=0; i<M; i++) { uint32_t sum1 = parallel_prefix_sum(src,dst,N); passed &= (sum0 == sum1); } double t1 = getSeconds(); printf("%zu/%3.2fM ",N,1E-6*double(N*M)/(t1-t0)); /* check if prefix sum is correct */ for (size_t i=0, sum=0; i<N; sum+=src[i++]) passed &= ((uint32_t)sum == dst[i]); } /* output if test passed or not */ if (passed) printf("[passed]\n"); else printf("[failed]\n"); return passed; }