bool run_add() { constexpr size_t N = 64; std::vector<T> host_input(N); std::vector<T> host_expected(N); for (int i = 0; i < N; ++i) { host_input[i] = (T)i; host_expected[i] = host_input[i] + host_input[i]; } T* input1; hipMalloc(&input1, N * sizeof(T)); hipMemcpy(input1, host_input.data(), host_input.size()*sizeof(T), hipMemcpyHostToDevice); T* input2; hipMalloc(&input2, N * sizeof(T)); hipMemcpy(input2, host_input.data(), host_input.size()*sizeof(T), hipMemcpyHostToDevice); constexpr unsigned int blocks = 1; constexpr unsigned int threads_per_block = 1; hipLaunchKernelGGL(add<T>, dim3(blocks), dim3(threads_per_block), 0, 0, input1, input2, N); hipMemcpy(host_input.data(), input1, host_input.size()*sizeof(T), hipMemcpyDeviceToHost); bool equal = true; for (int i = 0; i < N; i++) { equal &= (host_input[i] == host_expected[i]); } return equal; }
static void test_scan_values(viennacl::vector<ScalarType> const & input, viennacl::vector<ScalarType> & result, bool is_inclusive_scan) { std::vector<ScalarType> host_input(input.size()); std::vector<ScalarType> host_result(result.size()); viennacl::copy(input, host_input); viennacl::copy(result, host_result); ScalarType sum = 0; if (is_inclusive_scan) { for(viennacl::vcl_size_t i = 0; i < input.size(); i++) { sum += host_input[i]; host_input[i] = sum; } } else { for(viennacl::vcl_size_t i = 0; i < input.size(); i++) { ScalarType tmp = host_input[i]; host_input[i] = sum; sum += tmp; } } for(viennacl::vcl_size_t i = 0; i < input.size(); i++) { if (host_input[i] != host_result[i]) { std::cout << "Fail at vector index " << i << std::endl; std::cout << " result[" << i << "] = " << host_result[i] << std::endl; std::cout << " Reference = " << host_input[i] << std::endl; if (i > 0) { std::cout << " previous result[" << i-1 << "] = " << host_result[i-1] << std::endl; std::cout << " previous Reference = " << host_input[i-1] << std::endl; } exit(EXIT_FAILURE); } } std::cout << "PASSED!" << std::endl; }