bool runTestType(cl::Context context, cl::CommandQueue queue) { cl_uint size = 1024 * 2 + 15; std::vector<T> input(size); std::cout << "##Testing scan for " << input.size() << " elements and type " << magnet::CL::detail::traits<T>::kernel_type(); for(size_t i = 0; i < input.size(); ++i) input[i] = i+1; // create input buffer using pinned memory cl::Buffer bufferIn(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(T) * input.size(), &input[0]) ; magnet::CL::scan<T> scanFunctor; scanFunctor.build(queue, context); scanFunctor(bufferIn, bufferIn); std::vector<T> output(size); queue.enqueueReadBuffer(bufferIn, CL_TRUE, 0, input.size() * sizeof(T), &output[0]); bool failed = !testOutput(input, output); std::cout << (failed ? " FAILED" : " PASSED") << std::endl; return failed; }
void runTestType(cl::Context context, cl::CommandQueue queue) { cl_uint size = 2 << 10; std::vector<T> input(size); std::cout << "##Testing bitonic sort for " << input.size() << " elements and type " << magnet::CL::detail::traits<T>::kernel_type() << std::endl; for(size_t i = 0; i < input.size(); ++i) input[i] = input.size() - i - 1; // create input buffer using pinned memory cl::Buffer bufferIn(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(T) * input.size(), &input[0]) ; magnet::CL::bitonicSort<T> bitonicSortFunctor; bitonicSortFunctor.build(queue, context); bitonicSortFunctor(bufferIn); std::vector<T> output(size); queue.enqueueReadBuffer(bufferIn, CL_TRUE, 0, input.size() * sizeof(T), &output[0]); if (!testOutput(input, output)) M_throw() << "Incorrect output for size " << input.size() << " and type " << magnet::CL::detail::traits<T>::kernel_type(); }
bool runTestType(cl::Context context, cl::CommandQueue queue) { cl_uint size = (1 << 16) + 16384; std::vector<T> input(size); std::cout << "##Testing AMD radix sort for " << input.size() << " elements and type " << magnet::CL::detail::traits<T>::kernel_type(); for(size_t i = 0; i < input.size(); ++i) input[i] = input.size() - i - 1; // create input buffer using pinned memory cl::Buffer bufferIn(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(T) * input.size(), &input[0]); magnet::CL::radixSortAMD<T> radixSortFunctor; radixSortFunctor.build(queue, context); radixSortFunctor(bufferIn, bufferIn); std::vector<T> output(size); queue.enqueueReadBuffer(bufferIn, CL_TRUE, 0, input.size() * sizeof(T), &output[0]); bool failed = !testOutput(input, output); std::cout << " key(only) " << (failed ? "FAILED" : "PASSED") << ", "; //Now test with some data! //Refresh the input array queue.enqueueWriteBuffer(bufferIn, CL_TRUE, 0, input.size() * sizeof(T), &input[0]); //Write a data array std::vector<cl_uint> data(size); for(size_t i = 0; i < input.size(); ++i) data[i] = i; cl::Buffer dataIn(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_uint) * data.size(), &data[0]) ; radixSortFunctor(bufferIn, dataIn, bufferIn, dataIn); queue.enqueueReadBuffer(dataIn, CL_TRUE, 0, data.size() * sizeof(cl_uint), &data[0]); bool keyfail = !testOutput(input, output); std::cout << " key " << (keyfail ? "FAILED" : "PASSED"); bool datafail = false; for(size_t i = 0; i < input.size(); ++i) if (data[i] != input.size() - 1 - i) datafail = true; std::cout << " data " << (datafail ? "FAILED" : "PASSED") << std::endl; return failed || keyfail || datafail; }
bool runTestType(cl::Context context, cl::CommandQueue queue) { cl_uint size = 64 * 256; std::vector<T> input(size); for(size_t i = 0; i < input.size(); ++i) input[i] = input.size() - i - 1; // create input buffer using pinned memory cl::Buffer bufferIn(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(T) * input.size(), &input[0]) ; magnet::CL::sort<T> sortFunctor; sortFunctor.build(queue, context); sortFunctor(bufferIn); std::cout << "##Testing generic sort ("; switch(sortFunctor.getMode()) { case magnet::CL::sort<T>::CPU: std::cout << "HeapSort"; break; case magnet::CL::sort<T>::NVIDIA: std::cout << "radixNVIDIA"; break; case magnet::CL::sort<T>::AMD: std::cout << "radixAMD"; break; default: M_throw() << "Could not determine which sorting algorithm is being used"; } std::cout << ") for " << input.size() << " elements and type " << magnet::CL::detail::traits<T>::kernel_type(); std::vector<T> output(size); queue.enqueueReadBuffer(bufferIn, CL_TRUE, 0, input.size() * sizeof(T), &output[0]); bool failed = !testOutput(input, output); std::cout << " key(only) " << (failed ? "FAILED" : "PASSED") << ", "; //Now test with some data! //Refresh the input array queue.enqueueWriteBuffer(bufferIn, CL_TRUE, 0, input.size() * sizeof(T), &input[0]); //Write a data array std::vector<cl_uint> data(size); for(size_t i = 0; i < input.size(); ++i) data[i] = i; cl::Buffer dataIn(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_uint) * data.size(), &data[0]) ; sortFunctor(bufferIn, dataIn); queue.enqueueReadBuffer(dataIn, CL_TRUE, 0, data.size() * sizeof(cl_uint), &data[0]); bool keyfail = false;//!testOutput(input, output); std::cout << " key " << (keyfail ? "FAILED" : "PASSED"); bool datafail = false; for(size_t i = 0; i < input.size(); ++i) if (data[i] != input.size() - 1 - i) datafail = true; std::cout << " data " << (datafail ? "FAILED" : "PASSED") << std::endl; return failed || keyfail || datafail; }