int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::vector<int> v1 = generate_random_vector<int>(PERF_N / 2); std::vector<int> v2 = generate_random_vector<int>(PERF_N / 2); std::vector<int> v3(PERF_N); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); boost::compute::vector<int> gpu_v1(PERF_N / 2, context); boost::compute::vector<int> gpu_v2(PERF_N / 2, context); boost::compute::vector<int> gpu_v3(PERF_N, context); boost::compute::copy(v1.begin(), v1.end(), gpu_v1.begin(), queue); boost::compute::copy(v2.begin(), v2.end(), gpu_v2.begin(), queue); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::merge(gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::vector<int> check_v3(PERF_N); boost::compute::copy(gpu_v3.begin(), gpu_v3.end(), check_v3.begin(), queue); queue.finish(); std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin()); if(!ok){ std::cerr << "ERROR: merged ranges different" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vectors of random numbers on the host std::vector<int> v1(std::floor(PERF_N / 2.0)); std::vector<int> v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); // create vectors on the device and copy the data boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context); boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context); boost::compute::copy( v1.begin(), v1.end(), gpu_v1.begin(), queue ); boost::compute::copy( v2.begin(), v2.end(), gpu_v2.begin(), queue ); boost::compute::vector<int> gpu_v3(PERF_N, context); boost::compute::vector<int>::iterator gpu_v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); gpu_v3_end = boost::compute::set_intersection( gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; return 0; }