int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); // create vector on the device compute::vector<unsigned int> vector(PERF_N, context); // create mersenne twister engine compute::mt19937 rng(queue); // generate random numbers perf_timer t; t.start(); rng.generate(vector.begin(), vector.end(), queue); queue.finish(); t.stop(); std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // trying to find element that isn't in vector (worst-case scenario) int wanted = rand_int_max + 1; // device iterator boost::compute::vector<int>::iterator device_result_it; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); device_result_it = boost::compute::find(device_vector.begin(), device_vector.end(), wanted, queue); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify if found index is correct by comparing it with std::find() result size_t host_result_index = std::distance(host_vector.begin(), std::find(host_vector.begin(), host_vector.end(), wanted)); size_t device_result_index = device_result_it.get_index(); if(device_result_index != host_result_index){ std::cout << "ERROR: " << "device_result_index (" << device_result_index << ") " << "!= " << "host_result_index (" << host_result_index << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::reverse( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector<int> v1(std::floor(PERF_N / 2.0)); std::vector<int> v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); std::vector<int> v3(PERF_N); std::vector<int>::iterator v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); v3_end = std::set_difference( v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // trying to find element that isn't in vector (worst-case scenario) int wanted = rand_int_max + 1; // result std::vector<int>::iterator host_result_it; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); host_result_it = std::find(host_vector.begin(), host_vector.end(), wanted); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify if(host_result_it != host_vector.end()){ std::cout << "ERROR: " << "host_result_iterator != " << "host_vector.end()" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int sum = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); sum = ParallelSum<int>(&host_vector[0], host_vector.size()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "sum: " << sum << std::endl; int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0)); if(sum != host_sum){ std::cerr << "ERROR: sum (" << sum << ") != (" << host_sum << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; ::cl::Device device = bolt::cl::control::getDefault().getDevice(); std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl; // create host vector std::vector<int> h_vec = generate_random_vector<int>(PERF_N); // create device vector bolt::cl::device_vector<int> d_vec(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ // transfer data to the device bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); t.start(); bolt::cl::sort(d_vec.begin(), d_vec.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector<int> v1(PERF_N); std::generate(v1.begin(), v1.end(), rand_int); std::vector<int> v2(v1); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::includes( v1.begin(), v1.end(), v2.begin(), v2.end() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_keys(PERF_N); std::generate(host_keys.begin(), host_keys.end(), rand); std::vector<long> host_values(PERF_N); std::copy(host_keys.begin(), host_keys.end(), host_values.begin()); // create vector on the device and copy the data boost::compute::vector<int> device_keys(PERF_N, context); boost::compute::vector<long> device_values(PERF_N, context); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_keys.begin(), host_keys.end(), device_keys.begin(), queue ); boost::compute::copy( host_values.begin(), host_values.end(), device_values.begin(), queue ); t.start(); // sort vector boost::compute::sort_by_key( device_keys.begin(), device_keys.end(), device_values.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify keys are sorted if(!boost::compute::is_sorted(device_keys.begin(), device_keys.end(), queue)){ std::cout << "ERROR: is_sorted() returned false for the keys" << std::endl; return -1; } // verify values are sorted if(!boost::compute::is_sorted(device_values.begin(), device_values.end(), queue)){ std::cout << "ERROR: is_sorted() returned false for the values" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector<int>::iterator max = device_vector.begin(); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); max = boost::compute::max_element( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } int device_max = max.read(queue); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "max: " << device_max << std::endl; // verify max is correct int host_max = *std::max_element(host_vector.begin(), host_vector.end()); if(device_max != host_max){ std::cout << "ERROR: " << "device_max (" << device_max << ") " << "!= " << "host_max (" << host_max << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vectors of random numbers on the host std::vector<int> v1(std::floor(PERF_N / 2.0)); std::vector<int> v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); // create vectors on the device and copy the data boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context); boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context); boost::compute::copy( v1.begin(), v1.end(), gpu_v1.begin(), queue ); boost::compute::copy( v2.begin(), v2.end(), gpu_v2.begin(), queue ); boost::compute::vector<int> gpu_v3(PERF_N, context); boost::compute::vector<int>::iterator gpu_v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); gpu_v3_end = boost::compute::set_intersection( gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::vector<int> v1 = generate_random_vector<int>(PERF_N / 2); std::vector<int> v2 = generate_random_vector<int>(PERF_N / 2); std::vector<int> v3(PERF_N); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); boost::compute::vector<int> gpu_v1(PERF_N / 2, context); boost::compute::vector<int> gpu_v2(PERF_N / 2, context); boost::compute::vector<int> gpu_v3(PERF_N, context); boost::compute::copy(v1.begin(), v1.end(), gpu_v1.begin(), queue); boost::compute::copy(v2.begin(), v2.end(), gpu_v2.begin(), queue); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::merge(gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::vector<int> check_v3(PERF_N); boost::compute::copy(gpu_v3.begin(), gpu_v3.end(), check_v3.begin(), queue); queue.finish(); std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin()); if(!ok){ std::cerr << "ERROR: merged ranges different" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; std::vector<int> h1(PERF_N); std::vector<int> h2(PERF_N); std::generate(h1.begin(), h1.end(), rand_int); std::generate(h2.begin(), h2.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> d1(PERF_N, context); boost::compute::vector<int> d2(PERF_N, context); boost::compute::copy(h1.begin(), h1.end(), d1.begin(), queue); boost::compute::copy(h2.begin(), h2.end(), d2.begin(), queue); int product = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); product = boost::compute::inner_product( d1.begin(), d1.end(), d2.begin(), int(0), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify product is correct int host_product = std::inner_product( h1.begin(), h1.end(), h2.begin(), int(0) ); if(product != host_product){ std::cout << "ERROR: " << "device_product (" << product << ") " << "!= " << "host_product (" << host_product << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<float> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_float); // create vector on the device and copy the data boost::compute::vector<float> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort vector perf_timer t; t.start(); boost::compute::sort( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl; // verify vector is sorted if(!boost::compute::is_sorted(device_vector.begin(), device_vector.end(), queue)){ std::cout << "ERROR: is_sorted() returned false" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::reverse(host_vector.begin(), host_vector.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl; // create device vector (filled with zeros) bolt::cl::device_vector<int> d_vec(PERF_N, 0); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); bolt::cl::fill(d_vec.begin(), d_vec.end(), int(trial)); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); compute::vector<compute::uint_> vector(PERF_N, context); compute::default_random_engine rng(queue); compute::uniform_int_distribution<compute::uint_> dist(0, 1); perf_timer t; t.start(); dist.generate(vector.begin(), vector.end(), rng, queue); queue.finish(); t.stop(); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::vector<int> device_res(PERF_N,context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sum vector perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); t.start(); boost::compute::partial_sum( device_vector.begin(), device_vector.end(), device_res.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify sum is correct std::partial_sum( host_vector.begin(), host_vector.end(), host_vector.begin() ); int device_sum = device_res.back(); int host_sum = host_vector.back(); if(device_sum != host_sum){ std::cout << "ERROR: " << "device_sum (" << device_sum << ") " << "!= " << "host_sum (" << host_sum << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of keys and random values std::vector<int> host_keys(PERF_N); std::vector<int> host_values(PERF_N); std::generate(host_keys.begin(), host_keys.end(), UniqueKey); std::generate(host_values.begin(), host_values.end(), rand_int); // create vectors for keys and values on the device and copy the data boost::compute::vector<int> device_keys(PERF_N, context); boost::compute::vector<int> device_values(PERF_N,context); boost::compute::copy( host_keys.begin(), host_keys.end(), device_keys.begin(), queue ); boost::compute::copy( host_values.begin(), host_values.end(), device_values.begin(), queue ); // vectors for the results boost::compute::vector<int> device_keys_results(PERF_N, context); boost::compute::vector<int> device_values_results(PERF_N,context); typedef boost::compute::vector<int>::iterator iterType; std::pair<iterType, iterType> result( device_keys_results.begin(), device_values_results.begin() ); // reduce by key perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++) { t.start(); result = boost::compute::reduce_by_key(device_keys.begin(), device_keys.end(), device_values.begin(), device_keys_results.begin(), device_values_results.begin(), queue); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; size_t result_size = std::distance(device_keys_results.begin(), result.first); if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)) { std::cout << "ERROR: " << "wrong number of keys" << result_size << "\n" << (host_keys[PERF_N-1] + 1) << std::endl; return -1; } return 0; }