int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::reverse( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
int main() { // get default device and setup context compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); // generate random data on the host std::vector<float> host_vector(10000); std::generate(host_vector.begin(), host_vector.end(), rand); // create a vector on the device compute::vector<float> device_vector(host_vector.size(), context); // transfer data from the host to the device compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // calculate the square-root of each element in-place compute::transform( device_vector.begin(), device_vector.end(), device_vector.begin(), compute::sqrt<float>(), queue ); // copy values back to the host compute::copy( device_vector.begin(), device_vector.end(), host_vector.begin(), queue ); return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // trying to find element that isn't in vector (worst-case scenario) int wanted = rand_int_max + 1; // device iterator boost::compute::vector<int>::iterator device_result_it; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); device_result_it = boost::compute::find(device_vector.begin(), device_vector.end(), wanted, queue); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify if found index is correct by comparing it with std::find() result size_t host_result_index = std::distance(host_vector.begin(), std::find(host_vector.begin(), host_vector.end(), wanted)); size_t device_result_index = device_result_it.get_index(); if(device_result_index != host_result_index){ std::cout << "ERROR: " << "device_result_index (" << device_result_index << ") " << "!= " << "host_result_index (" << host_result_index << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { size_t size = 1000; if(argc >= 2){ size = boost::lexical_cast<size_t>(argv[1]); } std::cout << "size: " << size << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue( context, device, boost::compute::command_queue::enable_profiling ); // create vector of random numbers on the host std::vector<int> host_vector(size); std::generate(host_vector.begin(), host_vector.end(), rand); // create vector on the device and copy the data boost::compute::vector<int> device_vector(size, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sum vector boost::compute::timer t(queue); int sum = boost::compute::accumulate(device_vector.begin(), device_vector.end(), int(0), queue); std::cout << "time: " << t.elapsed() / 1e6 << " ms" << std::endl; // verify sum is correct int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0)); if(sum != host_sum){ std::cout << "ERROR: " << "device_sum (" << sum << ") " << "!= " << "host_sum (" << host_sum << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector<int>::iterator max = device_vector.begin(); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); max = boost::compute::max_element( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } int device_max = max.read(queue); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "max: " << device_max << std::endl; // verify max is correct int host_max = *std::max_element(host_vector.begin(), host_vector.end()); if(device_max != host_max){ std::cout << "ERROR: " << "device_max (" << device_max << ") " << "!= " << "host_max (" << host_max << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { size_t size = 1000; if(argc >= 2){ size = boost::lexical_cast<size_t>(argv[1]); } std::cout << "size: " << size << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue( context, device, boost::compute::command_queue::enable_profiling ); // create vector of random numbers on the host std::vector<unsigned int> host_vector(size); std::generate(host_vector.begin(), host_vector.end(), rand); // create vector on the device and copy the data boost::compute::vector<unsigned int> device_vector(size, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort vector boost::compute::timer t(queue); boost::compute::sort( device_vector.begin(), device_vector.end(), queue ); std::cout << "time: " << t.elapsed() / 1e6 << " ms" << std::endl; // verify vector is sorted if(!boost::compute::is_sorted(device_vector.begin(), device_vector.end(), queue)){ std::cout << "ERROR: is_sorted() returned false" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<float> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_float); // create vector on the device and copy the data boost::compute::vector<float> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort vector perf_timer t; t.start(); boost::compute::sort( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl; // verify vector is sorted if(!boost::compute::is_sorted(device_vector.begin(), device_vector.end(), queue)){ std::cout << "ERROR: is_sorted() returned false" << std::endl; return -1; } return 0; }
device_vector getDevices(cl_platform_id const& platformId) { cl_uint num_devices = 0; if (CL_SUCCESS != clGetDeviceIDs(platformId, DEVICE_TYPE, 0, NULL, &num_devices)) { std::cerr << "Failed to get number of devices." << std::endl; return device_vector(); } device_vector devices(num_devices); if (CL_SUCCESS != clGetDeviceIDs(platformId, DEVICE_TYPE, num_devices, devices.data(), NULL)) { std::cerr << "clGetDeviceIDs failed." << std::endl; num_devices = 0; } devices.resize(num_devices); return devices; }
int main() { // create data array on host int host_data[] = { 1, 3, 5, 7, 9 }; // create vector on device compute::vector<int> device_vector(5); // copy from host to device compute::copy(host_data, host_data + 5, device_vector.begin()); // create vector on host std::vector<int> host_vector(5); // copy data back to host compute::copy(device_vector.begin(), device_vector.end(), host_vector.begin()); return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::vector<int> device_res(PERF_N,context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sum vector perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); t.start(); boost::compute::partial_sum( device_vector.begin(), device_vector.end(), device_res.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify sum is correct std::partial_sum( host_vector.begin(), host_vector.end(), host_vector.begin() ); int device_sum = device_res.back(); int host_sum = host_vector.back(); if(device_sum != host_sum){ std::cout << "ERROR: " << "device_sum (" << device_sum << ") " << "!= " << "host_sum (" << host_sum << ")" << std::endl; return -1; } return 0; }