int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::reverse( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int sum = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); sum = ParallelSum<int>(&host_vector[0], host_vector.size()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "sum: " << sum << std::endl; int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0)); if(sum != host_sum){ std::cerr << "ERROR: sum (" << sum << ") != (" << host_sum << ")" << std::endl; return -1; } return 0; }
int main() { // get default device and setup context compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); // generate random data on the host std::vector<float> host_vector(10000); std::generate(host_vector.begin(), host_vector.end(), rand); // create a vector on the device compute::vector<float> device_vector(host_vector.size(), context); // transfer data from the host to the device compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // calculate the square-root of each element in-place compute::transform( device_vector.begin(), device_vector.end(), device_vector.begin(), compute::sqrt<float>(), queue ); // copy values back to the host compute::copy( device_vector.begin(), device_vector.end(), host_vector.begin(), queue ); return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // trying to find element that isn't in vector (worst-case scenario) int wanted = rand_int_max + 1; // device iterator boost::compute::vector<int>::iterator device_result_it; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); device_result_it = boost::compute::find(device_vector.begin(), device_vector.end(), wanted, queue); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify if found index is correct by comparing it with std::find() result size_t host_result_index = std::distance(host_vector.begin(), std::find(host_vector.begin(), host_vector.end(), wanted)); size_t device_result_index = device_result_it.get_index(); if(device_result_index != host_result_index){ std::cout << "ERROR: " << "device_result_index (" << device_result_index << ") " << "!= " << "host_result_index (" << host_result_index << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // trying to find element that isn't in vector (worst-case scenario) int wanted = rand_int_max + 1; // result std::vector<int>::iterator host_result_it; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); host_result_it = std::find(host_vector.begin(), host_vector.end(), wanted); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify if(host_result_it != host_vector.end()){ std::cout << "ERROR: " << "host_result_iterator != " << "host_vector.end()" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { size_t size = 1000; if(argc >= 2){ size = boost::lexical_cast<size_t>(argv[1]); } std::cout << "size: " << size << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue( context, device, boost::compute::command_queue::enable_profiling ); // create vector of random numbers on the host std::vector<int> host_vector(size); std::generate(host_vector.begin(), host_vector.end(), rand); // create vector on the device and copy the data boost::compute::vector<int> device_vector(size, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sum vector boost::compute::timer t(queue); int sum = boost::compute::accumulate(device_vector.begin(), device_vector.end(), int(0), queue); std::cout << "time: " << t.elapsed() / 1e6 << " ms" << std::endl; // verify sum is correct int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0)); if(sum != host_sum){ std::cout << "ERROR: " << "device_sum (" << sum << ") " << "!= " << "host_sum (" << host_sum << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector<int>::iterator max = device_vector.begin(); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); max = boost::compute::max_element( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } int device_max = max.read(queue); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "max: " << device_max << std::endl; // verify max is correct int host_max = *std::max_element(host_vector.begin(), host_vector.end()); if(device_max != host_max){ std::cout << "ERROR: " << "device_max (" << device_max << ") " << "!= " << "host_max (" << host_max << ")" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { size_t size = 1000; if(argc >= 2){ size = boost::lexical_cast<size_t>(argv[1]); } std::cout << "size: " << size << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue( context, device, boost::compute::command_queue::enable_profiling ); // create vector of random numbers on the host std::vector<unsigned int> host_vector(size); std::generate(host_vector.begin(), host_vector.end(), rand); // create vector on the device and copy the data boost::compute::vector<unsigned int> device_vector(size, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort vector boost::compute::timer t(queue); boost::compute::sort( device_vector.begin(), device_vector.end(), queue ); std::cout << "time: " << t.elapsed() / 1e6 << " ms" << std::endl; // verify vector is sorted if(!boost::compute::is_sorted(device_vector.begin(), device_vector.end(), queue)){ std::cout << "ERROR: is_sorted() returned false" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<float> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_float); // create vector on the device and copy the data boost::compute::vector<float> device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort vector perf_timer t; t.start(); boost::compute::sort( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl; // verify vector is sorted if(!boost::compute::is_sorted(device_vector.begin(), device_vector.end(), queue)){ std::cout << "ERROR: is_sorted() returned false" << std::endl; return -1; } return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::reverse(host_vector.begin(), host_vector.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
// this example demonstrates how to sort a vector of ints on the GPU int main() { // fix random number generator : do not give always the same numbers srand(time(NULL)); // create vector of random values on the host std::vector<int> host_vector(10); std::generate(host_vector.begin(), host_vector.end(), rand_int); // print out input vector std::cout << "input: [ "; for(size_t i = 0; i < host_vector.size(); i++){ std::cout << host_vector[i]; if(i != host_vector.size() - 1){ std::cout << ", "; } } std::cout << " ]" << std::endl; // transfer the values to the device compute::vector<int> device_vector = host_vector; // sort the values on the device compute::sort(device_vector.begin(), device_vector.end()); // transfer the values back to the host compute::copy(device_vector.begin(), device_vector.end(), host_vector.begin()); // print out the sorted vector std::cout << "output: [ "; for(size_t i = 0; i < host_vector.size(); i++){ std::cout << host_vector[i]; if(i != host_vector.size() - 1){ std::cout << ", "; } } std::cout << " ]" << std::endl; return 0; }
int main() { // generate random data on the host std::vector<float> host_vector(10000); std::generate(host_vector.begin(), host_vector.end(), rand); // create a vector on the device and transfer data from the host boost::compute::vector<float> device_vector = host_vector; // calculate sqrt of each element in-place boost::compute::transform(device_vector.begin(), device_vector.end(), device_vector.begin(), boost::compute::sqrt<float>()); // copy values back to the host boost::compute::copy(device_vector.begin(), device_vector.end(), host_vector.begin()); return 0; }
int main() { // create data array on host int host_data[] = { 1, 3, 5, 7, 9 }; // create vector on device compute::vector<int> device_vector(5); // copy from host to device compute::copy(host_data, host_data + 5, device_vector.begin()); // create vector on host std::vector<int> host_vector(5); // copy data back to host compute::copy(device_vector.begin(), device_vector.end(), host_vector.begin()); return 0; }
/* note that the lock must be already acquired upon entry. * This is necessary because the caller often peeks at * the header and delegates control to here when it doesn't * specifically handle the current RPC. * The lock must be unlocked before returning */ void process(RPCReceive& rcv, thread_scoped_lock &lock) { if(rcv.name == "mem_alloc") { MemoryType type; network_device_memory mem; device_ptr client_pointer; rcv.read(mem); rcv.read(type); lock.unlock(); client_pointer = mem.device_pointer; /* create a memory buffer for the device buffer */ size_t data_size = mem.memory_size(); DataVector &data_v = data_vector_insert(client_pointer, data_size); if(data_size) mem.data_pointer = (device_ptr)&(data_v[0]); else mem.data_pointer = 0; /* perform the allocation on the actual device */ device->mem_alloc(mem, type); /* store a mapping to/from client_pointer and real device pointer */ pointer_mapping_insert(client_pointer, mem.device_pointer); } else if(rcv.name == "mem_copy_to") { network_device_memory mem; rcv.read(mem); lock.unlock(); device_ptr client_pointer = mem.device_pointer; DataVector &data_v = data_vector_find(client_pointer); size_t data_size = mem.memory_size(); /* get pointer to memory buffer for device buffer */ mem.data_pointer = (device_ptr)&data_v[0]; /* copy data from network into memory buffer */ rcv.read_buffer((uint8_t*)mem.data_pointer, data_size); /* translate the client pointer to a real device pointer */ mem.device_pointer = device_ptr_from_client_pointer(client_pointer); /* copy the data from the memory buffer to the device buffer */ device->mem_copy_to(mem); } else if(rcv.name == "mem_copy_from") { network_device_memory mem; int y, w, h, elem; rcv.read(mem); rcv.read(y); rcv.read(w); rcv.read(h); rcv.read(elem); device_ptr client_pointer = mem.device_pointer; mem.device_pointer = device_ptr_from_client_pointer(client_pointer); DataVector &data_v = data_vector_find(client_pointer); mem.data_pointer = (device_ptr)&(data_v[0]); device->mem_copy_from(mem, y, w, h, elem); size_t data_size = mem.memory_size(); RPCSend snd(socket, &error_func, "mem_copy_from"); snd.write(); snd.write_buffer((uint8_t*)mem.data_pointer, data_size); lock.unlock(); } else if(rcv.name == "mem_zero") { network_device_memory mem; rcv.read(mem); lock.unlock(); device_ptr client_pointer = mem.device_pointer; mem.device_pointer = device_ptr_from_client_pointer(client_pointer); DataVector &data_v = data_vector_find(client_pointer); mem.data_pointer = (device_ptr)&(data_v[0]); device->mem_zero(mem); } else if(rcv.name == "mem_free") { network_device_memory mem; device_ptr client_pointer; rcv.read(mem); lock.unlock(); client_pointer = mem.device_pointer; mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer); device->mem_free(mem); } else if(rcv.name == "const_copy_to") { string name_string; size_t size; rcv.read(name_string); rcv.read(size); vector<char> host_vector(size); rcv.read_buffer(&host_vector[0], size); lock.unlock(); device->const_copy_to(name_string.c_str(), &host_vector[0], size); } else if(rcv.name == "tex_alloc") { network_device_memory mem; string name; InterpolationType interpolation; bool periodic; device_ptr client_pointer; rcv.read(name); rcv.read(mem); rcv.read(interpolation); rcv.read(periodic); lock.unlock(); client_pointer = mem.device_pointer; size_t data_size = mem.memory_size(); DataVector &data_v = data_vector_insert(client_pointer, data_size); if(data_size) mem.data_pointer = (device_ptr)&(data_v[0]); else mem.data_pointer = 0; rcv.read_buffer((uint8_t*)mem.data_pointer, data_size); device->tex_alloc(name.c_str(), mem, interpolation, periodic); pointer_mapping_insert(client_pointer, mem.device_pointer); } else if(rcv.name == "tex_free") { network_device_memory mem; device_ptr client_pointer; rcv.read(mem); lock.unlock(); client_pointer = mem.device_pointer; mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer); device->tex_free(mem); } else if(rcv.name == "load_kernels") { bool experimental; rcv.read(experimental); bool result; result = device->load_kernels(experimental); RPCSend snd(socket, &error_func, "load_kernels"); snd.add(result); snd.write(); lock.unlock(); } else if(rcv.name == "task_add") { DeviceTask task; rcv.read(task); lock.unlock(); if(task.buffer) task.buffer = device_ptr_from_client_pointer(task.buffer); if(task.rgba_half) task.rgba_half = device_ptr_from_client_pointer(task.rgba_half); if(task.rgba_byte) task.rgba_byte = device_ptr_from_client_pointer(task.rgba_byte); if(task.shader_input) task.shader_input = device_ptr_from_client_pointer(task.shader_input); if(task.shader_output) task.shader_output = device_ptr_from_client_pointer(task.shader_output); task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2); task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1); task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this); task.update_tile_sample = function_bind(&DeviceServer::task_update_tile_sample, this, _1); task.get_cancel = function_bind(&DeviceServer::task_get_cancel, this); device->task_add(task); } else if(rcv.name == "task_wait") { lock.unlock(); blocked_waiting = true; device->task_wait(); blocked_waiting = false; lock.lock(); RPCSend snd(socket, &error_func, "task_wait_done"); snd.write(); lock.unlock(); } else if(rcv.name == "task_cancel") { lock.unlock(); device->task_cancel(); } else if(rcv.name == "acquire_tile") { AcquireEntry entry; entry.name = rcv.name; rcv.read(entry.tile); acquire_queue.push_back(entry); lock.unlock(); } else if(rcv.name == "acquire_tile_none") { AcquireEntry entry; entry.name = rcv.name; acquire_queue.push_back(entry); lock.unlock(); } else if(rcv.name == "release_tile") { AcquireEntry entry; entry.name = rcv.name; acquire_queue.push_back(entry); lock.unlock(); } else { cout << "Error: unexpected RPC receive call \"" + rcv.name + "\"\n"; lock.unlock(); } }
void process(RPCReceive& rcv) { // fprintf(stderr, "receive process %s\n", rcv.name.c_str()); if(rcv.name == "mem_alloc") { MemoryType type; network_device_memory mem; device_ptr remote_pointer; rcv.read(mem); rcv.read(type); /* todo: CPU needs mem.data_pointer */ remote_pointer = mem.device_pointer; mem_data[remote_pointer] = vector<uint8_t>(); mem_data[remote_pointer].resize(mem.memory_size()); if(mem.memory_size()) mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]); else mem.data_pointer = 0; device->mem_alloc(mem, type); ptr_map[remote_pointer] = mem.device_pointer; ptr_imap[mem.device_pointer] = remote_pointer; } else if(rcv.name == "mem_copy_to") { network_device_memory mem; rcv.read(mem); device_ptr remote_pointer = mem.device_pointer; mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]); rcv.read_buffer((uint8_t*)mem.data_pointer, mem.memory_size()); mem.device_pointer = ptr_map[remote_pointer]; device->mem_copy_to(mem); } else if(rcv.name == "mem_copy_from") { network_device_memory mem; int y, w, h, elem; rcv.read(mem); rcv.read(y); rcv.read(w); rcv.read(h); rcv.read(elem); device_ptr remote_pointer = mem.device_pointer; mem.device_pointer = ptr_map[remote_pointer]; mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]); device->mem_copy_from(mem, y, w, h, elem); RPCSend snd(socket); snd.write(); snd.write_buffer((uint8_t*)mem.data_pointer, mem.memory_size()); } else if(rcv.name == "mem_zero") { network_device_memory mem; rcv.read(mem); device_ptr remote_pointer = mem.device_pointer; mem.device_pointer = ptr_map[mem.device_pointer]; mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]); device->mem_zero(mem); } else if(rcv.name == "mem_free") { network_device_memory mem; device_ptr remote_pointer; rcv.read(mem); remote_pointer = mem.device_pointer; mem.device_pointer = ptr_map[mem.device_pointer]; ptr_map.erase(remote_pointer); ptr_imap.erase(mem.device_pointer); mem_data.erase(remote_pointer); device->mem_free(mem); } else if(rcv.name == "const_copy_to") { string name_string; size_t size; rcv.read(name_string); rcv.read(size); vector<char> host_vector(size); rcv.read_buffer(&host_vector[0], size); device->const_copy_to(name_string.c_str(), &host_vector[0], size); } else if(rcv.name == "tex_alloc") { network_device_memory mem; string name; bool interpolation; bool periodic; device_ptr remote_pointer; rcv.read(name); rcv.read(mem); rcv.read(interpolation); rcv.read(periodic); remote_pointer = mem.device_pointer; mem_data[remote_pointer] = vector<uint8_t>(); mem_data[remote_pointer].resize(mem.memory_size()); if(mem.memory_size()) mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]); else mem.data_pointer = 0; rcv.read_buffer((uint8_t*)mem.data_pointer, mem.memory_size()); device->tex_alloc(name.c_str(), mem, interpolation, periodic); ptr_map[remote_pointer] = mem.device_pointer; ptr_imap[mem.device_pointer] = remote_pointer; } else if(rcv.name == "tex_free") { network_device_memory mem; device_ptr remote_pointer; rcv.read(mem); remote_pointer = mem.device_pointer; mem.device_pointer = ptr_map[mem.device_pointer]; ptr_map.erase(remote_pointer); ptr_map.erase(mem.device_pointer); mem_data.erase(remote_pointer); device->tex_free(mem); } else if(rcv.name == "task_add") { DeviceTask task; rcv.read(task); if(task.buffer) task.buffer = ptr_map[task.buffer]; if(task.rgba_byte) task.rgba_byte = ptr_map[task.rgba_byte]; if(task.rgba_half) task.rgba_half = ptr_map[task.rgba_half]; if(task.shader_input) task.shader_input = ptr_map[task.shader_input]; if(task.shader_output) task.shader_output = ptr_map[task.shader_output]; task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2); task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1); task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this); task.update_tile_sample = function_bind(&DeviceServer::task_update_tile_sample, this, _1); task.get_cancel = function_bind(&DeviceServer::task_get_cancel, this); device->task_add(task); } else if(rcv.name == "task_wait") { device->task_wait(); RPCSend snd(socket, "task_wait_done"); snd.write(); } else if(rcv.name == "task_cancel") { device->task_cancel(); } }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector<int> device_vector(PERF_N, context); boost::compute::vector<int> device_res(PERF_N,context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sum vector perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); t.start(); boost::compute::partial_sum( device_vector.begin(), device_vector.end(), device_res.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify sum is correct std::partial_sum( host_vector.begin(), host_vector.end(), host_vector.begin() ); int device_sum = device_res.back(); int host_sum = host_vector.back(); if(device_sum != host_sum){ std::cout << "ERROR: " << "device_sum (" << device_sum << ") " << "!= " << "host_sum (" << host_sum << ")" << std::endl; return -1; } return 0; }