void DeviceContext::SetProgram() { ocl_program_ = RegisterKernels( &(viennacl::ocl::get_context(static_cast<uint64_t>(id_)))); }
void Caffe::SetDevices(std::vector<int> device_ids) { Get().device_contexts_.clear(); #ifdef USE_GREENTEA Get().ocl_programs_.clear(); #endif int cuda_device_count = 0; #ifdef USE_CUDA cudaGetDeviceCount(&cuda_device_count); #endif // USE_CUDA for (int i = 0; i < cuda_device_count; ++i) { Get().device_contexts_.emplace_back( DeviceContext(i, Backend::BACKEND_CUDA)); for (int j = 0; j < device_ids.size(); ++j) { if (device_ids[j] == i) { Caffe::GetDeviceContext(i)->Init(); } } #ifdef USE_GREENTEA // Dummy to have same vector size as device contexts viennacl::ocl::program program; Get().ocl_programs_.push_back(program); #endif // USE_GREENTEA } // Initialize GreenTea devices #ifdef USE_GREENTEA int greentea_device_count = 0; typedef std::vector<viennacl::ocl::platform> platforms_type; platforms_type platforms = viennacl::ocl::get_platforms(); std::vector<std::tuple<viennacl::ocl::platform, viennacl::ocl::device>> platform_devices; // Loop through devices for (std::size_t platform_id = 0; platform_id < platforms.size(); ++platform_id) { typedef std::vector<viennacl::ocl::device> devices_type; devices_type devices = platforms[platform_id].devices(CL_DEVICE_TYPE_ALL); for (std::size_t device_id = 0; device_id < devices.size(); ++device_id) { platform_devices.push_back( std::make_tuple(platforms[platform_id], devices[device_id])); Get().device_contexts_.emplace_back( DeviceContext(cuda_device_count + greentea_device_count, Backend::BACKEND_OpenCL)); // Check if this device is really used and initialize bool is_used = false; for (int i = 0; i < device_ids.size(); ++i) { int device_id = device_ids[i]; if (device_id == cuda_device_count + greentea_device_count) { // Setup actual context and compile kernels for this device viennacl::ocl::setup_context( device_id, std::get<1>(platform_devices[greentea_device_count])); viennacl::ocl::context &ctx = viennacl::ocl::get_context( static_cast<uint64_t>(device_id)); viennacl::ocl::program & program = RegisterKernels(&ctx); Get().ocl_programs_.push_back(program); // viennacl::ocl::switch_context(device_id); // viennacl::ocl::switch_device(std::get<1> // (platform_devices[device_id - cuda_device_count])); // Add defined number of queues for (int q = 0; q < GREENTEA_QUEUE_COUNT - 1; ++q) { ctx.add_queue(ctx.current_device()); } Caffe::GetDeviceContext(device_id)->Init(); is_used = true; } } // Device not used, dummy if (!is_used) { viennacl::ocl::program program; Get().ocl_programs_.push_back(program); } greentea_device_count++; } } #endif // USE_GREENTEA }