void Caffe::SetDevice(const int device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == device_id) { return; } // The call to cudaSetDevice must come before any calls to Get, which // may perform initialization using the GPU. if(device_id >= 0) { CUDA_CHECK(cudaSetDevice(device_id)); } else { //negative means pick first avail cudaSetValidDevices(NULL,0); } if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_)); if (Get().curand_generator_) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); } CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_)); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, cluster_seedgen())); }
bool configureGpu(bool use_gpu_acceleration, std::vector<int> &valid_devices, int use_all_gpus, int &numBkgWorkers_gpu) { #ifdef ION_COMPILE_CUDA const unsigned long long gpu_mem = 2.5 * 1024 * 1024 * 1024; if (!use_gpu_acceleration) return false; // Get number of GPUs in system int num_gpus = 0; cudaError_t err = cudaGetDeviceCount( &num_gpus ); if (err != cudaSuccess) { printf("CUDA: No GPU device available. Defaulting to CPU only computation\n"); return false; } if ( use_all_gpus ) { // Add all GPUs to the valid device list for ( int dev = 0; dev < num_gpus; dev++ ) valid_devices.push_back(dev); } else { // Only add the highest compute devices to the compute list int version = 0; int major = 0; int minor = 0; cudaDeviceProp dev_props; // Iterate over GPUs to find the highest compute device for ( int dev = 0; dev < num_gpus; dev++ ) { cudaGetDeviceProperties( &dev_props, dev ); if ( (dev_props.major*10) + dev_props.minor > version ) { version = (dev_props.major*10) + dev_props.minor; major = dev_props.major; minor = dev_props.minor; } } for ( int dev = 0; dev < num_gpus; dev++ ) { cudaGetDeviceProperties(&dev_props, dev); if (dev_props.major == major && dev_props.minor == minor) { if (dev_props.totalGlobalMem > gpu_mem) { valid_devices.push_back(dev); } } } } // Set the number of GPU workers and tell CUDA about our list of valid devices if (valid_devices.size() > 0) { numBkgWorkers_gpu = int(valid_devices.size()); cudaSetValidDevices( &valid_devices[0], int( valid_devices.size() ) ); } else { printf("CUDA: No GPU device available. Defaulting to CPU only computation\n"); return false; } PoissonCDFApproxMemo poiss_cache; poiss_cache.Allocate (MAX_POISSON_TABLE_COL,MAX_POISSON_TABLE_ROW,POISSON_TABLE_STEP); poiss_cache.GenerateValues(); // fill out my table for(int i=valid_devices.size()-1 ; i >= 0; i--){ try{ //cudaSetDevice(valid_devices[i]); cout << "CUDA "<< valid_devices[i] << ": Creating Context and Constant memory on device with id: "<< valid_devices[i]<< endl; InitConstantMemoryOnGpu(valid_devices[i],poiss_cache); } catch(cudaException &e) { cout << "CUDA "<< valid_devices[i] << ": Context could not be created. removing device with id: "<< valid_devices[i] << " from valid device list" << endl; valid_devices.erase (valid_devices.begin()+i); numBkgWorkers_gpu -= 1; if(numBkgWorkers_gpu == 0) cout << "CUDA: no context could be created, defaulting to CPU only execution" << endl; } } if(numBkgWorkers_gpu == 0) return false; return true; #else return false; #endif }
cudaError_t WINAPI wine_cudaSetValidDevices( int *device_arr, int len ) { WINE_TRACE("\n"); return cudaSetValidDevices( device_arr, len); }