GPUParams<Dtype>::GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device) : Params<Dtype>(root_solver) { int initial_device; CUDA_CHECK(cudaGetDevice(&initial_device)); // Allocate device buffers CUDA_CHECK(cudaSetDevice(device)); CUDA_CHECK(cudaMalloc(&data_, size_ * sizeof(Dtype))); // Copy blob values const vector<Blob<Dtype>*>& net = root_solver->net()->learnable_params(); apply_buffers(net, data_, size_, copy); CUDA_CHECK(cudaMalloc(&diff_, size_ * sizeof(Dtype))); caffe_gpu_set(size_, Dtype(0), diff_); CUDA_CHECK(cudaSetDevice(initial_device)); }
void GPUParams<Dtype>::configure(Solver<Dtype>* solver) const { const vector<Blob<Dtype>*>& net = solver->net()->learnable_params(); apply_buffers(net, data_, size_, replace_gpu); apply_buffers(net, diff_, size_, replace_gpu_diff); }