Esempio n. 1
0
// 内部使用的  
// 如果当前未初始化直接在GPU分配内存  
// 如果当前在CPU,则在GPU上分配内存并且复制到GPU  
// 如果数据已经在GPU则啥也不做  
inline void SyncedMemory::to_gpu() {  
#ifndef CPU_ONLY  
  switch (head_) {  
  case UNINITIALIZED:  
    // 获取设备  
    CUDA_CHECK(cudaGetDevice(&gpu_device_));  
    // 在设备上分配内存  
    CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));  
    // 初始化为0  
    caffe_gpu_memset(size_, 0, gpu_ptr_);  
    head_ = HEAD_AT_GPU;  
    own_gpu_data_ = true;  
    break;  
  case HEAD_AT_CPU:  
    if (gpu_ptr_ == NULL) {  
      CUDA_CHECK(cudaGetDevice(&gpu_device_));  
      CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));  
      own_gpu_data_ = true;  
    }  
    caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);  
    head_ = SYNCED;  
    break;  
  case HEAD_AT_GPU:  
  case SYNCED:  
    break;  
  }  
#else  
  NO_GPU;  
#endif  
}  
Esempio n. 2
0
TEST_F(SyncedMemoryTest, TestGPUWrite) {
  SyncedMemory mem(10);
  void* gpu_data = mem.mutable_gpu_data();
  EXPECT_EQ(mem.head(), SyncedMemory::HEAD_AT_GPU);
  caffe_gpu_memset(mem.size(), 1, gpu_data);
  const void* cpu_data = mem.cpu_data();
  for (int i = 0; i < mem.size(); ++i) {
    EXPECT_EQ((static_cast<const char*>(cpu_data))[i], 1);
  }
  EXPECT_EQ(mem.head(), SyncedMemory::SYNCED);

  gpu_data = mem.mutable_gpu_data();
  EXPECT_EQ(mem.head(), SyncedMemory::HEAD_AT_GPU);
  caffe_gpu_memset(mem.size(), 2, gpu_data);
  cpu_data = mem.cpu_data();
  for (int i = 0; i < mem.size(); ++i) {
    EXPECT_EQ((static_cast<const char*>(cpu_data))[i], 2);
  }
  EXPECT_EQ(mem.head(), SyncedMemory::SYNCED);
}
Esempio n. 3
0
inline void SyncedMemory::to_gpu() {
#ifndef CPU_ONLY
  switch (head_) {
  case UNINITIALIZED:
    CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
    caffe_gpu_memset(size_, 0, gpu_ptr_);
    head_ = HEAD_AT_GPU;
    break;
  case HEAD_AT_CPU:
    if (gpu_ptr_ == NULL) {
      CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
    }
    caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);
    head_ = SYNCED;
    break;
  case HEAD_AT_GPU:
  case SYNCED:
    break;
  }
#else
  NO_GPU;
#endif
}
Esempio n. 4
0
inline void SyncedMemory::to_gpu() {
#ifndef CPU_ONLY
  switch (head_) {
    case UNINITIALIZED: {
      if (device_context_->backend() == Backend::BACKEND_CUDA) {
#ifdef USE_CUDA
        CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
        device_context_->IncreaseMemoryUsage(size_);
        caffe_gpu_memset(size_, 0, gpu_ptr_);
#endif  // USE_CUDA
      } else {
#ifdef USE_GREENTEA
        viennacl::ocl::context ctx = viennacl::ocl::get_context(
            device_context_->id());
        ctx.get_queue().finish();
        cl_int err;
        if (ctx.devices()[0].type() == CL_DEVICE_TYPE_CPU) {
          cl_gpu_mem_ = clCreateBuffer(ctx.handle().get(),
                     CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
                     size_, nullptr, &err);
        } else {
          cl_gpu_mem_ = clCreateBuffer(ctx.handle().get(), CL_MEM_READ_WRITE,
                                       size_, nullptr, &err);
        }
        CHECK_EQ(0, err) << "OpenCL buffer allocation of size "
                        << size_ << " failed.";
        device_context_->IncreaseMemoryUsage(size_);
        int alpha = 0;
        greentea_memset(device_context_->id(), size_, alpha, cl_gpu_mem_, 0);
        gpu_ptr_ = reinterpret_cast<void*>(cl_gpu_mem_);
        ctx.get_queue().finish();
#endif  // USE_GREENTEA
      }
      head_ = HEAD_AT_GPU;
      break;
    }
    case HEAD_AT_CPU: {
      if (device_context_->backend() == Backend::BACKEND_CUDA) {
#ifdef USE_CUDA
        if (gpu_ptr_ == nullptr) {
          CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
          device_context_->IncreaseMemoryUsage(size_);
        }
        caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);
#endif  // USE_CUDA
      } else {
#ifdef USE_GREENTEA
        viennacl::ocl::context ctx = viennacl::ocl::get_context(
            device_context_->id());
        ctx.get_queue().finish();
        if (gpu_ptr_ == nullptr) {
          cl_int err;
          if (ctx.devices()[0].type() == CL_DEVICE_TYPE_CPU) {
            cl_gpu_mem_ = clCreateBuffer(
                ctx.handle().get(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
                size_, nullptr, &err);
          } else {
            cl_gpu_mem_ = clCreateBuffer(ctx.handle().get(), CL_MEM_READ_WRITE,
                                         size_, nullptr, &err);
          }
          CHECK_EQ(0, err) << "OpenCL buffer allocation of size "
                          << size_ << " failed.";
          device_context_->IncreaseMemoryUsage(size_);
          gpu_ptr_ = reinterpret_cast<void*>(cl_gpu_mem_);
          ctx.get_queue().finish();
        }
        greentea_gpu_memcpy(size_, cpu_ptr_, (cl_mem) gpu_ptr_, 0, &ctx);
        ctx.get_queue().finish();
#endif  // USE_GREENTEA
      }
      head_ = SYNCED;
      break;
    }
    case HEAD_AT_GPU:
    case SYNCED:
      break;
  }
#else
  NO_GPU;
#endif
}