Ejemplo n.º 1
0
void Caffe::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().cusparse_descr_)CUSPARSE_CHECK(cusparseDestroyMatDescr(Get().cusparse_descr_));
  if (Get().cusparse_handle_)CUSPARSE_CHECK(cusparseDestroy(Get().cusparse_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUSPARSE_CHECK(cusparseCreate(&Get().cusparse_handle_));
  CUSPARSE_CHECK(cusparseCreateMatDescr(&Get().cusparse_descr_));
//  cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL);
//  cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO);
  LOG(INFO)<<"set descr";
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
Ejemplo n.º 2
0
void cuda_initialize() {
    CUDA_CHECK(cudaStreamCreate(&g_context.stream));
    CUBLAS_CHECK(cublasCreate_v2(&g_context.cublas_handle));
    CUBLAS_CHECK(cublasSetStream(g_context.cublas_handle, g_context.stream));
    // CUDNN_CHECK(cudnnCreate(&g_context.cudnn_handle));
    // CUDNN_CHECK(cudnnSetStream(g_context.cudnn_handle, g_context.stream));
}
Ejemplo n.º 3
0
Caffe::Properties::Properties() :
      init_time_(std::time(nullptr)),
      main_thread_id_(std::this_thread::get_id()),
      caffe_version_(AS_STRING(CAFFE_VERSION)) {
#ifndef CPU_ONLY
  int count = 0;
  CUDA_CHECK(cudaGetDeviceCount(&count));
  compute_capabilities_.resize(count);
  cudaDeviceProp device_prop;
  for (int gpu = 0; gpu < compute_capabilities_.size(); ++gpu) {
    CUDA_CHECK(cudaGetDeviceProperties(&device_prop, gpu));
    compute_capabilities_[gpu] = device_prop.major * 100 + device_prop.minor;
    DLOG(INFO) << "GPU " << gpu << " '" << device_prop.name << "' has compute capability "
        << device_prop.major << "." << device_prop.minor;
  }
#ifdef USE_CUDNN
  cudnn_version_ =
      AS_STRING(CUDNN_MAJOR) "." AS_STRING(CUDNN_MINOR) "." AS_STRING(CUDNN_PATCHLEVEL);
#else
  cudnn_version_ = "USE_CUDNN is not defined";
#endif
  int cublas_version = 0;
  CUBLAS_CHECK(cublasGetVersion(Caffe::cublas_handle(), &cublas_version));
  cublas_version_ = std::to_string(cublas_version);

  int cuda_version = 0;
  CUDA_CHECK(cudaRuntimeGetVersion(&cuda_version));
  cuda_version_ = std::to_string(cuda_version);

  int cuda_driver_version = 0;
  CUDA_CHECK(cudaDriverGetVersion(&cuda_driver_version));
  cuda_driver_version_ = std::to_string(cuda_driver_version);
#endif
}
Ejemplo n.º 4
0
void Context::Init(int device_id) {
  device_id_ = device_id;

  SwitchDevice();

#if defined(USE_CUDA)
  if (blas_handle_ == nullptr) {
    CUBLAS_CHECK(cublasCreate((cublasHandle_t*)&blas_handle_));
    CHECK_NOTNULL(blas_handle_);
  }
#endif

#if defined(USE_CUDNN)
  if (cudnn_handle_ == nullptr) {
    CUDNN_CHECK(cudnnCreate((cudnnHandle_t*)&cudnn_handle_));
    CHECK_NOTNULL(cudnn_handle_);
  }
#endif

#if defined(USE_NNPACK)
  if (nnpack_handle_ == nullptr) {
    CHECK_EQ(nnp_initialize(), nnp_status_success);
    nnpack_handle_ = pthreadpool_create(0);
    CHECK_NOTNULL(nnpack_handle_);
  }
#endif
}
Ejemplo n.º 5
0
void caffe_gpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
    const int N, const double alpha, const double* A, const double* x,
    const double beta, double* y) {
  cublasOperation_t cuTransA =
      (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
  CUBLAS_CHECK(cublasDgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
      A, N, x, 1, &beta, y, 1));
}
Ejemplo n.º 6
0
void Caffe::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUDA_CHECK(cudaSetDevice(device_id));
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
Ejemplo n.º 7
0
Caffe::~Caffe() {
  if (cusparse_descr_) CUSPARSE_CHECK(cusparseDestroyMatDescr(cusparse_descr_));
  if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (cusparse_handle_) CUSPARSE_CHECK(cusparseDestroy(cusparse_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
}
Ejemplo n.º 8
0
void caffe_gpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
    const int N, const float alpha, const float* A, const float* x,
    const float beta, float* y) {
  cublasOperation_t cuTransA =
      (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
  CUBLAS_CHECK(cublasSgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
      A, N, x, 1, &beta, y, 1));
}
Ejemplo n.º 9
0
Caffe::~Caffe()
{
  if (cublas_handle_)
    CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_)
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  if (vsl_stream_)
    VSL_CHECK(vslDeleteStream(&vsl_stream_));
}
Ejemplo n.º 10
0
Caffe::Caffe()
  : mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL),
  curand_generator_(NULL), vsl_stream_(NULL)
{
  CUBLAS_CHECK(cublasCreate(&cublas_handle_));
  //TODO: original caffe code has bug here!
  CURAND_CHECK(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator_, 1701ULL));
  VSL_CHECK(vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, 1701));
}
Ejemplo n.º 11
0
void Engine::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
Ejemplo n.º 12
0
Caffe::~Caffe() {
  // Make sure all device contexts and
  // dependent memory blocks are freed properly
  device_contexts_.clear();
#ifdef USE_CUDA
  if (cublas_handle_)
    CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
#endif  // USE_CUDA
}
Ejemplo n.º 13
0
void Caffe::SetSlaveDevice(const int slave_device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == slave_device_id) {
    return;
  }
  if (Get().slave_cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().slave_cublas_handle_));
  if (Get().slave_curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().slave_curand_generator_));
  }
  CUDA_CHECK(cudaSetDevice(slave_device_id));
  CUDA_CHECK(cudaStreamCreate (&Get().slave_cu_stream_));
  CUBLAS_CHECK(cublasCreate(&Get().slave_cublas_handle_));
  CUBLAS_CHECK(cublasSetStream(Get().slave_cublas_handle_, Get().slave_cu_stream_));
  CURAND_CHECK(curandCreateGenerator(&Get().slave_curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().slave_curand_generator_,
      cluster_seedgen()));
  Get().slave_device_id_ = slave_device_id;
  CUDA_CHECK(cudaSetDevice(current_device));
  Caffe::set_gpu_mode(Caffe::MASTER_SLAVE);
}
Ejemplo n.º 14
0
void Caffe::SetDevice(const int device_id) {
  std::vector<int> devices;
  devices.push_back(device_id);
  Caffe::SetDevices(devices);

  Get().default_device_context_ = GetDeviceContext(device_id);

  if (Get().default_device_context_->backend() == Backend::BACKEND_CUDA) {
#ifdef USE_CUDA
    int current_device;
    CUDA_CHECK(cudaGetDevice(&current_device));
    if (current_device == device_id) {
      return;
    }
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
    CUDA_CHECK(cudaSetDevice(device_id));
    if (Get().cublas_handle_)
      CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
    if (Get().curand_generator_) {
      CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
    }
    CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
    CURAND_CHECK(
        curandCreateGenerator(&Get().curand_generator_,
                              CURAND_RNG_PSEUDO_DEFAULT));
    CURAND_CHECK(
        curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
                                           cluster_seedgen()));
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
#ifdef USE_CLBLAS
    clblasSetup();
#endif  // USE_CLBLAS
#endif  // USE_GREENTEA
  }
}
Ejemplo n.º 15
0
Caffe::~Caffe() {
  for (vector<cublasHandle_t>& group_cublas_handles : cublas_handles_) {
    for (cublasHandle_t h : group_cublas_handles) {
      if (h) {
        CUBLAS_CHECK(cublasDestroy(h));
      }
    }
  }
  for_each(curand_generators_.begin(), curand_generators_.end(), [](curandGenerator_t h) {
    if (h) {
      CURAND_CHECK(curandDestroyGenerator(h));
    }
  });
}
Ejemplo n.º 16
0
void caffe_gpu_geam<float>(const CBLAS_TRANSPOSE TransA,
    const CBLAS_TRANSPOSE TransB, const int M, const int N,
    const float alpha, const float* A, const float* B, const float beta,
	float* C){
		// Note that cublas follows fortran order.
  int lda = (TransA == CblasNoTrans) ? N : M;
  int ldb = (TransB == CblasNoTrans) ? N : M;
  cublasOperation_t cuTransA =
      (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
  cublasOperation_t cuTransB =
      (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
  CUBLAS_CHECK(cublasSgeam(Caffe::get_current_cublas_handle(), cuTransB, cuTransA,
      N, M, &alpha, B, ldb, &beta,  A, lda, C, N));
}
Ejemplo n.º 17
0
void caffe_gpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
    const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
    const double alpha, const double* A, const double* B, const double beta,
    double* C) {
  // Note that cublas follows fortran order.
  int lda = (TransA == CblasNoTrans) ? K : M;
  int ldb = (TransB == CblasNoTrans) ? N : K;
  cublasOperation_t cuTransA =
      (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
  cublasOperation_t cuTransB =
      (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
  CUBLAS_CHECK(cublasDgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
      N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
}
Ejemplo n.º 18
0
cublasHandle_t Caffe::device_cublas_handle(int group) {
  std::lock_guard<std::mutex> lock(cublas_mutex_);
  vector<cublasHandle_t>& group_cublas_handles = cublas_handles_[current_device()];
  if (group + 1 > group_cublas_handles.size()) {
    group_cublas_handles.resize(group + 1);
  }
  cublasHandle_t& cublas_handle = group_cublas_handles[group];
  if (!cublas_handle) {
    // Try to create a cublas handler, and report an error if failed (but we will
    // keep the program running as one might just want to run CPU code).
    if (cublasCreate(&cublas_handle) != CUBLAS_STATUS_SUCCESS) {
      LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
    }
    CUBLAS_CHECK(cublasSetStream(cublas_handle, device_pstream(group)->get()));
  }
  return cublas_handle;
}
Ejemplo n.º 19
0
void caffe_gpu_gemm_batched<float>(const CBLAS_TRANSPOSE TransA,
    const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
    const float alpha, const float** A, const float** B, const float beta,
    float** C,
	int batch_count){
	
	// Note that cublas follows fortran order.
	int lda = (TransA == CblasNoTrans) ? K : M;
	int ldb = (TransB == CblasNoTrans) ? N : K;
	cublasOperation_t cuTransA =
      (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
	cublasOperation_t cuTransB =
      (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
			
	CUBLAS_CHECK(cublasSgemmBatched(Caffe::get_current_cublas_handle(), cuTransB, cuTransA,
      N, M, K, &alpha, B, ldb, A, lda, &beta, C, N,
	  batch_count));
}
Ejemplo n.º 20
0
void Context::Clear() {
#if defined(USE_CUDA)
  if (blas_handle_ != nullptr) {
    CUBLAS_CHECK(cublasDestroy(cublasHandle_t(blas_handle_)));
    blas_handle_ = nullptr;
  }
#endif

#if defined(USE_CUDNN)
  if (cudnn_handle_ != nullptr) {
    CUDNN_CHECK(cudnnDestroy(cudnnHandle_t(cudnn_handle_)));
    cudnn_handle_ = nullptr;
  }
#endif

#if defined(USE_NNPACK)
  if (nnpack_handle_ != nullptr) {
    CHECK_EQ(nnp_deinitialize(), nnp_status_success);
    pthreadpool_destroy(pthreadpool_t(nnpack_handle_));
    nnpack_handle_ = nullptr;
  }
#endif
}
Ejemplo n.º 21
0
void caffe_gpu_scal<float>(const int N, const float alpha, float *X) {
  CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1));
}
Ejemplo n.º 22
0
void caffe_gpu_scale<double>(const int n, const double alpha, const double *x,
                             double* y) {
  CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), n, x, 1, y, 1));
  CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1));
}
Ejemplo n.º 23
0
void caffe_gpu_scale<float>(const int n, const float alpha, const float *x,
                            float* y) {
  CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), n, x, 1, y, 1));
  CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), n, &alpha, y, 1));
}
Ejemplo n.º 24
0
void caffe_gpu_asum<double>(const int n, const double* x, double* y) {
  CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y));
}
Ejemplo n.º 25
0
void caffe_gpu_asum<float>(const int n, const float* x, float* y) {
  CUBLAS_CHECK(cublasSasum(Caffe::cublas_handle(), n, x, 1, y));
}
Ejemplo n.º 26
0
void caffe_gpu_dot<double>(const int n, const double* x, const double* y,
    double * out) {
  CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), n, x, 1, y, 1, out));
}
Ejemplo n.º 27
0
void caffe_gpu_scal<double>(const int N, const double alpha, double *X) {
  CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1));
}
Ejemplo n.º 28
0
Engine::~Engine() {
  if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
}
Ejemplo n.º 29
0
void caffe_gpu_dot<float>(const int n, const float* x, const float* y,
    float* out) {
  CUBLAS_CHECK(cublasSdot(Caffe::cublas_handle(), n, x, 1, y, 1, out));
}
Ejemplo n.º 30
0
void caffe_gpu_axpy<double>(const int N, const double alpha, const double* X,
    double* Y) {
  CUBLAS_CHECK(cublasDaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1));
}