コード例 #1
0
ファイル: common.cpp プロジェクト: ZhouYuSong/caffe-pruned
void Caffe::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().cusparse_descr_)CUSPARSE_CHECK(cusparseDestroyMatDescr(Get().cusparse_descr_));
  if (Get().cusparse_handle_)CUSPARSE_CHECK(cusparseDestroy(Get().cusparse_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUSPARSE_CHECK(cusparseCreate(&Get().cusparse_handle_));
  CUSPARSE_CHECK(cusparseCreateMatDescr(&Get().cusparse_descr_));
//  cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL);
//  cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO);
  LOG(INFO)<<"set descr";
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
コード例 #2
0
void CudaUtil::cublasClose(cublasHandle_t handle)
{
	cublasStatus_t status = cublasDestroy(handle);
	if (status != CUBLAS_STATUS_SUCCESS) {
		throw CudaException("CUBALS destroy error");
	}
}
コード例 #3
0
ファイル: cublas_level_1.cpp プロジェクト: xflying777/OpenAcc
void gpu_cublas1(double *A, double *B, double *C, double *D, double *r, double *nrmC, int N, int N2)
{
	#pragma acc data present(A, B, C, D)
	{
		#pragma acc host_data use_device(A, B, C, D)
		{
			cublasHandle_t handle;
			cublasCreate(&handle);
			const double alpha = 1.0;
			const double beta = 0.0;
			cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_T, N, N, N, &alpha, A, N, B, N, &beta, C, N);
			printf(" gpu gemm success \n");
			cublasDdot(handle, N2, C, 1, B, 1, r);
			printf(" gpu dot success \n");
			*r = -1.0 * *r;
			cublasDaxpy(handle, N2, r, B, 1, C, 1);
			printf(" gpu axpy success \n");
			cublasDnrm2(handle, N2, C, 1, nrmC);
			printf(" gpu nrm2 success \n");
			cublasDcopy(handle, N2, C, 1, D, 1);
			printf(" gpu copy success \n");
			*nrmC = 1.0 / *nrmC;
			cublasDscal(handle, N2, nrmC, D, 1);
			printf(" gpu scal success \n");
			cublasDestroy(handle);
			printf(" gpu destroy success \n");
		}
	}
}
コード例 #4
0
ファイル: common.cpp プロジェクト: ZhouYuSong/caffe-pruned
Caffe::~Caffe() {
  if (cusparse_descr_) CUSPARSE_CHECK(cusparseDestroyMatDescr(cusparse_descr_));
  if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (cusparse_handle_) CUSPARSE_CHECK(cusparseDestroy(cusparse_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
}
コード例 #5
0
ファイル: device.cpp プロジェクト: AI42/minerva
GpuDevice::Impl::~Impl() {
  ActivateDevice();
  for (size_t i = 0; i < kParallelism; ++i) {
    CUDNN_CALL(cudnnDestroy(cudnn_handle[i]));
    CUBLAS_CALL(cublasDestroy(cublas_handle[i]));
    CUDA_CALL(cudaStreamDestroy(stream[i]));
  }
}
コード例 #6
0
		cuda_running_configuration::~cuda_running_configuration()
		{
			if (cublas_handle)
				cublasDestroy(cublas_handle);
			if (cusparse_handle)
				cusparseDestroy(cusparse_handle);
			cudaDeviceReset();
		}
コード例 #7
0
ファイル: bigSizeTensors.cpp プロジェクト: thomas-hoer/cuTT
void contractTT(sTensorGPU *TT1, sTensorGPU *TT2, const int n, const int size)
{
	cublasHandle_t handle;
	cublasCreate(&handle);
	type result=0;

	sTensorGPU temp1 = emptyTensor(size*size,2);
	sTensorGPU temp2 = emptyTensor(size*size*2,3);
	cudaEvent_t start;
	cudaEventCreate(&start);
	cudaEvent_t stop;
	cudaEventCreate(&stop);

	//printf("Start contractTT\n");

	cudaEventRecord(start, NULL);
	int indA = TT1[0].size[0];
	int indB = TT2[0].size[0];

	sTensorCPU tt1start = copyToCPU(TT1[0]);
	sTensorCPU tt2start = copyToCPU(TT2[0]);
	sTensorCPU tt1end = copyToCPU(TT1[n - 1]);
	sTensorCPU tt2end = copyToCPU( TT2[n - 1]);


	for (int i = 0; i < indA; i++){
		TT1[0] = prepareTensorStart(tt1start, i);
		TT1[n - 1] = prepareTensorEnd(tt1end, i);
		for (int j = 0; j < indB; j++){
			TT2[0] = prepareTensorStart(tt2start, j);
			TT2[n - 1] = prepareTensorEnd(tt2end, j);
			contractTensor(handle, TT1[0], TT2[0], temp1);
			for (int i = 1; i < n; i++){
				contractTensor(handle, temp1, TT1[i], temp2);
				contractTensor(handle, temp2, TT2[i], temp1, 2);
			}
			type add = 0;
			cudaMemcpy(&add, temp1.deviceData, sizeof(type), cudaMemcpyDeviceToHost);
			//printf("%e ", add);
			result += add;
		}
	}
	cudaEventRecord(stop, NULL);
	cudaEventSynchronize(stop);
	
	float msecTotal = 0.0f;
	cudaEventElapsedTime(&msecTotal, start, stop);
	printf("Time: %.3fms\n", msecTotal);
	printf("Ops: %.0f\n", bops);
	double gigaFlops = (bops * 1.0e-9f) / (msecTotal / 1000.0f);
	printf("Perf= %.2f GFlop/s\n", gigaFlops);

	cublasDestroy(handle);
	cudaDeviceReset();

	printf("%.5e \n", result);
	exit(0);
}
コード例 #8
0
ファイル: common.cpp プロジェクト: xiaobinxu/recaffe
Caffe::~Caffe()
{
  if (cublas_handle_)
    CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_)
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  if (vsl_stream_)
    VSL_CHECK(vslDeleteStream(&vsl_stream_));
}
コード例 #9
0
ファイル: device.cpp プロジェクト: Exlsunshine/minerva
GpuDevice::~GpuDevice() {
  CUDA_CALL(cudaSetDevice(device_));
  pool_.WaitForAllFinished();
  for (size_t i = 0; i < kParallelism; ++i) {
    CUDNN_CALL(cudnnDestroy(cudnn_handle_[i]));
    CUBLAS_CALL(cublasDestroy(cublas_handle_[i]));
    CUDA_CALL(cudaStreamDestroy(stream_[i]));
  }
  delete data_store_;
}
コード例 #10
0
ファイル: common.cpp プロジェクト: aharrison24/caffe
Caffe::~Caffe() {
  // Make sure all device contexts and
  // dependent memory blocks are freed properly
  device_contexts_.clear();
#ifdef USE_CUDA
  if (cublas_handle_)
    CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
#endif  // USE_CUDA
}
コード例 #11
0
ファイル: micronn.c プロジェクト: microo8/micronn
uint micronn_free(micronn* net)
{
    uint i;
    for(i = 0; i <= net->nhidden; i++) {
        micronn_matrix_free(net->weights[i]);
    }
    free(net->chidden);
    free(net->weights);
    cublasDestroy(net->handle);
    free(net);
    return 1;
};
コード例 #12
0
ファイル: gmres_part.cpp プロジェクト: xflying777/OpenAcc
void dot_gpu(double *x, double *y, double *result, int N)
{
	#pragma acc data present(x, y)
	{
		#pragma acc host_data use_device(x, y)
		{
			cublasHandle_t h;
			cublasCreate(&h);
			cublasDdot(h, N, x, 1, y, 1, result);
			cublasDestroy(h);
		}
	}
}
コード例 #13
0
ファイル: cuda_manager.cpp プロジェクト: bsumirak/ugcore
CUDAManager::~CUDAManager()
{
	if(m_tempBuffer)
		cudaFree(m_tempBuffer);
	cudaFree(m_tempRetBuffer);

#ifdef USE_CUSPARSE
	if(cusparseHandle) cusparseDestroy(cusparseHandle);
#endif
    if(cublasHandle) cublasDestroy(cublasHandle);
	cudaDeviceReset();    
	cout << "Cleaned up CUDA." << endl;
}
コード例 #14
0
ファイル: gmres_part.cpp プロジェクト: xflying777/OpenAcc
void norm_gpu(double *x, double *norm, int N)
{
	#pragma acc data present(x)
	{
		#pragma acc host_data use_device(x)
		{
			cublasHandle_t h;
			cublasCreate(&h);
			cublasDnrm2(h, N, x, 1, norm);
			cublasDestroy(h);
		}
	}
}
コード例 #15
0
ファイル: blasx_tile_resource.c プロジェクト: 529038378/BLASX
void blasx_resource_dest(int GPUs, cublasHandle_t* handles, cudaStream_t* streams, cudaEvent_t* events, float** C_dev)
{
    int GPU_id;
    for (GPU_id = 0; GPU_id < GPUs; GPU_id++) {
        cudaSetDevice( GPU_id );
        int i = 0;
        for (i = 0; i < STREAMNUM; i++) {
            cudaStreamDestroy( streams[i+GPU_id*STREAMNUM] );
            cudaEventDestroy( events[i+GPU_id*STREAMNUM] );
        }
        for (i = 0; i < STREAMNUM*2; i++) cudaFree( C_dev[i+GPU_id*STREAMNUM*2] );
        cublasDestroy( handles[GPU_id] );
    }
}
コード例 #16
0
Caffe::~Caffe() {
  for (vector<cublasHandle_t>& group_cublas_handles : cublas_handles_) {
    for (cublasHandle_t h : group_cublas_handles) {
      if (h) {
        CUBLAS_CHECK(cublasDestroy(h));
      }
    }
  }
  for_each(curand_generators_.begin(), curand_generators_.end(), [](curandGenerator_t h) {
    if (h) {
      CURAND_CHECK(curandDestroyGenerator(h));
    }
  });
}
コード例 #17
0
ファイル: THCGeneral.c プロジェクト: ibcn-cloudlet/cutorch
void THCudaShutdown(THCState* state)
{
  THCRandom_shutdown(state);

  free(state->rngState);
  free(state->cudaHostAllocator);
  free(state->deviceProperties);

  int deviceCount = 0;
  int prevDev = -1;
  THCudaCheck(cudaGetDevice(&prevDev));
  THCudaCheck(cudaGetDeviceCount(&deviceCount));

  /* cleanup p2p access state */
  for (int dev = 0; dev < deviceCount; ++dev) {
    free(state->p2pAccessEnabled[dev]);
  }
  free(state->p2pAccessEnabled);

  /* cleanup per-device state */
  for (int dev = 0; dev < deviceCount; ++dev) {
    THCudaCheck(cudaSetDevice(dev));
    /* Free Torch-defined streams (0 is the default stream) */
    for (int stream = 1; stream <= state->numUserStreams; ++stream) {
      THCudaCheck(cudaStreamDestroy(
                    THCState_getDeviceStream(state, dev, stream)));
    }
    /* Free Torch-defined handles (0 is NULL for consistency with streams API) */
    for (int handle = 1; handle <= state->numUserBlasHandles; ++handle) {
      THCublasCheck(cublasDestroy(
                      THCState_getDeviceBlasHandle(state, dev, handle)));
    }
    /* Free per-stream scratch space; starts at 0 because there is space for
       the default stream as well*/
    for (int stream = 0; stream <= state->numUserStreams; ++stream) {
      THCudaCheck(THCudaFree(state, THCState_getDeviceScratchSpace(state, dev, stream)));
    }

    free(state->resourcesPerDevice[dev].streams);
    free(state->resourcesPerDevice[dev].blasHandles);
    free(state->resourcesPerDevice[dev].devScratchSpacePerStream);
  }
  free(state->resourcesPerDevice);
  state->cudaDeviceAllocator.shutdown(state->cudaDeviceAllocator.state);
  THCThreadLocal_free(state->currentPerDeviceStream);
  THCThreadLocal_free(state->currentPerDeviceBlasHandle);

  THCudaCheck(cudaSetDevice(prevDev));
}
コード例 #18
0
ファイル: cublas_level_1.cpp プロジェクト: xflying777/OpenAcc
void cublas_gemm(const double *A, const double *B, double *C, int N)
{
	#pragma acc data present(A, B, C)
	{
		#pragma acc host_data use_device(A, B, C)
		{
			cublasHandle_t h;
			cublasCreate(&h);
			const double alpha = 1.0;
			const double beta = 0.0;
			cublasDgemm(h, CUBLAS_OP_T, CUBLAS_OP_T, N, N, N, &alpha, A, N, B, N, &beta, C, N);
			cublasDestroy(h);
		}
	}
}
コード例 #19
0
ファイル: dgemv_test2.cpp プロジェクト: xflying777/OpenAcc
void gemv_gpu(double *a, double *b, double *c1, int N)
{
	#pragma acc data present(a, b, c1)
	{
		#pragma acc host_data use_device(a, b, c1)
		{
			cublasHandle_t handle;
			cublasCreate(&handle);
			const double alpha = 1.0;
			const double beta = 0.0;
			cublasDgemv(handle, CUBLAS_OP_T, N, N, &alpha, a, N, b, 1, &beta, c1, 1);
			cublasDestroy(handle);
		}
	} // end pragma data
}
コード例 #20
0
ファイル: gmres_part.cpp プロジェクト: xflying777/OpenAcc
// Note : cublasDgemm( handle, CUBLAS_OP_N, CUBLAS_OP_N, n,n,n, &alpha, A, n, B, n, &beta, C, n)
// means matrix C = B * A
void cublas_gemm(int n, double *c, double *b, double *a )
{
	#pragma acc data present(a, b, c)
	{
		#pragma acc host_data use_device(a, b, c)
		{
			cublasHandle_t handle;
			cublasCreate(&handle);
			const double alpha = 1.0;
			const double beta = 0.0;
			cublasDgemm( handle, CUBLAS_OP_N, CUBLAS_OP_N, n,n,n, &alpha, a, n, b, n, &beta, c, n);
			cublasDestroy(handle);
		}
	}
}
コード例 #21
0
ファイル: THCGeneral.cpp プロジェクト: HustlehardInc/pytorch
void THCudaShutdown(THCState* state)
{
  THCRandom_shutdown(state);

  free(state->rngState);
  free(state->deviceProperties);

  int deviceCount = 0;
  int prevDev = -1;
  THCudaCheck(cudaGetDevice(&prevDev));
  THCudaCheck(cudaGetDeviceCount(&deviceCount));

  /* cleanup p2p access state */
  for (int dev = 0; dev < deviceCount; ++dev) {
    free(state->p2pAccessEnabled[dev]);
  }
  free(state->p2pAccessEnabled);

  /* cleanup per-device state */
  for (int dev = 0; dev < deviceCount; ++dev) {
    THCudaCheck(cudaSetDevice(dev));
    THCCudaResourcesPerDevice* res = &(state->resourcesPerDevice[dev]);
    /* Free user defined BLAS handles */
    for (int i = 0; i < res->numBlasHandles; ++i) {
      THCublasCheck(cublasDestroy(res->blasHandles[i]));
    }
    /* Free user defined sparse handles */
    for (int i = 0; i < res->numSparseHandles; ++i) {
      THCusparseCheck(cusparseDestroy(res->sparseHandles[i]));
    }

    free(res->blasHandles);
    free(res->sparseHandles);
    THCStream_free((THCStream*)THCThreadLocal_get(state->currentStreams[dev]));
    THCThreadLocal_free(state->currentStreams[dev]);
  }
  free(state->resourcesPerDevice);
  if (state->cudaDeviceAllocator->emptyCache) {
    state->cudaDeviceAllocator->emptyCache(state->cudaDeviceAllocator->state);
  }
  if (state->cudaHostAllocator == &THCCachingHostAllocator) {
    THCCachingHostAllocator_emptyCache();
  }
  free(state->currentStreams);
  THCThreadLocal_free(state->currentPerDeviceBlasHandle);

  THCudaCheck(cudaSetDevice(prevDev));
}
コード例 #22
0
ファイル: backsolver.cpp プロジェクト: xflying777/OpenAcc
// Solve A * x = b in GPU.
void cublas_backsolver(double *A, double *x, double *b, int N)
{
	#pragma acc data present(A, x, b)
	{
		#pragma host_data use_device(A, x, b)
		{
			cublasHandle_t h;
			cublasCreate(&h);
			cublasDcopy(h, N, b, 1, x, 1);
//			printf(" cublasDcopy success. \n");
			cublasDtrsv(h, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, N, A, N, x, 1);
//			printf(" cublasDtrsv success. \n");
			cublasDestroy(h);
		}
	}
}
コード例 #23
0
ファイル: common.cpp プロジェクト: FuchenUSTC/caffe-c3d
void Caffe::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUDA_CHECK(cudaSetDevice(device_id));
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
コード例 #24
0
// Multiply the arrays A and B on GPU and save the result in C
// C(m,n) = A(m,k) * B(k,n)
void gpu_blas_mmul(const double *A, const double *B, double *C, const int m, const int k, const int n) {
     int lda=m,ldb=k,ldc=m;
     const double alf = 1;
     const double bet = 0;
     const double *alpha = &alf;
     const double *beta = &bet;
 
     // Create a handle for CUBLAS
     cublasHandle_t handle;
     cublasCreate(&handle);
 
     // Do the actual multiplication
     cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 
     // Destroy the handle
     cublasDestroy(handle);
 }
コード例 #25
0
static void teardown(void *c) {
  cuda_context *ctx = (cuda_context *)c;
  blas_handle *handle = (blas_handle *)ctx->blas_handle;

  if (ctx->blas_handle == NULL)
    return;

  cuda_enter(ctx);
  cublasDestroy(handle->h);
  GpuKernel_clear(&handle->sgemvBH_N_a1_b1_small);
  GpuKernel_clear(&handle->sgemvBH_T_a1_b1_small);
  GpuKernel_clear(&handle->dgemvBH_N_a1_b1_small);
  GpuKernel_clear(&handle->dgemvBH_T_a1_b1_small);
  GpuKernel_clear(&handle->sgerBH_gen_small);
  GpuKernel_clear(&handle->dgerBH_gen_small);
  cuda_exit(ctx);
  free(ctx->blas_handle);
  ctx->blas_handle = NULL;
}
コード例 #26
0
ファイル: common.cpp プロジェクト: airxcy/ITF_Tracker
void Engine::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
コード例 #27
0
ファイル: matrix_norm.cpp プロジェクト: xflying777/OpenAcc
double cublas_gemm_norm(const double *A, const double *B, double *C, int N)
{
	double *norm;
	norm = (double *) malloc(1*sizeof(double));

        #pragma acc data present(A, B, C) copyout(norm[0])
        {
                #pragma acc host_data use_device(A, B, C)
                {
                        cublasHandle_t h;
                        cublasCreate(&h);
                        const double alpha = 1.0;
                        const double beta = 0.0;
                        cublasDgemm(h, CUBLAS_OP_T, CUBLAS_OP_T, N, N, N, &alpha, A, N, B, N, &beta, C, N);
						cublasDnrm2(h, N*N, C, 1, norm);
                        cublasDestroy(h);
                }
        }
	return *norm;
}
コード例 #28
0
void Caffe::SetSlaveDevice(const int slave_device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == slave_device_id) {
    return;
  }
  if (Get().slave_cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().slave_cublas_handle_));
  if (Get().slave_curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().slave_curand_generator_));
  }
  CUDA_CHECK(cudaSetDevice(slave_device_id));
  CUDA_CHECK(cudaStreamCreate (&Get().slave_cu_stream_));
  CUBLAS_CHECK(cublasCreate(&Get().slave_cublas_handle_));
  CUBLAS_CHECK(cublasSetStream(Get().slave_cublas_handle_, Get().slave_cu_stream_));
  CURAND_CHECK(curandCreateGenerator(&Get().slave_curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().slave_curand_generator_,
      cluster_seedgen()));
  Get().slave_device_id_ = slave_device_id;
  CUDA_CHECK(cudaSetDevice(current_device));
  Caffe::set_gpu_mode(Caffe::MASTER_SLAVE);
}
コード例 #29
0
ファイル: common.cpp プロジェクト: aharrison24/caffe
void Caffe::SetDevice(const int device_id) {
  std::vector<int> devices;
  devices.push_back(device_id);
  Caffe::SetDevices(devices);

  Get().default_device_context_ = GetDeviceContext(device_id);

  if (Get().default_device_context_->backend() == Backend::BACKEND_CUDA) {
#ifdef USE_CUDA
    int current_device;
    CUDA_CHECK(cudaGetDevice(&current_device));
    if (current_device == device_id) {
      return;
    }
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
    CUDA_CHECK(cudaSetDevice(device_id));
    if (Get().cublas_handle_)
      CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
    if (Get().curand_generator_) {
      CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
    }
    CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
    CURAND_CHECK(
        curandCreateGenerator(&Get().curand_generator_,
                              CURAND_RNG_PSEUDO_DEFAULT));
    CURAND_CHECK(
        curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
                                           cluster_seedgen()));
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
#ifdef USE_CLBLAS
    clblasSetup();
#endif  // USE_CLBLAS
#endif  // USE_GREENTEA
  }
}
コード例 #30
0
ファイル: context.cpp プロジェクト: junluan/shadow
void Context::Clear() {
#if defined(USE_CUDA)
  if (blas_handle_ != nullptr) {
    CUBLAS_CHECK(cublasDestroy(cublasHandle_t(blas_handle_)));
    blas_handle_ = nullptr;
  }
#endif

#if defined(USE_CUDNN)
  if (cudnn_handle_ != nullptr) {
    CUDNN_CHECK(cudnnDestroy(cudnnHandle_t(cudnn_handle_)));
    cudnn_handle_ = nullptr;
  }
#endif

#if defined(USE_NNPACK)
  if (nnpack_handle_ != nullptr) {
    CHECK_EQ(nnp_deinitialize(), nnp_status_success);
    pthreadpool_destroy(pthreadpool_t(nnpack_handle_));
    nnpack_handle_ = nullptr;
  }
#endif
}