cublasHandle_t blas_handle() { static int init[16] = {0}; static cublasHandle_t handle[16]; int i = cuda_get_device(); if(!init[i]) { cublasCreate(&handle[i]); init[i] = 1; } return handle[i]; }
void cuda_random(float *x_gpu, size_t n) { static curandGenerator_t gen[16]; static int init[16] = { 0 }; int i = cuda_get_device(); if (!init[i]) { curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); init[i] = 1; } curandGenerateUniform(gen[i], x_gpu, n); check_error(cudaPeekAtLastError()); }
cublasHandle_t blas_handle() { static int init[16] = { 0 }; static cublasHandle_t handle[16]; int i = cuda_get_device(); if (!init[i]) { cublasCreate(&handle[i]); init[i] = 1; //use tensor cores printf("Tensor cores %d, handle %d\n", use_tensor_cores, handle[i]); if (use_tensor_cores == 0){ cublasSetMathMode(handle[i], CUBLAS_DEFAULT_MATH); }else if(use_tensor_cores == 1){ cublasSetMathMode(handle[i], CUBLAS_TENSOR_OP_MATH); } } return handle[i]; }
/** returns 1 if and only if the GPU with the given id is usable for CUDA computations. Only devices with compute capability of 1.1 or higher are ok, since atomic operations are required for CUDA-LB. */ int tclcommand_cuda(ClientData data, Tcl_Interp *interp, int argc, char **argv) { #ifndef CUDA Tcl_AppendResult(interp, "Feature CUDA required!", (char *)NULL); return TCL_ERROR; #else if (argc <= 1) { Tcl_AppendResult(interp, "too few arguments to the cuda command", (char *)NULL); return TCL_ERROR; } argc--; argv++; if (ARG0_IS_S("list")) { if (argc != 1) { Tcl_AppendResult(interp, "cuda list takes no arguments", (char *)NULL); return TCL_ERROR; } return list_gpus(interp); } else if (ARG0_IS_S("setdevice")) { int dev; if (argc <= 1 || !ARG1_IS_I(dev)) { Tcl_AppendResult(interp, "expected: cuda setdevice <devnr>", (char *)NULL); return TCL_ERROR; } if (cuda_check_gpu(dev) == ES_ERROR) { Tcl_AppendResult(interp, "GPU not present or compute model not sufficient", (char *)NULL); return TCL_ERROR; } if (cuda_set_device(dev) == ES_OK) { return TCL_OK; } else { Tcl_AppendResult(interp, cuda_error, (char *)NULL); return TCL_ERROR; } } else if (ARG0_IS_S("getdevice")) { if (argc != 1) { Tcl_AppendResult(interp, "cuda getdevice takes no arguments", (char *)NULL); return TCL_ERROR; } int dev = cuda_get_device(); if (dev >= 0) { char buffer[TCL_INTEGER_SPACE]; sprintf(buffer, "%d", dev); Tcl_AppendResult(interp, buffer, (char *)NULL); return TCL_OK; } else { Tcl_AppendResult(interp, cuda_error, (char *)NULL); return TCL_ERROR; } } else { Tcl_AppendResult(interp, "unknown subcommand \"", argv[0], "\"", (char *)NULL); return TCL_ERROR; } #endif /* defined(CUDA) */ }