Ejemplo n.º 1
0
cublasHandle_t blas_handle()
{
    static int init[16] = {0};
    static cublasHandle_t handle[16];
    int i = cuda_get_device();
    if(!init[i]) {
        cublasCreate(&handle[i]);
        init[i] = 1;
    }
    return handle[i];
}
Ejemplo n.º 2
0
void cuda_random(float *x_gpu, size_t n) {
	static curandGenerator_t gen[16];
	static int init[16] = { 0 };
	int i = cuda_get_device();
	if (!init[i]) {
		curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT);
		curandSetPseudoRandomGeneratorSeed(gen[i], time(0));
		init[i] = 1;
	}
	curandGenerateUniform(gen[i], x_gpu, n);
	check_error(cudaPeekAtLastError());
}
Ejemplo n.º 3
0
cublasHandle_t blas_handle() {
	static int init[16] = { 0 };
	static cublasHandle_t handle[16];
	int i = cuda_get_device();
	if (!init[i]) {
		cublasCreate(&handle[i]);
		init[i] = 1;

		//use tensor cores
		printf("Tensor cores %d, handle %d\n", use_tensor_cores, handle[i]);
		if (use_tensor_cores == 0){
			cublasSetMathMode(handle[i], CUBLAS_DEFAULT_MATH);
		}else if(use_tensor_cores == 1){
			cublasSetMathMode(handle[i], CUBLAS_TENSOR_OP_MATH);
		}

	}
	return handle[i];
}
Ejemplo n.º 4
0
/** returns 1 if and only if the GPU with the given id is usable for
    CUDA computations.  Only devices with compute capability of 1.1 or
    higher are ok, since atomic operations are required for
    CUDA-LB. */
int tclcommand_cuda(ClientData data, Tcl_Interp *interp,
		    int argc, char **argv)
{
#ifndef CUDA
    Tcl_AppendResult(interp, "Feature CUDA required!", (char *)NULL);
    return TCL_ERROR;
#else
  if (argc <= 1) {
    Tcl_AppendResult(interp, "too few arguments to the cuda command", (char *)NULL);
    return TCL_ERROR;
  }
  argc--; argv++;
  
  if (ARG0_IS_S("list")) {
    if (argc != 1) {
      Tcl_AppendResult(interp, "cuda list takes no arguments", (char *)NULL);
      return TCL_ERROR;
    }
    return list_gpus(interp);
  }
  else if (ARG0_IS_S("setdevice")) {
    int dev;
    if (argc <= 1 || !ARG1_IS_I(dev)) {
      Tcl_AppendResult(interp, "expected: cuda setdevice <devnr>", (char *)NULL);
      return TCL_ERROR;
    }
    if (cuda_check_gpu(dev) == ES_ERROR) {
      Tcl_AppendResult(interp, "GPU not present or compute model not sufficient", (char *)NULL);
      return TCL_ERROR;
    }
    if (cuda_set_device(dev) == ES_OK) {
      return TCL_OK;
    }
    else {
      Tcl_AppendResult(interp, cuda_error, (char *)NULL);
      return TCL_ERROR;
    }
  }
  else if (ARG0_IS_S("getdevice")) {
    if (argc != 1) {
      Tcl_AppendResult(interp, "cuda getdevice takes no arguments", (char *)NULL);
      return TCL_ERROR;
    }
    int dev = cuda_get_device();
    if (dev >= 0) {
      char buffer[TCL_INTEGER_SPACE];
      sprintf(buffer, "%d", dev);
      Tcl_AppendResult(interp, buffer, (char *)NULL);
      return TCL_OK;
    }
    else {
      Tcl_AppendResult(interp, cuda_error, (char *)NULL);
      return TCL_ERROR;
    }
  }
  else {
    Tcl_AppendResult(interp, "unknown subcommand \"", argv[0], "\"", (char *)NULL);
    return TCL_ERROR;
  }
#endif /* defined(CUDA) */
}