/// indicate that scalar values are passed as reference on device inline void set_scalar_host() { MGPU_CUDA_BLAS_CALL(cublasSetPointerMode(handle_, CUBLAS_POINTER_MODE_HOST)); }
static int setup(void *c) { cuda_context *ctx = (cuda_context *)c; blas_handle *handle; const char *tmp[2]; cublasStatus_t err; int e; int types[10]; if (ctx->blas_handle != NULL) return GA_NO_ERROR; handle = calloc(1, sizeof(*handle)); if (handle == NULL) return GA_MEMORY_ERROR; cuda_enter(ctx); err = cublasCreate(&handle->h); if (err != CUBLAS_STATUS_SUCCESS) { cuda_exit(ctx); free(handle); return GA_BLAS_ERROR; } err = cublasSetStream(handle->h, ctx->s); if (err != CUBLAS_STATUS_SUCCESS) { e = GA_BLAS_ERROR; goto e1; } cublasSetPointerMode(handle->h, CUBLAS_POINTER_MODE_HOST); cublasSetAtomicsMode(handle->h, CUBLAS_ATOMICS_ALLOWED); types[0] = GA_BUFFER; types[1] = GA_SIZE; types[2] = GA_BUFFER; types[3] = GA_SIZE; types[4] = GA_BUFFER; types[5] = GA_SIZE; types[6] = GA_SIZE; types[7] = GA_SIZE; types[8] = GA_SIZE; e = GpuKernel_init(&handle->sgemvBH_N_a1_b1_small, &cuda_ops, ctx, 1, &code_sgemvBH_N_a1_b1_small, NULL, "sgemv", 9, types, 0, NULL); if (e != GA_NO_ERROR) goto e1; e = GpuKernel_init(&handle->sgemvBH_T_a1_b1_small, &cuda_ops, ctx, 1, &code_sgemvBH_T_a1_b1_small, NULL, "sgemv", 9, types, 0, NULL); if (e != GA_NO_ERROR) goto e2; tmp[0] = atomicadd_double; tmp[1] = code_dgemvBH_N_a1_b1_small; e = GpuKernel_init(&handle->dgemvBH_N_a1_b1_small, &cuda_ops, ctx, 2, tmp, NULL, "dgemv", 9, types, GA_USE_DOUBLE, NULL); if (e != GA_NO_ERROR) goto e3; tmp[0] = atomicadd_double; tmp[1] = code_dgemvBH_T_a1_b1_small; e = GpuKernel_init(&handle->dgemvBH_T_a1_b1_small, &cuda_ops, ctx, 2, tmp, NULL, "dgemv", 9, types, GA_USE_DOUBLE, NULL); if (e != GA_NO_ERROR) goto e4; types[0] = GA_BUFFER; types[1] = GA_SIZE; types[2] = GA_BUFFER; types[3] = GA_SIZE; types[4] = GA_FLOAT; types[5] = GA_BUFFER; types[6] = GA_SIZE; types[7] = GA_SIZE; types[8] = GA_SIZE; types[9] = GA_SIZE; e = GpuKernel_init(&handle->sgerBH_gen_small, &cuda_ops, ctx, 1, &code_sgerBH_gen_small, NULL, "_sgerBH_gen_small", 10, types, 0, NULL); if (e != GA_NO_ERROR) goto e5; types[4] = GA_DOUBLE; tmp[0] = atomicadd_double; tmp[1] = code_dgerBH_gen_small; e = GpuKernel_init(&handle->dgerBH_gen_small, &cuda_ops, ctx, 2, tmp, NULL, "_dgerBH_gen_small", 10, types, GA_USE_DOUBLE, NULL); if (e != GA_NO_ERROR) goto e6; ctx->blas_handle = handle; cuda_exit(ctx); return GA_NO_ERROR; e6: GpuKernel_clear(&handle->sgerBH_gen_small); e5: GpuKernel_clear(&handle->dgemvBH_T_a1_b1_small); e4: GpuKernel_clear(&handle->dgemvBH_N_a1_b1_small); e3: GpuKernel_clear(&handle->sgemvBH_T_a1_b1_small); e2: GpuKernel_clear(&handle->sgemvBH_N_a1_b1_small); e1: cublasDestroy(handle->h); cuda_exit(ctx); free(handle); return e; }
/// indicate that scalar values are passed as reference on host inline void set_scalar_device() { MGPU_CUDA_BLAS_CALL(cublasSetPointerMode(handle_, CUBLAS_POINTER_MODE_DEVICE)); }