Example #1
0
File: blas.hpp Project: SuHui/mgpu
 /// indicate that scalar values are passed as reference on device
 inline void set_scalar_host()
 {
   MGPU_CUDA_BLAS_CALL(cublasSetPointerMode(handle_,
     CUBLAS_POINTER_MODE_HOST));
 }
static int setup(void *c) {
  cuda_context *ctx = (cuda_context *)c;
  blas_handle *handle;
  const char *tmp[2];
  cublasStatus_t err;
  int e;
  int types[10];

  if (ctx->blas_handle != NULL)
    return GA_NO_ERROR;

  handle = calloc(1, sizeof(*handle));
  if (handle == NULL)
    return GA_MEMORY_ERROR;

  cuda_enter(ctx);
  err = cublasCreate(&handle->h);
  if (err != CUBLAS_STATUS_SUCCESS) {
    cuda_exit(ctx);
    free(handle);
    return GA_BLAS_ERROR;
  }

  err = cublasSetStream(handle->h, ctx->s);
  if (err != CUBLAS_STATUS_SUCCESS) {
    e = GA_BLAS_ERROR;
    goto e1;
  }

  cublasSetPointerMode(handle->h, CUBLAS_POINTER_MODE_HOST);
  cublasSetAtomicsMode(handle->h, CUBLAS_ATOMICS_ALLOWED);

  types[0] = GA_BUFFER;
  types[1] = GA_SIZE;
  types[2] = GA_BUFFER;
  types[3] = GA_SIZE;
  types[4] = GA_BUFFER;
  types[5] = GA_SIZE;
  types[6] = GA_SIZE;
  types[7] = GA_SIZE;
  types[8] = GA_SIZE;
  e = GpuKernel_init(&handle->sgemvBH_N_a1_b1_small, &cuda_ops, ctx, 1, &code_sgemvBH_N_a1_b1_small, NULL, "sgemv", 9, types, 0, NULL);
  if (e != GA_NO_ERROR) goto e1;
  e = GpuKernel_init(&handle->sgemvBH_T_a1_b1_small, &cuda_ops, ctx, 1, &code_sgemvBH_T_a1_b1_small, NULL, "sgemv", 9, types, 0, NULL);
  if (e != GA_NO_ERROR) goto e2;
  tmp[0] = atomicadd_double;
  tmp[1] = code_dgemvBH_N_a1_b1_small;
  e = GpuKernel_init(&handle->dgemvBH_N_a1_b1_small, &cuda_ops, ctx, 2, tmp, NULL, "dgemv", 9, types, GA_USE_DOUBLE, NULL);
  if (e != GA_NO_ERROR) goto e3;
  tmp[0] = atomicadd_double;
  tmp[1] = code_dgemvBH_T_a1_b1_small;
  e = GpuKernel_init(&handle->dgemvBH_T_a1_b1_small, &cuda_ops, ctx, 2, tmp, NULL, "dgemv", 9, types, GA_USE_DOUBLE, NULL);
  if (e != GA_NO_ERROR) goto e4;

  types[0] = GA_BUFFER;
  types[1] = GA_SIZE;
  types[2] = GA_BUFFER;
  types[3] = GA_SIZE;
  types[4] = GA_FLOAT;
  types[5] = GA_BUFFER;
  types[6] = GA_SIZE;
  types[7] = GA_SIZE;
  types[8] = GA_SIZE;
  types[9] = GA_SIZE;
  e = GpuKernel_init(&handle->sgerBH_gen_small, &cuda_ops, ctx, 1, &code_sgerBH_gen_small, NULL, "_sgerBH_gen_small", 10, types, 0, NULL);
  if (e != GA_NO_ERROR) goto e5;
  types[4] = GA_DOUBLE;
  tmp[0] = atomicadd_double;
  tmp[1] = code_dgerBH_gen_small;
  e = GpuKernel_init(&handle->dgerBH_gen_small, &cuda_ops, ctx, 2, tmp, NULL, "_dgerBH_gen_small", 10, types, GA_USE_DOUBLE, NULL);
  if (e != GA_NO_ERROR) goto e6;

  ctx->blas_handle = handle;

  cuda_exit(ctx);

  return GA_NO_ERROR;

 e6:
  GpuKernel_clear(&handle->sgerBH_gen_small);
 e5:
  GpuKernel_clear(&handle->dgemvBH_T_a1_b1_small);
 e4:
  GpuKernel_clear(&handle->dgemvBH_N_a1_b1_small);
 e3:
  GpuKernel_clear(&handle->sgemvBH_T_a1_b1_small);
 e2:
  GpuKernel_clear(&handle->sgemvBH_N_a1_b1_small);
 e1:
  cublasDestroy(handle->h);
  cuda_exit(ctx);
  free(handle);
  return e;
}
Example #3
0
File: blas.hpp Project: SuHui/mgpu
 /// indicate that scalar values are passed as reference on host
 inline void set_scalar_device()
 {
   MGPU_CUDA_BLAS_CALL(cublasSetPointerMode(handle_,
     CUBLAS_POINTER_MODE_DEVICE));
 }