示例#1
0
static void cuda_free_ctx(cuda_context *ctx) {
  gpuarray_blas_ops *blas_ops;
  gpudata *next, *curr;

  ASSERT_CTX(ctx);
  ctx->refcnt--;
  if (ctx->refcnt == 0) {
    assert(ctx->enter == 0 && "Context was active when freed!");
    if (ctx->blas_handle != NULL) {
      ctx->err = cuda_property(ctx, NULL, NULL, GA_CTX_PROP_BLAS_OPS,
                               &blas_ops);
      blas_ops->teardown(ctx);
    }
    cuMemFreeHost((void *)ctx->errbuf->ptr);
    deallocate(ctx->errbuf);

    cuStreamDestroy(ctx->s);

    /* Clear out the freelist */
    for (curr = ctx->freeblocks; curr != NULL; curr = next) {
      next = curr->next;
      cuMemFree(curr->ptr);
      deallocate(curr);
    }

    if (!(ctx->flags & DONTFREE))
      cuCtxDestroy(ctx->ctx);
    cache_destroy(ctx->extcopy_cache);
    CLEAR(ctx);
    free(ctx);
  }
}
static void cuda_free_ctx(cuda_context *ctx) {
  gpuarray_blas_ops *blas_ops;

  ASSERT_CTX(ctx);
  ctx->refcnt--;
  if (ctx->refcnt == 0) {
    if (ctx->blas_handle != NULL) {
      ctx->err = cuda_property(ctx, NULL, NULL, GA_CTX_PROP_BLAS_OPS, &blas_ops);
      blas_ops->teardown(ctx);
    }
    cuStreamDestroy(ctx->s);
    if (!(ctx->flags & DONTFREE))
      cuCtxDestroy(ctx->ctx);
    cache_free(ctx->extcopy_cache);
    CLEAR(ctx);
    free(ctx);
  }
}
static void cuda_free_ctx(cuda_context *ctx) {
  gpuarray_blas_ops *blas_ops;

  ASSERT_CTX(ctx);
  ctx->refcnt--;
  if (ctx->refcnt == 0) {
    assert(ctx->enter == 0 && "Context was active when freed!");
    if (ctx->blas_handle != NULL) {
      ctx->err = cuda_property(ctx, NULL, NULL, GA_CTX_PROP_BLAS_OPS,
                               &blas_ops);
      blas_ops->teardown(ctx);
    }
    ctx->refcnt = 2; /* Prevent recursive calls */
    cuda_free(ctx->errbuf);
    cuStreamDestroy(ctx->s);
    if (!(ctx->flags & DONTFREE))
      cuCtxDestroy(ctx->ctx);
    cache_free(ctx->extcopy_cache);
    CLEAR(ctx);
    free(ctx);
  }
}
示例#4
0
static int cuda_extcopy(gpudata *input, size_t ioff, gpudata *output,
                        size_t ooff, int intype, int outtype,
                        unsigned int a_nd, const size_t *a_dims,
                        const ssize_t *a_str, unsigned int b_nd,
                        const size_t *b_dims, const ssize_t *b_str) {
  cuda_context *ctx = input->ctx;
  void *args[2];
  int res = GA_SYS_ERROR;
  unsigned int i;
  size_t nEls = 1, ls, gs;
  gpukernel *k;
  extcopy_args a, *aa;

  ASSERT_BUF(input);
  ASSERT_BUF(output);
  if (input->ctx != output->ctx)
    return GA_INVALID_ERROR;

  for (i = 0; i < a_nd; i++) {
    nEls *= a_dims[i];
  }
  if (nEls == 0) return GA_NO_ERROR;

  a.ind = a_nd;
  a.ond = b_nd;
  a.itype = intype;
  a.otype = outtype;
  a.ioff = ioff;
  a.ooff = ooff;
  a.idims = a_dims;
  a.odims = b_dims;
  a.istr = a_str;
  a.ostr = b_str;

  k = cache_get(ctx->extcopy_cache, &a);
  if (k == NULL) {
    res = gen_extcopy_kernel(&a, input->ctx, &k, nEls);
    if (res != GA_NO_ERROR)
      return res;

    /* Cache the kernel */
    aa = memdup(&a, sizeof(a));
    if (aa == NULL) goto done;
    aa->idims = memdup(a_dims, a_nd*sizeof(size_t));
    aa->odims = memdup(b_dims, b_nd*sizeof(size_t));
    aa->istr = memdup(a_str, a_nd*sizeof(ssize_t));
    aa->ostr = memdup(b_str, b_nd*sizeof(ssize_t));
    if (aa->idims == NULL || aa->odims == NULL ||
        aa->istr == NULL || aa->ostr == NULL) {
      extcopy_free(aa);
      goto done;
    }
    /* One ref is given to the cache, we manage the other */
    cuda_retainkernel(k);
    cache_add(ctx->extcopy_cache, aa, k);
  } else {
    /* This is our reference */
    cuda_retainkernel(k);
  }
done:

  /* Cheap kernel scheduling */
  res = cuda_property(NULL, NULL, k, GA_KERNEL_PROP_MAXLSIZE, &ls);
  if (res != GA_NO_ERROR) goto fail;

  gs = ((nEls-1) / ls) + 1;
  args[0] = input;
  args[1] = output;
  res = cuda_callkernel(k, 1, &ls, &gs, 0, args);

fail:
  /* We free our reference here */
  cuda_freekernel(k);
  return res;
}