static gpudata *cuda_transfer(gpudata *src, size_t offset, size_t sz,
                              void *dst_c, int may_share) {
  cuda_context *ctx = src->ctx;
  cuda_context *dst_ctx = (cuda_context *)dst_c;
  gpudata *dst;

  ASSERT_BUF(src);
  ASSERT_CTX(ctx);
  ASSERT_CTX(dst_ctx);

  if (ctx == dst_ctx) {
    if (may_share && offset == 0) {
        cuda_retain(src);
        return src;
    }
    dst = cuda_alloc(ctx, sz, NULL, 0, NULL);
    if (dst == NULL) return NULL;
    cuda_enter(ctx);

    cuda_wait(src, CUDA_WAIT_READ);
    cuda_wait(dst, CUDA_WAIT_WRITE);

    ctx->err = cuMemcpyDtoDAsync(dst->ptr, src->ptr+offset, sz, ctx->s);
    if (ctx->err != CUDA_SUCCESS) {
      cuda_exit(ctx);
      cuda_free(dst);
      return NULL;
    }
    cuda_record(src, CUDA_WAIT_READ);
    cuda_record(dst, CUDA_WAIT_WRITE);

    cuda_exit(ctx);
    return dst;
  }

  dst = cuda_alloc(dst_ctx, sz, NULL, 0, NULL);
  if (dst == NULL)
    return NULL;
  cuda_enter(ctx);
  cuda_waits(src, CUDA_WAIT_READ, dst_ctx->mem_s);
  cuda_waits(dst, CUDA_WAIT_WRITE, dst_ctx->mem_s);
  ctx->err = cuMemcpyPeerAsync(dst->ptr, dst->ctx->ctx, src->ptr+offset,
			       src->ctx->ctx, sz, dst_ctx->mem_s);
  if (ctx->err != CUDA_SUCCESS) {
    cuda_free(dst);
    cuda_exit(ctx);
    return NULL;
  }

  cuda_records(dst, CUDA_WAIT_WRITE, dst_ctx->mem_s);
  cuda_records(src, CUDA_WAIT_READ, dst_ctx->mem_s);

  cuda_exit(ctx);
  return dst;
}
Beispiel #2
0
SEXP R_auto_cuMemcpyPeerAsync(SEXP r_dstDevice, SEXP r_dstContext, SEXP r_srcDevice, SEXP r_srcContext, SEXP r_ByteCount, SEXP r_hStream)
{
    SEXP r_ans = R_NilValue;
    CUdeviceptr dstDevice = REAL(r_dstDevice)[0];
    CUcontext dstContext = (CUcontext) getRReference(r_dstContext);
    CUdeviceptr srcDevice = REAL(r_srcDevice)[0];
    CUcontext srcContext = (CUcontext) getRReference(r_srcContext);
    size_t ByteCount = REAL(r_ByteCount)[0];
    CUstream hStream = (CUstream) getRReference(r_hStream);
    
    CUresult ans;
    ans = cuMemcpyPeerAsync(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream);
    
    r_ans = Renum_convert_CUresult(ans) ;
    
    return(r_ans);
}