static gpudata *cuda_transfer(gpudata *src, size_t offset, size_t sz, void *dst_c, int may_share) { cuda_context *ctx = src->ctx; cuda_context *dst_ctx = (cuda_context *)dst_c; gpudata *dst; ASSERT_BUF(src); ASSERT_CTX(ctx); ASSERT_CTX(dst_ctx); if (ctx == dst_ctx) { if (may_share && offset == 0) { cuda_retain(src); return src; } dst = cuda_alloc(ctx, sz, NULL, 0, NULL); if (dst == NULL) return NULL; cuda_enter(ctx); cuda_wait(src, CUDA_WAIT_READ); cuda_wait(dst, CUDA_WAIT_WRITE); ctx->err = cuMemcpyDtoDAsync(dst->ptr, src->ptr+offset, sz, ctx->s); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); cuda_free(dst); return NULL; } cuda_record(src, CUDA_WAIT_READ); cuda_record(dst, CUDA_WAIT_WRITE); cuda_exit(ctx); return dst; } dst = cuda_alloc(dst_ctx, sz, NULL, 0, NULL); if (dst == NULL) return NULL; cuda_enter(ctx); cuda_waits(src, CUDA_WAIT_READ, dst_ctx->mem_s); cuda_waits(dst, CUDA_WAIT_WRITE, dst_ctx->mem_s); ctx->err = cuMemcpyPeerAsync(dst->ptr, dst->ctx->ctx, src->ptr+offset, src->ctx->ctx, sz, dst_ctx->mem_s); if (ctx->err != CUDA_SUCCESS) { cuda_free(dst); cuda_exit(ctx); return NULL; } cuda_records(dst, CUDA_WAIT_WRITE, dst_ctx->mem_s); cuda_records(src, CUDA_WAIT_READ, dst_ctx->mem_s); cuda_exit(ctx); return dst; }
void *cuda_make_ctx(CUcontext ctx, int flags) { int64_t v = 0; cuda_context *res; int e = 0; res = malloc(sizeof(*res)); if (res == NULL) return NULL; res->ctx = ctx; res->err = CUDA_SUCCESS; res->blas_handle = NULL; res->refcnt = 1; res->flags = flags; res->enter = 0; if (detect_arch(ARCH_PREFIX, res->bin_id, &err)) { free(res); return NULL; } res->extcopy_cache = cache_alloc(64, 32); if (res->extcopy_cache == NULL) { free(res); return NULL; } err = cuStreamCreate(&res->s, 0); if (err != CUDA_SUCCESS) { cache_free(res->extcopy_cache); free(res); return NULL; } TAG_CTX(res); /* Need to tag before cuda_alloc */ res->errbuf = cuda_alloc(res, 8, &v, GA_BUFFER_INIT, &e); if (e != GA_NO_ERROR) { err = res->err; cache_free(res->extcopy_cache); cuStreamDestroy(res->s); free(res); return NULL; } res->refcnt--; /* Don't want to create a reference loop with the errbuf */ return res; }