int gpublas_sger(cb_order order, size_t M, size_t N, float alpha, gpudata *X, size_t offX, int incX, gpudata *Y, size_t offY, int incY, gpudata *A, size_t offA, size_t lda) { return gpudata_context(X)->blas_ops->sger( order, M, N, alpha, X, offX, incX, Y, offY, incY, A, offA, lda); }
int gpublas_dgemm(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, double alpha, gpudata *A, size_t offA, size_t lda, gpudata *B, size_t offB, size_t ldb, double beta, gpudata *C, size_t offC, size_t ldc) { return gpudata_context(A)->blas_ops->dgemm( order, transA, transB, M, N, K, alpha, A, offA, lda, B, offB, ldb, beta, C, offC, ldc); }
static int ga_extcopy(GpuArray *dst, const GpuArray *src) { struct extcopy_args a, *aa; gpucontext *ctx = gpudata_context(dst->data); GpuElemwise *k = NULL; void *args[2]; if (ctx != gpudata_context(src->data)) return GA_INVALID_ERROR; a.itype = src->typecode; a.otype = dst->typecode; if (ctx->extcopy_cache != NULL) k = cache_get(ctx->extcopy_cache, &a); if (k == NULL) { gpuelemwise_arg gargs[2]; gargs[0].name = "src"; gargs[0].typecode = src->typecode; gargs[0].flags = GE_READ; gargs[1].name = "dst"; gargs[1].typecode = dst->typecode; gargs[1].flags = GE_WRITE; k = GpuElemwise_new(ctx, "", "dst = src", 2, gargs, 0, 0); if (k == NULL) return GA_MISC_ERROR; aa = memdup(&a, sizeof(a)); if (aa == NULL) { GpuElemwise_free(k); return GA_MEMORY_ERROR; } if (ctx->extcopy_cache == NULL) ctx->extcopy_cache = cache_twoq(4, 8, 8, 2, extcopy_eq, extcopy_hash, extcopy_free, (cache_freev_fn)GpuElemwise_free, ctx->err); if (ctx->extcopy_cache == NULL) return GA_MISC_ERROR; if (cache_add(ctx->extcopy_cache, aa, k) != 0) return GA_MISC_ERROR; } args[0] = (void *)src; args[1] = (void *)dst; return GpuElemwise_call(k, args, GE_BROADCAST); }
int gpublas_dgemv(cb_order order, cb_transpose transA, size_t M, size_t N, double alpha, gpudata *A, size_t offA, size_t lda, gpudata *X, size_t offX, int incX, double beta, gpudata *Y, size_t offY, int incY) { return gpudata_context(A)->blas_ops->dgemv( order, transA, M, N, alpha, A, offA, lda, X, offX, incX, beta, Y, offY, incY); }
int gpublas_dgerBatch(cb_order order, size_t M, size_t N, double alpha, gpudata **x, size_t *offX, size_t incX, gpudata **y, size_t *offY, size_t incY, gpudata **A, size_t *offA, size_t lda, size_t batchCount, int flags) { if (batchCount == 0) return GA_NO_ERROR; return gpudata_context(x[0])->blas_ops->dgerBatch( order, M, N, alpha, x, offX, incX, y, offY, incY, A, offA, lda, batchCount, flags); }
int gpublas_dgemvBatch( cb_order order, cb_transpose transA, size_t M, size_t N, double alpha, gpudata **A, size_t *offA, size_t lda, gpudata **x, size_t *offX, size_t incX, double beta, gpudata **y, size_t *offY, size_t incY, size_t batchCount, int flags) { if (batchCount == 0) return GA_NO_ERROR; return gpudata_context(A[0])->blas_ops->dgemvBatch( order, transA, M, N, alpha, A, offA, lda, x, offX, incX, beta, y, offY, incY, batchCount, flags); }
int gpublas_dgemmBatch( cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, double alpha, gpudata **A, size_t *offA, size_t lda, gpudata **B, size_t *offB, size_t ldb, double beta, gpudata **C, size_t *offC, size_t ldc, size_t batchCount, int flags) { if (flags != 0) return GA_INVALID_ERROR; if (batchCount == 0) return GA_NO_ERROR; return gpudata_context(A[0])->blas_ops->dgemmBatch( order, transA, transB, M, N, K, alpha, A, offA, lda, B, offB, ldb, beta, C, offC, ldc, batchCount); }
gpucontext *GpuArray_context(const GpuArray *a) { return gpudata_context(a->data); }
const char *GpuArray_error(const GpuArray *a, int err) { return gpucontext_error(gpudata_context(a->data), err); }