static int dger(cb_order order, size_t M, size_t N, double alpha, gpudata *X, size_t offX, int incX, gpudata *Y, size_t offY, int incY, gpudata *A, size_t offA, size_t lda) { cl_ctx *ctx = X->ctx; cl_event ev; StatusCode err; ARRAY_INIT(X); ARRAY_INIT(Y); ARRAY_INIT(A); err = CLBlastDger(convO(order), M, N, alpha, X->buf, offX, incX, Y->buf, offY, incY, A->buf, offA, lda, &ctx->q, &ev); if (err != kSuccess) return GA_BLAS_ERROR; ARRAY_FINI(X); ARRAY_FINI(Y); ARRAY_FINI(A); clReleaseEvent(ev); return GA_NO_ERROR; }
static int sgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, float alpha, gpudata **A, size_t *offA, size_t lda, gpudata **B, size_t *offB, size_t ldb, float beta, gpudata **C, size_t *offC, size_t ldc, size_t batchCount) { cl_ctx *ctx = A[0]->ctx; cl_event ev; size_t i; StatusCode err; for (i = 0; i < batchCount; i++) { ARRAY_INIT(A[i]); ARRAY_INIT(B[i]); ARRAY_INIT(C[i]); err = CLBlastSgemm(convO(order), convT(transA), convT(transB), M, N, K, alpha, A[i]->buf, offA[i], lda, B[i]->buf, offB[i], ldb, beta, C[i]->buf, offB[i], ldc, &ctx->q, &ev); if (err != kSuccess) return GA_BLAS_ERROR; ARRAY_FINI(A[i]); ARRAY_FINI(B[i]); ARRAY_FINI(C[i]); clReleaseEvent(ev); } return GA_NO_ERROR; }
static int sgemv(cb_order order, cb_transpose transA, size_t M, size_t N, float alpha, gpudata *A, size_t offA, size_t lda, gpudata *X, size_t offX, int incX, float beta, gpudata *Y, size_t offY, int incY) { cl_ctx *ctx = A->ctx; StatusCode err; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(X); ARRAY_INIT(Y); err = CLBlastSgemv(convO(order), convT(transA), M, N, alpha, A->buf, offA, lda, X->buf, offX, incX, beta, Y->buf, offY, incY, &ctx->q, &ev); if (err != kSuccess) return GA_BLAS_ERROR; ARRAY_FINI(A); ARRAY_FINI(X); ARRAY_FINI(Y); clReleaseEvent(ev); return GA_NO_ERROR; }
static int sgemm(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, float alpha, gpudata *A, size_t offA, size_t lda, gpudata *B, size_t offB, size_t ldb, float beta, gpudata *C, size_t offC, size_t ldc) { cl_ctx *ctx = A->ctx; StatusCode err; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(B); ARRAY_INIT(C); err = CLBlastSgemm(convO(order), convT(transA), convT(transB), M, N, K, alpha, A->buf, offA, lda, B->buf, offB, ldb, beta, C->buf, offC, ldc, &ctx->q, &ev); if (err != kSuccess) return GA_BLAS_ERROR; ARRAY_FINI(A); ARRAY_FINI(B); ARRAY_FINI(C); clReleaseEvent(ev); return GA_NO_ERROR; }
static int ddot( size_t N, gpudata *X, size_t offX, size_t incX, gpudata *Y, size_t offY, size_t incY, gpudata *Z, size_t offZ) { cl_ctx *ctx = X->ctx; StatusCode err; cl_event ev; ARRAY_INIT(X); ARRAY_INIT(Y); ARRAY_INIT(Z); err = CLBlastDdot( N, Z->buf, offZ, X->buf, offX, incX, Y->buf, offY, incY, &ctx->q, &ev); if (err != kSuccess) return GA_BLAS_ERROR; ARRAY_FINI(X); ARRAY_FINI(Y); ARRAY_FINI(Z); clReleaseEvent(ev); return GA_NO_ERROR; }
static int sger(cb_order order, size_t M, size_t N, float alpha, gpudata *X, size_t offX, int incX, gpudata *Y, size_t offY, int incY, gpudata *A, size_t offA, size_t lda) { cl_ctx *ctx = X->ctx; cl_event evl[3]; cl_event ev; cl_uint num_ev = 0; clblasStatus err; ARRAY_INIT(X); ARRAY_INIT(Y); ARRAY_INIT(A); err = clblasSger(convO(order), M, N, alpha, X->buf, offX, incX, Y->buf, offY, incY, A->buf, offA, lda, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev); if (err != clblasSuccess) return GA_BLAS_ERROR; ARRAY_FINI(X); ARRAY_FINI(Y); ARRAY_FINI(A); clReleaseEvent(ev); return GA_NO_ERROR; }
static int dgemm(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, double alpha, gpudata *A, size_t offA, size_t lda, gpudata *B, size_t offB, size_t ldb, double beta, gpudata *C, size_t offC, size_t ldc) { cl_ctx *ctx = A->ctx; clblasStatus err; cl_uint num_ev = 0; cl_event evl[3]; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(B); ARRAY_INIT(C); err = clblasDgemm(convO(order), convT(transA), convT(transB), M, N, K, alpha, A->buf, offA, lda, B->buf, offB, ldb, beta, C->buf, offC, ldc, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev); if (err != clblasSuccess) return GA_BLAS_ERROR; ARRAY_FINI(A); ARRAY_FINI(B); ARRAY_FINI(C); clReleaseEvent(ev); return GA_NO_ERROR; }
static int dgemv(cb_order order, cb_transpose transA, size_t M, size_t N, double alpha, gpudata *A, size_t offA, size_t lda, gpudata *X, size_t offX, int incX, double beta, gpudata *Y, size_t offY, int incY) { cl_ctx *ctx = A->ctx; clblasStatus err; cl_uint num_ev = 0; cl_event evl[3]; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(X); ARRAY_INIT(Y); err = clblasDgemv(convO(order), convT(transA), M, N, alpha, A->buf, offA, lda, X->buf, offX, incX, beta, Y->buf, offY, incY, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev); if (err != clblasSuccess) return GA_BLAS_ERROR; ARRAY_FINI(A); ARRAY_FINI(X); ARRAY_FINI(Y); clReleaseEvent(ev); return GA_NO_ERROR; }
static int sgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, float alpha, gpudata **A, size_t *offA, size_t lda, gpudata **B, size_t *offB, size_t ldb, float beta, gpudata **C, size_t *offC, size_t ldc, size_t batchCount) { cl_ctx *ctx = A[0]->ctx; cl_event evl[3]; cl_event ev; size_t i; cl_uint num_ev = 0; for (i = 0; i < batchCount; i++) { ARRAY_INIT(A[i]); ARRAY_INIT(B[i]); ARRAY_INIT(C[i]); CLB_CHECK(ctx->err, clblasSgemm(convO(order), convT(transA), convT(transB), M, N, K, alpha, A[i]->buf, offA[i], lda, B[i]->buf, offB[i], ldb, beta, C[i]->buf, offC[i], ldc, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev)); ARRAY_FINI(A[i]); ARRAY_FINI(B[i]); ARRAY_FINI(C[i]); clReleaseEvent(ev); } return GA_NO_ERROR; }
static int dgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, double alpha, gpudata **A, size_t *offA, size_t lda, gpudata **B, size_t *offB, size_t ldb, double beta, gpudata **C, size_t *offC, size_t ldc, size_t batchCount) { cl_ctx *ctx = A[0]->ctx; cl_event evl[3]; cl_event ev; size_t i; cl_uint num_ev = 0; clblasStatus err; for (i = 0; i < batchCount; i++) { ARRAY_INIT(A[i]); ARRAY_INIT(B[i]); ARRAY_INIT(C[i]); err = clblasDgemm(convO(order), convT(transA), convT(transB), M, N, K, alpha, A[i]->buf, offA[i], lda, B[i]->buf, offB[i], ldb, beta, C[i]->buf, offB[i], ldc, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev); if (err != clblasSuccess) return GA_BLAS_ERROR; ARRAY_FINI(A[i]); ARRAY_FINI(B[i]); ARRAY_FINI(C[i]); clReleaseEvent(ev); } return GA_NO_ERROR; }
static int dgemm(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, double alpha, gpudata *A, size_t offA, size_t lda, gpudata *B, size_t offB, size_t ldb, double beta, gpudata *C, size_t offC, size_t ldc) { cl_ctx *ctx = A->ctx; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(B); ARRAY_INIT(C); CLBT_CHECK(ctx->err, CLBlastDgemm(convO(order), convT(transA), convT(transB), M, N, K, alpha, A->buf, offA, lda, B->buf, offB, ldb, beta, C->buf, offC, ldc, &ctx->q, &ev)); ARRAY_FINI(A); ARRAY_FINI(B); ARRAY_FINI(C); clReleaseEvent(ev); return GA_NO_ERROR; }
static int dgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, double alpha, gpudata **A, size_t *offA, size_t lda, gpudata **B, size_t *offB, size_t ldb, double beta, gpudata **C, size_t *offC, size_t ldc, size_t batchCount) { cl_ctx *ctx = A[0]->ctx; cl_event ev; size_t i; for (i = 0; i < batchCount; i++) { ARRAY_INIT(A[i]); ARRAY_INIT(B[i]); ARRAY_INIT(C[i]); CLBT_CHECK(ctx->err, CLBlastDgemm(convO(order), convT(transA), convT(transB), M, N, K, alpha, A[i]->buf, offA[i], lda, B[i]->buf, offB[i], ldb, beta, C[i]->buf, offC[i], ldc, &ctx->q, &ev)); ARRAY_FINI(A[i]); ARRAY_FINI(B[i]); ARRAY_FINI(C[i]); clReleaseEvent(ev); } return GA_NO_ERROR; }
static int hgemv(cb_order order, cb_transpose transA, size_t M, size_t N, float alpha, gpudata *A, size_t offA, size_t lda, gpudata *X, size_t offX, int incX, float beta, gpudata *Y, size_t offY, int incY) { cl_ctx *ctx = A->ctx; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(X); ARRAY_INIT(Y); CLBT_CHECK(ctx->err, CLBlastHgemv(convO(order), convT(transA), M, N, float_to_half(alpha), A->buf, offA, lda, X->buf, offX, incX, float_to_half(beta), Y->buf, offY, incY, &ctx->q, &ev)); ARRAY_FINI(A); ARRAY_FINI(X); ARRAY_FINI(Y); clReleaseEvent(ev); return GA_NO_ERROR; }
static int dger(cb_order order, size_t M, size_t N, double alpha, gpudata *X, size_t offX, int incX, gpudata *Y, size_t offY, int incY, gpudata *A, size_t offA, size_t lda) { cl_ctx *ctx = X->ctx; cl_event evl[3]; cl_event ev; cl_uint num_ev = 0; ARRAY_INIT(X); ARRAY_INIT(Y); ARRAY_INIT(A); CLB_CHECK(ctx->err, clblasDger(convO(order), M, N, alpha, X->buf, offX, incX, Y->buf, offY, incY, A->buf, offA, lda, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev)); ARRAY_FINI(X); ARRAY_FINI(Y); ARRAY_FINI(A); clReleaseEvent(ev); return GA_NO_ERROR; }
static int sgemm(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, float alpha, gpudata *A, size_t offA, size_t lda, gpudata *B, size_t offB, size_t ldb, float beta, gpudata *C, size_t offC, size_t ldc) { cl_ctx *ctx = A->ctx; cl_uint num_ev = 0; cl_event evl[3]; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(B); ARRAY_INIT(C); CLB_CHECK(ctx->err, clblasSgemm(convO(order), convT(transA), convT(transB), M, N, K, alpha, A->buf, offA, lda, B->buf, offB, ldb, beta, C->buf, offC, ldc, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev)); ARRAY_FINI(A); ARRAY_FINI(B); ARRAY_FINI(C); clReleaseEvent(ev); return GA_NO_ERROR; }
static int sgemv(cb_order order, cb_transpose transA, size_t M, size_t N, float alpha, gpudata *A, size_t offA, size_t lda, gpudata *X, size_t offX, int incX, float beta, gpudata *Y, size_t offY, int incY) { cl_ctx *ctx = A->ctx; cl_uint num_ev = 0; cl_event evl[3]; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(X); ARRAY_INIT(Y); CLB_CHECK(ctx->err, clblasSgemv(convO(order), convT(transA), M, N, alpha, A->buf, offA, lda, X->buf, offX, incX, beta, Y->buf, offY, incY, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev)); ARRAY_FINI(A); ARRAY_FINI(X); ARRAY_FINI(Y); clReleaseEvent(ev); return GA_NO_ERROR; }
static int ddot( size_t N, gpudata *X, size_t offX, size_t incX, gpudata *Y, size_t offY, size_t incY, gpudata *Z, size_t offZ) { cl_ctx *ctx = X->ctx; clblasStatus err; cl_uint num_ev = 0; cl_event evl[3]; cl_event ev; gpudata *wbuf; wbuf = opencl_ops.buffer_alloc((gpucontext*)ctx, N*sizeof(double), NULL, GA_BUFFER_READ_WRITE); if (wbuf == NULL) return ctx->err->code; ARRAY_INIT(X); ARRAY_INIT(Y); ARRAY_INIT(Z); err = clblasDdot( N, Z->buf, offZ, X->buf, offX, incX, Y->buf, offY, incY, wbuf->buf, 1, &ctx->q, num_ev, num_ev ? evl : NULL, &ev); opencl_ops.buffer_release(wbuf); if (err != clblasSuccess) return error_clblas(ctx->err, "clblasDdot", err); ARRAY_FINI(X); ARRAY_FINI(Y); ARRAY_FINI(Z); clReleaseEvent(ev); return GA_NO_ERROR; }
static int dger(cb_order order, size_t M, size_t N, double alpha, gpudata *X, size_t offX, int incX, gpudata *Y, size_t offY, int incY, gpudata *A, size_t offA, size_t lda) { cl_ctx *ctx = X->ctx; cl_event ev; ARRAY_INIT(X); ARRAY_INIT(Y); ARRAY_INIT(A); CLBT_CHECK(ctx->err, CLBlastDger(convO(order), M, N, alpha, X->buf, offX, incX, Y->buf, offY, incY, A->buf, offA, lda, &ctx->q, &ev)); ARRAY_FINI(X); ARRAY_FINI(Y); ARRAY_FINI(A); clReleaseEvent(ev); return GA_NO_ERROR; }
static int ddot( size_t N, gpudata *X, size_t offX, size_t incX, gpudata *Y, size_t offY, size_t incY, gpudata *Z, size_t offZ) { cl_ctx *ctx = X->ctx; cl_event ev; ARRAY_INIT(X); ARRAY_INIT(Y); ARRAY_INIT(Z); CLBT_CHECK(ctx->err, CLBlastDdot(N, Z->buf, offZ, X->buf, offX, incX, Y->buf, offY, incY, &ctx->q, &ev)); ARRAY_FINI(X); ARRAY_FINI(Y); ARRAY_FINI(Z); clReleaseEvent(ev); return GA_NO_ERROR; }