static int hgemm(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, float alpha, gpudata *A, size_t offA, size_t lda, gpudata *B, size_t offB, size_t ldb, float beta, gpudata *C, size_t offC, size_t ldc) { cl_ctx *ctx = A->ctx; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(B); ARRAY_INIT(C); CLBT_CHECK(ctx->err, CLBlastHgemm(convO(order), convT(transA), convT(transB), M, N, K, float_to_half(alpha), A->buf, offA, lda, B->buf, offB, ldb, float_to_half(beta), C->buf, offC, ldc, &ctx->q, &ev)); ARRAY_FINI(A); ARRAY_FINI(B); ARRAY_FINI(C); clReleaseEvent(ev); return GA_NO_ERROR; }
static int hgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB, size_t M, size_t N, size_t K, float alpha, gpudata **A, size_t *offA, size_t lda, gpudata **B, size_t *offB, size_t ldb, float beta, gpudata **C, size_t *offC, size_t ldc, size_t batchCount) { cl_ctx *ctx = A[0]->ctx; cl_event ev; size_t i; for (i = 0; i < batchCount; i++) { ARRAY_INIT(A[i]); ARRAY_INIT(B[i]); ARRAY_INIT(C[i]); CLBT_CHECK(ctx->err, CLBlastHgemm(convO(order), convT(transA), convT(transB), M, N, K, float_to_half(alpha), A[i]->buf, offA[i], lda, B[i]->buf, offB[i], ldb, float_to_half(beta), C[i]->buf, offC[i], ldc, &ctx->q, &ev)); ARRAY_FINI(A[i]); ARRAY_FINI(B[i]); ARRAY_FINI(C[i]); clReleaseEvent(ev); } return GA_NO_ERROR; }
static int hgemv(cb_order order, cb_transpose transA, size_t M, size_t N, float alpha, gpudata *A, size_t offA, size_t lda, gpudata *X, size_t offX, int incX, float beta, gpudata *Y, size_t offY, int incY) { cl_ctx *ctx = A->ctx; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(X); ARRAY_INIT(Y); CLBT_CHECK(ctx->err, CLBlastHgemv(convO(order), convT(transA), M, N, float_to_half(alpha), A->buf, offA, lda, X->buf, offX, incX, float_to_half(beta), Y->buf, offY, incY, &ctx->q, &ev)); ARRAY_FINI(A); ARRAY_FINI(X); ARRAY_FINI(Y); clReleaseEvent(ev); return GA_NO_ERROR; }
static void CompareImages(const EXRImage &a, const EXRImage &b, bool halfQuantize) { EXPECT_EQ(a.num_channels, b.num_channels); EXPECT_EQ(a.width, b.width); EXPECT_EQ(a.height, b.height); for (int i = 0; i < a.num_channels; ++i) { EXPECT_EQ(a.pixel_types[i], b.pixel_types[i]); EXPECT_EQ(std::string(a.channel_names[i]), std::string(b.channel_names[i])); } for (int i = 0; i < a.width * a.height; ++i) { for (int c = 0; c < a.num_channels; ++c) { float ap = ((float *)a.images[c])[i]; float bp = ((float *)b.images[c])[i]; if (std::isnan(ap) && std::isnan(bp)) continue; if (halfQuantize) { int ha = float_to_half(ap); int hb = float_to_half(bp); EXPECT_EQ(ha, hb) << "offset " << i << ", channel " << c << ", fa " << ap << ", fb " << bp; } else { EXPECT_EQ(ap, bp) << "offset " << i << ", channel " << c; } } } }
static int hger(cb_order order, size_t M, size_t N, float alpha, gpudata *X, size_t offX, int incX, gpudata *Y, size_t offY, int incY, gpudata *A, size_t offA, size_t lda) { cl_ctx *ctx = X->ctx; cl_event ev; StatusCode err; ARRAY_INIT(X); ARRAY_INIT(Y); ARRAY_INIT(A); err = CLBlastHger(convO(order), M, N, float_to_half(alpha), X->buf, offX, incX, Y->buf, offY, incY, A->buf, offA, lda, &ctx->q, &ev); if (err != kSuccess) return GA_BLAS_ERROR; ARRAY_FINI(X); ARRAY_FINI(Y); ARRAY_FINI(A); clReleaseEvent(ev); return GA_NO_ERROR; }
static void HALF_fill(npy_half *buffer, npy_intp length, void *NPY_UNUSED(ignored)) { npy_intp i; float start = half_to_float(buffer[0]); float delta = half_to_float(buffer[1]); delta -= start; for (i = 2; i < length; ++i) { buffer[i] = float_to_half(start + i*delta); } }
void pack(float *packed, const float *color, GLenum type) { unsigned *p = (unsigned *) packed; GLubyte ub[4]; GLushort us[4]; unsigned i; packed[0] = 0.0f; packed[1] = 0.0f; packed[2] = 0.0f; packed[3] = 1.0f; switch (type) { case GL_HALF_FLOAT: for (i = 0; i < 4; i++) us[i] = float_to_half(color[i]); p[0] = (us[0]) | (us[1] << 16); p[1] = (us[2]) | (us[3] << 16); break; case GL_UNSIGNED_SHORT: for (i = 0; i < 4; i++) { const float tmp = CLAMP(color[i], 0.0, 1.0); us[i] = (GLushort) round(65535.0 * tmp); } p[0] = (us[0]) | (us[1] << 16); p[1] = (us[2]) | (us[3] << 16); break; case GL_UNSIGNED_BYTE: for (i = 0; i < 4; i++) { const float tmp = CLAMP(color[i], 0.0, 1.0); ub[i] = (GLubyte) round(255.0 * tmp); } p[0] = (ub[0]) | (ub[1] << 8) | (ub[2] << 16) | (ub[3] << 24); break; case GL_BYTE: for (i = 0; i < 4; i++) { const float tmp = CLAMP(color[i], -(128.0 / 127.0), 1.0); ub[i] = (GLubyte) round(127.0 * tmp + 128.0); } p[0] = (ub[0]) | (ub[1] << 8) | (ub[2] << 16) | (ub[3] << 24); break; } }
static void HALF_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, void *NPY_UNUSED(ignore)) { float tmp = 0.0f; npy_intp i; for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { tmp += half_to_float(*((npy_half *)ip1)) * half_to_float(*((npy_half *)ip2)); } *((npy_half *)op) = float_to_half(tmp); }