static int hgemm(cb_order order, cb_transpose transA, cb_transpose transB,
                 size_t M, size_t N, size_t K, float alpha,
                 gpudata *A, size_t offA, size_t lda,
                 gpudata *B, size_t offB, size_t ldb, float beta,
                 gpudata *C, size_t offC, size_t ldc) {
  cl_ctx *ctx = A->ctx;
  cl_event ev;

  ARRAY_INIT(A);
  ARRAY_INIT(B);
  ARRAY_INIT(C);

  CLBT_CHECK(ctx->err, CLBlastHgemm(convO(order), convT(transA), convT(transB),
                                    M, N, K, float_to_half(alpha),
                                    A->buf, offA, lda, B->buf, offB, ldb,
                                    float_to_half(beta), C->buf, offC, ldc,
                                    &ctx->q, &ev));

  ARRAY_FINI(A);
  ARRAY_FINI(B);
  ARRAY_FINI(C);

  clReleaseEvent(ev);

  return GA_NO_ERROR;
}
static int hgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
                      size_t M, size_t N, size_t K, float alpha,
                      gpudata **A, size_t *offA, size_t lda,
                      gpudata **B, size_t *offB, size_t ldb,
                      float beta, gpudata **C, size_t *offC, size_t ldc,
                      size_t batchCount) {
  cl_ctx *ctx = A[0]->ctx;
  cl_event ev;
  size_t i;

  for (i = 0; i < batchCount; i++) {
    ARRAY_INIT(A[i]);
    ARRAY_INIT(B[i]);
    ARRAY_INIT(C[i]);
    CLBT_CHECK(ctx->err, CLBlastHgemm(convO(order), convT(transA),
                                      convT(transB), M, N, K,
                                      float_to_half(alpha),
                                      A[i]->buf, offA[i], lda,
                                      B[i]->buf, offB[i], ldb,
                                      float_to_half(beta),
                                      C[i]->buf, offC[i], ldc, &ctx->q, &ev));
    ARRAY_FINI(A[i]);
    ARRAY_FINI(B[i]);
    ARRAY_FINI(C[i]);
    clReleaseEvent(ev);
  }

  return GA_NO_ERROR;
}
static int hgemv(cb_order order, cb_transpose transA, size_t M, size_t N,
                 float alpha, gpudata *A, size_t offA, size_t lda,
                 gpudata *X, size_t offX, int incX, float beta,
                 gpudata *Y, size_t offY, int incY) {
  cl_ctx *ctx = A->ctx;
  cl_event ev;

  ARRAY_INIT(A);
  ARRAY_INIT(X);
  ARRAY_INIT(Y);

  CLBT_CHECK(ctx->err, CLBlastHgemv(convO(order), convT(transA), M, N,
                                    float_to_half(alpha),
                                    A->buf, offA, lda, X->buf, offX, incX,
                                    float_to_half(beta),
                                    Y->buf, offY, incY, &ctx->q, &ev));

  ARRAY_FINI(A);
  ARRAY_FINI(X);
  ARRAY_FINI(Y);

  clReleaseEvent(ev);

  return GA_NO_ERROR;
}
Example #4
0
static void CompareImages(const EXRImage &a, const EXRImage &b,
                          bool halfQuantize) {
   EXPECT_EQ(a.num_channels, b.num_channels);
   EXPECT_EQ(a.width, b.width);
   EXPECT_EQ(a.height, b.height);
   for (int i = 0; i < a.num_channels; ++i) {
     EXPECT_EQ(a.pixel_types[i], b.pixel_types[i]);
     EXPECT_EQ(std::string(a.channel_names[i]),
               std::string(b.channel_names[i]));
   }
   for (int i = 0; i < a.width * a.height; ++i) {
     for (int c = 0; c < a.num_channels; ++c) {
       float ap = ((float *)a.images[c])[i];
       float bp = ((float *)b.images[c])[i];
       if (std::isnan(ap) && std::isnan(bp))
         continue;
       if (halfQuantize) {
         int ha = float_to_half(ap);
         int hb = float_to_half(bp);
         EXPECT_EQ(ha, hb) <<  "offset " << i << ", channel " << c <<
             ", fa " << ap << ", fb " << bp;
       }
       else {
         EXPECT_EQ(ap, bp) << "offset " << i << ", channel " << c;
       }
     }
   }
}
static int hger(cb_order order, size_t M, size_t N, float alpha,
                gpudata *X, size_t offX, int incX,
                gpudata *Y, size_t offY, int incY,
                gpudata *A, size_t offA, size_t lda) {
  cl_ctx *ctx = X->ctx;
  cl_event ev;
  StatusCode err;

  ARRAY_INIT(X);
  ARRAY_INIT(Y);
  ARRAY_INIT(A);

  err = CLBlastHger(convO(order), M, N, float_to_half(alpha), X->buf, offX, incX,
                    Y->buf, offY, incY, A->buf, offA, lda, &ctx->q, &ev);
  if (err != kSuccess)
    return GA_BLAS_ERROR;

  ARRAY_FINI(X);
  ARRAY_FINI(Y);
  ARRAY_FINI(A);

  clReleaseEvent(ev);

  return GA_NO_ERROR;
}
Example #6
0
static void
HALF_fill(npy_half *buffer, npy_intp length, void *NPY_UNUSED(ignored))
{
    npy_intp i;
    float start = half_to_float(buffer[0]);
    float delta = half_to_float(buffer[1]);

    delta -= start;
    for (i = 2; i < length; ++i) {
        buffer[i] = float_to_half(start + i*delta);
    }
}
Example #7
0
void
pack(float *packed, const float *color, GLenum type)
{
    unsigned *p = (unsigned *) packed;
    GLubyte ub[4];
    GLushort us[4];
    unsigned i;

    packed[0] = 0.0f;
    packed[1] = 0.0f;
    packed[2] = 0.0f;
    packed[3] = 1.0f;

    switch (type) {
    case GL_HALF_FLOAT:
        for (i = 0; i < 4; i++)
            us[i] = float_to_half(color[i]);

        p[0] = (us[0]) | (us[1] << 16);
        p[1] = (us[2]) | (us[3] << 16);
        break;

    case GL_UNSIGNED_SHORT:
        for (i = 0; i < 4; i++) {
            const float tmp = CLAMP(color[i], 0.0, 1.0);
            us[i] = (GLushort) round(65535.0 * tmp);
        }

        p[0] = (us[0]) | (us[1] << 16);
        p[1] = (us[2]) | (us[3] << 16);
        break;

    case GL_UNSIGNED_BYTE:
        for (i = 0; i < 4; i++) {
            const float tmp = CLAMP(color[i], 0.0, 1.0);
            ub[i] = (GLubyte) round(255.0 * tmp);
        }

        p[0] = (ub[0]) | (ub[1] << 8) | (ub[2] << 16) | (ub[3] << 24);
        break;

    case GL_BYTE:
        for (i = 0; i < 4; i++) {
            const float tmp =
                CLAMP(color[i], -(128.0 / 127.0), 1.0);
            ub[i] = (GLubyte) round(127.0 * tmp + 128.0);
        }

        p[0] = (ub[0]) | (ub[1] << 8) | (ub[2] << 16) | (ub[3] << 24);
        break;
    }
}
Example #8
0
static void
HALF_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n,
           void *NPY_UNUSED(ignore))
{
    float tmp = 0.0f;
    npy_intp i;

    for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) {
        tmp += half_to_float(*((npy_half *)ip1)) *
               half_to_float(*((npy_half *)ip2));
    }
    *((npy_half *)op) = float_to_half(tmp);
}