コード例 #1
0
static int sgemv(cb_order order, cb_transpose transA, size_t M, size_t N,
                 float alpha, gpudata *A, size_t offA, size_t lda,
                 gpudata *X, size_t offX, int incX, float beta,
                 gpudata *Y, size_t offY, int incY) {
  cl_ctx *ctx = A->ctx;
  cl_uint num_ev = 0;
  cl_event evl[3];
  cl_event ev;

  ARRAY_INIT(A);
  ARRAY_INIT(X);
  ARRAY_INIT(Y);

  CLB_CHECK(ctx->err, clblasSgemv(convO(order), convT(transA), M, N, alpha,
                                  A->buf, offA, lda, X->buf, offX, incX,
                                  beta, Y->buf, offY, incY, 1, &ctx->q,
                                  num_ev, num_ev == 0 ? NULL : evl, &ev));

  ARRAY_FINI(A);
  ARRAY_FINI(X);
  ARRAY_FINI(Y);

  clReleaseEvent(ev);

  return GA_NO_ERROR;
}
コード例 #2
0
ファイル: clfunc_xgemv.hpp プロジェクト: AndreasMiller/clBLAS
void
xGemv<float>::
call_func()
{
    timer.Start(timer_id);

	clblasSgemv(order_, buffer_.trans_a_, buffer_.m_, buffer_.n_,
                     buffer_.alpha_, buffer_.buf_a_, buffer_.offA_,
                     buffer_.lda_, buffer_.buf_x_, 0, 1, buffer_.beta_,
                     buffer_.buf_y_, 0, 1, numQueues, queues_, 0, NULL, &event_);

    clWaitForEvents(1, &event_);
    timer.Stop(timer_id);
}
コード例 #3
0
ファイル: TensorMath.cpp プロジェクト: imclab/cn24
void TensorMath::GEMV(const bool is_row_major, const bool transpose_A, const int M, const int N, const datum alpha, const Conv::Tensor &A, const int smA, const int ldA, const Conv::Tensor &X, const int smX, const int incX, const datum beta, Conv::Tensor &Y, const int smY, const int incY)
{
#ifdef BUILD_CLBLAS
  ((Tensor&)A).MoveToGPU();
  ((Tensor&)X).MoveToGPU();
  Y.MoveToGPU(Y.hint_ignore_content_ && beta == 0.0);
  
  cl_event done_event = NULL;
  
  const int offA = A.width() * A.height() * A.maps() * smA;
  const int offX = X.width() * X.height() * X.maps() * smX;
  const int offY = Y.width() * Y.height() * Y.maps() * smY;
  
  cl_int err =
    clblasSgemv(is_row_major ? clblasRowMajor : clblasColumnMajor,
      transpose_A ? clblasTrans : clblasNoTrans,
      M, N, alpha,
      (cl_mem)A.cl_data_ptr_, offA, ldA,
      (cl_mem)X.cl_data_ptr_, offX, incX, beta,
      (cl_mem)Y.cl_data_ptr_, offY, incY,
      1, &(CLHelper::queue), 0, NULL, &done_event);
  
  if(err!=CL_SUCCESS)
    FATAL("Call to clblasSgemv failed. Error: " << err);
#else
#ifdef BUILD_OPENCL
  ((Tensor&)A).MoveToCPU();
  ((Tensor&)X).MoveToCPU();
  Y.MoveToCPU(Y.hint_ignore_content_ && beta == 0.0);
#endif
  
#ifdef BUILD_BLAS
  INNERGEMV(is_row_major ? CblasRowMajor : CblasColMajor,
            transpose_A ? CblasTrans : CblasNoTrans,
            M, N, alpha, A.data_ptr_const(0,0,0,smA),
            ldA, X.data_ptr_const(0,0,0,smX), incX, beta, Y.data_ptr(0,0,0,smY), incY);
#else
  if(!is_row_major)
    FATAL("Reference GEMV does not support column-major matrices!");
  
  // ...
  const datum* a_ptr = A.data_ptr_const(0, 0, 0, smA);
  const datum* x_ptr = X.data_ptr_const(0, 0, 0, smX);
  datum* y_ptr = Y.data_ptr(0, 0, 0, smY);
  
  #pragma omp parallel for default(shared)
  for(int i = 0; i < M; i++) {
    datum sum = 0.0;
    for(int j = 0; j < N; j++) {
      const datum a_value = transpose_A ?
        a_ptr[j * ldA + i]
      :
        a_ptr[i * ldA + j];
      
      const datum x_value = x_ptr[j * incX];
      sum += x_value * a_value;
    }
    if(beta == 0.0)
      y_ptr[i * incY] = alpha * sum;
    else 
      y_ptr[i * incY] = beta * y_ptr[i * incY] + alpha * sum;
  }
  
#endif // BUILD_BLAS
#endif // BUILD_CLBLAS
  Y.hint_ignore_content_ = false;
}