static int sgemv(cb_order order, cb_transpose transA, size_t M, size_t N, float alpha, gpudata *A, size_t offA, size_t lda, gpudata *X, size_t offX, int incX, float beta, gpudata *Y, size_t offY, int incY) { cl_ctx *ctx = A->ctx; cl_uint num_ev = 0; cl_event evl[3]; cl_event ev; ARRAY_INIT(A); ARRAY_INIT(X); ARRAY_INIT(Y); CLB_CHECK(ctx->err, clblasSgemv(convO(order), convT(transA), M, N, alpha, A->buf, offA, lda, X->buf, offX, incX, beta, Y->buf, offY, incY, 1, &ctx->q, num_ev, num_ev == 0 ? NULL : evl, &ev)); ARRAY_FINI(A); ARRAY_FINI(X); ARRAY_FINI(Y); clReleaseEvent(ev); return GA_NO_ERROR; }
void xGemv<float>:: call_func() { timer.Start(timer_id); clblasSgemv(order_, buffer_.trans_a_, buffer_.m_, buffer_.n_, buffer_.alpha_, buffer_.buf_a_, buffer_.offA_, buffer_.lda_, buffer_.buf_x_, 0, 1, buffer_.beta_, buffer_.buf_y_, 0, 1, numQueues, queues_, 0, NULL, &event_); clWaitForEvents(1, &event_); timer.Stop(timer_id); }
void TensorMath::GEMV(const bool is_row_major, const bool transpose_A, const int M, const int N, const datum alpha, const Conv::Tensor &A, const int smA, const int ldA, const Conv::Tensor &X, const int smX, const int incX, const datum beta, Conv::Tensor &Y, const int smY, const int incY) { #ifdef BUILD_CLBLAS ((Tensor&)A).MoveToGPU(); ((Tensor&)X).MoveToGPU(); Y.MoveToGPU(Y.hint_ignore_content_ && beta == 0.0); cl_event done_event = NULL; const int offA = A.width() * A.height() * A.maps() * smA; const int offX = X.width() * X.height() * X.maps() * smX; const int offY = Y.width() * Y.height() * Y.maps() * smY; cl_int err = clblasSgemv(is_row_major ? clblasRowMajor : clblasColumnMajor, transpose_A ? clblasTrans : clblasNoTrans, M, N, alpha, (cl_mem)A.cl_data_ptr_, offA, ldA, (cl_mem)X.cl_data_ptr_, offX, incX, beta, (cl_mem)Y.cl_data_ptr_, offY, incY, 1, &(CLHelper::queue), 0, NULL, &done_event); if(err!=CL_SUCCESS) FATAL("Call to clblasSgemv failed. Error: " << err); #else #ifdef BUILD_OPENCL ((Tensor&)A).MoveToCPU(); ((Tensor&)X).MoveToCPU(); Y.MoveToCPU(Y.hint_ignore_content_ && beta == 0.0); #endif #ifdef BUILD_BLAS INNERGEMV(is_row_major ? CblasRowMajor : CblasColMajor, transpose_A ? CblasTrans : CblasNoTrans, M, N, alpha, A.data_ptr_const(0,0,0,smA), ldA, X.data_ptr_const(0,0,0,smX), incX, beta, Y.data_ptr(0,0,0,smY), incY); #else if(!is_row_major) FATAL("Reference GEMV does not support column-major matrices!"); // ... const datum* a_ptr = A.data_ptr_const(0, 0, 0, smA); const datum* x_ptr = X.data_ptr_const(0, 0, 0, smX); datum* y_ptr = Y.data_ptr(0, 0, 0, smY); #pragma omp parallel for default(shared) for(int i = 0; i < M; i++) { datum sum = 0.0; for(int j = 0; j < N; j++) { const datum a_value = transpose_A ? a_ptr[j * ldA + i] : a_ptr[i * ldA + j]; const datum x_value = x_ptr[j * incX]; sum += x_value * a_value; } if(beta == 0.0) y_ptr[i * incY] = alpha * sum; else y_ptr[i * incY] = beta * y_ptr[i * incY] + alpha * sum; } #endif // BUILD_BLAS #endif // BUILD_CLBLAS Y.hint_ignore_content_ = false; }