void os_error(int line) { #if _WIN32 dbg_printf("System error: %ldL\n",GetLastError()); #endif local_assert(line); }
void Extratest(size_t M, size_t N, size_t lda, size_t ldb, T alpha, T delta) { T *A, *B, *blasB, *clblasB; cl_mem bufA, bufB; clMath::BlasBase *base; cl_event *events; cl_int err; base = clMath::BlasBase::getInstance(); if ((typeid(T) == typeid(cl_double) || typeid(T) == typeid(DoubleComplex)) && !base->isDevSupportDoublePrecision()) { std::cerr << ">> WARNING: The target device doesn't support native " "double precision floating point arithmetic" << std::endl << ">> Test skipped" << std::endl; SUCCEED(); return; } clblasOrder order = clblasColumnMajor; clblasSide side = clblasLeft; clblasUplo uplo = clblasUpper; clblasTranspose trans = clblasNoTrans; clblasDiag diag = clblasNonUnit; A = new T[M * lda]; B = new T[N * ldb]; blasB = new T[N * ldb]; clblasB = new T[N * ldb]; memset(A, 0, M*lda*sizeof(T)); memset(B, 0, N*ldb*sizeof(T)); for(int i=0; i<M; i++) // down each column { for(int j=0; j<M; j++) // down each row { AssignA<T>(A, i, j, lda); } } for(int i=0; i<N; i++) // down each column { for(int j=0; j<M; j++) // down each row { AssignB<T>(B, i, j, ldb, M); } } memcpy(blasB, B, N*ldb*sizeof(T)); memcpy(clblasB, B, N*ldb*sizeof(T)); ::std::cerr << "Calling reference xTRSM routine... "; ::clMath::blas::trsm(order, side, uplo, trans, diag, M, N, alpha, A, lda, blasB, ldb); bufA = base->createEnqueueBuffer(A, M*lda*sizeof(T), 0, CL_MEM_READ_ONLY); bufB = base->createEnqueueBuffer(clblasB, N*ldb*sizeof(T), 0, CL_MEM_READ_WRITE); events = new cl_event[1]; memset(events, 0, sizeof(cl_event)); if ((bufA == NULL) || (bufB == NULL)) { /* Skip the test, the most probable reason is * matrix too big for a device. */ releaseMemObjects(bufA, bufB); deleteBuffers<T>(A, B, blasB, clblasB, NULL); delete[] events; ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." << ::std::endl << ">> Can't execute the test, because data is not transfered to GPU." << ::std::endl << ">> Test skipped." << ::std::endl; SUCCEED(); return; } ::std::cerr << "Calling clblas xTRSM routine... "; err = (cl_int)::clMath::clblas::trsm(order, side, uplo, trans, diag, M, N, alpha, bufA, 0, lda, bufB, 0, ldb, 1, base->commandQueues(), 0, NULL, events); if (err != CL_SUCCESS) { releaseMemObjects(bufA, bufB); deleteBuffers<T>(A, B, blasB, clblasB, NULL); delete[] events; ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRSM() failed"; } err = waitForSuccessfulFinish(1, base->commandQueues(), events); if (err != CL_SUCCESS) { releaseMemObjects(bufA, bufB); deleteBuffers<T>(A, B, blasB, clblasB, NULL); delete[] events; ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; } ::std::cerr << "Done" << ::std::endl; clEnqueueReadBuffer(base->commandQueues()[0], bufB, CL_TRUE, 0, N*ldb*sizeof(T), clblasB, 0, NULL, NULL); releaseMemObjects(bufA, bufB); // Validate the answer for(int i=0; i<N; i++) // down each column { for(int j=0; j<ldb; j++) // down each row { local_assert(blasB[i*ldb + j], clblasB[i*ldb + j], delta); } } deleteBuffers<T>(A, B, blasB, clblasB, NULL); delete[] events; }