Beispiel #1
0
void os_error(int line)
{
#if _WIN32
    dbg_printf("System error: %ldL\n",GetLastError());
#endif
    local_assert(line);
}
Beispiel #2
0
void Extratest(size_t M, size_t N, size_t lda, size_t ldb, T alpha, T delta)
{
	T *A, *B, *blasB, *clblasB;
	cl_mem bufA, bufB;
	clMath::BlasBase *base;
    cl_event *events;
	cl_int err;

	base = clMath::BlasBase::getInstance();
    if ((typeid(T) == typeid(cl_double) ||
         typeid(T) == typeid(DoubleComplex)) &&
        !base->isDevSupportDoublePrecision()) {

        std::cerr << ">> WARNING: The target device doesn't support native "
                     "double precision floating point arithmetic" <<
                     std::endl << ">> Test skipped" << std::endl;
        SUCCEED();
        return;
    }


	clblasOrder order = clblasColumnMajor;
	clblasSide side = clblasLeft;
	clblasUplo uplo = clblasUpper;
	clblasTranspose trans = clblasNoTrans;
	clblasDiag diag = clblasNonUnit;

	A = new T[M * lda];
    B = new T[N * ldb];
    blasB = new T[N * ldb];
    clblasB = new T[N * ldb];

	memset(A, 0, M*lda*sizeof(T));
	memset(B, 0, N*ldb*sizeof(T));

	for(int i=0; i<M; i++) // down each column
	{
		for(int j=0; j<M; j++) // down each row
		{
			AssignA<T>(A, i, j, lda);
		}
	}

	for(int i=0; i<N; i++) // down each column
	{
		for(int j=0; j<M; j++) // down each row
		{
			AssignB<T>(B, i, j, ldb, M);
		}
	}

    memcpy(blasB, B, N*ldb*sizeof(T));
    memcpy(clblasB, B, N*ldb*sizeof(T));

	::std::cerr << "Calling reference xTRSM routine... ";
	::clMath::blas::trsm(order, side, uplo, trans, diag, M, N, alpha, A, lda, blasB, ldb);


    bufA = base->createEnqueueBuffer(A, M*lda*sizeof(T), 0, CL_MEM_READ_ONLY);
    bufB = base->createEnqueueBuffer(clblasB, N*ldb*sizeof(T), 0, CL_MEM_READ_WRITE);

    events = new cl_event[1];
    memset(events, 0, sizeof(cl_event));

    if ((bufA == NULL) || (bufB == NULL)) {
        /* Skip the test, the most probable reason is
         *     matrix too big for a device.
         */
        releaseMemObjects(bufA, bufB);
        deleteBuffers<T>(A, B, blasB, clblasB, NULL);
        delete[] events;
        ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
            << ::std::endl
            << ">> Can't execute the test, because data is not transfered to GPU."
            << ::std::endl
            << ">> Test skipped." << ::std::endl;
        SUCCEED();
        return;
    }

    ::std::cerr << "Calling clblas xTRSM routine... ";
    err = (cl_int)::clMath::clblas::trsm(order, side, uplo, trans, diag, M, N, alpha, bufA, 0, lda, bufB, 0, ldb,
				1, base->commandQueues(), 0, NULL, events);
    if (err != CL_SUCCESS) {
        releaseMemObjects(bufA, bufB);
        deleteBuffers<T>(A, B, blasB, clblasB, NULL);
        delete[] events;
        ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRSM() failed";
    }

    err = waitForSuccessfulFinish(1, base->commandQueues(), events);
    if (err != CL_SUCCESS) {
        releaseMemObjects(bufA, bufB);
        deleteBuffers<T>(A, B, blasB, clblasB, NULL);
        delete[] events;
        ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
    }
    ::std::cerr << "Done" << ::std::endl;

    clEnqueueReadBuffer(base->commandQueues()[0], bufB, CL_TRUE,
                        0, N*ldb*sizeof(T), clblasB, 0, NULL, NULL);

    releaseMemObjects(bufA, bufB);

	// Validate the answer
	for(int i=0; i<N; i++) // down each column
	{
		for(int j=0; j<ldb; j++) // down each row
		{
			local_assert(blasB[i*ldb + j], clblasB[i*ldb + j], delta);
		}
	}

	deleteBuffers<T>(A, B, blasB, clblasB, NULL);
    delete[] events;
}