コード例 #1
0
ファイル: clfunc_xCG.hpp プロジェクト: pavanky/clSPARSE
    void setup_buffer( double pAlpha, double pBeta, const std::string& path )
    {
        sparseFile = path;

        // Read sparse data from file and construct a COO matrix from it
        int nnz, row, col;
        clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, sparseFile.c_str( ) );
        if( fileError != clsparseSuccess )
            throw clsparse::io_exception( "Could not read matrix market header from disk" );


        // Now initialise a CSR matrix from the COO matrix
        clsparseInitCsrMatrix( &csrMtx );
        csrMtx.num_nonzeros = nnz;
        csrMtx.num_rows = row;
        csrMtx.num_cols = col;
        clsparseCsrMetaSize( &csrMtx, control );

        cl_int status;
        csrMtx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY,
            csrMtx.num_nonzeros * sizeof( T ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" );

        csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY,
            csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" );

        csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY,
            ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" );

        csrMtx.rowBlocks = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY,
            csrMtx.rowBlockSize * sizeof( cl_ulong ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" );

        if(typeid(T) == typeid(float))
            fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
        else if (typeid(T) == typeid(double))
            fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
        else
            fileError = clsparseInvalidType;

        if( fileError != clsparseSuccess )
            throw std::runtime_error( "Could not read matrix market data from disk" );

        // Initialize the dense X & Y vectors that we multiply against the sparse matrix
        clsparseInitVector( &x );
        x.num_values = csrMtx.num_rows;
        x.values = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE,
                                     x.num_values * sizeof( T ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer x.values" );

        clsparseInitVector( &y );
        y.num_values = csrMtx.num_cols;
        y.values = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE,
                                     y.num_values * sizeof( T ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer y.values" );


    }
コード例 #2
0
ファイル: test-solvers.cpp プロジェクト: pavanky/clSPARSE
    void TearDown()
    {
        clsparseReleaseSolverControl(solverControl);

        ::clReleaseMemObject(gX.values);
        ::clReleaseMemObject(gB.values);

        clsparseInitVector(&gX);
        clsparseInitVector(&gB);
    }
コード例 #3
0
ファイル: test-blas2.cpp プロジェクト: kvaragan/clSPARSE
    void TearDown()
    {
        ::clReleaseMemObject(gAlpha.value);
        ::clReleaseMemObject(gBeta.value);

        ::clReleaseMemObject(gX.values);
        ::clReleaseMemObject(gY.values);

        clsparseInitScalar(&gAlpha);
        clsparseInitScalar(&gBeta);

        clsparseInitVector(&gX);
        clsparseInitVector(&gY);

    }
コード例 #4
0
ファイル: cldense-dot.hpp プロジェクト: nagyist/clSPARSE
clsparseStatus dot(clsparseScalarPrivate* pR,
                   const cldenseVectorPrivate* pX,
                   const cldenseVectorPrivate* pY,
                   const clsparseControl control)
{

    cl_int status;

    init_scalar(pR, (T)0, control);

    // with REDUCE_BLOCKS_NUMBER = 256 final reduction can be performed
    // within one block;
    const cl_ulong REDUCE_BLOCKS_NUMBER = 256;

    /* For future optimisation
    //workgroups per compute units;
    const cl_uint  WG_PER_CU = 64;
    const cl_ulong REDUCE_BLOCKS_NUMBER = control->max_compute_units * WG_PER_CU;
    */
    const cl_ulong REDUCE_BLOCK_SIZE = 256;

    cl_ulong xSize = pX->num_values - pX->offset();
    cl_ulong ySize = pY->num_values - pY->offset();

    assert (xSize == ySize);

    cl_ulong size = xSize;


    if (size > 0)
    {
        cl::Context context = control->getContext();

        //partial result
        cldenseVectorPrivate partial;
        clsparseInitVector(&partial);
        partial.num_values = REDUCE_BLOCKS_NUMBER;

        clMemRAII<T> rPartial (control->queue(), &partial.values, partial.num_values);

        status = inner_product<T>(&partial, pX, pY, size,  REDUCE_BLOCKS_NUMBER,
                               REDUCE_BLOCK_SIZE, control);

        if (status != clsparseSuccess)
        {
            return clsparseInvalidKernelExecution;
        }

       status = atomic_reduce<T>(pR, &partial, REDUCE_BLOCK_SIZE,
                                     control);

        if (status != CL_SUCCESS)
        {
            return clsparseInvalidKernelExecution;
        }
    }

    return clsparseSuccess;
}
コード例 #5
0
ファイル: test-blas2.cpp プロジェクト: kvaragan/clSPARSE
    void SetUp()
    {
        clsparseInitScalar(&gAlpha);
        clsparseInitScalar(&gBeta);

        clsparseInitVector(&gX);
        clsparseInitVector(&gY);

        hAlpha = T(CSRE::alpha);
        hBeta = T(CSRE::beta);

        hX = uBLAS::vector<T>(CSRE::n_cols, 1);
        hY = uBLAS::vector<T>(CSRE::n_rows, 2);

        cl_int status;

        gX.values = clCreateBuffer(CLSE::context,
                                   CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                   hX.size() * sizeof(T), hX.data().begin(),
                                   &status);
        gX.num_values = hX.size();
        ASSERT_EQ(CL_SUCCESS, status);

        gY.values = clCreateBuffer(CLSE::context,
                                   CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                                   hY.size() * sizeof(T), hY.data().begin(),
                                   &status);
        gY.num_values = hY.size();
        ASSERT_EQ(CL_SUCCESS, status);

        gAlpha.value = clCreateBuffer(CLSE::context,
                                      CL_MEM_READ_ONLY| CL_MEM_COPY_HOST_PTR,
                                      sizeof(T), &hAlpha, &status);
        ASSERT_EQ(CL_SUCCESS, status);

        gBeta.value = clCreateBuffer(CLSE::context,
                                     CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                     sizeof(T), &hBeta, &status);
        ASSERT_EQ(CL_SUCCESS, status);
    }
コード例 #6
0
ファイル: test-solvers.cpp プロジェクト: pavanky/clSPARSE
    void SetUp()
    {
        // Setup solver control
        clsparseStatus status;
        solverControl = clsparseCreateSolverControl(precond,
                                                    maxIterations,
                                                    relativeTolerance,
                                                    absoluteTolerance);
        ASSERT_NE(nullptr, solverControl);

        status = clsparseSolverPrintMode(solverControl, printMode);
        ASSERT_EQ(clsparseSuccess, status);

        // Setup rhs and vector of unknowns

        hX = uBLAS::vector<T>(CSRE::n_cols, (T) initialUnknownsValue);
        hB = uBLAS::vector<T>(CSRE::n_rows, (T) initialRhsValue);

        clsparseInitVector(&gX);
        clsparseInitVector(&gB);

        cl_int cl_status;

        gX.values = clCreateBuffer(CLSE::context,
                                   CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                   hX.size() * sizeof(T), hX.data().begin(),
                                   &cl_status);
        gX.num_values = hX.size();
        ASSERT_EQ(CL_SUCCESS, cl_status);

        gB.values = clCreateBuffer(CLSE::context,
                                   CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                   hB.size() * sizeof(T), hB.data().begin(),
                                   &cl_status);
        gB.num_values = hB.size();
        ASSERT_EQ(CL_SUCCESS, cl_status);

    }
コード例 #7
0
ファイル: test-blas2.cpp プロジェクト: kvaragan/clSPARSE
    void test_csrmv()
    {
        clsparseStatus status;
        cl_int cl_status;

        clsparseEnableExtendedPrecision(CLSE::control, extended_precision);

        if (typeid(T) == typeid(cl_float) )
        {
            status = clsparseScsrmv(&gAlpha, &CSRE::csrSMatrix, &gX,
                                    &gBeta, &gY, CLSE::control);

            ASSERT_EQ(clsparseSuccess, status);

            float* vals = (float*)&CSRE::ublasSCsr.value_data()[0];
            int* rows = &CSRE::ublasSCsr.index1_data()[0];
            int* cols = &CSRE::ublasSCsr.index2_data()[0];
            for (int row = 0; row < CSRE::n_rows; row++)
            {
                // Summation done at a higher precision to decrease
                // summation errors from rounding.
                hY[row] *= hBeta;
                int row_end = rows[row+1];
                double temp_sum;
                temp_sum = hY[row];
                for (int i = rows[row]; i < rows[row+1]; i++)
                {
                    // Perform: hY[row] += hAlpha * vals[i] * hX[cols[i]];
                    temp_sum += hAlpha * vals[i] * hX[cols[i]];
                }
                hY[row] = temp_sum;
            }

            T* host_result = (T*) ::clEnqueueMapBuffer(CLSE::queue, gY.values,
                                                       CL_TRUE, CL_MAP_READ,
                                                       0, gY.num_values * sizeof(T),
                                                       0, nullptr, nullptr, &cl_status);
            ASSERT_EQ(CL_SUCCESS, cl_status);

            uint64_t max_ulps = 0;
            uint64_t min_ulps = UINT64_MAX;
            uint64_t total_ulps = 0;
            for (int i = 0; i < hY.size(); i++)
            {
                long long int intDiff = (long long int)boost::math::float_distance(hY[i], host_result[i]);
                intDiff = llabs(intDiff);
                total_ulps += intDiff;
                if (max_ulps < intDiff)
                    max_ulps = intDiff;
                if (min_ulps > intDiff)
                    min_ulps = intDiff;
                // Debug printouts.
                //std::cout << "Row " << i << " Float Ulps: " << intDiff << std::endl;
                //std::cout.precision(9);
                //std::cout << "\tFloat hY[" << i << "] = " << std::scientific << hY[i] << " (0x" << std::hex << *(uint32_t *)&hY[i] << "), " << std::dec;
                //std::cout << "host_result[" << i << "] = " << std::scientific << host_result[i] << " (0x" << std::hex << *(uint32_t *)&host_result[i] << ")" << std::dec << std::endl;
            }
#ifndef NDEBUG
            if (extended_precision)
            {
                std::cout << "Float Min ulps: " << min_ulps << std::endl;
                std::cout << "Float Max ulps: " << max_ulps << std::endl;
                std::cout << "Float Total ulps: " << total_ulps << std::endl;
                std::cout << "Float Average ulps: " << (double)total_ulps/(double)hY.size() <<  " (Size: " << hY.size() << ")" << std::endl;
            }
#endif

            for (int i = 0; i < hY.size(); i++)
            {
                double compare_val = 0.;
                if (extended_precision)
                {
                    // The limit here is somewhat weak because some GPUs don't
                    // support correctly rounded denorms in SPFP mode.
                    if (boost::math::isnormal(hY[i]))
                        compare_val = fabs(hY[i]*1e-3);
                }
                else
                {
                    if (boost::math::isnormal(hY[i]))
                        compare_val = fabs(hY[i]*0.1);
                }
                if (compare_val < 10*FLT_EPSILON)
                    compare_val = 10*FLT_EPSILON;
                ASSERT_NEAR(hY[i], host_result[i], compare_val);
            }

            cl_status = ::clEnqueueUnmapMemObject(CLSE::queue, gY.values,
                                                  host_result, 0, nullptr, nullptr);
            ASSERT_EQ(CL_SUCCESS, cl_status);
        }

        if (typeid(T) == typeid(cl_double) )
        {
            status = clsparseDcsrmv(&gAlpha, &CSRE::csrDMatrix, &gX,
                                    &gBeta, &gY, CLSE::control);

            ASSERT_EQ(clsparseSuccess, status);

            double* vals = (double*)&CSRE::ublasDCsr.value_data()[0];
            int* rows = &CSRE::ublasDCsr.index1_data()[0];
            int* cols = &CSRE::ublasDCsr.index2_data()[0];
            for (int row = 0; row < CSRE::n_rows; row++)
            {
                // Summation done using a compensated summation to decrease
                // summation errors from rounding. This allows us to get
                // smaller errors without requiring quad precision support.
                // This method is like performing summation at quad precision and
                // casting down to double in the end.
                hY[row] *= hBeta;
                int row_end = rows[row+1];
                double temp_sum;
                temp_sum = hY[row];
                T sumk_err = 0.;
                for (int i = rows[row]; i < rows[row+1]; i++)
                {
                    // Perform: hY[row] += hAlpha * vals[i] * hX[cols[i]];
                    temp_sum = two_sum(temp_sum, hAlpha*vals[i]*hX[cols[i]], &sumk_err);
                }
                hY[row] = temp_sum + sumk_err;
            }

            T* host_result = (T*) ::clEnqueueMapBuffer(CLSE::queue, gY.values,
                                                       CL_TRUE, CL_MAP_READ,
                                                       0, gY.num_values * sizeof(T),
                                                       0, nullptr, nullptr, &cl_status);
            ASSERT_EQ(CL_SUCCESS, cl_status);

            uint64_t max_ulps = 0;
            uint64_t min_ulps = ULLONG_MAX;
            uint64_t total_ulps = 0;
            for (int i = 0; i < hY.size(); i++)
            {
                long long int intDiff = (long long int)boost::math::float_distance(hY[i], host_result[i]);
                intDiff = llabs(intDiff);
                total_ulps += intDiff;
                if (max_ulps < intDiff)
                    max_ulps = intDiff;
                if (min_ulps > intDiff)
                    min_ulps = intDiff;
                // Debug printouts.
                //std::cout << "Row " << i << " Double Ulps: " << intDiff << std::endl;
                //std::cout.precision(17);
                //std::cout << "\tDouble hY[" << i << "] = " << std::scientific << hY[i] << " (0x" << std::hex << *(uint64_t *)&hY[i] << "), " << std::dec;
                //std::cout << "host_result[" << i << "] = " << std::scientific << host_result[i] << " (0x" << std::hex << *(uint64_t *)&host_result[i] << ")" << std::dec << std::endl;
            }
            if (extended_precision)
            {
#ifndef NDEBUG
                std::cout << "Double Min ulps: " << min_ulps << std::endl;
                std::cout << "Double Max ulps: " << max_ulps << std::endl;
                std::cout << "Double Total ulps: " << total_ulps << std::endl;
                std::cout << "Double Average ulps: " << (double)total_ulps/(double)hY.size() <<  " (Size: " << hY.size() << ")" << std::endl;
#endif

                for (int i = 0; i < hY.size(); i++)
                {
                    double compare_val = fabs(hY[i]*1e-14);
                    if (compare_val < 10*DBL_EPSILON)
                        compare_val = 10*DBL_EPSILON;
                    ASSERT_NEAR(hY[i], host_result[i], compare_val);
                }
            }
            else
            {
                for (int i = 0; i < hY.size(); i++)
                {
                    double compare_val = 0.;
                    if (boost::math::isnormal(hY[i]))
                        compare_val = fabs(hY[i]*0.1);
                    if (compare_val < 10*DBL_EPSILON)
                        compare_val = 10*DBL_EPSILON;
                    ASSERT_NEAR(hY[i], host_result[i], compare_val);
                }
            }

            cl_status = ::clEnqueueUnmapMemObject(CLSE::queue, gY.values,
                                                  host_result, 0, nullptr, nullptr);
            ASSERT_EQ(CL_SUCCESS, cl_status);
        }
        // Reset output buffer for next test.
        ::clReleaseMemObject(gY.values);
        clsparseInitVector(&gY);
        gY.values = clCreateBuffer(CLSE::context,
                CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                hY.size() * sizeof(T), hY.data().begin(),
                &cl_status);
        gY.num_values = hY.size();
        ASSERT_EQ(CL_SUCCESS, cl_status);
    }