示例#1
0
    xDense2Csr( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr )
    {
		gpuTimer = nullptr;
		cpuTimer = nullptr;

        //      Create and initialize our timer class, if the external timer shared library loaded
        if( sparseGetTimer )
        {
            gpuTimer = sparseGetTimer( CLSPARSE_GPU );
            gpuTimer->Reserve( 1, profileCount );
            gpuTimer->setNormalize( true );

            cpuTimer = sparseGetTimer( CLSPARSE_CPU );
            cpuTimer->Reserve( 1, profileCount );
            cpuTimer->setNormalize( true );

            gpuTimerID = gpuTimer->getUniqueID( "GPU xDense2Csr", 0 );
            cpuTimerID = cpuTimer->getUniqueID( "CPU xDense2Csr", 0 );
        }
        clsparseEnableAsync( control, false );
    }// End of constructor
示例#2
0
    xBiCGStab( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType ):
        clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ),/* gpuTimer( nullptr ),*/ cpuTimer( nullptr )
    {
        //	Create and initialize our timer class, if the external timer shared library loaded
        if( sparseGetTimer )
        {
//            gpuTimer = sparseGetTimer( CLSPARSE_GPU );
//            gpuTimer->Reserve( 1, profileCount );
//            gpuTimer->setNormalize( true );

            cpuTimer = sparseGetTimer( CLSPARSE_CPU );
            cpuTimer->Reserve( 1, profileCount );
            cpuTimer->setNormalize( true );

//            gpuTimerID = gpuTimer->getUniqueID( "GPU xCGM", 0 );
            cpuTimerID = cpuTimer->getUniqueID( "CPU xBiCGStab", 0 );
        }


        clsparseEnableAsync( control, false );

        solverControl = clsparseCreateSolverControl(DIAGONAL, 1000, 1e-6, 0);
        clsparseSolverPrintMode(solverControl, VERBOSE);
    }
示例#3
0
// C = A * A; // A is filled with random powers of 2
TYPED_TEST(TestCSRSpGeMM, Powersof2)
{
    using SPER = CSRSparseEnvironment;
    using CLSE = ClSparseEnvironment;
    typedef typename uBLAS::compressed_matrix<float, uBLAS::row_major, 0, uBLAS::unbounded_array<int> > uBlasCSRM;

    cl::Event event;
    clsparseEnableAsync(CLSE::control, true);

    clsparse_matrix_fill<float> objFillVals(42, -14, 14);

    std::vector<float> tmpArray;
    tmpArray.resize(SPER::csrSMatrix.num_nonzeros);

    objFillVals.fillMtxTwoPowers(tmpArray.data(), tmpArray.size());
    //objFillVals.fillMtxOnes(tmpArray.data(), tmpArray.size());

    // Fill ublas scr with the same matrix values
    for (size_t i = 0; i < tmpArray.size(); i++)
    {
        SPER::ublasSCsr.value_data()[i] = tmpArray[i];
    }
    
    // Copy host to the device
    cl_int cl_status = clEnqueueWriteBuffer(CLSE::queue, SPER::csrSMatrix.values, CL_TRUE, 0, sizeof(float)* tmpArray.size(),
                                                 tmpArray.data(), 0, nullptr, nullptr);
    EXPECT_EQ(CL_SUCCESS, cl_status);
    tmpArray.clear();

    clsparseStatus status = generateSpGemmResult<TypeParam>(this->csrMatrixC);

    EXPECT_EQ(clsparseSuccess, status);

    status = clsparseGetEvent(CLSE::control, &event());
    EXPECT_EQ(clsparseSuccess, status);
    event.wait();


    std::vector<int> resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix
    std::vector<int> resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices
    std::vector<TypeParam> resultVals((this->csrMatrixC).num_nonzeros); // Values

    this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros);
    (this->C).complete_index1_data();

    cl_status = clEnqueueReadBuffer(CLSE::queue,
        this->csrMatrixC.values, CL_TRUE, 0,
        (this->csrMatrixC).num_nonzeros *sizeof(TypeParam),
        resultVals.data(), 0, NULL, NULL);

    EXPECT_EQ(CL_SUCCESS, cl_status);


    cl_status = clEnqueueReadBuffer(CLSE::queue,
        this->csrMatrixC.colIndices, CL_TRUE, 0,
        (this->csrMatrixC).num_nonzeros * sizeof(int), resultColIndices.data(), 0, NULL, NULL);

    EXPECT_EQ(CL_SUCCESS, cl_status);


    cl_status = clEnqueueReadBuffer(CLSE::queue,
        this->csrMatrixC.rowOffsets, CL_TRUE, 0,
        ((this->csrMatrixC).num_rows + 1)  * sizeof(int), resultRowPtr.data(), 0, NULL, NULL);

    EXPECT_EQ(CL_SUCCESS, cl_status);

    std::cout << "Done with GPU" << std::endl;

    if (typeid(TypeParam) == typeid(float))
    {
        this->C = uBLAS::sparse_prod(SPER::ublasSCsr, SPER::ublasSCsr, this->C);
    }

    this->browOffsetsMisFlag = false;
    this->checkRowOffsets(resultRowPtr);
    //if (::testing::Test::HasFailure())
    if (this->browOffsetsMisFlag == true)
    {
        // Check the values in Dense format
        this->checkInDense(resultRowPtr, resultColIndices, resultVals);
    }
    else
    {
        /* Check Col Indices */
        for (int i = 0; i < resultColIndices.size(); i++)
        {
            ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]);
        }

        /* Check Values */
        for (int i = 0; i < resultVals.size(); i++)
        {
            //TODO: how to define the tolerance 
            ASSERT_NEAR(resultVals[i], this->C.value_data()[i], 0.0);
        }

        ASSERT_EQ(resultRowPtr.size(), this->C.index1_data().size());

        //Rest of the col_indices should be zero
        for (size_t i = resultColIndices.size(); i < this->C.index2_data().size(); i++)
        {
            ASSERT_EQ(0, this->C.index2_data()[i]);
        }

        // Rest of the values should be zero
        for (size_t i = resultVals.size(); i < this->C.value_data().size(); i++)
        {
            ASSERT_EQ(0, this->C.value_data()[i]);
        }
    }

}//end TestCSRSpGeMM: Powersof2
示例#4
0
// C = A * A; // Square matrices are only supported
TYPED_TEST(TestCSRSpGeMM, square)
{
    using SPER = CSRSparseEnvironment;
    using CLSE = ClSparseEnvironment;
    typedef typename uBLAS::compressed_matrix<float, uBLAS::row_major, 0, uBLAS::unbounded_array<int> > uBlasCSRM;
 
    cl::Event event;
    clsparseEnableAsync(CLSE::control, true);

#ifdef TEST_LONG
    clsparseStatus status = generateSpGemmResult_long<TypeParam>(this->csrMatrixC);
#else
    clsparseStatus status = generateSpGemmResult<TypeParam>(this->csrMatrixC);
#endif

    EXPECT_EQ(clsparseSuccess, status);

    status = clsparseGetEvent(CLSE::control, &event());
    EXPECT_EQ(clsparseSuccess, status);
    event.wait();

    //std::cout << "nrows =" << (this->csrMatrixC).num_rows << std::endl;
    //std::cout << "nnz =" << (this->csrMatrixC).num_nonzeros << std::endl;

    std::vector<int> resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix
    std::vector<int> resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices
    std::vector<TypeParam> resultVals((this->csrMatrixC).num_nonzeros); // Values

    this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros);
    (this->C).complete_index1_data();

    cl_int cl_status = clEnqueueReadBuffer(CLSE::queue,
        this->csrMatrixC.values, CL_TRUE, 0,
        (this->csrMatrixC).num_nonzeros *sizeof(TypeParam),
        resultVals.data(), 0, NULL, NULL);
    
    EXPECT_EQ(CL_SUCCESS, cl_status);

    
    cl_status = clEnqueueReadBuffer(CLSE::queue,
        this->csrMatrixC.colIndices, CL_TRUE, 0,
        (this->csrMatrixC).num_nonzeros * sizeof(int), resultColIndices.data(), 0, NULL, NULL);
    
    EXPECT_EQ(CL_SUCCESS, cl_status);

    
    cl_status = clEnqueueReadBuffer(CLSE::queue,
        this->csrMatrixC.rowOffsets, CL_TRUE, 0,
        ((this->csrMatrixC).num_rows + 1)  * sizeof(int), resultRowPtr.data(), 0, NULL, NULL);

    EXPECT_EQ(CL_SUCCESS, cl_status);

    std::cout << "Done with GPU" << std::endl;

#ifdef TEST_LONG 
    // Generate referencee result from ublas
    if (typeid(TypeParam) == typeid(float))
    {
        this->C = uBLAS::sparse_prod(SPER::ublasSCsrA, SPER::ublasSCsrB, this->C);
    }
#else
    if (typeid(TypeParam) == typeid(float))
    {
        this->C = uBLAS::sparse_prod(SPER::ublasSCsr, SPER::ublasSCsr, this->C);
    }

#endif
    
    /*
    if (typeid(TypeParam) == typeid(double))
    {
        this->C = uBLAS::sparse_prod(SPER::ublasDCsr, SPER::ublasDCsr, this->C);;
    }*/

    /*
    for (int i = 0; i < resultRowPtr.size(); i++)
    {
        ASSERT_EQ(resultRowPtr[i], this->C.index1_data()[i]);
    }*/
    this->browOffsetsMisFlag = false;
   this->checkRowOffsets(resultRowPtr);
   //if (::testing::Test::HasFailure())
   if (this->browOffsetsMisFlag == true)
    {
        // Check the values in Dense format
        this->checkInDense(resultRowPtr, resultColIndices, resultVals);
    }
    else
    {
        /* Check Col Indices */
        for (int i = 0; i < resultColIndices.size(); i++)
        {
            ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]);
        }

        /* Check Values */
        for (int i = 0; i < resultVals.size(); i++)
        {
            //TODO: how to define the tolerance 
            ASSERT_NEAR(resultVals[i], this->C.value_data()[i], 0.1);
        }

        ASSERT_EQ(resultRowPtr.size(), this->C.index1_data().size());

        //Rest of the col_indices should be zero
        for (size_t i = resultColIndices.size(); i < this->C.index2_data().size(); i++)
        {
            ASSERT_EQ(0, this->C.index2_data()[i]);
        }

        // Rest of the values should be zero
        for (size_t i = resultVals.size(); i < this->C.value_data().size(); i++)
        {
            ASSERT_EQ(0, this->C.value_data()[i]);
        }
    }

}//end TestCSRSpGeMM: square