xDense2Csr( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr ) { gpuTimer = nullptr; cpuTimer = nullptr; // Create and initialize our timer class, if the external timer shared library loaded if( sparseGetTimer ) { gpuTimer = sparseGetTimer( CLSPARSE_GPU ); gpuTimer->Reserve( 1, profileCount ); gpuTimer->setNormalize( true ); cpuTimer = sparseGetTimer( CLSPARSE_CPU ); cpuTimer->Reserve( 1, profileCount ); cpuTimer->setNormalize( true ); gpuTimerID = gpuTimer->getUniqueID( "GPU xDense2Csr", 0 ); cpuTimerID = cpuTimer->getUniqueID( "CPU xDense2Csr", 0 ); } clsparseEnableAsync( control, false ); }// End of constructor
xBiCGStab( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ),/* gpuTimer( nullptr ),*/ cpuTimer( nullptr ) { // Create and initialize our timer class, if the external timer shared library loaded if( sparseGetTimer ) { // gpuTimer = sparseGetTimer( CLSPARSE_GPU ); // gpuTimer->Reserve( 1, profileCount ); // gpuTimer->setNormalize( true ); cpuTimer = sparseGetTimer( CLSPARSE_CPU ); cpuTimer->Reserve( 1, profileCount ); cpuTimer->setNormalize( true ); // gpuTimerID = gpuTimer->getUniqueID( "GPU xCGM", 0 ); cpuTimerID = cpuTimer->getUniqueID( "CPU xBiCGStab", 0 ); } clsparseEnableAsync( control, false ); solverControl = clsparseCreateSolverControl(DIAGONAL, 1000, 1e-6, 0); clsparseSolverPrintMode(solverControl, VERBOSE); }
// C = A * A; // A is filled with random powers of 2 TYPED_TEST(TestCSRSpGeMM, Powersof2) { using SPER = CSRSparseEnvironment; using CLSE = ClSparseEnvironment; typedef typename uBLAS::compressed_matrix<float, uBLAS::row_major, 0, uBLAS::unbounded_array<int> > uBlasCSRM; cl::Event event; clsparseEnableAsync(CLSE::control, true); clsparse_matrix_fill<float> objFillVals(42, -14, 14); std::vector<float> tmpArray; tmpArray.resize(SPER::csrSMatrix.num_nonzeros); objFillVals.fillMtxTwoPowers(tmpArray.data(), tmpArray.size()); //objFillVals.fillMtxOnes(tmpArray.data(), tmpArray.size()); // Fill ublas scr with the same matrix values for (size_t i = 0; i < tmpArray.size(); i++) { SPER::ublasSCsr.value_data()[i] = tmpArray[i]; } // Copy host to the device cl_int cl_status = clEnqueueWriteBuffer(CLSE::queue, SPER::csrSMatrix.values, CL_TRUE, 0, sizeof(float)* tmpArray.size(), tmpArray.data(), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, cl_status); tmpArray.clear(); clsparseStatus status = generateSpGemmResult<TypeParam>(this->csrMatrixC); EXPECT_EQ(clsparseSuccess, status); status = clsparseGetEvent(CLSE::control, &event()); EXPECT_EQ(clsparseSuccess, status); event.wait(); std::vector<int> resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix std::vector<int> resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices std::vector<TypeParam> resultVals((this->csrMatrixC).num_nonzeros); // Values this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros); (this->C).complete_index1_data(); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.values, CL_TRUE, 0, (this->csrMatrixC).num_nonzeros *sizeof(TypeParam), resultVals.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.colIndices, CL_TRUE, 0, (this->csrMatrixC).num_nonzeros * sizeof(int), resultColIndices.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.rowOffsets, CL_TRUE, 0, ((this->csrMatrixC).num_rows + 1) * sizeof(int), resultRowPtr.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); std::cout << "Done with GPU" << std::endl; if (typeid(TypeParam) == typeid(float)) { this->C = uBLAS::sparse_prod(SPER::ublasSCsr, SPER::ublasSCsr, this->C); } this->browOffsetsMisFlag = false; this->checkRowOffsets(resultRowPtr); //if (::testing::Test::HasFailure()) if (this->browOffsetsMisFlag == true) { // Check the values in Dense format this->checkInDense(resultRowPtr, resultColIndices, resultVals); } else { /* Check Col Indices */ for (int i = 0; i < resultColIndices.size(); i++) { ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]); } /* Check Values */ for (int i = 0; i < resultVals.size(); i++) { //TODO: how to define the tolerance ASSERT_NEAR(resultVals[i], this->C.value_data()[i], 0.0); } ASSERT_EQ(resultRowPtr.size(), this->C.index1_data().size()); //Rest of the col_indices should be zero for (size_t i = resultColIndices.size(); i < this->C.index2_data().size(); i++) { ASSERT_EQ(0, this->C.index2_data()[i]); } // Rest of the values should be zero for (size_t i = resultVals.size(); i < this->C.value_data().size(); i++) { ASSERT_EQ(0, this->C.value_data()[i]); } } }//end TestCSRSpGeMM: Powersof2
// C = A * A; // Square matrices are only supported TYPED_TEST(TestCSRSpGeMM, square) { using SPER = CSRSparseEnvironment; using CLSE = ClSparseEnvironment; typedef typename uBLAS::compressed_matrix<float, uBLAS::row_major, 0, uBLAS::unbounded_array<int> > uBlasCSRM; cl::Event event; clsparseEnableAsync(CLSE::control, true); #ifdef TEST_LONG clsparseStatus status = generateSpGemmResult_long<TypeParam>(this->csrMatrixC); #else clsparseStatus status = generateSpGemmResult<TypeParam>(this->csrMatrixC); #endif EXPECT_EQ(clsparseSuccess, status); status = clsparseGetEvent(CLSE::control, &event()); EXPECT_EQ(clsparseSuccess, status); event.wait(); //std::cout << "nrows =" << (this->csrMatrixC).num_rows << std::endl; //std::cout << "nnz =" << (this->csrMatrixC).num_nonzeros << std::endl; std::vector<int> resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix std::vector<int> resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices std::vector<TypeParam> resultVals((this->csrMatrixC).num_nonzeros); // Values this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros); (this->C).complete_index1_data(); cl_int cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.values, CL_TRUE, 0, (this->csrMatrixC).num_nonzeros *sizeof(TypeParam), resultVals.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.colIndices, CL_TRUE, 0, (this->csrMatrixC).num_nonzeros * sizeof(int), resultColIndices.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.rowOffsets, CL_TRUE, 0, ((this->csrMatrixC).num_rows + 1) * sizeof(int), resultRowPtr.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); std::cout << "Done with GPU" << std::endl; #ifdef TEST_LONG // Generate referencee result from ublas if (typeid(TypeParam) == typeid(float)) { this->C = uBLAS::sparse_prod(SPER::ublasSCsrA, SPER::ublasSCsrB, this->C); } #else if (typeid(TypeParam) == typeid(float)) { this->C = uBLAS::sparse_prod(SPER::ublasSCsr, SPER::ublasSCsr, this->C); } #endif /* if (typeid(TypeParam) == typeid(double)) { this->C = uBLAS::sparse_prod(SPER::ublasDCsr, SPER::ublasDCsr, this->C);; }*/ /* for (int i = 0; i < resultRowPtr.size(); i++) { ASSERT_EQ(resultRowPtr[i], this->C.index1_data()[i]); }*/ this->browOffsetsMisFlag = false; this->checkRowOffsets(resultRowPtr); //if (::testing::Test::HasFailure()) if (this->browOffsetsMisFlag == true) { // Check the values in Dense format this->checkInDense(resultRowPtr, resultColIndices, resultVals); } else { /* Check Col Indices */ for (int i = 0; i < resultColIndices.size(); i++) { ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]); } /* Check Values */ for (int i = 0; i < resultVals.size(); i++) { //TODO: how to define the tolerance ASSERT_NEAR(resultVals[i], this->C.value_data()[i], 0.1); } ASSERT_EQ(resultRowPtr.size(), this->C.index1_data().size()); //Rest of the col_indices should be zero for (size_t i = resultColIndices.size(); i < this->C.index2_data().size(); i++) { ASSERT_EQ(0, this->C.index2_data()[i]); } // Rest of the values should be zero for (size_t i = resultVals.size(); i < this->C.value_data().size(); i++) { ASSERT_EQ(0, this->C.value_data()[i]); } } }//end TestCSRSpGeMM: square