vector(const vector& other, bool copy = true) : BASE::_size(other.size()), queue(other.queue) { cl_int status; cl::Event controlEvent; const BUFF_TYPE& src = other.data(); BUFF_TYPE& dst = BASE::data(); cl_mem_flags flags = src.getInfo<CL_MEM_FLAGS>(&status); CLSPARSE_V(status, "Vector cpy constr, getInfo<CL_MEM_FLAGS>"); assert (BASE::_size > 0); dst = create_buffer(BASE::_size, flags); if (copy) { status = queue.enqueueCopyBuffer(src, dst, 0, 0, sizeof(value_type) * other.size(), NULL, &controlEvent); CLSPARSE_V(status, "operator= queue.enqueueCopyBuffer"); status = controlEvent.wait(); CLSPARSE_V(status, "operator= controlEvent.wait"); } }
//assignment operator performs deep copy vector& operator= (const vector& other) { if (this != &other) { assert(other.size() > 0); if (size() != other.size()) resize(other.size()); cl::Event controlEvent; cl_int status; const cl::Buffer& src = other.data(); cl::Buffer& dst = BASE::data(); status = queue.enqueueCopyBuffer(src, dst, 0, 0, sizeof(value_type) * other.size(), NULL, &controlEvent); CLSPARSE_V(status, "operator= queue.enqueueCopyBuffer"); status = controlEvent.wait(); CLSPARSE_V(status, "operator= controlEvent.wait"); } return *this; }
void clsparseDeviceTimer::queryOpenCL( size_t id ) { for( size_t s = 0; s < timerData.at( id ).size( ); ++s ) { for( size_t n = 0; n < timerData.at( id ).at( s ).size( ); ++n ) { StatData& sd = timerData[ id ][ s ][ n ]; cl_ulong profStart, profEnd = 0; cl_int err = 0; sd.deltaNanoSec = 0; for( size_t i = 0; i < sd.outEvents.size( ); ++i ) { profStart = sd.outEvents[ i ].getProfilingInfo<CL_PROFILING_COMMAND_START>( &err ); CLSPARSE_V( err, "clsparseDeviceTimer::queryOpenCL" ); profEnd = sd.outEvents[ i ].getProfilingInfo<CL_PROFILING_COMMAND_END>( &err ); CLSPARSE_V( err, "clsparseDeviceTimer::queryOpenCL" ); sd.deltaNanoSec += ( profEnd - profStart ); } sd.doubleNanoSec = static_cast<cl_double>( sd.deltaNanoSec ); } } }
void initialize_gpu_buffer() { CLSPARSE_V(::clEnqueueFillBuffer(queue, a.value, &alpha, sizeof(T), 0, sizeof(T) * 1, 0, NULL, NULL), "::clEnqueueFillBuffer alpha.value"); CLSPARSE_V(::clEnqueueFillBuffer(queue, b.value, &beta, sizeof(T), 0, sizeof(T) * 1, 0, NULL, NULL), "::clEnqueueFillBuffer beta.value"); }// end of function
// update the device memory when reference is out of scope ~reference_base() { if (host_buffer) { ::cl::Event unmapEvent; CLSPARSE_V( queue.enqueueUnmapMemObject( container.data(), host_buffer, NULL, &unmapEvent ), "Array failed to unmap host buffer back to device memory" ); CLSPARSE_V( unmapEvent.wait( ), "Failed to wait for unmap event" ); } }
void reset_gpu_write_buffer() { // Every call to clsparseScsrSpGemm() allocates memory to csrMtxC, therefore freeing the memory CLSPARSE_V(::clReleaseMemObject(csrMtxC.values), "clReleaseMemObject csrMtxC.values"); CLSPARSE_V(::clReleaseMemObject(csrMtxC.col_indices), "clReleaseMemObject csrMtxC.col_indices"); CLSPARSE_V(::clReleaseMemObject(csrMtxC.row_pointer), "clReleaseMemObject csrMtxC.row_pointer"); // Initilize the output CSR Matrix clsparseInitCsrMatrix(&csrMtxC); }// end of function
virtual ~clsparseFunc( ) { if( clsparseReleaseControl( control ) != clsparseSuccess ) { std::cout << "Problem with releasing control object" << std::endl; } clsparseTeardown( ); CLSPARSE_V( ::clReleaseCommandQueue( queue ), "releasing command queue" ); CLSPARSE_V( ::clReleaseContext( ctx ), "releasing context" ); }
void setup_buffer(double pAlpha, double pBeta, const std::string& path) { sparseFile = path; // Read sparse data from file and construct a CSR matrix from it int nnz; int row; int col; clsparseStatus fileError = clsparseHeaderfromFile(&nnz, &row, &col, sparseFile.c_str()); if (clsparseSuccess != fileError) throw std::runtime_error("Could not read matrix market header from disk"); // Now initialize a CSR matrix from the CSR matrix // VK we have to handle other cases if input mtx file is not in CSR format clsparseInitCsrMatrix(&csrMtx); csrMtx.num_nonzeros = nnz; csrMtx.num_rows = row; csrMtx.num_cols = col; clsparseCsrMetaSize( &csrMtx, control ); cl_int status; csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(cl_int), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(cl_int), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.rowBlockSize * sizeof(cl_ulong), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowBlocks"); if (typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control); else if (typeid(T) == typeid(double)) fileError = clsparseDCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control); else fileError = clsparseInvalidType; if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk"); // Initialize the output dense matrix cldenseInitMatrix(&denseMtx); denseMtx.major = rowMajor; denseMtx.num_rows = row; denseMtx.num_cols = col; denseMtx.lead_dim = col; // To Check!! VK; denseMtx.values = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, denseMtx.num_rows * denseMtx.num_cols * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer denseMtx.values"); }// end
void reset_gpu_write_buffer( ) { int scalar_i = 0; T scalar_f = 0; CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.rowOffsets, &scalar_i, sizeof( int ), 0, sizeof( int ) * (csrMtx.num_rows + 1), 0, NULL, NULL ), "::clEnqueueFillBuffer row" ); CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.colIndices, &scalar_i, sizeof( int ), 0, sizeof( int ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer col" ); CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.values, &scalar_f, sizeof( T ), 0, sizeof( T ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer values" ); }
void cleanup() { if (gpuTimer && cpuTimer) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; cpuTimer->pruneOutliers(3.0); cpuTimer->Print(flopCnt, "GFlop/s"); cpuTimer->Reset(); gpuTimer->pruneOutliers(3.0); gpuTimer->Print(flopCnt, "GFlop/s"); gpuTimer->Reset(); } //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); CLSPARSE_V(::clReleaseMemObject(csrMtx.col_indices), "clReleaseMemObject csrMtx.col_indices"); CLSPARSE_V(::clReleaseMemObject(csrMtx.row_pointer), "clReleaseMemObject csrMtx.row_pointer"); //CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); if (csrMtxC.values != nullptr) CLSPARSE_V(::clReleaseMemObject(csrMtxC.values), "clReleaseMemObject csrMtxC.values"); if (csrMtxC.col_indices != nullptr) CLSPARSE_V(::clReleaseMemObject(csrMtxC.col_indices), "clReleaseMemObject csrMtxC.col_indices"); if (csrMtxC.row_pointer != nullptr) CLSPARSE_V(::clReleaseMemObject(csrMtxC.row_pointer), "clReleaseMemObject csrMtxC.row_pointer"); //CLSPARSE_V(::clReleaseMemObject(csrMtxC.rowBlocks), "clReleaseMemObject csrMtxC.rowBlocks"); CLSPARSE_V(::clReleaseMemObject(a.value), "clReleaseMemObject alpha.value"); CLSPARSE_V(::clReleaseMemObject(b.value), "clReleaseMemObject beta.value"); }
void setup_buffer(double pAlpha, double pBeta, const std::string& path) { sparseFile = path; // Read sparse data from file and construct a CSR matrix from it clsparseIdx_t nnz; clsparseIdx_t row; clsparseIdx_t col; clsparseStatus fileError = clsparseHeaderfromFile(&nnz, &row, &col, sparseFile.c_str()); if (clsparseSuccess != fileError) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); // Now initialize a CSR matrix from the CSR matrix clsparseInitCsrMatrix(&csrMtx); csrMtx.num_nonzeros = nnz; csrMtx.num_rows = row; csrMtx.num_cols = col; cl_int status; csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); csrMtx.col_indices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.col_indices"); csrMtx.row_pointer = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.row_pointer"); if (typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes ); else if (typeid(T) == typeid(double)) fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes ); else fileError = clsparseInvalidType; if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk: " + sparseFile); clsparseCsrMetaCreate(&csrMtx, control); // Initialize the output dense matrix cldenseInitMatrix(&denseMtx); denseMtx.major = rowMajor; denseMtx.num_rows = row; denseMtx.num_cols = col; denseMtx.lead_dim = col; // To Check!! VK; denseMtx.values = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, denseMtx.num_rows * denseMtx.num_cols * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer denseMtx.values"); }// end
void cleanup( ) { if( gpuTimer && cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; size_t sparseBytes = sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ); cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset( ); gpuTimer->pruneOutliers( 3.0 ); gpuTimer->Print( sparseBytes, "GiB/s" ); gpuTimer->Reset( ); } //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowBlocks ), "clReleaseMemObject csrMtx.rowBlocks" ); CLSPARSE_V( ::clReleaseMemObject( x.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( y.values ), "clReleaseMemObject y.values" ); CLSPARSE_V( ::clReleaseMemObject( a.value ), "clReleaseMemObject alpha.value" ); CLSPARSE_V( ::clReleaseMemObject( b.value ), "clReleaseMemObject beta.value" ); }
void reset_gpu_write_buffer() { T scalar = 0; CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.values, &scalar, sizeof(T), 0, cooMtx.num_nonzeros * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.values"); cl_int scalarIntZero = 0; CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(cl_int), 0, cooMtx.num_nonzeros * sizeof(cl_int), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(cl_int), 0, cooMtx.num_nonzeros * sizeof(cl_int), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); }// end
void initialize_gpu_buffer( ) { T scalarOne = 1.0; CLSPARSE_V( ::clEnqueueFillBuffer( queue, x.values, &scalarOne, sizeof( T ), 0, sizeof( T ) * x.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer x.values" ); T scalarZero = 0.0; CLSPARSE_V( ::clEnqueueFillBuffer( queue, y.values, &scalarZero, sizeof( T ), 0, sizeof( T ) * y.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer y.values" ); CLSPARSE_V( ::clEnqueueFillBuffer( queue, a.value, &alpha, sizeof( T ), 0, sizeof( T ) * 1, 0, NULL, NULL ), "::clEnqueueFillBuffer alpha.value" ); CLSPARSE_V( ::clEnqueueFillBuffer( queue, b.value, &beta, sizeof( T ), 0, sizeof( T ) * 1, 0, NULL, NULL ), "::clEnqueueFillBuffer beta.value" ); }
void initialize_gpu_buffer() { T scalarZero = 0.0; CLSPARSE_V(::clEnqueueFillBuffer(queue, denseMtx.values, &scalarZero, sizeof(T), 0, denseMtx.num_rows * denseMtx.num_cols * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer denseMtx.values"); }// end
void initialize_gpu_buffer( ) { // We will solve A*x = y, // where initial guess of x will be vector of zeros 0, // and y will be vector of ones; T xValue = 0.0; T yValue = 1.0; CLSPARSE_V( ::clEnqueueFillBuffer( queue, x.values, &xValue, sizeof( T ), 0, sizeof( T ) * x.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer x.values" ); CLSPARSE_V( ::clEnqueueFillBuffer( queue, y.values, &yValue, sizeof( T ), 0, sizeof( T ) * y.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer y.values" ); }
// apply preconditioner void operator ()(const clsparse::vector<T>& x, clsparse::vector<T>& y, clsparseControl control) { //element wise multiply y = x*invDiag_A; clsparseStatus status = elementwise_transform<T, EW_MULTIPLY>(y, x, invDiag_A, control); CLSPARSE_V(status, "Diagonal operator()"); }
reference_base< Container >& operator=(const value_type& rhs ) { cl_int status = CL_SUCCESS; naked_pointer result = reinterpret_cast< naked_pointer >( queue.enqueueMapBuffer(container.data(), true, CL_MAP_WRITE_INVALIDATE_REGION, index * sizeof( value_type ), sizeof( value_type ), NULL, NULL, &status ) ); CLSPARSE_V( status, "Array failed map device memory to host memory for operator[]" ); *result = rhs; ::cl::Event unmapEvent; CLSPARSE_V( queue.enqueueUnmapMemObject( container.data(), result, NULL, &unmapEvent ), "Array failed to unmap host memory back to device memory" ); CLSPARSE_V( unmapEvent.wait( ), "Failed to wait for unmap event" ); return *this; }
cl_int fill(Container& c, const T & value) { cl::Event controlEvent; cl_int status; assert (c.size() > 0); if (c.size() > 0) { status = c.getQueue().enqueueFillBuffer(c.data(), value, 0, c.size() * sizeof(T), NULL, &controlEvent); CLSPARSE_V(status, "queue.enqueueFillBuffer"); status = controlEvent.wait(); CLSPARSE_V(status, "controlEvent.wait"); } return status; }
void reset_gpu_write_buffer( ) { // we will solve A*x = y, where initial guess of x will be 0 T scalar = 0; CLSPARSE_V( ::clEnqueueFillBuffer( queue, x.values, &scalar, sizeof( T ), 0, sizeof( T ) * x.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer x.values" ); // reset solverControl for next call clsparseSetSolverParams(solverControl, NOPRECOND, 100, 1e-2, 1e-8); }
void resize(size_t size) { if(this->size() != size) { cl_mem_flags flags; cl_int status = BASE::data().getInfo(CL_MEM_FLAGS, &flags); CLSPARSE_V(status, "buffer get info flags"); BASE::data() = create_buffer(size, flags); } }
// Automatic type conversion operator to turn the reference object into a value_type operator value_type() const { cl_int status = CL_SUCCESS; naked_pointer result = reinterpret_cast< naked_pointer >( queue.enqueueMapBuffer( container.data(), true, CL_MAP_READ, index * sizeof( value_type ), sizeof( value_type ), NULL, NULL, &status) ); CLSPARSE_V( status, "Array failed map device memory to host memory for operator[]" ); value_type valTmp = *result; ::cl::Event unmapEvent; CLSPARSE_V( queue.enqueueUnmapMemObject( container.data(), result, NULL, &unmapEvent ), "Array failed to unmap host memory back to device memory" ); CLSPARSE_V( unmapEvent.wait( ), "Failed to wait for unmap event" ); return valTmp; }
vector(clsparseControl control, size_t size, const value_type& value = value_type(), cl_mem_flags flags = CL_MEM_READ_WRITE, cl_bool init = true) : queue(control->queue) { BASE::data() = create_buffer(size, flags); if (init) { cl_int status = fill(control, value); CLSPARSE_V(status, "vector.fill"); } }
void cleanup( ) { if( gpuTimer && cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; size_t sparseBytes = 0; cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset( ); gpuTimer->pruneOutliers( 3.0 ); gpuTimer->Print( sparseBytes, "GiB/s" ); gpuTimer->Reset( ); } //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); CLSPARSE_V( ::clReleaseMemObject( cooMatx.values ), "clReleaseMemObject cooMtx.values" ); CLSPARSE_V( ::clReleaseMemObject( cooMatx.colIndices ), "clReleaseMemObject cooMtx.colIndices" ); CLSPARSE_V( ::clReleaseMemObject( cooMatx.rowIndices ), "clReleaseMemObject cooMtx.rowOffsets" ); }
clsparseIdx_t xSpMSpM_Getflopcount(void) { // C = A * B // But here C = A* A, the A & B matrices are same clsparseIdx_t nnzA = csrMtx.num_nonzeros; clsparseIdx_t Browptrlen = csrMtx.num_rows + 1; // Number of row offsets std::vector<clsparseIdx_t> colIdxA(nnzA, 0); std::vector<clsparseIdx_t> rowptrB(Browptrlen, 0); cl_int run_status = 0; run_status = clEnqueueReadBuffer(queue, csrMtx.col_indices, CL_TRUE, 0, nnzA*sizeof(clsparseIdx_t), colIdxA.data(), 0, nullptr, nullptr); CLSPARSE_V(run_status, "Reading col_indices from GPU failed"); // copy rowptrs run_status = clEnqueueReadBuffer(queue, csrMtx.row_pointer, CL_TRUE, 0, Browptrlen*sizeof(clsparseIdx_t), rowptrB.data(), 0, nullptr, nullptr); CLSPARSE_V(run_status, "Reading row offsets from GPU failed"); clsparseIdx_t flop = 0; for (clsparseIdx_t i = 0; i < nnzA; i++) { clsparseIdx_t colIdx = colIdxA[i]; // Get colIdx of A flop += rowptrB[colIdx + 1] - rowptrB[colIdx]; // nnz in 'colIdx'th row of B } flop = 2 * flop; // Two operations - Multiply & Add return flop; }// end of function
void setup_buffer( double pAlpha, double pBeta, const std::string& path ) { sparseFile = path; // Read sparse data from file and construct a COO matrix from it int nnz, row, col; clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, sparseFile.c_str( ) ); if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market header from disk" ); // Now initialise a CSR matrix from the COO matrix clsparseInitCsrMatrix( &csrMtx ); csrMtx.num_nonzeros = nnz; csrMtx.num_rows = row; csrMtx.num_cols = col; clsparseCsrMetaSize( &csrMtx, control ); cl_int status; csrMtx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); csrMtx.rowBlocks = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.rowBlockSize * sizeof( cl_ulong ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); if(typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control ); else if (typeid(T) == typeid(double)) fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control ); else fileError = clsparseInvalidType; if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk" ); // Initialize the dense X & Y vectors that we multiply against the sparse matrix clsparseInitVector( &x ); x.num_values = csrMtx.num_rows; x.values = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, x.num_values * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer x.values" ); clsparseInitVector( &y ); y.num_values = csrMtx.num_cols; y.values = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, y.num_values * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer y.values" ); }
DiagonalPreconditioner(const clsparseCsrMatrixPrivate* A, clsparseControl control) : invDiag_A( control, std::min( A->num_rows, A->num_cols ), 0, CL_MEM_READ_WRITE, false ) { cl_int status; // extract inverse diagonal from matrix A and store it in invDiag_A // easy to check with poisson matrix; status = extract_diagonal<T, true>(invDiag_A, A, control); CLSPARSE_V(status, "Invalid extract_diagonal kernel execution"); }
void cleanup(void) { if (gpuTimer && cpuTimer) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; #if 0 // Need to verify this calculation VK //size_t sparseBytes = sizeof(cl_int) * (csrMtx.nnz + csrMtx.m) + sizeof(T) * (csrMtx.nnz + csrMtx.n + csrMtx.m); //Host to GPU: CSR-> [rowOffsets(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) //GPU to Host: Dense - > [sizeof(T) * denseMtx.num_rows * denseMTx.num_cols] size_t sparseBytes = sizeof(cl_int) * (csrMtx.num_nonzeros + csrMtx.num_rows + 1) + sizeof(T) * (csrMtx.num_nonzeros) + sizeof(T) * (denseMtx.num_rows * denseMtx.num_cols); cpuTimer->pruneOutliers(3.0); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset(); gpuTimer->pruneOutliers( 3.0 ); gpuTimer->Print( sparseBytes, "GiB/s" ); gpuTimer->Reset(); #endif // Calculate Number of Elements transformed per unit time size_t sparseElements = csrMtx.num_nonzeros; cpuTimer->pruneOutliers(3.0); cpuTimer->Print(sparseElements, "GiElements/s"); cpuTimer->Reset(); gpuTimer->pruneOutliers(3.0); gpuTimer->Print(sparseElements, "GiElements/s"); gpuTimer->Reset(); } //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices"); CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets"); CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); CLSPARSE_V(::clReleaseMemObject(denseMtx.values), "clReleaseMemObject denseMtx.values"); }
reference_base(Container &rhs, difference_type index, difference_type range, cl::CommandQueue queue): container( rhs ), index( index ), range ( range ), queue(queue) { cl_int status = CL_SUCCESS; //should we throw or map until container.size()? assert( (index + range) < container.size() ); host_buffer = reinterpret_cast< naked_pointer >( queue.enqueueMapBuffer( container.data(), true, CL_MAP_READ | CL_MAP_WRITE, index * sizeof( value_type ), range * sizeof( value_type ), NULL, NULL, &status) ); CLSPARSE_V( status, "Mapping device buffer on host failed" ); }
clsparseStatus clsparseDcoo2csr ( const clsparseCooMatrix* coo, clsparseCsrMatrix* csr, const clsparseControl control) { if (!clsparseInitialized) { return clsparseNotInitialized; } //check opencl elements if (control == nullptr) { return clsparseInvalidControlObject; } csr->num_rows = coo->num_rows; csr->num_cols = coo->num_cols; csr->num_nonzeros = coo->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed clsparse::vector<int> csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); clsparse::vector<int> csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector<cl_double> csr_values (control, csr->values, csr->num_nonzeros); clsparse::vector<int> coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); clsparse::vector<int> coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector<cl_double> coo_values (control, coo->values, coo->num_nonzeros); csr_col_indices = coo_col_indices; csr_values = coo_values; clsparseStatus status = indices_to_offsets(csr_row_offsets, coo_row_indices, control); CLSPARSE_V(status, "Error: coo2csr indices to offsets"); return status; }