clsparseStatus validateMemObject(cldenseVector &vector, size_t required_size) { #if !defined(NDEBUG) && (BUILD_CLVERSION >= 200) std::cout << "Don't know how to validate SVM void* buffer" << std::endl; return clsparseSuccess; #else return validateMemObject(vector.values, required_size); #endif }
clsparseStatus clsparseScsr2coo(const clsparseCsrMatrix* csr, clsparseCooMatrix* coo, const clsparseControl control) { const clsparseCsrMatrixPrivate* pCsr = static_cast<const clsparseCsrMatrixPrivate*>(csr); clsparseCooMatrixPrivate* pCoo = static_cast<clsparseCooMatrixPrivate*>(coo); pCoo->num_rows = pCsr->num_rows; pCoo->num_cols = pCsr->num_cols; pCoo->num_nonzeros = pCsr->num_nonzeros; if (!clsparseInitialized) { return clsparseNotInitialized; } //check opencl elements if (control == nullptr) { return clsparseInvalidControlObject; } clsparseStatus status; //validate cl_mem objects status = validateMemObject(pCoo->rowIndices, sizeof(cl_int)* pCoo->num_nonzeros); if(status != clsparseSuccess) return status; status = validateMemObject(pCoo->colIndices, sizeof(cl_int)* pCoo->num_nonzeros); if(status != clsparseSuccess) return status; status = validateMemObject(pCoo->values, sizeof(cl_float)* pCoo->num_nonzeros); if(status != clsparseSuccess) return status; //validate cl_mem sizes //TODO: ask about validateMemObjectSize cl_uint nnz_per_row = pCoo->num_nonzeros / pCoo->num_rows; //average num_nonzeros per row cl_uint wave_size = control->wavefront_size; cl_uint group_size = 256; //wave_size * 8; // 256 gives best performance! cl_uint subwave_size = wave_size; // adjust subwave_size according to nnz_per_row; // each wavefron will be assigned to the row of the csr matrix if(wave_size > 32) { //this apply only for devices with wavefront > 32 like AMD(64) if (nnz_per_row < 64) { subwave_size = 32; } } if (nnz_per_row < 32) { subwave_size = 16; } if (nnz_per_row < 16) { subwave_size = 8; } if (nnz_per_row < 8) { subwave_size = 4; } if (nnz_per_row < 4) { subwave_size = 2; } const std::string params = std::string() + "-DINDEX_TYPE=" + OclTypeTraits<cl_int>::type + " -DVALUE_TYPE=" + OclTypeTraits<cl_float>::type + " -DSIZE_TYPE=" + OclTypeTraits<cl_ulong>::type + " -DWG_SIZE=" + std::to_string(group_size) + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); //TODO add error handling //copy indices clEnqueueCopyBuffer(control->queue(), pCsr-> colIndices, pCoo-> colIndices, 0, 0, sizeof(cl_int) * pCoo->num_nonzeros, 0, NULL, NULL); //copy values clEnqueueCopyBuffer(control->queue(), pCsr-> values, pCoo-> values, 0, 0, sizeof(cl_float) * pCoo->num_nonzeros, 0, NULL, NULL); return csr2coo_transform( pCoo->num_rows, pCoo->num_cols, pCsr->rowOffsets, pCoo->rowIndices, params, group_size, subwave_size, control); }
clsparseStatus clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, clsparseControl control, cl_bool read_explicit_zeroes ) { clsparseCsrMatrixPrivate* pCsrMatx = static_cast<clsparseCsrMatrixPrivate*>( csrMatx ); // Check that the file format is matrix market; the only format we can read right now // This is not a complete solution, and fails for directories with file names etc... // TODO: Should we use boost filesystem? std::string strPath( filePath ); if( strPath.find_last_of( '.' ) != std::string::npos ) { std::string ext = strPath.substr( strPath.find_last_of( '.' ) + 1 ); if( ext != "mtx" ) return clsparseInvalidFileFormat; } else return clsparseInvalidFileFormat; // Read data from a file on disk into CPU buffers // Data is read natively as COO format with the reader MatrixMarketReader< cl_double > mm_reader; if( mm_reader.MMReadFormat( filePath, read_explicit_zeroes ) ) return clsparseInvalidFile; // BUG: We need to check to see if openCL buffers currently exist and deallocate them first! // FIX: Below code will check whether the buffers were allocated in the first place; { clsparseStatus validationStatus = validateMemObject(pCsrMatx->values, mm_reader.GetNumNonZeroes() * sizeof(cl_double)); // I dont want to reallocate buffer because I suppress the users buffer memory flags; // It is users responsibility to provide good buffer; if (validationStatus != clsparseSuccess) return validationStatus; validationStatus = validateMemObject(pCsrMatx->col_indices, mm_reader.GetNumNonZeroes() * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; validationStatus = validateMemObject(pCsrMatx->row_pointer, (mm_reader.GetNumRows() + 1) * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; } pCsrMatx->num_rows = mm_reader.GetNumRows( ); pCsrMatx->num_cols = mm_reader.GetNumCols( ); pCsrMatx->num_nonzeros = mm_reader.GetNumNonZeroes( ); // Transfers data from CPU buffer to GPU buffers cl_int mapStatus = 0; clMemRAII< cl_double > rCsrValues( control->queue( ), pCsrMatx->values); clMemRAII< clsparseIdx_t > rCsrcol_indices( control->queue( ), pCsrMatx->col_indices ); clMemRAII< clsparseIdx_t > rCsrrow_pointer( control->queue( ), pCsrMatx->row_pointer ); cl_double* fCsrValues = rCsrValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->valOffset( ), pCsrMatx->num_nonzeros, &mapStatus ); if (mapStatus != CL_SUCCESS) { CLSPARSE_V(mapStatus, "Error: Mapping rCsrValues failed"); return clsparseInvalidMemObj; } clsparseIdx_t* iCsrcol_indices = rCsrcol_indices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros, &mapStatus ); if (mapStatus != CL_SUCCESS) { CLSPARSE_V(mapStatus, "Error: Mapping rCsrcol_indices failed"); return clsparseInvalidMemObj; } clsparseIdx_t* iCsrrow_pointer = rCsrrow_pointer.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1, &mapStatus ); if (mapStatus != CL_SUCCESS) { CLSPARSE_V(mapStatus, "Error: Mapping rCsrrow_pointer failed"); return clsparseInvalidMemObj; } // The following section of code converts the sparse format from COO to CSR Coordinate< cl_double >* coords = mm_reader.GetUnsymCoordinates( ); std::sort( coords, coords + pCsrMatx->num_nonzeros, CoordinateCompare< cl_double > ); clsparseIdx_t current_row = 1; iCsrrow_pointer[ 0 ] = 0; for (clsparseIdx_t i = 0; i < pCsrMatx->num_nonzeros; i++) { iCsrcol_indices[ i ] = coords[ i ].y; fCsrValues[ i ] = coords[ i ].val; while( coords[ i ].x >= current_row ) iCsrrow_pointer[ current_row++ ] = i; } iCsrrow_pointer[ current_row ] = pCsrMatx->num_nonzeros; while( current_row <= pCsrMatx->num_rows ) iCsrrow_pointer[ current_row++ ] = pCsrMatx->num_nonzeros; return clsparseSuccess; }