int MatRead(char *fileA, PetscInt& n, Mat& A) { PetscInt *arrayIA, *arrayJA, // arrays of rows and columns of A *nnzA, // number of nonzeros in each row nzA; // number of nonzeros in matrix A PetscScalar *arrayVA; // array of values of A int read_error = 0; if (readSparseMatrix(fileA, n, nzA, nnzA, arrayIA, arrayJA, arrayVA)) { // read_error in reading in the vector, set read_error to 1 read_error = 1; } /*--------------------------------------------------------------------------- * set up the matrix A *---------------------------------------------------------------------------*/ MatCreateSeqAIJ(MPI_COMM_SELF, n, n, nzA, nnzA, &A); /*--------------------------------------------------------------------------- * set the values of matrix A *---------------------------------------------------------------------------*/ /* PetscMalloc1(n * sizeof(PetscInt), &idx); for (PetscInt i = 0; i < n; ++i) { idx[i] = i; } MatSetValues(A, n, idx, n, idx, arrayA, INSERT_VALUES); */ for (PetscInt i = 0; i < nzA; ++i) { MatSetValue(A, arrayIA[i], arrayJA[i], arrayVA[i], INSERT_VALUES); if (arrayIA[i] != arrayJA[i]) { MatSetValue(A, arrayJA[i], arrayIA[i], arrayVA[i], INSERT_VALUES); } } /*--------------------------------------------------------------------------- * assemble the matrix A *---------------------------------------------------------------------------*/ MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); free(nnzA); free(arrayIA); free(arrayJA); free(arrayVA); }
static bool import_sparse(int* pvCtx, int _iDatasetId, int _iItemPos, int *_piAddress, char *_pstVarname) { int iRet = 0; int iRows = 0; int iCols = 0; int iComplex = 0; double *pdblReal = NULL; double *pdblImg = NULL; int iNbItem = 0; int *piNbItemRow = NULL; int *piColPos = NULL; SciErr sciErr; iRet = getSparseDimension(_iDatasetId, &iRows, &iCols, &iNbItem); if (iRet) { return false; } iComplex = isComplexData(_iDatasetId); if (iComplex) { piNbItemRow = (int *)MALLOC(iRows * sizeof(int)); piColPos = (int *)MALLOC(iNbItem * sizeof(int)); pdblReal = (double *)MALLOC(iNbItem * sizeof(double)); pdblImg = (double *)MALLOC(iNbItem * sizeof(double)); iRet = readSparseComplexMatrix(_iDatasetId, iRows, iCols, iNbItem, piNbItemRow, piColPos, pdblReal, pdblImg); } else { piNbItemRow = (int *)MALLOC(iRows * sizeof(int)); piColPos = (int *)MALLOC(iNbItem * sizeof(int)); pdblReal = (double *)MALLOC(iNbItem * sizeof(double)); iRet = readSparseMatrix(_iDatasetId, iRows, iCols, iNbItem, piNbItemRow, piColPos, pdblReal); } if (iRet) { FREE(piNbItemRow); FREE(piColPos); FREE(pdblReal); if (iComplex) { FREE(pdblImg); } return false; } if (_piAddress == NULL) { if (iComplex) { sciErr = createNamedComplexSparseMatrix(pvCtx, _pstVarname, iRows, iCols, iNbItem, piNbItemRow, piColPos, pdblReal, pdblImg); } else { sciErr = createNamedSparseMatrix(pvCtx, _pstVarname, iRows, iCols, iNbItem, piNbItemRow, piColPos, pdblReal); } } else //if not null this variable is in a list { if (iComplex) { sciErr = createComplexSparseMatrixInNamedList(pvCtx, _pstVarname, _piAddress, _iItemPos, iRows, iCols, iNbItem, piNbItemRow, piColPos, pdblReal, pdblImg); } else { sciErr = createSparseMatrixInNamedList(pvCtx, _pstVarname, _piAddress, _iItemPos, iRows, iCols, iNbItem, piNbItemRow, piColPos, pdblReal); } } FREE(piNbItemRow); FREE(piColPos); FREE(pdblReal); if (iComplex) { FREE(pdblImg); } if (sciErr.iErr) { printError(&sciErr, 0); return false; } return true; }
int main(int argc, char** argv) { // timer struct timeval st, et; float gputime = 0.0, cputime = 0.0; // read Sparse Matrix from file or generate if (argc < 2 || argc > 4) { printf("Correct Usage: <executable> <input matrix file>\n"); exit(-1); } // init the network agile::NetworkEnvironment environment(argc, argv); // allocate a GPU typedef agile::GPUCommunicator<unsigned, float, float> communicator_type; communicator_type com; com.allocateGPU(); char spmfileName[256]; strcpy(spmfileName, argv[1]); if (!fileIsReadable(spmfileName)) { printf("Non-existent input matrix file\n"); exit(-1); } unsigned m_num_rows, m_num_cols; std::vector<unsigned> m_row_nnz; std::vector<unsigned> m_column_index; std::vector<float> m_data; // read in matrix from matrix-market file readSparseMatrix(spmfileName, 0, m_num_rows, m_num_cols, m_row_nnz, m_column_index, m_data); std::cout << m_num_rows << "\t" << m_num_cols << "\t"; /* PRINT_VEC("m_row_nnz", m_row_nnz); PRINT_VEC("m_column_index", m_column_index); PRINT_VEC("m_data", m_data); */ // init gpu matrix agile::GPUCSMatrix<float> A(m_row_nnz, m_column_index, m_data); // init random vector std::vector<float> x_host(m_num_cols, 0); srand(time(NULL)); for (unsigned i=0; i<m_num_cols; ++i) x_host[i] = rand() / (float)RAND_MAX; //PRINT_VEC("RANDOM X VECTOR", x_host); // init gpu vector agile::GPUVector<float> x(m_num_cols); x.assignFromHost(x_host.begin(), x_host.end()); // init result gpu vector: y agile::GPUVector<float> y(m_num_rows); // start time gettimeofday(&st, NULL); for (unsigned t=0; t<NUM_ITER; ++t) { // gpu multiplication agile::multiply(A, x, y); cudaThreadSynchronize(); } // stop time gettimeofday(&et, NULL); gputime = ((et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0)/NUM_ITER; // transfer GPU multiplication result back to cpu std::vector<float> y_host; y.copyToHost(y_host); //----------------- CPU computation from ibm demo --------------------------- SpMatrix m; readSparseMatrix(&m, spmfileName, 0); unsigned int numNonZeroElements = m.numNZEntries; unsigned int memSize_row = sizeof(float) * m_num_rows; // allocate host memory float* h_x = (float*) malloc(memSize_row); #if PADDED_CSR float *h_val; unsigned int *h_indices, *h_rowIndices; genPaddedCSRFormat(&m, &h_val, &h_rowIndices, &h_indices); #else float* h_val = (float*) malloc(sizeof(float)*numNonZeroElements); unsigned int* h_indices = (unsigned int*) malloc(sizeof(int)*numNonZeroElements); unsigned int* h_rowIndices = (unsigned int*) malloc(sizeof(int)*(m_num_rows+1)); genCSRFormat(&m, h_val, h_rowIndices, h_indices); #endif // CPU REFERENCE float* reference = (float*) malloc(memSize_row); #if EXEC_CPU #if TIMER gettimeofday(&st, NULL); #endif // compute reference solution #if BCSR float *val; unsigned int *rowIndices, *indices; unsigned int numblocks; genBCSRFormat(&m, &val, &rowIndices, &indices, &numblocks, BCSR_r, BCSR_c); computeSpMV_BCSR(reference, val, rowIndices, indices, &(x_host[0]), m_num_rows, m_num_cols, BCSR_r, BCSR_c); #else computeSpMV(reference, h_val, h_rowIndices, h_indices, &(x_host[0]), m_num_rows); #endif #if TIMER gettimeofday(&et, NULL); cputime = (et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0; #endif #endif float flops= ((numNonZeroElements * 2) / (gputime*1000000)); //printf("GPU (ms) \tCPU (ms) \tGFLOPS\n"); printf("%f\t%f\t%f\t", gputime, cputime, flops); #if VERIFY // check result float error_norm, ref_norm, diff; error_norm = 0; ref_norm = 0; for (unsigned i = 0; i < m_num_rows; ++i) { diff = reference[i] - y_host[i]; error_norm += diff * diff; ref_norm += reference[i] * reference[i]; } error_norm = (float)sqrt((double)error_norm); ref_norm = (float)sqrt((double)ref_norm); if (fabs(ref_norm) < 1e-7) printf ("Test FAILED"); else printf( "Test %s", ((error_norm / ref_norm) < 1e-6f) ? "PASSED" : "FAILED"); #endif free(reference); free(h_x); #if !PADDED_CSR free(h_val); free(h_indices); free(h_rowIndices); #endif return 0; }