TxMatrixOptimizationDataCU::~TxMatrixOptimizationDataCU() { if (handle) { cusparseDestroy(handle); handle = 0; } if (matDescr) { cusparseDestroyMatDescr(matDescr); matDescr = 0; } if (localMatrix) { cusparseDestroyHybMat(localMatrix); localMatrix = 0; } if (gsContext) { cugelusDestroySorIterationData(gsContext); gsContext = 0; } if (f2c) { CHKCUDAERR(cudaFree(f2c)); } if (workvector) { CHKCUDAERR(cudaFree(workvector)); } #ifndef HPCG_NOMPI CHKCUDAERR(cudaFree(elementsToSend)); CHKCUDAERR(cudaFree(sendBuffer_d)); #endif }
void Kokkos::CUSPARSEdetails::CUSPARSEMatDescDestroyer::free(cusparseMatDescr_t *ptr) { cusparseStatus_t status = cusparseDestroyMatDescr( *ptr ); delete ptr; TEUCHOS_TEST_FOR_EXCEPTION(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "Kokkos::CUSPARSEdetails::CUSPARSEMatDescDestroyer::free(): unknown error.") }
void Caffe::SetDevice(const int device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == device_id) { return; } // The call to cudaSetDevice must come before any calls to Get, which // may perform initialization using the GPU. CUDA_CHECK(cudaSetDevice(device_id)); if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_)); if (Get().cusparse_descr_)CUSPARSE_CHECK(cusparseDestroyMatDescr(Get().cusparse_descr_)); if (Get().cusparse_handle_)CUSPARSE_CHECK(cusparseDestroy(Get().cusparse_handle_)); if (Get().curand_generator_) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); } CUSPARSE_CHECK(cusparseCreate(&Get().cusparse_handle_)); CUSPARSE_CHECK(cusparseCreateMatDescr(&Get().cusparse_descr_)); // cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL); // cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO); LOG(INFO)<<"set descr"; CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_)); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, cluster_seedgen())); }
Caffe::~Caffe() { if (cusparse_descr_) CUSPARSE_CHECK(cusparseDestroyMatDescr(cusparse_descr_)); if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_)); if (cusparse_handle_) CUSPARSE_CHECK(cusparseDestroy(cusparse_handle_)); if (curand_generator_) { CURAND_CHECK(curandDestroyGenerator(curand_generator_)); } }
magma_int_t magma_dapplycuicc_l( magma_d_vector b, magma_d_vector *x, magma_d_preconditioner *precond ){ double one = MAGMA_D_MAKE( 1.0, 0.0); // CUSPARSE context // cusparseHandle_t cusparseHandle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); if(cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrL; cusparseStatus = cusparseCreateMatDescr(&descrL); if(cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrL,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if(cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrL, CUSPARSE_DIAG_TYPE_NON_UNIT); if(cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatFillMode(descrL,CUSPARSE_FILL_MODE_LOWER); if(cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseSetMatIndexBase(descrL,CUSPARSE_INDEX_BASE_ZERO); if(cusparseStatus != 0) printf("error in IndexBase.\n"); // end CUSPARSE context // cusparseStatus = cusparseDcsrsv_solve( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, &one, descrL, precond->M.val, precond->M.row, precond->M.col, precond->cuinfoL, b.val, x->val ); if(cusparseStatus != 0) printf("error in L triangular solve:%p.\n", precond->cuinfoL ); cusparseDestroyMatDescr( descrL ); cusparseDestroy( cusparseHandle ); magma_device_sync(); return MAGMA_SUCCESS; }
// finalize CUDA void ssp_finalize_cuda(ssp_cuda *cudaHandle) { if (!cudaHandle) return; if (cudaHandle->cusparse_handle) cusparseDestroy(cudaHandle->cusparse_handle); if (cudaHandle->cusparse_matDescr) cusparseDestroyMatDescr(cudaHandle->cusparse_matDescr); free(cudaHandle); cudaDeviceReset(); }
extern "C" magma_int_t magma_capplycumicc_l( magma_c_matrix b, magma_c_matrix *x, magma_c_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; magmaFloatComplex one = MAGMA_C_MAKE( 1.0, 0.0); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseCcsrsm_solve( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, b.num_rows*b.num_cols/precond->M.num_rows, &one, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL, b.dval, precond->M.num_rows, x->dval, precond->M.num_rows )); magma_device_sync(); cleanup: cusparseDestroyMatDescr( descrL ); cusparseDestroy( cusparseHandle ); return info; }
extern "C" magma_int_t magma_dapplycumilu_r_transpose( magma_d_matrix b, magma_d_matrix *x, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrU=NULL; double one = MAGMA_D_MAKE( 1.0, 0.0); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseDcsrsm_solve( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->UT.num_rows, b.num_rows*b.num_cols/precond->UT.num_rows, &one, descrU, precond->UT.dval, precond->UT.drow, precond->UT.dcol, precond->cuinfoUT, b.dval, precond->UT.num_rows, x->dval, precond->UT.num_rows )); cleanup: cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); return info; }
extern "C" magma_int_t magma_dcumilusetup_transpose( magma_d_matrix A, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix Ah1={Magma_CSR}, Ah2={Magma_CSR}; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrLT=NULL; cusparseMatDescr_t descrUT=NULL; // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); // transpose the matrix magma_dmtransfer( precond->L, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransposeconjugate( Ah2, &Ah1, queue ); magma_dmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransfer( Ah2, &(precond->LT), Magma_CPU, Magma_DEV, queue ); magma_dmfree(&Ah2, queue ); magma_dmtransfer( precond->U, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransposeconjugate( Ah2, &Ah1, queue ); magma_dmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransfer( Ah2, &(precond->UT), Magma_CPU, Magma_DEV, queue ); magma_dmfree(&Ah2, queue ); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrLT )); CHECK_CUSPARSE( cusparseSetMatType( descrLT, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrLT, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrLT, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrLT, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoLT )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->LT.num_rows, precond->LT.nnz, descrLT, precond->LT.dval, precond->LT.drow, precond->LT.dcol, precond->cuinfoLT )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrUT )); CHECK_CUSPARSE( cusparseSetMatType( descrUT, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrUT, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrUT, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrUT, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoUT )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->UT.num_rows, precond->UT.nnz, descrUT, precond->UT.dval, precond->UT.drow, precond->UT.dcol, precond->cuinfoUT )); cleanup: cusparseDestroyMatDescr( descrLT ); cusparseDestroyMatDescr( descrUT ); cusparseDestroy( cusparseHandle ); magma_dmfree(&Ah1, queue ); magma_dmfree(&Ah2, queue ); return info; }
magma_int_t magma_dcustomilusetup( magma_d_matrix A, magma_d_matrix b, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_d_matrix hA={Magma_CSR}; char preconditionermatrix[255]; // first L snprintf( preconditionermatrix, sizeof(preconditionermatrix), "precondL.mtx" ); CHECK( magma_d_csr_mtx( &hA, preconditionermatrix , queue) ); CHECK( magma_dmtransfer( hA, &precond->L, Magma_CPU, Magma_DEV , queue )); // extract the diagonal of L into precond->d CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_dvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); magma_dmfree( &hA, queue ); // now U snprintf( preconditionermatrix, sizeof(preconditionermatrix), "precondU.mtx" ); CHECK( magma_d_csr_mtx( &hA, preconditionermatrix , queue) ); CHECK( magma_dmtransfer( hA, &precond->U, Magma_CPU, Magma_DEV , queue )); // extract the diagonal of U into precond->d2 CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_dvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseDcsrsv_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.val, precond->L.row, precond->L.col, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseDcsrsv_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.val, precond->U.row, precond->U.col, precond->cuinfoU )); cleanup: cusparseDestroy( cusparseHandle ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseHandle=NULL; descrL=NULL; descrU=NULL; magma_dmfree( &hA, queue ); return info; }
~xDense2Csr() { cusparseDestroyMatDescr(descrA); }// end
sparse_matrix::~sparse_matrix() { assert(cudaFree(csrValA) == cudaSuccess); assert(cudaFree(csrRowPtrA) == cudaSuccess); assert(cudaFree(csrColIndA) == cudaSuccess); assert(cusparseDestroyMatDescr(descrA) == cudaSuccess); }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { TESTING_INIT(); magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); magma_d_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; magma_int_t *pntre; double c_one = MAGMA_D_MAKE(1.0, 0.0); double c_zero = MAGMA_D_MAKE(0.0, 0.0); magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf( "\n# usage: ./run_dspmv" " [ --blocksize %d --alignment %d (for SELLP) ]" " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_dm_5stencil( laplace_size, &hA, queue ); } else { // file-matrix test magma_d_csr_mtx( &hA, argv[i], queue ); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; magma_d_vector hx, hy, dx, dy, hrefvec, hcheck; // init CPU vectors magma_d_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue ); magma_d_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue ); // init DEV vectors magma_d_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); #ifdef MAGMA_WITH_MKL // calling MKL with CSR pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) ); pntre[0] = 0; for (j=0; j<hA.num_rows; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT *col; TESTING_MALLOC_CPU( col, MKL_INT, nnz ); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_MALLOC_CPU( row, MKL_INT, num_rows ); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } start = magma_wtime(); for (j=0; j<10; j++ ) { mkl_dcsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); } end = magma_wtime(); printf( "\n > MKL : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); TESTING_FREE_CPU( row ); TESTING_FREE_CPU( col ); free(pntre); #endif // MAGMA_WITH_MKL // copy matrix to GPU magma_d_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); // SpMV on GPU (CSR) -- this is the reference! start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_d_spmv( c_one, dA, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_d_mfree(&dA, queue ); magma_d_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue ); // convert to ELL and copy to GPU magma_d_mconvert( hA, &hA_ELL, Magma_CSR, Magma_ELL, queue ); magma_d_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue ); magma_d_mfree(&hA_ELL, queue ); magma_d_vfree( &dy, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (ELL) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_d_spmv( c_one, dA_ELL, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard ELL).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_d_mfree(&dA_ELL, queue ); magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]); if ( res < .000001 ) printf("# tester spmv ELL: ok\n"); else printf("# tester spmv ELL: failed\n"); magma_d_vfree( &hcheck, queue ); // convert to SELLP and copy to GPU magma_d_mconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue ); magma_d_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue ); magma_d_mfree(&hA_SELLP, queue ); magma_d_vfree( &dy, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) magma_d_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue ); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv SELL-P: ok\n"); else printf("# tester spmv SELL-P: failed\n"); magma_d_vfree( &hcheck, queue ); magma_d_mfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); double alpha = c_one; double beta = c_zero; magma_d_vfree( &dy, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); // copy matrix to GPU magma_d_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue ); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseDcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE CSR\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); cusparseMatDescr_t descrA; cusparseStatus = cusparseCreateMatDescr(&descrA); if (cusparseStatus != 0) printf("error\n"); cusparseHybMat_t hybA; cusparseStatus = cusparseCreateHybMat( &hybA ); if (cusparseStatus != 0) printf("error\n"); magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE CSR: ok\n"); else printf("# tester spmv cuSPARSE CSR: failed\n"); magma_d_vfree( &hcheck, queue ); magma_d_vfree( &dy, queue ); magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue ); cusparseDcsr2hyb(cusparseHandle, hA.num_rows, hA.num_cols, descrA, dA.dval, dA.drow, dA.dcol, hybA, 0, CUSPARSE_HYB_PARTITION_AUTO); start = magma_sync_wtime( queue ); for (j=0; j<10; j++) cusparseStatus = cusparseDhybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, descrA, hybA, dx.dval, &beta, dy.dval); end = magma_sync_wtime( queue ); if (cusparseStatus != 0) printf("error in cuSPARSE HYB\n"); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (HYB).\n", (end-start)/10, FLOPS*10/(end-start) ); magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]); printf("# |x-y|_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester spmv cuSPARSE HYB: ok\n"); else printf("# tester spmv cuSPARSE HYB: failed\n"); magma_d_vfree( &hcheck, queue ); cusparseDestroyMatDescr( descrA ); cusparseDestroyHybMat( hybA ); cusparseDestroy( cusparseHandle ); magma_d_mfree(&dA, queue ); printf("\n\n"); // free CPU memory magma_d_mfree(&hA, queue ); magma_d_vfree(&hx, queue ); magma_d_vfree(&hy, queue ); magma_d_vfree(&hrefvec, queue ); // free GPU memory magma_d_vfree(&dx, queue ); magma_d_vfree(&dy, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
~cuSparseHandleType(){ cusparseDestroyMatDescr(a_descr); cusparseDestroyMatDescr(b_descr); cusparseDestroyMatDescr(c_descr); cusparseDestroy(handle); }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); magma_s_matrix hA={Magma_CSR}, hA_SELLP={Magma_CSR}, dA={Magma_CSR}, dA_SELLP={Magma_CSR}; magma_s_matrix hx={Magma_CSR}, hy={Magma_CSR}, dx={Magma_CSR}, dy={Magma_CSR}, hrefvec={Magma_CSR}, hcheck={Magma_CSR}; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; #ifdef MAGMA_WITH_MKL magma_int_t *pntre=NULL; #endif cusparseHandle_t cusparseHandle = NULL; cusparseMatDescr_t descr = NULL; float c_one = MAGMA_S_MAKE(1.0, 0.0); float c_zero = MAGMA_S_MAKE(0.0, 0.0); float accuracy = 1e-10; #define PRECISION_s #if defined(PRECISION_c) accuracy = 1e-4; #endif #if defined(PRECISION_s) accuracy = 1e-4; #endif magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf("\n# usage: ./run_sspmm" " [ --blocksize %lld --alignment %lld (for SELLP) ] matrices\n\n", (long long) hA_SELLP.blocksize, (long long) hA_SELLP.alignment ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_sm_5stencil( laplace_size, &hA, queue )); } else { // file-matrix test TESTING_CHECK( magma_s_csr_mtx( &hA, argv[i], queue )); } printf("%% matrix info: %lld-by-%lld with %lld nonzeros\n", (long long) hA.num_rows, (long long) hA.num_cols, (long long) hA.nnz ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; // m - number of rows for the sparse matrix // n - number of vectors to be multiplied in the SpMM product magma_int_t m, n; m = hA.num_rows; n = 48; // init CPU vectors TESTING_CHECK( magma_svinit( &hx, Magma_CPU, m, n, c_one, queue )); TESTING_CHECK( magma_svinit( &hy, Magma_CPU, m, n, c_zero, queue )); // init DEV vectors TESTING_CHECK( magma_svinit( &dx, Magma_DEV, m, n, c_one, queue )); TESTING_CHECK( magma_svinit( &dy, Magma_DEV, m, n, c_zero, queue )); // calling MKL with CSR #ifdef MAGMA_WITH_MKL TESTING_CHECK( magma_imalloc_cpu( &pntre, m + 1 ) ); pntre[0] = 0; for (j=0; j < m; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT num_vecs = n; MKL_INT *col; TESTING_CHECK( magma_malloc_cpu( (void**) &col, nnz * sizeof(MKL_INT) )); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_CHECK( magma_malloc_cpu( (void**) &row, num_rows * sizeof(MKL_INT) )); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } // === Call MKL with consecutive SpMVs, using mkl_scsrmv === // warmp up mkl_scsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); start = magma_wtime(); for (j=0; j < 10; j++ ) { mkl_scsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); } end = magma_wtime(); printf( "\n > MKL SpMVs : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); // === Call MKL with blocked SpMVs, using mkl_scsrmm === char transa = 'n'; MKL_INT ldb = n, ldc=n; char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'}; // warm up mkl_scsrmm( &transa, &num_rows, &num_vecs, &num_cols, MKL_ADDR(&c_one), matdescra, MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), &ldb, MKL_ADDR(&c_zero), MKL_ADDR(hy.val), &ldc ); start = magma_wtime(); for (j=0; j < 10; j++ ) { mkl_scsrmm( &transa, &num_rows, &num_vecs, &num_cols, MKL_ADDR(&c_one), matdescra, MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), &ldb, MKL_ADDR(&c_zero), MKL_ADDR(hy.val), &ldc ); } end = magma_wtime(); printf( "\n > MKL SpMM : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); magma_free_cpu( row ); magma_free_cpu( col ); row = NULL; col = NULL; #endif // MAGMA_WITH_MKL // copy matrix to GPU TESTING_CHECK( magma_smtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue )); // SpMV on GPU (CSR) start = magma_sync_wtime( queue ); for (j=0; j < 10; j++) { TESTING_CHECK( magma_s_spmv( c_one, dA, dx, c_zero, dy, queue )); } end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); TESTING_CHECK( magma_smtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue )); magma_smfree(&dA, queue ); // convert to SELLP and copy to GPU TESTING_CHECK( magma_smconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue )); TESTING_CHECK( magma_smtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue )); magma_smfree(&hA_SELLP, queue ); magma_smfree( &dy, queue ); TESTING_CHECK( magma_svinit( &dy, Magma_DEV, dx.num_rows, dx.num_cols, c_zero, queue )); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j < 10; j++) { TESTING_CHECK( magma_s_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue )); } end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); TESTING_CHECK( magma_smtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue )); res = 0.0; for(magma_int_t k=0; k < hA.num_rows; k++ ) { res=res + MAGMA_S_REAL(hcheck.val[k]) - MAGMA_S_REAL(hrefvec.val[k]); } printf("%% |x-y|_F = %8.2e\n", res); if ( res < accuracy ) printf("%% tester spmm SELL-P: ok\n"); else printf("%% tester spmm SELL-P: failed\n"); magma_smfree( &hcheck, queue ); magma_smfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // magma_smfree( &dy, queue ); TESTING_CHECK( magma_svinit( &dy, Magma_DEV, dx.num_rows, dx.num_cols, c_zero, queue )); //#ifdef PRECISION_d start = magma_sync_wtime( queue ); TESTING_CHECK( cusparseCreate( &cusparseHandle )); TESTING_CHECK( cusparseSetStream( cusparseHandle, magma_queue_get_cuda_stream(queue) )); TESTING_CHECK( cusparseCreateMatDescr( &descr )); TESTING_CHECK( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL )); TESTING_CHECK( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO )); float alpha = c_one; float beta = c_zero; // copy matrix to GPU TESTING_CHECK( magma_smtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue) ); for (j=0; j < 10; j++) { cusparseScsrmm(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, dA.num_rows, n, dA.num_cols, dA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, dA.num_cols, &beta, dy.dval, dA.num_cols); } end = magma_sync_wtime( queue ); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10*n/(end-start) ); TESTING_CHECK( magma_smtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue )); res = 0.0; for(magma_int_t k=0; k < hA.num_rows; k++ ) { res = res + MAGMA_S_REAL(hcheck.val[k]) - MAGMA_S_REAL(hrefvec.val[k]); } printf("%% |x-y|_F = %8.2e\n", res); if ( res < accuracy ) printf("%% tester spmm cuSPARSE: ok\n"); else printf("%% tester spmm cuSPARSE: failed\n"); magma_smfree( &hcheck, queue ); cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); descr = NULL; cusparseHandle = NULL; //#endif printf("\n\n"); // free CPU memory magma_smfree( &hA, queue ); magma_smfree( &hx, queue ); magma_smfree( &hy, queue ); magma_smfree( &hrefvec, queue ); // free GPU memory magma_smfree( &dx, queue ); magma_smfree( &dy, queue ); magma_smfree( &dA, queue); #ifdef MAGMA_WITH_MKL magma_free_cpu( pntre ); #endif i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }
extern "C" magma_int_t magma_dcuspmm( magma_d_sparse_matrix A, magma_d_sparse_matrix B, magma_d_sparse_matrix *AB ){ if( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ){ magma_d_sparse_matrix C; C.num_rows = A.num_rows; C.num_cols = A.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; // CUSPARSE context // cusparseHandle_t handle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&handle); if(cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseMatDescr_t descrB; cusparseMatDescr_t descrC; cusparseStatus = cusparseCreateMatDescr(&descrA); cusparseStatus = cusparseCreateMatDescr(&descrB); cusparseStatus = cusparseCreateMatDescr(&descrC); if(cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL); if(cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO); if(cusparseStatus != 0) printf("error in IndexBase.\n"); // multiply A and B on the device magma_int_t baseC; // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); magma_index_malloc( &C.row, (A.num_rows + 1) ); cusparseXcsrgemmNnz(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_rows, A.num_rows, descrA, A.nnz, A.row, A.col, descrB, B.nnz, B.row, B.col, descrC, C.row, nnzTotalDevHostPtr ); if (NULL != nnzTotalDevHostPtr){ C.nnz = *nnzTotalDevHostPtr; }else{ // workaround as nnz and base C are magma_int_t magma_index_t base_t, nnz_t; magma_index_getvector( 1, C.row+C.num_rows, 1, &nnz_t, 1 ); magma_index_getvector( 1, C.row, 1, &base_t, 1 ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } magma_index_malloc( &C.col, C.nnz ); magma_dmalloc( &C.val, C.nnz ); cusparseDcsrgemm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_rows, A.num_rows, descrA, A.nnz, A.val, A.row, A.col, descrB, B.nnz, B.val, B.row, B.col, descrC, C.val, C.row, C.col); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); // end CUSPARSE context // magma_d_mtransfer( C, AB, Magma_DEV, Magma_DEV ); magma_d_mfree( &C ); return MAGMA_SUCCESS; } else{ printf("error: CSRMM only supported on device and CSR format.\n"); return MAGMA_SUCCESS; } }
magma_int_t magma_ccustomicsetup( magma_c_matrix A, magma_c_matrix b, magma_c_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_c_matrix hA={Magma_CSR}; char preconditionermatrix[255]; snprintf( preconditionermatrix, sizeof(preconditionermatrix), "/Users/hanzt0114cl306/work/matrices/ani/ani7_crop_ichol.mtx" ); CHECK( magma_c_csr_mtx( &hA, preconditionermatrix , queue) ); // for CUSPARSE CHECK( magma_cmtransfer( hA, &precond->M, Magma_CPU, Magma_DEV , queue )); // copy the matrix to precond->L and (transposed) to precond->U CHECK( magma_cmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue )); CHECK( magma_cmtranspose( precond->L, &(precond->U), queue )); // extract the diagonal of L into precond->d CHECK( magma_cjacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_cvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_cjacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_cvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseCcsrsv_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.val, precond->M.row, precond->M.col, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseCcsrsv_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.val, precond->M.row, precond->M.col, precond->cuinfoU )); cleanup: cusparseDestroy( cusparseHandle ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseHandle=NULL; descrL=NULL; descrU=NULL; magma_cmfree( &hA, queue ); return info; }
extern "C" magma_int_t magma_dcumiccsetup( magma_d_matrix A, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; #if CUDA_VERSION >= 7000 csric02Info_t info_M=NULL; void *pBuffer = NULL; #endif magma_d_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, U={Magma_CSR}; CHECK( magma_dmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); U.diagorder_type = Magma_VALUE; CHECK( magma_dmconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue )); // in case using fill-in if( precond->levels > 0 ){ magma_d_matrix hAL={Magma_CSR}, hAUt={Magma_CSR}; CHECK( magma_dsymbilu( &hACSR, precond->levels, &hAL, &hAUt, queue )); magma_dmfree(&hAL, queue); magma_dmfree(&hAUt, queue); } CHECK( magma_dmconvert( hACSR, &U, Magma_CSR, Magma_CSRL, queue )); magma_dmfree( &hACSR, queue ); CHECK( magma_dmtransfer(U, &(precond->M), Magma_CPU, Magma_DEV, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) )); // use kernel to manually check for zeros n the diagonal CHECK( magma_ddiagcheck( precond->M, queue ) ); #if CUDA_VERSION >= 7000 // this version has the bug fixed where a zero on the diagonal causes a crash CHECK_CUSPARSE( cusparseCreateCsric02Info(&info_M) ); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); int buffersize; int structural_zero; int numerical_zero; CHECK_CUSPARSE( cusparseDcsric02_bufferSize( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, &buffersize ) ); CHECK( magma_malloc((void**)&pBuffer, buffersize) ); CHECK_CUSPARSE( cusparseDcsric02_analysis( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer )); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &numerical_zero ) ); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &structural_zero ) ); CHECK_CUSPARSE( cusparseDcsric02( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer) ); #else // this version contains the bug but is needed for backward compability CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_SYMMETRIC )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrA, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrA, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK_CUSPARSE( cusparseDcsric0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); #endif CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // copy the matrix to precond->L and (transposed) to precond->U CHECK( magma_dmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue )); CHECK( magma_dmtranspose( precond->L, &(precond->U), queue )); // extract the diagonal of L into precond->d CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_dvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_dvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); } /* // to enable also the block-asynchronous iteration for the triangular solves CHECK( magma_dmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue )); hA.storage_type = Magma_CSR; magma_d_matrix hD, hR, hAt CHECK( magma_dcsrsplit( 256, hA, &hD, &hR, queue )); CHECK( magma_dmtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hD, queue ); magma_dmfree(&hR, queue ); CHECK( magma_d_cucsrtranspose( hA, &hAt, queue )); CHECK( magma_dcsrsplit( 256, hAt, &hD, &hR, queue )); CHECK( magma_dmtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hD, queue ); magma_dmfree(&hR, queue ); magma_dmfree(&hA, queue ); magma_dmfree(&hAt, queue ); */ cleanup: #if CUDA_VERSION >= 7000 magma_free( pBuffer ); cusparseDestroyCsric02Info( info_M ); #endif cusparseDestroySolveAnalysisInfo( precond->cuinfo ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroyMatDescr( descrA ); cusparseDestroy( cusparseHandle ); magma_dmfree(&U, queue ); magma_dmfree(&hA, queue ); return info; }
extern "C" magma_int_t magma_d_spmv( double alpha, magma_d_matrix A, magma_d_matrix x, double beta, magma_d_matrix y, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix x2={Magma_CSR}; cusparseHandle_t cusparseHandle = 0; cusparseMatDescr_t descr = 0; // make sure RHS is a dense matrix if ( x.storage_type != Magma_DENSE ) { printf("error: only dense vectors are supported for SpMV.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if ( A.memory_location != x.memory_location || x.memory_location != y.memory_location ) { printf("error: linear algebra objects are not located in same memory!\n"); printf("memory locations are: %d %d %d\n", A.memory_location, x.memory_location, y.memory_location ); info = MAGMA_ERR_INVALID_PTR; goto cleanup; } // DEV case if ( A.memory_location == Magma_DEV ) { if ( A.num_cols == x.num_rows && x.num_cols == 1 ) { if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descr )); CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO )); cusparseDcsrmv( cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, &beta, y.dval ); } else if ( A.storage_type == Magma_ELL ) { //printf("using ELLPACKT kernel for SpMV: "); CHECK( magma_dgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_ELLPACKT ) { //printf("using ELL kernel for SpMV: "); CHECK( magma_dgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_ELLRT ) { //printf("using ELLRT kernel for SpMV: "); CHECK( magma_dgeellrtmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, A.alignment, A.blocksize, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_SELLP ) { //printf("using SELLP kernel for SpMV: "); CHECK( magma_dgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_DENSE ) { //printf("using DENSE kernel for SpMV: "); magmablas_dgemv( MagmaNoTrans, A.num_rows, A.num_cols, alpha, A.dval, A.num_rows, x.dval, 1, beta, y.dval, 1, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_SPMVFUNCTION ) { //printf("using DENSE kernel for SpMV: "); CHECK( magma_dcustomspmv( alpha, x, beta, y, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_BCSR ) { //printf("using CUSPARSE BCSR kernel for SpMV: "); // CUSPARSE context // cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; int mb = magma_ceildiv( A.num_rows, A.blocksize ); int nb = magma_ceildiv( A.num_cols, A.blocksize ); CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descr )); cusparseDbsrmv( cusparseHandle, dirA, CUSPARSE_OPERATION_NON_TRANSPOSE, mb, nb, A.numblocks, &alpha, descr, A.dval, A.drow, A.dcol, A.blocksize, x.dval, &beta, y.dval ); } else { printf("error: format not supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } } else if ( A.num_cols < x.num_rows || x.num_cols > 1 ) { magma_int_t num_vecs = x.num_rows / A.num_cols * x.num_cols; if ( A.storage_type == Magma_CSR ) { CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descr )); CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO )); if ( x.major == MagmaColMajor) { cusparseDcsrmm(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, num_vecs, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, A.num_cols, &beta, y.dval, A.num_cols); } else if ( x.major == MagmaRowMajor) { /*cusparseDcsrmm2(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, A.num_rows, num_vecs, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, A.num_cols, &beta, y.dval, A.num_cols); */ } } else if ( A.storage_type == Magma_SELLP ) { if ( x.major == MagmaRowMajor) { CHECK( magma_dmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue )); } else if ( x.major == MagmaColMajor) { // transpose first to row major CHECK( magma_dvtranspose( x, &x2, queue )); CHECK( magma_dmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x2.dval, beta, y.dval, queue )); } } /*if ( A.storage_type == Magma_DENSE ) { //printf("using DENSE kernel for SpMV: "); magmablas_dmgemv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, alpha, A.dval, A.num_rows, x.dval, 1, beta, y.dval, 1 ); //printf("done.\n"); }*/ else { printf("error: format not supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } } } // CPU case missing! else { printf("error: CPU not yet supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); cusparseHandle = 0; descr = 0; magma_dmfree(&x2, queue ); return info; }
extern "C" magma_int_t magma_c_cucsrtranspose( magma_c_sparse_matrix A, magma_c_sparse_matrix *B, magma_queue_t queue ) { // for symmetric matrices: convert to csc using cusparse if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) { magma_c_sparse_matrix C; magma_c_mtransfer( A, &C, Magma_DEV, Magma_DEV, queue ); // CUSPARSE context // cusparseHandle_t handle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&handle); cusparseSetStream( handle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseMatDescr_t descrB; cusparseStatus = cusparseCreateMatDescr(&descrA); cusparseStatus = cusparseCreateMatDescr(&descrB); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseCcsr2csc( handle, A.num_rows, A.num_rows, A.nnz, A.dval, A.drow, A.dcol, C.dval, C.dcol, C.drow, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in transpose: %d.\n", cusparseStatus); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroy( handle ); magma_c_mtransfer( C, B, Magma_DEV, Magma_DEV, queue ); if( A.fill_mode == Magma_FULL ){ B->fill_mode = Magma_FULL; } else if( A.fill_mode == Magma_LOWER ){ B->fill_mode = Magma_UPPER; } else if ( A.fill_mode == Magma_UPPER ){ B->fill_mode = Magma_LOWER; } // end CUSPARSE context // return MAGMA_SUCCESS; }else if( A.storage_type == Magma_CSR && A.memory_location == Magma_CPU ){ magma_c_sparse_matrix A_d, B_d; magma_c_mtransfer( A, &A_d, A.memory_location, Magma_DEV, queue ); magma_c_cucsrtranspose( A_d, &B_d, queue ); magma_c_mtransfer( B_d, B, Magma_DEV, A.memory_location, queue ); magma_c_mfree( &A_d, queue ); magma_c_mfree( &B_d, queue ); return MAGMA_SUCCESS; }else { magma_c_sparse_matrix ACSR, BCSR; magma_c_mconvert( A, &ACSR, A.storage_type, Magma_CSR, queue ); magma_c_cucsrtranspose( ACSR, &BCSR, queue ); magma_c_mconvert( BCSR, B, Magma_CSR, A.storage_type, queue ); magma_c_mfree( &ACSR, queue ); magma_c_mfree( &BCSR, queue ); return MAGMA_SUCCESS; } }
extern "C" magma_int_t magma_zcuspmm( magma_z_matrix A, magma_z_matrix B, magma_z_matrix *AB, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix C={Magma_CSR}; C.num_rows = A.num_rows; C.num_cols = B.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; C.fill_mode = MagmaFull; C.val = NULL; C.col = NULL; C.row = NULL; C.rowidx = NULL; C.blockinfo = NULL; C.diag = NULL; C.dval = NULL; C.dcol = NULL; C.drow = NULL; C.drowidx = NULL; C.ddiag = NULL; magma_index_t base_t, nnz_t, baseC; cusparseHandle_t handle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrB=NULL; cusparseMatDescr_t descrC=NULL; if ( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ) { // CUSPARSE context / CHECK_CUSPARSE( cusparseCreate( &handle )); CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrC )); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatType( descrC, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrC, CUSPARSE_INDEX_BASE_ZERO )); // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; CHECK_CUSPARSE( cusparseSetPointerMode( handle, CUSPARSE_POINTER_MODE_HOST )); CHECK( magma_index_malloc( &C.drow, (A.num_rows + 1) )); CHECK_CUSPARSE( cusparseXcsrgemmNnz( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, B.num_cols, A.num_cols, descrA, A.nnz, A.drow, A.dcol, descrB, B.nnz, B.drow, B.dcol, descrC, C.drow, nnzTotalDevHostPtr )); if (NULL != nnzTotalDevHostPtr) { C.nnz = *nnzTotalDevHostPtr; } else { // workaround as nnz and base C are magma_int_t magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1, queue ); magma_index_getvector( 1, C.drow, 1, &base_t, 1, queue ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } CHECK( magma_index_malloc( &C.dcol, C.nnz )); CHECK( magma_zmalloc( &C.dval, C.nnz )); CHECK_CUSPARSE( cusparseZcsrgemm( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, B.num_cols, A.num_cols, descrA, A.nnz, A.dval, A.drow, A.dcol, descrB, B.nnz, B.dval, B.drow, B.dcol, descrC, C.dval, C.drow, C.dcol )); // end CUSPARSE context // //magma_device_sync(); magma_queue_sync( queue ); CHECK( magma_zmtransfer( C, AB, Magma_DEV, Magma_DEV, queue )); } else { info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); magma_zmfree( &C, queue ); return info; }
static void dispose(cusparseMatDescr_t handle) { cuda_check( cusparseDestroyMatDescr(handle) ); }
extern "C" magma_int_t magma_dcumilugeneratesolverinfo( magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_d_matrix hA={Magma_CSR}, hL={Magma_CSR}, hU={Magma_CSR}; if (precond->L.memory_location != Magma_DEV ){ CHECK( magma_dmtransfer( precond->M, &hA, precond->M.memory_location, Magma_CPU, queue )); hL.diagorder_type = Magma_UNITY; CHECK( magma_dmconvert( hA, &hL , Magma_CSR, Magma_CSRL, queue )); hU.diagorder_type = Magma_VALUE; CHECK( magma_dmconvert( hA, &hU , Magma_CSR, Magma_CSRU, queue )); CHECK( magma_dmtransfer( hL, &(precond->L), Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hU, &(precond->U), Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hA, queue ); magma_dmfree(&hL, queue ); magma_dmfree(&hU, queue ); } // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.dval, precond->L.drow, precond->L.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.dval, precond->U.drow, precond->U.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // extract the diagonal of L into precond->d CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_dvinit( &precond->work1, Magma_DEV, precond->U.num_rows, 1, MAGMA_D_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_dvinit( &precond->work2, Magma_DEV, precond->U.num_rows, 1, MAGMA_D_ZERO, queue )); } cleanup: cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); return info; }
extern "C" magma_int_t magma_cmtransposeconjugate( magma_c_matrix A, magma_c_matrix *B, magma_queue_t queue ) { // for symmetric matrices: convert to csc using cusparse magma_int_t info = 0; cusparseHandle_t handle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrB=NULL; magma_c_matrix ACSR={Magma_CSR}, BCSR={Magma_CSR}; magma_c_matrix A_d={Magma_CSR}, B_d={Magma_CSR}; if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) { // fill in information for B B->storage_type = A.storage_type; B->diagorder_type = A.diagorder_type; B->memory_location = Magma_DEV; B->num_rows = A.num_cols; // transposed B->num_cols = A.num_rows; // transposed B->nnz = A.nnz; B->true_nnz = A.true_nnz; if ( A.fill_mode == MagmaFull ) { B->fill_mode = MagmaFull; } else if ( A.fill_mode == MagmaLower ) { B->fill_mode = MagmaUpper; } else if ( A.fill_mode == MagmaUpper ) { B->fill_mode = MagmaLower; } B->dval = NULL; B->drow = NULL; B->dcol = NULL; // memory allocation CHECK( magma_cmalloc( &B->dval, B->nnz )); CHECK( magma_index_malloc( &B->drow, B->num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, B->nnz )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &handle )); CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB )); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseCcsr2csc( handle, A.num_rows, A.num_cols, A.nnz, A.dval, A.drow, A.dcol, B->dval, B->dcol, B->drow, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO) ); CHECK( magma_cmconjugate( B, queue )); } else if ( A.memory_location == Magma_CPU ){ CHECK( magma_cmtransfer( A, &A_d, A.memory_location, Magma_DEV, queue )); CHECK( magma_cmtransposeconjugate( A_d, &B_d, queue )); CHECK( magma_cmtransfer( B_d, B, Magma_DEV, A.memory_location, queue )); } else { CHECK( magma_cmconvert( A, &ACSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_cmtransposeconjugate( ACSR, &BCSR, queue )); CHECK( magma_cmconvert( BCSR, B, Magma_CSR, A.storage_type, queue )); } cleanup: cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroy( handle ); magma_cmfree( &A_d, queue ); magma_cmfree( &B_d, queue ); magma_cmfree( &ACSR, queue ); magma_cmfree( &BCSR, queue ); if( info != 0 ){ magma_cmfree( B, queue ); } return info; }
extern "C" magma_int_t magma_dcumiccsetup( magma_d_sparse_matrix A, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_d_sparse_matrix hA, hACSR, U, hD, hR, hAt; magma_d_mtransfer( A, &hA, A.memory_location, Magma_CPU, queue ); U.diagorder_type = Magma_VALUE; magma_d_mconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue ); magma_d_mconvert( hACSR, &U, Magma_CSR, Magma_CSRL, queue ); magma_d_mfree( &hACSR, queue ); magma_d_mtransfer(U, &(precond->M), Magma_CPU, Magma_DEV, queue ); // CUSPARSE context // cusparseHandle_t cusparseHandle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseStatus = cusparseCreateMatDescr(&descrA); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_SYMMETRIC); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrA, CUSPARSE_DIAG_TYPE_NON_UNIT); if (cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrA,CUSPARSE_FILL_MODE_LOWER); if (cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) ); if (cusparseStatus != 0) printf("error in info.\n"); // end CUSPARSE context // cusparseStatus = cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo); if (cusparseStatus != 0) printf("error in analysis IC.\n"); cusparseStatus = cusparseDcsric0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo); cusparseStatus = cusparseDestroySolveAnalysisInfo( precond->cuinfo ); if (cusparseStatus != 0) printf("error in info-free.\n"); if (cusparseStatus != 0) printf("error in ICC.\n"); cusparseMatDescr_t descrL; cusparseStatus = cusparseCreateMatDescr(&descrL); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrL,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrL, CUSPARSE_DIAG_TYPE_NON_UNIT); if (cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrL,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrL,CUSPARSE_FILL_MODE_LOWER); if (cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoL); if (cusparseStatus != 0) printf("error in info.\n"); cusparseStatus = cusparseDcsrsm_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL ); if (cusparseStatus != 0) printf("error in analysis L.\n"); cusparseDestroyMatDescr( descrL ); cusparseMatDescr_t descrU; cusparseStatus = cusparseCreateMatDescr(&descrU); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrU,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrU, CUSPARSE_DIAG_TYPE_NON_UNIT); if (cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrU,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrU,CUSPARSE_FILL_MODE_LOWER); if (cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoU); if (cusparseStatus != 0) printf("error in info.\n"); cusparseStatus = cusparseDcsrsm_analysis(cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU ); if (cusparseStatus != 0) printf("error in analysis U.\n"); cusparseDestroyMatDescr( descrU ); cusparseDestroyMatDescr( descrA ); cusparseDestroy( cusparseHandle ); magma_d_mfree(&U, queue ); magma_d_mfree(&hA, queue ); /* // to enable also the block-asynchronous iteration for the triangular solves magma_d_mtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue ); hA.storage_type = Magma_CSR; magma_dcsrsplit( 256, hA, &hD, &hR, queue ); magma_d_mtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue ); magma_d_mtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue ); magma_d_mfree(&hD, queue ); magma_d_mfree(&hR, queue ); magma_d_cucsrtranspose( hA, &hAt, queue ); magma_dcsrsplit( 256, hAt, &hD, &hR, queue ); magma_d_mtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue ); magma_d_mtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue ); magma_d_mfree(&hD, queue ); magma_d_mfree(&hR, queue ); magma_d_mfree(&hA, queue ); magma_d_mfree(&hAt, queue ); */ return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_s_spmv( float alpha, magma_s_sparse_matrix A, magma_s_vector x, float beta, magma_s_vector y, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); if ( A.memory_location != x.memory_location || x.memory_location != y.memory_location ) { printf("error: linear algebra objects are not located in same memory!\n"); printf("memory locations are: %d %d %d\n", A.memory_location, x.memory_location, y.memory_location ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_INVALID_PTR; } // DEV case if ( A.memory_location == Magma_DEV ) { if ( A.num_cols == x.num_rows && x.num_cols == 1 ) { if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { //printf("using CSR kernel for SpMV: "); //magma_sgecsrmv( MagmaNoTrans, A.num_rows, A.num_cols, alpha, // A.dval, A.drow, A.dcol, x.dval, beta, y.dval ); //printf("done.\n"); cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); cusparseScsrmv( cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, &beta, y.dval ); cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); } else if ( A.storage_type == Magma_ELL ) { //printf("using ELLPACKT kernel for SpMV: "); magma_sgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_ELLPACKT ) { //printf("using ELL kernel for SpMV: "); magma_sgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_ELLRT ) { //printf("using ELLRT kernel for SpMV: "); magma_sgeellrtmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, A.alignment, A.blocksize, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_SELLP ) { //printf("using SELLP kernel for SpMV: "); magma_sgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_DENSE ) { //printf("using DENSE kernel for SpMV: "); magmablas_sgemv( MagmaNoTrans, A.num_rows, A.num_cols, alpha, A.dval, A.num_rows, x.dval, 1, beta, y.dval, 1 ); //printf("done.\n"); } /* else if ( A.storage_type == Magma_BCSR ) { //printf("using CUSPARSE BCSR kernel for SpMV: "); // CUSPARSE context // cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); // end CUSPARSE context // cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; int mb = (A.num_rows + A.blocksize-1)/A.blocksize; int nb = (A.num_cols + A.blocksize-1)/A.blocksize; cusparseSbsrmv( cusparseHandle, dirA, CUSPARSE_OPERATION_NON_TRANSPOSE, mb, nb, A.numblocks, &alpha, descr, A.dval, A.drow, A.dcol, A.blocksize, x.dval, &beta, y.dval ); //printf("done.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }*/ else { printf("error: format not supported.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } } else if ( A.num_cols < x.num_rows || x.num_cols > 1 ) { magma_int_t num_vecs = x.num_rows / A.num_cols * x.num_cols; if ( A.storage_type == Magma_CSR ) { cusparseHandle_t cusparseHandle = 0; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); if ( x.major == MagmaColMajor) { cusparseScsrmm(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, num_vecs, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, A.num_cols, &beta, y.dval, A.num_cols); } else if ( x.major == MagmaRowMajor) { cusparseScsrmm2(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, A.num_rows, num_vecs, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, A.num_cols, &beta, y.dval, A.num_cols); } cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); } else if ( A.storage_type == Magma_ELL ) { if ( x.major == MagmaColMajor) { magma_smgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); } else if ( x.major == MagmaRowMajor) { // transpose first to col major magma_s_vector x2; magma_svtranspose( x, &x2, queue ); magma_smgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); magma_s_vfree(&x2, queue ); } } else if ( A.storage_type == Magma_ELLPACKT ) { if ( x.major == MagmaColMajor) { magma_smgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); } else if ( x.major == MagmaRowMajor) { // transpose first to col major magma_s_vector x2; magma_svtranspose( x, &x2, queue ); magma_smgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue ); magma_s_vfree(&x2, queue ); } } else if ( A.storage_type == Magma_SELLP ) { if ( x.major == MagmaRowMajor) { magma_smgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue ); } else if ( x.major == MagmaColMajor) { // transpose first to row major magma_s_vector x2; magma_svtranspose( x, &x2, queue ); magma_smgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x2.dval, beta, y.dval, queue ); magma_s_vfree(&x2, queue ); } }/* if ( A.storage_type == Magma_DENSE ) { //printf("using DENSE kernel for SpMV: "); magmablas_smgemv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, alpha, A.dval, A.num_rows, x.dval, 1, beta, y.dval, 1 ); //printf("done.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }*/ else { printf("error: format not supported.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } } } // CPU case missing! else { printf("error: CPU not yet supported.\n"); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_dcumicgeneratesolverinfo( magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU )); /* // to enable also the block-asynchronous iteration for the triangular solves CHECK( magma_dmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue )); hA.storage_type = Magma_CSR; CHECK( magma_dcsrsplit( 256, hA, &hD, &hR, queue )); CHECK( magma_dmtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hD, queue ); magma_dmfree(&hR, queue ); CHECK( magma_d_cucsrtranspose( hA, &hAt, queue )); CHECK( magma_dcsrsplit( 256, hAt, &hD, &hR, queue )); CHECK( magma_dmtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hD, queue ); magma_dmfree(&hR, queue ); magma_dmfree(&hA, queue ); magma_dmfree(&hAt, queue ); */ cleanup: cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); return info; }
extern "C" magma_int_t magma_dcumilusetup( magma_d_sparse_matrix A, magma_d_preconditioner *precond, magma_queue_t queue ) { //magma_d_mvisu(A, queue ); // copy matrix into preconditioner parameter magma_d_sparse_matrix hA, hACSR; magma_d_mtransfer( A, &hA, A.memory_location, Magma_CPU, queue ); magma_d_mconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue ); magma_d_mtransfer(hACSR, &(precond->M), Magma_CPU, Magma_DEV, queue ); magma_d_mfree( &hA, queue ); magma_d_mfree( &hACSR, queue ); // CUSPARSE context // cusparseHandle_t cusparseHandle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseStatus = cusparseCreateMatDescr(&descrA); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrA, CUSPARSE_DIAG_TYPE_NON_UNIT); if (cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) ); if (cusparseStatus != 0) printf("error in info.\n"); // end CUSPARSE context // cusparseStatus = cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo); if (cusparseStatus != 0) printf("error in analysis:%d\n", cusparseStatus); cusparseStatus = cusparseDcsrilu0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo); if (cusparseStatus != 0) printf("error in ILU:%d\n", cusparseStatus); cusparseStatus = cusparseDestroySolveAnalysisInfo( precond->cuinfo ); if (cusparseStatus != 0) printf("error in info-free.\n"); cusparseDestroyMatDescr( descrA ); magma_d_sparse_matrix hL, hU; magma_d_mtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue ); hL.diagorder_type = Magma_UNITY; magma_d_mconvert( hA, &hL , Magma_CSR, Magma_CSRL, queue ); hU.diagorder_type = Magma_VALUE; magma_d_mconvert( hA, &hU , Magma_CSR, Magma_CSRU, queue ); magma_d_mtransfer( hL, &(precond->L), Magma_CPU, Magma_DEV, queue ); magma_d_mtransfer( hU, &(precond->U), Magma_CPU, Magma_DEV, queue ); cusparseMatDescr_t descrL; cusparseStatus = cusparseCreateMatDescr(&descrL); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrL,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrL, CUSPARSE_DIAG_TYPE_UNIT); if (cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrL,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrL,CUSPARSE_FILL_MODE_LOWER); if (cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoL); if (cusparseStatus != 0) printf("error in info.\n"); cusparseStatus = cusparseDcsrsm_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.dval, precond->L.drow, precond->L.dcol, precond->cuinfoL ); if (cusparseStatus != 0) printf("error in analysis.\n"); cusparseDestroyMatDescr( descrL ); cusparseMatDescr_t descrU; cusparseStatus = cusparseCreateMatDescr(&descrU); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrU,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrU, CUSPARSE_DIAG_TYPE_NON_UNIT); if (cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrU,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrU,CUSPARSE_FILL_MODE_UPPER); if (cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoU); if (cusparseStatus != 0) printf("error in info.\n"); cusparseStatus = cusparseDcsrsm_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.dval, precond->U.drow, precond->U.dcol, precond->cuinfoU ); if (cusparseStatus != 0) printf("error in analysis.\n"); cusparseDestroyMatDescr( descrU ); magma_d_mfree(&hA, queue ); magma_d_mfree(&hL, queue ); magma_d_mfree(&hU, queue ); cusparseDestroy( cusparseHandle ); return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_zcuspaxpy( magmaDoubleComplex *alpha, magma_z_sparse_matrix A, magmaDoubleComplex *beta, magma_z_sparse_matrix B, magma_z_sparse_matrix *AB, magma_queue_t queue ) { if ( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ) { magma_z_sparse_matrix C; C.num_rows = A.num_rows; C.num_cols = A.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; magma_int_t stat_dev = 0; C.val = NULL; C.col = NULL; C.row = NULL; C.rowidx = NULL; C.blockinfo = NULL; C.diag = NULL; C.dval = NULL; C.dcol = NULL; C.drow = NULL; C.drowidx = NULL; C.ddiag = NULL; // CUSPARSE context // cusparseHandle_t handle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&handle); cusparseSetStream( handle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseMatDescr_t descrB; cusparseMatDescr_t descrC; cusparseStatus = cusparseCreateMatDescr(&descrA); cusparseStatus = cusparseCreateMatDescr(&descrB); cusparseStatus = cusparseCreateMatDescr(&descrC); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); // multiply A and B on the device magma_int_t baseC; // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); stat_dev += magma_index_malloc( &C.drow, (A.num_rows + 1) ); cusparseXcsrgeamNnz(handle,A.num_rows, A.num_cols, descrA, A.nnz, A.drow, A.dcol, descrB, B.nnz, B.drow, B.dcol, descrC, C.row, nnzTotalDevHostPtr); if (NULL != nnzTotalDevHostPtr) { C.nnz = *nnzTotalDevHostPtr; } else { // workaround as nnz and base C are magma_int_t magma_index_t base_t, nnz_t; magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1 ); magma_index_getvector( 1, C.drow, 1, &base_t, 1 ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } stat_dev += magma_index_malloc( &C.dcol, C.nnz ); stat_dev += magma_zmalloc( &C.dval, C.nnz ); if( stat_dev != 0 ) { magma_z_mfree( &C, queue ); return MAGMA_ERR_DEVICE_ALLOC; } cusparseZcsrgeam(handle, A.num_rows, A.num_cols, alpha, descrA, A.nnz, A.dval, A.drow, A.dcol, beta, descrB, B.nnz, B.dval, B.drow, B.dcol, descrC, C.dval, C.drow, C.dcol); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); // end CUSPARSE context // magma_z_mtransfer( C, AB, Magma_DEV, Magma_DEV, queue ); magma_z_mfree( &C, queue ); return MAGMA_SUCCESS; } else { printf("error: CSRSPAXPY only supported on device and CSR format.\n"); return MAGMA_SUCCESS; } }
extern "C" magma_int_t magma_dapplycumicc_r( magma_d_vector b, magma_d_vector *x, magma_d_preconditioner *precond, magma_queue_t queue ) { double one = MAGMA_D_MAKE( 1.0, 0.0); // CUSPARSE context // cusparseHandle_t cusparseHandle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); cusparseSetStream( cusparseHandle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrU; cusparseStatus = cusparseCreateMatDescr(&descrU); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrU,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrU, CUSPARSE_DIAG_TYPE_NON_UNIT); if (cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrU,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrU,CUSPARSE_FILL_MODE_LOWER); if (cusparseStatus != 0) printf("error in fillmode.\n"); // end CUSPARSE context // magma_int_t dofs = precond->M.num_rows; cusparseStatus = cusparseDcsrsm_solve( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, b.num_rows*b.num_cols/precond->M.num_rows, &one, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU, b.dval, precond->M.num_rows, x->dval, precond->M.num_rows); if (cusparseStatus != 0) printf("error in U triangular solve:%d.\n", precond->cuinfoU ); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); magma_device_sync(); return MAGMA_SUCCESS; }