magma_int_t magma_zbaiter( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par ) { // prepare solver feedback solver_par->solver = Magma_BAITER; solver_par->info = 0; magma_z_sparse_matrix A_d, D, R, D_d, R_d; magma_z_mtransfer( A, &A_d, Magma_CPU, Magma_DEV ); // initial residual real_Double_t tempo1, tempo2; double residual; magma_zresidual( A_d, b, *x, &residual ); solver_par->init_res = residual; solver_par->res_vec = NULL; solver_par->timing = NULL; // setup magma_zcsrsplit( 256, A, &D, &R ); magma_z_mtransfer( D, &D_d, Magma_CPU, Magma_DEV ); magma_z_mtransfer( R, &R_d, Magma_CPU, Magma_DEV ); magma_int_t localiter = 1; magma_device_sync(); tempo1=magma_wtime(); // block-asynchronous iteration iterator for( int iter=0; iter<solver_par->maxiter; iter++) magma_zbajac_csr( localiter, D_d, R_d, b, x ); magma_device_sync(); tempo2=magma_wtime(); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_zresidual( A_d, b, *x, &residual ); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if( solver_par->init_res > solver_par->final_res ) solver_par->info = 0; else solver_par->info = -1; magma_z_mfree(&D); magma_z_mfree(&R); magma_z_mfree(&D_d); magma_z_mfree(&R_d); magma_z_mfree(&A_d); return MAGMA_SUCCESS; } /* magma_zbaiter */
extern "C" magma_int_t magma_zbaiter_overlap( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BAITERO; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO; // initial residual real_Double_t tempo1, tempo2, runtime=0; double residual; magma_int_t localiter = precond_par->maxiter; magma_z_matrix Ah={Magma_CSR}, ACSR={Magma_CSR}, A_d={Magma_CSR}, r={Magma_CSR}, D={Magma_CSR}, R={Magma_CSR}; // setup magma_int_t matrices; matrices = precond_par->levels; struct magma_z_matrix D_d[ 256 ]; struct magma_z_matrix R_d[ 256 ]; magma_int_t overlap; magma_int_t blocksize = 256; if( matrices==2 || matrices==4 || matrices==8 || matrices==16 || matrices==32 || matrices==64 || matrices==128 ){ overlap = blocksize/matrices; }else if( matrices == 1){ overlap = 0; }else{ printf("error: overlap ratio not supported.\n"); goto cleanup; } CHECK( magma_zmtransfer( A, &Ah, A.memory_location, Magma_CPU, queue )); CHECK( magma_zmconvert( Ah, &ACSR, Ah.storage_type, Magma_CSR, queue )); CHECK( magma_zmtransfer( ACSR, &A_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zresidualvec( A_d, b, *x, &r, &residual, queue)); solver_par->init_res = residual; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t) residual; } // setup for( int i=0; i<matrices; i++ ){ CHECK( magma_zcsrsplit( i*overlap, 256, ACSR, &D, &R, queue )); CHECK( magma_zmtransfer( D, &D_d[i], Magma_CPU, Magma_DEV, queue )); CHECK( magma_zmtransfer( R, &R_d[i], Magma_CPU, Magma_DEV, queue )); magma_zmfree(&D, queue ); magma_zmfree(&R, queue ); } magma_int_t iterinc; if( solver_par->verbose == 0 ){ iterinc = solver_par->maxiter; } else{ iterinc = solver_par->verbose; } solver_par->numiter = 0; solver_par->spmv_count = 0; // block-asynchronous iteration iterator do { tempo1 = magma_sync_wtime( queue ); solver_par->numiter+= iterinc; for( int z=0; z<iterinc; z++){ CHECK( magma_zbajac_csr_overlap( localiter, matrices, overlap, D_d, R_d, b, x, queue )); } tempo2 = magma_sync_wtime( queue ); runtime += tempo2-tempo1; if ( solver_par->verbose > 0 ) { CHECK( magma_zresidualvec( A_d, b, *x, &r, &residual, queue)); solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) residual; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) runtime; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); solver_par->runtime = runtime; CHECK( magma_zresidual( A_d, b, *x, &residual, queue)); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if ( solver_par->init_res > solver_par->final_res ){ info = MAGMA_SUCCESS; } else { info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&D, queue ); magma_zmfree(&R, queue ); for( int i=0; i<matrices; i++ ){ magma_zmfree(&D_d[i], queue ); magma_zmfree(&R_d[i], queue ); } magma_zmfree(&A_d, queue ); magma_zmfree(&ACSR, queue ); magma_zmfree(&Ah, queue ); solver_par->info = info; return info; } /* magma_zbaiter_overlap */
magma_int_t magma_zailusetup( magma_z_sparse_matrix A, magma_z_preconditioner *precond ){ magma_z_sparse_matrix hAh, hA, hAL, hALCOO, hAU, hAUT, hAUCOO, dAL, dAU, hL, hU, dL, dU, DL, RL, DU, RU; // copy original matrix as CSRCOO to device magma_z_mtransfer(A, &hAh, A.memory_location, Magma_CPU); magma_z_mconvert( hAh, &hA, hAh.storage_type, Magma_CSR ); magma_z_mfree(&hAh); // in case using fill-in magma_zilustruct( &hA, precond->levels); // need only lower triangular hAL.diagorder_type == Magma_UNITY; magma_z_mconvert( hA, &hAL, Magma_CSR, Magma_CSRL ); magma_z_mconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO ); magma_z_mtransfer( hALCOO, &dAL, Magma_CPU, Magma_DEV ); magma_z_mtransfer( hALCOO, &dAU, Magma_CPU, Magma_DEV ); // need only upper triangular magma_z_mconvert( hA, &hAU, Magma_CSR, Magma_CSRU ); magma_z_cucsrtranspose( hAU, &hAUT ); magma_z_mconvert( hAUT, &hAUCOO, Magma_CSR, Magma_CSRCOO ); magma_z_mtransfer( hAUCOO, &dL, Magma_CPU, Magma_DEV ); magma_z_mtransfer( hAUCOO, &dU, Magma_CPU, Magma_DEV ); magma_z_mfree(&hALCOO); magma_z_mfree(&hAL); magma_z_mfree(&hAUCOO); magma_z_mfree(&hAUT); magma_z_mfree(&hAU); for(int i=0; i<precond->sweeps; i++){ magma_zailu_csr_s( dAL, dAU, dL, dU ); } magma_z_mtransfer( dL, &hL, Magma_DEV, Magma_CPU ); magma_z_mtransfer( dU, &hU, Magma_DEV, Magma_CPU ); magma_z_LUmergein( hL, hU, &hA); magma_z_mtransfer( hA, &precond->M, Magma_CPU, Magma_DEV ); magma_z_mfree(&dL); magma_z_mfree(&dU); magma_z_mfree(&dAL); magma_z_mfree(&dAU); hAL.diagorder_type = Magma_UNITY; magma_z_mconvert(hA, &hAL, Magma_CSR, Magma_CSRL); hAL.storage_type = Magma_CSR; magma_z_mconvert(hA, &hAU, Magma_CSR, Magma_CSRU); hAU.storage_type = Magma_CSR; magma_z_mfree(&hA); magma_z_mfree(&hL); magma_z_mfree(&hU); magma_zcsrsplit( 256, hAL, &DL, &RL ); magma_zcsrsplit( 256, hAU, &DU, &RU ); magma_z_mtransfer( DL, &precond->LD, Magma_CPU, Magma_DEV ); magma_z_mtransfer( DU, &precond->UD, Magma_CPU, Magma_DEV ); // for cusparse uncomment this magma_z_mtransfer( hAL, &precond->L, Magma_CPU, Magma_DEV ); magma_z_mtransfer( hAU, &precond->U, Magma_CPU, Magma_DEV ); // for ba-solve uncomment this /* if( RL.nnz != 0 ) magma_z_mtransfer( RL, &precond->L, Magma_CPU, Magma_DEV ); else{ precond->L.nnz = 0; precond->L.val = NULL; precond->L.col = NULL; precond->L.row = NULL; precond->L.blockinfo = NULL; } if( RU.nnz != 0 ) magma_z_mtransfer( RU, &precond->U, Magma_CPU, Magma_DEV ); else{ precond->U.nnz = 0; precond->L.val = NULL; precond->L.col = NULL; precond->L.row = NULL; precond->L.blockinfo = NULL; } */ magma_z_mfree(&hAL); magma_z_mfree(&hAU); magma_z_mfree(&DL); magma_z_mfree(&RL); magma_z_mfree(&DU); magma_z_mfree(&RU); // CUSPARSE context // cusparseHandle_t cusparseHandle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); if(cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrL; cusparseStatus = cusparseCreateMatDescr(&descrL); if(cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrL,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if(cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrL, CUSPARSE_DIAG_TYPE_UNIT); if(cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrL,CUSPARSE_INDEX_BASE_ZERO); if(cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrL,CUSPARSE_FILL_MODE_LOWER); if(cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoL); if(cusparseStatus != 0) printf("error in info.\n"); cusparseStatus = cusparseZcsrsv_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.val, precond->L.row, precond->L.col, precond->cuinfoL ); if(cusparseStatus != 0) printf("error in analysis.\n"); cusparseDestroyMatDescr( descrL ); cusparseMatDescr_t descrU; cusparseStatus = cusparseCreateMatDescr(&descrU); if(cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrU,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if(cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrU, CUSPARSE_DIAG_TYPE_NON_UNIT); if(cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrU,CUSPARSE_INDEX_BASE_ZERO); if(cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrU,CUSPARSE_FILL_MODE_UPPER); if(cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoU); if(cusparseStatus != 0) printf("error in info.\n"); cusparseStatus = cusparseZcsrsv_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.val, precond->U.row, precond->U.col, precond->cuinfoU ); if(cusparseStatus != 0) printf("error in analysis.\n"); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); return MAGMA_SUCCESS; }
magma_int_t magma_zaiccsetup( magma_z_sparse_matrix A, magma_z_preconditioner *precond ){ magma_z_sparse_matrix hAh, hA, hAL, hALCOO, dAL, hL, dL, DL, RL; // copy original matrix as CSRCOO to device magma_z_mtransfer(A, &hAh, A.memory_location, Magma_CPU); magma_z_mconvert( hAh, &hA, hAh.storage_type, Magma_CSR ); magma_z_mfree(&hAh); // in case using fill-in magma_zilustruct( &hA, precond->levels); magma_z_mconvert( hA, &hAL, Magma_CSR, Magma_CSRL ); magma_z_mconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO ); magma_z_mtransfer( hALCOO, &dAL, Magma_CPU, Magma_DEV ); magma_z_mtransfer( hALCOO, &dL, Magma_CPU, Magma_DEV ); magma_z_mfree(&hALCOO); magma_z_mfree(&hAL); magma_z_mfree(&hA); for(int i=0; i<precond->sweeps; i++){ magma_zaic_csr_s( dAL, dL ); } magma_z_mtransfer( dL, &hL, Magma_DEV, Magma_CPU ); magma_z_mfree(&dL); magma_z_mfree(&dAL); magma_z_mconvert(hL, &hAL, hL.storage_type, Magma_CSR); // for CUSPARSE magma_z_mtransfer( hAL, &precond->M, Magma_CPU, Magma_DEV ); magma_zcsrsplit( 256, hAL, &DL, &RL ); magma_z_mtransfer( DL, &precond->LD, Magma_CPU, Magma_DEV ); magma_z_mtransfer( RL, &precond->L, Magma_CPU, Magma_DEV ); magma_z_mfree(&hL); magma_z_cucsrtranspose( hAL, &hL ); magma_zcsrsplit( 256, hL, &DL, &RL ); magma_z_mtransfer( DL, &precond->UD, Magma_CPU, Magma_DEV ); magma_z_mtransfer( RL, &precond->U, Magma_CPU, Magma_DEV ); magma_z_mfree(&hAL); magma_z_mfree(&hL); magma_z_mfree(&DL); magma_z_mfree(&RL); // CUSPARSE context // cusparseHandle_t cusparseHandle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&cusparseHandle); if(cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrL; cusparseStatus = cusparseCreateMatDescr(&descrL); if(cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrL,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if(cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrL, CUSPARSE_DIAG_TYPE_NON_UNIT); if(cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrL,CUSPARSE_INDEX_BASE_ZERO); if(cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrL,CUSPARSE_FILL_MODE_LOWER); if(cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoL); if(cusparseStatus != 0) printf("error in info.\n"); cusparseStatus = cusparseZcsrsv_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.val, precond->M.row, precond->M.col, precond->cuinfoL ); if(cusparseStatus != 0) printf("error in analysis L.\n"); cusparseDestroyMatDescr( descrL ); cusparseMatDescr_t descrU; cusparseStatus = cusparseCreateMatDescr(&descrU); if(cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrU,CUSPARSE_MATRIX_TYPE_TRIANGULAR); if(cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatDiagType (descrU, CUSPARSE_DIAG_TYPE_NON_UNIT); if(cusparseStatus != 0) printf("error in DiagType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrU,CUSPARSE_INDEX_BASE_ZERO); if(cusparseStatus != 0) printf("error in IndexBase.\n"); cusparseStatus = cusparseSetMatFillMode(descrU,CUSPARSE_FILL_MODE_LOWER); if(cusparseStatus != 0) printf("error in fillmode.\n"); cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoU); if(cusparseStatus != 0) printf("error in info.\n"); cusparseStatus = cusparseZcsrsv_analysis(cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.val, precond->M.row, precond->M.col, precond->cuinfoU ); if(cusparseStatus != 0) printf("error in analysis U.\n"); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); return MAGMA_SUCCESS; }