magma_int_t magma_ccsrget_gpu( magma_c_matrix A, magma_int_t *m, magma_int_t *n, magmaIndex_ptr *row, magmaIndex_ptr *col, magmaFloatComplex_ptr *val, magma_queue_t queue ) { magma_int_t info = 0; magma_c_matrix A_DEV={Magma_CSR}, A_CSR={Magma_CSR}; if ( A.memory_location == Magma_DEV && A.storage_type == Magma_CSR ) { *m = A.num_rows; *n = A.num_cols; *val = A.dval; *col = A.dcol; *row = A.drow; } else { CHECK( magma_cmconvert( A, &A_CSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_cmtransfer( A_CSR, &A_DEV, A.memory_location, Magma_DEV, queue )); magma_ccsrget_gpu( A_DEV, m, n, row, col, val, queue ); } cleanup: magma_cmfree( &A_CSR, queue ); magma_cmfree( &A_DEV, queue ); return info; }
magma_int_t magma_cmgenerator( magma_int_t n, magma_int_t offdiags, magma_index_t *diag_offset, magmaFloatComplex *diag_vals, magma_c_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_c_matrix B={Magma_CSR}; B.val = NULL; B.col = NULL; B.row = NULL; B.rowidx = NULL; B.blockinfo = NULL; B.diag = NULL; B.dval = NULL; B.dcol = NULL; B.drow = NULL; B.drowidx = NULL; B.ddiag = NULL; B.list = NULL; B.dlist = NULL; B.num_rows = n; B.num_cols = n; B.fill_mode = MagmaFull; B.memory_location = Magma_CPU; B.storage_type = Magma_ELLPACKT; B.max_nnz_row = (2*offdiags+1); CHECK( magma_cmalloc_cpu( &B.val, B.max_nnz_row*n )); CHECK( magma_index_malloc_cpu( &B.col, B.max_nnz_row*n )); for( int i=0; i<n; i++ ) { // stride over rows // stride over the number of nonzeros in each row // left of diagonal for( int j=0; j<offdiags; j++ ) { B.val[ i*B.max_nnz_row + j ] = diag_vals[ offdiags - j ]; B.col[ i*B.max_nnz_row + j ] = -1 * diag_offset[ offdiags-j ] + i; } // elements on the diagonal B.val[ i*B.max_nnz_row + offdiags ] = diag_vals[ 0 ]; B.col[ i*B.max_nnz_row + offdiags ] = i; // right of diagonal for( int j=0; j<offdiags; j++ ) { B.val[ i*B.max_nnz_row + j + offdiags +1 ] = diag_vals[ j+1 ]; B.col[ i*B.max_nnz_row + j + offdiags +1 ] = diag_offset[ j+1 ] + i; } } // set invalid entries to zero for( int i=0; i<n; i++ ) { // stride over rows for( int j=0; j<B.max_nnz_row; j++ ) { // nonzeros in every row if ( (B.col[i*B.max_nnz_row + j] < 0) || (B.col[i*B.max_nnz_row + j] >= n) ) { B.val[ i*B.max_nnz_row + j ] = MAGMA_C_MAKE( 0.0, 0.0 ); } } } B.nnz = 0; for( int i=0; i<n; i++ ) { // stride over rows for( int j=0; j<B.max_nnz_row; j++ ) { // nonzeros in every row if ( MAGMA_C_REAL( B.val[i*B.max_nnz_row + j]) != 0.0 ) B.nnz++; } } B.true_nnz = B.nnz; // converting it to CSR will remove the invalit entries completely CHECK( magma_cmconvert( B, A, Magma_ELLPACKT, Magma_CSR, queue )); cleanup: if( info != 0 ){ magma_cmfree( &B, queue ); } return info; }
magma_int_t magma_cm_5stencil( magma_int_t n, magma_c_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i,j,k; magma_c_matrix hA={Magma_CSR}; // generate matrix of desired structure and size (2d 5-point stencil) magma_int_t nn = n*n; magma_int_t offdiags = 2; magma_index_t *diag_offset=NULL; magmaFloatComplex *diag_vals=NULL; CHECK( magma_cmalloc_cpu( &diag_vals, offdiags+1 )); CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 )); diag_offset[0] = 0; diag_offset[1] = 1; diag_offset[2] = n; #define COMPLEX #ifdef COMPLEX // complex case diag_vals[0] = MAGMA_C_MAKE( 4.0, 4.0 ); diag_vals[1] = MAGMA_C_MAKE( -1.0, -1.0 ); diag_vals[2] = MAGMA_C_MAKE( -1.0, -1.0 ); #else // real case diag_vals[0] = MAGMA_C_MAKE( 4.0, 0.0 ); diag_vals[1] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[2] = MAGMA_C_MAKE( -1.0, 0.0 ); #endif CHECK( magma_cmgenerator( nn, offdiags, diag_offset, diag_vals, &hA, queue )); // now set some entries to zero (boundary...) for( i=0; i<n; i++ ) { for( j=0; j<n; j++ ) { magma_index_t row = i*n+j; for( k=hA.row[row]; k<hA.row[row+1]; k++) { if ((hA.col[k] == row-1 ) && (row+1)%n == 1 ) hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 ); if ((hA.col[k] == row+1 ) && (row)%n ==n-1 ) hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 ); } } } CHECK( magma_cmconvert( hA, A, Magma_CSR, Magma_CSR, queue )); magma_cmcsrcompressor( A, queue ); A->true_nnz = A->nnz; cleanup: magma_free_cpu( diag_vals ); magma_free_cpu( diag_offset ); magma_cmfree( &hA, queue ); return info; }
magma_int_t magma_cm_27stencil( magma_int_t n, magma_c_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i,j,k; magma_c_matrix hA={Magma_CSR}; // generate matrix of desired structure and size (3d 27-point stencil) magma_int_t nn = n*n*n; magma_int_t offdiags = 13; magma_index_t *diag_offset=NULL; magmaFloatComplex *diag_vals=NULL; CHECK( magma_cmalloc_cpu( &diag_vals, offdiags+1 )); CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 )); diag_offset[0] = 0; diag_offset[1] = 1; diag_offset[2] = n-1; diag_offset[3] = n; diag_offset[4] = n+1; diag_offset[5] = n*n-n-1; diag_offset[6] = n*n-n; diag_offset[7] = n*n-n+1; diag_offset[8] = n*n-1; diag_offset[9] = n*n; diag_offset[10] = n*n+1; diag_offset[11] = n*n+n-1; diag_offset[12] = n*n+n; diag_offset[13] = n*n+n+1; diag_vals[0] = MAGMA_C_MAKE( 26.0, 0.0 ); diag_vals[1] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[2] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[3] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[4] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[5] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[6] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[7] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[8] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[9] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[10] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[11] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[12] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[13] = MAGMA_C_MAKE( -1.0, 0.0 ); CHECK( magma_cmgenerator( nn, offdiags, diag_offset, diag_vals, &hA, queue )); // now set some entries to zero (boundary...) for( i=0; i < n*n; i++ ) { for( j=0; j < n; j++ ) { magma_index_t row = i*n+j; for( k=hA.row[row]; k<hA.row[row+1]; k++) { if ((hA.col[k] == row-1 || hA.col[k] == row-n-1 || hA.col[k] == row+n-1 || hA.col[k] == row-n*n+n-1 || hA.col[k] == row+n*n-n-1 || hA.col[k] == row-n*n-1 || hA.col[k] == row+n*n-1 || hA.col[k] == row-n*n-n-1 || hA.col[k] == row+n*n+n-1 ) && (row+1)%n == 1 ) hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 ); if ((hA.col[k] == row+1 || hA.col[k] == row-n+1 || hA.col[k] == row+n+1 || hA.col[k] == row-n*n+n+1 || hA.col[k] == row+n*n-n+1 || hA.col[k] == row-n*n+1 || hA.col[k] == row+n*n+1 || hA.col[k] == row-n*n-n+1 || hA.col[k] == row+n*n+n+1 ) && (row)%n ==n-1 ) hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 ); } } } hA.true_nnz = hA.nnz; CHECK( magma_cmconvert( hA, A, Magma_CSR, Magma_CSR, queue )); cleanup: magma_free_cpu( diag_vals ); magma_free_cpu( diag_offset ); magma_cmfree( &hA, queue ); return info; }
extern "C" magma_int_t magma_cmtransposeconjugate( magma_c_matrix A, magma_c_matrix *B, magma_queue_t queue ) { // for symmetric matrices: convert to csc using cusparse magma_int_t info = 0; cusparseHandle_t handle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrB=NULL; magma_c_matrix ACSR={Magma_CSR}, BCSR={Magma_CSR}; magma_c_matrix A_d={Magma_CSR}, B_d={Magma_CSR}; if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) { // fill in information for B B->storage_type = A.storage_type; B->diagorder_type = A.diagorder_type; B->memory_location = Magma_DEV; B->num_rows = A.num_cols; // transposed B->num_cols = A.num_rows; // transposed B->nnz = A.nnz; B->true_nnz = A.true_nnz; if ( A.fill_mode == MagmaFull ) { B->fill_mode = MagmaFull; } else if ( A.fill_mode == MagmaLower ) { B->fill_mode = MagmaUpper; } else if ( A.fill_mode == MagmaUpper ) { B->fill_mode = MagmaLower; } B->dval = NULL; B->drow = NULL; B->dcol = NULL; // memory allocation CHECK( magma_cmalloc( &B->dval, B->nnz )); CHECK( magma_index_malloc( &B->drow, B->num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, B->nnz )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &handle )); CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB )); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseCcsr2csc( handle, A.num_rows, A.num_cols, A.nnz, A.dval, A.drow, A.dcol, B->dval, B->dcol, B->drow, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO) ); CHECK( magma_cmconjugate( B, queue )); } else if ( A.memory_location == Magma_CPU ){ CHECK( magma_cmtransfer( A, &A_d, A.memory_location, Magma_DEV, queue )); CHECK( magma_cmtransposeconjugate( A_d, &B_d, queue )); CHECK( magma_cmtransfer( B_d, B, Magma_DEV, A.memory_location, queue )); } else { CHECK( magma_cmconvert( A, &ACSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_cmtransposeconjugate( ACSR, &BCSR, queue )); CHECK( magma_cmconvert( BCSR, B, Magma_CSR, A.storage_type, queue )); } cleanup: cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroy( handle ); magma_cmfree( &A_d, queue ); magma_cmfree( &B_d, queue ); magma_cmfree( &ACSR, queue ); magma_cmfree( &BCSR, queue ); if( info != 0 ){ magma_cmfree( B, queue ); } return info; }
extern "C" magma_int_t magma_cbaiter_overlap( magma_c_matrix A, magma_c_matrix b, magma_c_matrix *x, magma_c_solver_par *solver_par, magma_c_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BAITERO; // some useful variables magmaFloatComplex c_zero = MAGMA_C_ZERO; // initial residual real_Double_t tempo1, tempo2, runtime=0; float residual; magma_int_t localiter = precond_par->maxiter; magma_c_matrix Ah={Magma_CSR}, ACSR={Magma_CSR}, A_d={Magma_CSR}, r={Magma_CSR}, D={Magma_CSR}, R={Magma_CSR}; // setup magma_int_t matrices; matrices = precond_par->levels; struct magma_c_matrix D_d[ 256 ]; struct magma_c_matrix R_d[ 256 ]; magma_int_t overlap; magma_int_t blocksize = 256; if( matrices==2 || matrices==4 || matrices==8 || matrices==16 || matrices==32 || matrices==64 || matrices==128 ){ overlap = blocksize/matrices; }else if( matrices == 1){ overlap = 0; }else{ printf("error: overlap ratio not supported.\n"); goto cleanup; } CHECK( magma_cmtransfer( A, &Ah, A.memory_location, Magma_CPU, queue )); CHECK( magma_cmconvert( Ah, &ACSR, Ah.storage_type, Magma_CSR, queue )); CHECK( magma_cmtransfer( ACSR, &A_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_cvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cresidualvec( A_d, b, *x, &r, &residual, queue)); solver_par->init_res = residual; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t) residual; } // setup for( int i=0; i<matrices; i++ ){ CHECK( magma_ccsrsplit( i*overlap, 256, ACSR, &D, &R, queue )); CHECK( magma_cmtransfer( D, &D_d[i], Magma_CPU, Magma_DEV, queue )); CHECK( magma_cmtransfer( R, &R_d[i], Magma_CPU, Magma_DEV, queue )); magma_cmfree(&D, queue ); magma_cmfree(&R, queue ); } magma_int_t iterinc; if( solver_par->verbose == 0 ){ iterinc = solver_par->maxiter; } else{ iterinc = solver_par->verbose; } solver_par->numiter = 0; solver_par->spmv_count = 0; // block-asynchronous iteration iterator do { tempo1 = magma_sync_wtime( queue ); solver_par->numiter+= iterinc; for( int z=0; z<iterinc; z++){ CHECK( magma_cbajac_csr_overlap( localiter, matrices, overlap, D_d, R_d, b, x, queue )); } tempo2 = magma_sync_wtime( queue ); runtime += tempo2-tempo1; if ( solver_par->verbose > 0 ) { CHECK( magma_cresidualvec( A_d, b, *x, &r, &residual, queue)); solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) residual; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) runtime; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); solver_par->runtime = runtime; CHECK( magma_cresidual( A_d, b, *x, &residual, queue)); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if ( solver_par->init_res > solver_par->final_res ){ info = MAGMA_SUCCESS; } else { info = MAGMA_DIVERGENCE; } cleanup: magma_cmfree(&r, queue ); magma_cmfree(&D, queue ); magma_cmfree(&R, queue ); for( int i=0; i<matrices; i++ ){ magma_cmfree(&D_d[i], queue ); magma_cmfree(&R_d[i], queue ); } magma_cmfree(&A_d, queue ); magma_cmfree(&ACSR, queue ); magma_cmfree(&Ah, queue ); solver_par->info = info; return info; } /* magma_cbaiter_overlap */
extern "C" magma_int_t magma_ccumilugeneratesolverinfo( magma_c_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_c_matrix hA={Magma_CSR}, hL={Magma_CSR}, hU={Magma_CSR}; if (precond->L.memory_location != Magma_DEV ){ CHECK( magma_cmtransfer( precond->M, &hA, precond->M.memory_location, Magma_CPU, queue )); hL.diagorder_type = Magma_UNITY; CHECK( magma_cmconvert( hA, &hL , Magma_CSR, Magma_CSRL, queue )); hU.diagorder_type = Magma_VALUE; CHECK( magma_cmconvert( hA, &hU , Magma_CSR, Magma_CSRU, queue )); CHECK( magma_cmtransfer( hL, &(precond->L), Magma_CPU, Magma_DEV, queue )); CHECK( magma_cmtransfer( hU, &(precond->U), Magma_CPU, Magma_DEV, queue )); magma_cmfree(&hA, queue ); magma_cmfree(&hL, queue ); magma_cmfree(&hU, queue ); } // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.dval, precond->L.drow, precond->L.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.dval, precond->U.drow, precond->U.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // extract the diagonal of L into precond->d CHECK( magma_cjacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_cvinit( &precond->work1, Magma_DEV, precond->U.num_rows, 1, MAGMA_C_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_cjacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_cvinit( &precond->work2, Magma_DEV, precond->U.num_rows, 1, MAGMA_C_ZERO, queue )); } cleanup: cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); return info; }
extern "C" magma_int_t magma_cbombard( magma_c_matrix A, magma_c_matrix b, magma_c_matrix *x, magma_c_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // 1=QMR, 2=CGS, 3+BiCGSTAB magma_int_t flag = 0; // prepare solver feedback solver_par->solver = Magma_BOMBARD; solver_par->numiter = 0; solver_par->spmv_count = 0; // local variables magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE; // solver variables float nom0, r0, res, Q_res, T_res, C_res, B_res, nomb; //QMR magmaFloatComplex Q_rho = c_one, Q_rho1 = c_one, Q_eta = -c_one , Q_pds = c_one, Q_thet = c_one, Q_thet1 = c_one, Q_epsilon = c_one, Q_beta = c_one, Q_delta = c_one, Q_pde = c_one, Q_rde = c_one, Q_gamm = c_one, Q_gamm1 = c_one, Q_psi = c_one; //TFQMR magmaFloatComplex T_rho = c_one, T_rho_l = c_one, T_eta = c_zero , T_c = c_zero , T_theta = c_zero , T_tau = c_zero, T_alpha = c_one, T_beta = c_zero, T_sigma = c_zero; //CGS magmaFloatComplex C_rho, C_rho_l = c_one, C_alpha, C_beta = c_zero; //BiCGSTAB magmaFloatComplex B_alpha, B_beta, B_omega, B_rho_old, B_rho_new; magma_int_t dofs = A.num_rows* b.num_cols; // need to transpose the matrix // GPU workspace // QMR magma_c_matrix AT = {Magma_CSR}, Ah1 = {Magma_CSR}, Ah2 = {Magma_CSR}, Q_r={Magma_CSR}, r_tld={Magma_CSR}, Q_x={Magma_CSR}, Q_v={Magma_CSR}, Q_w={Magma_CSR}, Q_wt={Magma_CSR}, Q_d={Magma_CSR}, Q_s={Magma_CSR}, Q_z={Magma_CSR}, Q_q={Magma_CSR}, Q_p={Magma_CSR}, Q_pt={Magma_CSR}, Q_y={Magma_CSR}, d1={Magma_CSR}, d2={Magma_CSR}; //TFQMR // GPU workspace magma_c_matrix T_r={Magma_CSR}, T_pu_m={Magma_CSR}, T_x={Magma_CSR}, T_d={Magma_CSR}, T_w={Magma_CSR}, T_v={Magma_CSR}, T_u_mp1={Magma_CSR}, T_u_m={Magma_CSR}, T_Au={Magma_CSR}, T_Ad={Magma_CSR}, T_Au_new={Magma_CSR}; // CGS magma_c_matrix C_r={Magma_CSR}, C_rt={Magma_CSR}, C_x={Magma_CSR}, C_p={Magma_CSR}, C_q={Magma_CSR}, C_u={Magma_CSR}, C_v={Magma_CSR}, C_t={Magma_CSR}, C_p_hat={Magma_CSR}, C_q_hat={Magma_CSR}, C_u_hat={Magma_CSR}, C_v_hat={Magma_CSR}; //BiCGSTAB magma_c_matrix B_r={Magma_CSR}, B_x={Magma_CSR}, B_p={Magma_CSR}, B_v={Magma_CSR}, B_s={Magma_CSR}, B_t={Magma_CSR}; CHECK( magma_cvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &d1, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &d2, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // QMR CHECK( magma_cvinit( &Q_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &Q_x, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // TFQMR CHECK( magma_cvinit( &T_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &T_u_mp1,Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_cvinit( &T_u_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_cvinit( &T_pu_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_cvinit( &T_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &T_d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &T_w, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_cvinit( &T_Ad, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &T_Au_new, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &T_Au, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_cvinit( &T_x, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // CGS CHECK( magma_cvinit( &C_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_x,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_p_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_q_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_u, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_u_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_v_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &C_t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // BiCGSTAB CHECK( magma_cvinit( &B_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &B_x,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &B_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &B_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &B_s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &B_t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_cresidualvec( A, b, *x, &r_tld, &nom0, queue)); solver_par->init_res = nom0; res = nom0; // QMR magma_ccopy( dofs, r_tld.dval, 1, Q_r.dval, 1, queue ); magma_ccopy( dofs, r_tld.dval, 1, Q_y.dval, 1, queue ); magma_ccopy( dofs, r_tld.dval, 1, Q_v.dval, 1, queue ); magma_ccopy( dofs, r_tld.dval, 1, Q_wt.dval, 1, queue ); magma_ccopy( dofs, r_tld.dval, 1, Q_z.dval, 1, queue ); magma_ccopy( dofs, x->dval, 1, Q_x.dval, 1, queue ); // transpose the matrix // transpose the matrix magma_cmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransposeconjugate( Ah2, &Ah1, queue ); magma_cmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_cmfree(&Ah2, queue ); // TFQMR solver_par->init_res = nom0; magma_ccopy( dofs, r_tld.dval, 1, T_r.dval, 1, queue ); magma_ccopy( dofs, T_r.dval, 1, T_w.dval, 1, queue ); magma_ccopy( dofs, T_r.dval, 1, T_u_m.dval, 1, queue ); magma_ccopy( dofs, T_r.dval, 1, T_u_mp1.dval, 1, queue ); magma_ccopy( dofs, T_u_m.dval, 1, T_pu_m.dval, 1, queue ); CHECK( magma_c_spmv( c_one, A, T_pu_m, c_zero, T_v, queue )); magma_ccopy( dofs, T_v.dval, 1, T_Au.dval, 1, queue ); // CGS magma_ccopy( dofs, r_tld.dval, 1, C_r.dval, 1, queue ); magma_ccopy( dofs, x->dval, 1, C_x.dval, 1, queue ); // BiCGSTAB magma_ccopy( dofs, r_tld.dval, 1, B_r.dval, 1, queue ); magma_ccopy( dofs, x->dval, 1, B_x.dval, 1, queue ); CHECK( magma_c_spmv( c_one, A, B_r, c_zero, B_v, queue )); nomb = magma_scnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } T_tau = magma_csqrt( magma_cdotc( dofs, T_r.dval, 1, r_tld.dval, 1, queue) ); T_rho = magma_cdotc( dofs, T_r.dval, 1, r_tld.dval, 1, queue ); T_rho_l = T_rho; Q_psi = magma_csqrt( magma_cdotc( dofs, Q_z.dval, 1, Q_z.dval, 1, queue )); Q_rho = magma_csqrt( magma_cdotc( dofs, Q_y.dval, 1, Q_y.dval, 1, queue )); // BiCGSTAB B_rho_new = magma_cdotc( dofs, B_r.dval, 1, B_r.dval, 1, queue ); B_rho_old = B_omega = B_alpha = MAGMA_C_MAKE( 1.0, 0. ); // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_cqmr_1( b.num_rows, b.num_cols, Q_rho, Q_psi, Q_y.dval, Q_z.dval, Q_v.dval, Q_w.dval, queue ); //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; //QMR: delta = z' * y; Q_delta = magma_cdotc( dofs, Q_z.dval, 1, Q_y.dval, 1, queue ); // TFQMR T_alpha = T_rho / magma_cdotc( dofs, T_v.dval, 1, r_tld.dval, 1, queue ); T_sigma = T_theta * T_theta / T_alpha * T_eta; //CGS: rho = r' * r_tld C_rho = magma_cdotc( dofs, C_r.dval, 1, r_tld.dval, 1, queue ); // BiCGSTAB B_rho_old = B_rho_new; B_rho_new = magma_cdotc( dofs, r_tld.dval, 1, B_r.dval, 1, queue ); // rho=<rr,r> B_beta = B_rho_new/B_rho_old * B_alpha/B_omega; // beta=rho/rho_old *alpha/omega if( solver_par->numiter == 1 ){ //QMR: p = y; //QMR: q = z; magma_ccopy( dofs, Q_y.dval, 1, Q_p.dval, 1, queue ); magma_ccopy( dofs, Q_z.dval, 1, Q_q.dval, 1, queue ); //QMR: u = r; //QMR: p = r; magma_ccgs_2( b.num_rows, b.num_cols, C_r.dval, C_u.dval, C_p.dval, queue ); } else{ Q_pde = Q_psi * Q_delta / Q_epsilon; Q_rde = Q_rho * MAGMA_C_CONJ(Q_delta/Q_epsilon); C_beta = C_rho / C_rho_l; //QMR p = y - pde * p //QMR q = z - rde * q magma_cqmr_2( b.num_rows, b.num_cols, Q_pde, Q_rde, Q_y.dval, Q_z.dval, Q_p.dval, Q_q.dval, queue ); //CGS: u = r + beta*q; //CGS: p = u + beta*( q + beta*p ); magma_ccgs_1( b.num_rows, b.num_cols, C_beta, C_r.dval, C_q.dval, C_u.dval, C_p.dval, queue ); } // TFQMR magma_ctfqmr_1( b.num_rows, b.num_cols, T_alpha, T_sigma, T_v.dval, T_Au.dval, T_u_m.dval, T_pu_m.dval, T_u_mp1.dval, T_w.dval, T_d.dval, T_Ad.dval, queue ); T_theta = magma_csqrt( magma_cdotc(dofs, T_w.dval, 1, T_w.dval, 1, queue) ) / T_tau; T_c = c_one / magma_csqrt( c_one + T_theta*T_theta ); T_tau = T_tau * T_theta *T_c; T_eta = T_c * T_c * T_alpha; T_sigma = T_theta * T_theta / T_alpha * T_eta; magma_ctfqmr_2( b.num_rows, b.num_cols, T_eta, T_d.dval, T_Ad.dval, T_x.dval, T_r.dval, queue ); magma_ccopy( dofs, T_u_mp1.dval, 1, T_pu_m.dval, 1, queue ); // BiCGSTAB: p = r + beta * ( p - omega * v ) magma_cbicgstab_1( b.num_rows, b.num_cols, B_beta, B_omega, B_r.dval, B_v.dval, B_p.dval, queue ); //QMR CHECK( magma_c_spmv( c_one, A, Q_p, c_zero, Q_pt, queue )); //TFQMR CHECK( magma_c_spmv( c_one, A, T_pu_m, c_zero, T_Au_new, queue )); //CGS CHECK( magma_c_spmv( c_one, A, C_p, c_zero, C_v_hat, queue )); // BiCGSTAB CHECK( magma_c_spmv( c_one, A, B_p, c_zero, B_v, queue )); // v = Ap solver_par->spmv_count++; //QMR: epsilon = q' * pt; Q_epsilon = magma_cdotc( dofs, Q_q.dval, 1, Q_pt.dval, 1, queue ); Q_beta = Q_epsilon / Q_delta; //TFQMR magma_ccopy( dofs, T_Au_new.dval, 1, T_Au.dval, 1, queue ); magma_ccopy( dofs, T_u_mp1.dval, 1, T_u_m.dval, 1, queue ); //CGS: alpha = r_tld' * v_hat C_alpha = C_rho / magma_cdotc( dofs, r_tld.dval, 1, C_v_hat.dval, 1, queue ); //BiCGSTAB B_alpha = B_rho_new / magma_cdotc( dofs, r_tld.dval, 1, B_v.dval, 1, queue ); //QMR: v = pt - beta * v //QMR: y = v magma_cqmr_3( b.num_rows, b.num_cols, Q_beta, Q_pt.dval, Q_v.dval, Q_y.dval, queue ); // TFQMR magma_ctfqmr_5( b.num_rows, b.num_cols, T_alpha, T_sigma, T_v.dval, T_Au.dval, T_pu_m.dval, T_w.dval, T_d.dval, T_Ad.dval, queue ); // TFQMR T_sigma = T_theta * T_theta / T_alpha * T_eta; T_theta = magma_csqrt( magma_cdotc(dofs, T_w.dval, 1, T_w.dval, 1, queue) ) / T_tau; T_c = c_one / magma_csqrt( c_one + T_theta*T_theta ); T_tau = T_tau * T_theta *T_c; T_eta = T_c * T_c * T_alpha; // TFQMR magma_ctfqmr_2( b.num_rows, b.num_cols, T_eta, T_d.dval, T_Ad.dval, T_x.dval, T_r.dval, queue ); T_rho = magma_cdotc( dofs, T_w.dval, 1, r_tld.dval, 1, queue ); T_beta = T_rho / T_rho_l; T_rho_l = T_rho; magma_ctfqmr_3( b.num_rows, b.num_cols, T_beta, T_w.dval, T_u_m.dval, T_u_mp1.dval, queue ); magma_ccopy( dofs, T_u_mp1.dval, 1, T_pu_m.dval, 1, queue ); //CGS: q = u - alpha v_hat //CGS: t = u + q magma_ccgs_3( b.num_rows, b.num_cols, C_alpha, C_v_hat.dval, C_u.dval, C_q.dval, C_t.dval, queue ); // BiCGSTAB: s = r - alpha v magma_cbicgstab_2( b.num_rows, b.num_cols, B_alpha, B_r.dval, B_v.dval, B_s.dval, queue ); Q_rho1 = Q_rho; //QMR rho = norm(y); Q_rho = magma_csqrt( magma_cdotc( dofs, Q_y.dval, 1, Q_y.dval, 1, queue ) ); //QMR wt = A' * q - beta' * w; CHECK( magma_c_spmv( c_one, AT, Q_q, c_zero, Q_wt, queue )); //TFQMR CHECK( magma_c_spmv( c_one, A, T_pu_m, c_zero, T_Au_new, queue )); //CGS t = A u_hat CHECK( magma_c_spmv( c_one, A, C_t, c_zero, C_rt, queue )); //BiCGSTAB CHECK( magma_c_spmv( c_one, A, B_s, c_zero, B_t, queue )); // t=As solver_par->spmv_count++; //BiCGSTAB B_omega = magma_cdotc( dofs, B_t.dval, 1, B_s.dval, 1, queue ) // omega = <s,t>/<t,t> / magma_cdotc( dofs, B_t.dval, 1, B_t.dval, 1, queue ); // QMR magma_caxpy( dofs, - MAGMA_C_CONJ( Q_beta ), Q_w.dval, 1, Q_wt.dval, 1, queue ); // no precond: z = wt magma_ccopy( dofs, Q_wt.dval, 1, Q_z.dval, 1, queue ); //TFQMR magma_ctfqmr_4( b.num_rows, b.num_cols, T_beta, T_Au_new.dval, T_v.dval, T_Au.dval, queue ); magma_ccopy( dofs, T_u_mp1.dval, 1, T_u_m.dval, 1, queue ); // QMR Q_thet1 = Q_thet; Q_thet = Q_rho / (Q_gamm * MAGMA_C_MAKE( MAGMA_C_ABS(Q_beta), 0.0 )); Q_gamm1 = Q_gamm; Q_gamm = c_one / magma_csqrt(c_one + Q_thet*Q_thet); Q_eta = - Q_eta * Q_rho1 * Q_gamm * Q_gamm / (Q_beta * Q_gamm1 * Q_gamm1); if ( solver_par->numiter == 1 ) { //QMR: d = eta * p + pds * d; //QMR: s = eta * pt + pds * d; //QMR: x = x + d; //QMR: r = r - s; magma_cqmr_4( b.num_rows, b.num_cols, Q_eta, Q_p.dval, Q_pt.dval, Q_d.dval, Q_s.dval, Q_x.dval, Q_r.dval, queue ); } else { Q_pds = (Q_thet1 * Q_gamm) * (Q_thet1 * Q_gamm); // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_cqmr_5( b.num_rows, b.num_cols, Q_eta, Q_pds, Q_p.dval, Q_pt.dval, Q_d.dval, Q_s.dval, Q_x.dval, Q_r.dval, queue ); } // CGS: r = r -alpha*A u_hat // CGS: x = x + alpha u_hat magma_ccgs_4( b.num_rows, b.num_cols, C_alpha, C_t.dval, C_rt.dval, C_x.dval, C_r.dval, queue ); C_rho_l = C_rho; // BiCGSTAB: x = x + alpha * p + omega * s // BiCGSTAB: r = s - omega * t magma_cbicgstab_3( b.num_rows, b.num_cols, B_alpha, B_omega, B_p.dval, B_s.dval, B_t.dval, B_x.dval, B_r.dval, queue ); //QMR: psi = norm(z); Q_psi = magma_csqrt( magma_cdotc( dofs, Q_z.dval, 1, Q_z.dval, 1, queue ) ); //QMR: v = y / rho //QMR: y = y / rho //QMR: w = wt / psi //QMR: z = z / psi magma_cqmr_1( b.num_rows, b.num_cols, Q_rho, Q_psi, Q_y.dval, Q_z.dval, Q_v.dval, Q_w.dval, queue ); Q_res = magma_scnrm2( dofs, Q_r.dval, 1, queue ); T_res = magma_scnrm2( dofs, T_r.dval, 1, queue ); C_res = magma_scnrm2( dofs, C_r.dval, 1, queue ); B_res = magma_scnrm2( dofs, B_r.dval, 1, queue ); // printf(" %e %e %e\n", Q_res, C_res, B_res); if( Q_res < res ){ res = Q_res; flag = 1; } if( T_res < res ){ res = Q_res; flag = 2; } if( C_res < res ){ res = C_res; flag = 3; } if( B_res < res ){ res = B_res; flag = 4; } if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ info = MAGMA_SUCCESS; break; } if( magma_c_isnan_inf( Q_beta ) && magma_c_isnan_inf( C_beta ) && magma_c_isnan_inf( B_beta ) ){ info = MAGMA_DIVERGENCE; break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); // copy back the best solver switch ( flag ) { case 1: printf("%% QMR fastest solver.\n"); magma_ccopy( dofs, Q_x.dval, 1, x->dval, 1, queue ); break; case 2: printf("%% TFQMR fastest solver.\n"); magma_ccopy( dofs, T_x.dval, 1, x->dval, 1, queue ); break; case 3: printf("%% CGS fastest solver.\n"); magma_ccopy( dofs, C_x.dval, 1, x->dval, 1, queue ); break; case 4: printf("%% BiCGSTAB fastest solver.\n"); magma_ccopy( dofs, B_x.dval, 1, x->dval, 1, queue ); break; } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; float residual; CHECK( magma_cresidualvec( A, b, *x, &r_tld, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_cmfree(&r_tld, queue ); magma_cmfree(&d1, queue ); magma_cmfree(&d2, queue ); magma_cmfree(&AT, queue ); // QMR magma_cmfree(&Q_r, queue ); magma_cmfree(&Q_v, queue ); magma_cmfree(&Q_w, queue ); magma_cmfree(&Q_wt, queue ); magma_cmfree(&Q_d, queue ); magma_cmfree(&Q_s, queue ); magma_cmfree(&Q_z, queue ); magma_cmfree(&Q_q, queue ); magma_cmfree(&Q_p, queue ); magma_cmfree(&Q_pt, queue ); magma_cmfree(&Q_y, queue ); magma_cmfree(&Q_x, queue ); magma_cmfree(&Ah1, queue ); magma_cmfree(&Ah2, queue ); // TFQMR magma_cmfree(&T_r, queue ); magma_cmfree(&T_x, queue ); magma_cmfree(&T_d, queue ); magma_cmfree(&T_w, queue ); magma_cmfree(&T_v, queue ); magma_cmfree(&T_u_m, queue ); magma_cmfree(&T_u_mp1, queue ); magma_cmfree(&T_pu_m, queue ); magma_cmfree(&T_d, queue ); magma_cmfree(&T_Au, queue ); magma_cmfree(&T_Au_new, queue ); magma_cmfree(&T_Ad, queue ); // CGS magma_cmfree(&C_r, queue ); magma_cmfree(&C_rt, queue ); magma_cmfree(&C_x, queue ); magma_cmfree(&C_p, queue ); magma_cmfree(&C_q, queue ); magma_cmfree(&C_u, queue ); magma_cmfree(&C_v, queue ); magma_cmfree(&C_t, queue ); magma_cmfree(&C_p_hat, queue ); magma_cmfree(&C_q_hat, queue ); magma_cmfree(&C_u_hat, queue ); magma_cmfree(&C_v_hat, queue ); // BiCGSTAB magma_cmfree(&B_r, queue ); magma_cmfree(&B_x, queue ); magma_cmfree(&B_p, queue ); magma_cmfree(&B_v, queue ); magma_cmfree(&B_s, queue ); magma_cmfree(&B_t, queue ); solver_par->info = info; return info; } /* magma_cbombard */
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_copts zopts; magma_queue_t queue; magma_queue_create( 0, &queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_matrix A={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_c_matrix x={Magma_CSR}, b={Magma_CSR}; int i=1; TESTING_CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; TESTING_CHECK( magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } // for the eigensolver case zopts.solver_par.ev_length = A.num_cols; TESTING_CHECK( magma_ceigensolverinfo_init( &zopts.solver_par, queue )); // scale matrix TESTING_CHECK( magma_cmscale( &A, zopts.scaling, queue )); // preconditioner if ( zopts.solver_par.solver != Magma_ITERREF ) { TESTING_CHECK( magma_c_precondsetup( A, b, &zopts.solver_par, &zopts.precond_par, queue ) ); } TESTING_CHECK( magma_cmconvert( A, &B, Magma_CSR, zopts.output_format, queue )); printf( "\n%% matrix info: %lld-by-%lld with %lld nonzeros\n\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); printf("matrixinfo = [\n"); printf("%% size (m x n) || nonzeros (nnz) || nnz/m || stored nnz\n"); printf("%%============================================================================%%\n"); printf(" %8lld %8lld %10lld %4lld %10lld\n", (long long) B.num_rows, (long long) B.num_cols, (long long) B.true_nnz, (long long) (B.true_nnz/B.num_rows), (long long) B.nnz ); printf("%%============================================================================%%\n"); printf("];\n"); TESTING_CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess TESTING_CHECK( magma_cvinit( &b, Magma_DEV, A.num_rows, 1, one, queue )); //magma_cvinit( &x, Magma_DEV, A.num_cols, 1, one, queue ); //magma_c_spmv( one, B_d, x, zero, b, queue ); // b = A x //magma_cmfree(&x, queue ); TESTING_CHECK( magma_cvinit( &x, Magma_DEV, A.num_cols, 1, zero, queue )); info = magma_c_solver( B_d, b, &x, &zopts, queue ); if( info != 0 ) { printf("%%error: solver returned: %s (%lld).\n", magma_strerror( info ), (long long) info ); } printf("convergence = [\n"); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); printf("];\n\n"); zopts.solver_par.verbose = 0; printf("solverinfo = [\n"); magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); printf("];\n\n"); printf("precondinfo = [\n"); printf("%% setup runtime\n"); printf(" %.6f %.6f\n", zopts.precond_par.setuptime, zopts.precond_par.runtime ); printf("];\n\n"); magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&b, queue ); i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }
extern "C" magma_int_t magma_ccumilusetup_transpose( magma_c_matrix A, magma_c_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; magma_c_matrix Ah1={Magma_CSR}, Ah2={Magma_CSR}; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrLT=NULL; cusparseMatDescr_t descrUT=NULL; // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); // transpose the matrix magma_cmtransfer( precond->L, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransposeconjugate( Ah2, &Ah1, queue ); magma_cmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransfer( Ah2, &(precond->LT), Magma_CPU, Magma_DEV, queue ); magma_cmfree(&Ah2, queue ); magma_cmtransfer( precond->U, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransposeconjugate( Ah2, &Ah1, queue ); magma_cmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransfer( Ah2, &(precond->UT), Magma_CPU, Magma_DEV, queue ); magma_cmfree(&Ah2, queue ); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrLT )); CHECK_CUSPARSE( cusparseSetMatType( descrLT, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrLT, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrLT, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrLT, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoLT )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->LT.num_rows, precond->LT.nnz, descrLT, precond->LT.dval, precond->LT.drow, precond->LT.dcol, precond->cuinfoLT )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrUT )); CHECK_CUSPARSE( cusparseSetMatType( descrUT, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrUT, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrUT, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrUT, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoUT )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->UT.num_rows, precond->UT.nnz, descrUT, precond->UT.dval, precond->UT.drow, precond->UT.dcol, precond->cuinfoUT )); cleanup: cusparseDestroyMatDescr( descrLT ); cusparseDestroyMatDescr( descrUT ); cusparseDestroy( cusparseHandle ); magma_cmfree(&Ah1, queue ); magma_cmfree(&Ah2, queue ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); magmaFloatComplex one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex zero = MAGMA_C_MAKE(0.0, 0.0); magma_c_matrix A={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_c_matrix x={Magma_CSR}, b={Magma_CSR}, t={Magma_CSR}; magma_c_matrix x1={Magma_CSR}, x2={Magma_CSR}; //Chronometry real_Double_t tempo1, tempo2; int i=1; CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; CHECK( magma_csolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf( "\n%% matrix info: %d-by-%d with %d nonzeros\n\n", int(A.num_rows), int(A.num_cols), int(A.nnz) ); // for the eigensolver case zopts.solver_par.ev_length = A.num_rows; CHECK( magma_ceigensolverinfo_init( &zopts.solver_par, queue )); // scale matrix CHECK( magma_cmscale( &A, zopts.scaling, queue )); CHECK( magma_cmconvert( A, &B, Magma_CSR, zopts.output_format, queue )); CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess CHECK( magma_cvinit( &b, Magma_DEV, A.num_cols, 1, one, queue )); CHECK( magma_cvinit( &x, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &t, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &x1, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_cvinit( &x2, Magma_DEV, A.num_cols, 1, zero, queue )); //preconditioner CHECK( magma_c_precondsetup( B_d, b, &zopts.solver_par, &zopts.precond_par, queue ) ); float residual; CHECK( magma_cresidual( B_d, b, x, &residual, queue )); zopts.solver_par.init_res = residual; printf("data = [\n"); printf("%%runtime left preconditioner:\n"); tempo1 = magma_sync_wtime( queue ); info = magma_c_applyprecond_left( MagmaNoTrans, B_d, b, &x1, &zopts.precond_par, queue ); tempo2 = magma_sync_wtime( queue ); if( info != 0 ){ printf("error: preconditioner returned: %s (%d).\n", magma_strerror( info ), int(info) ); } CHECK( magma_cresidual( B_d, b, x1, &residual, queue )); printf("%.8e %.8e\n", tempo2-tempo1, residual ); printf("%%runtime right preconditioner:\n"); tempo1 = magma_sync_wtime( queue ); info = magma_c_applyprecond_right( MagmaNoTrans, B_d, b, &x2, &zopts.precond_par, queue ); tempo2 = magma_sync_wtime( queue ); if( info != 0 ){ printf("error: preconditioner returned: %s (%d).\n", magma_strerror( info ), int(info) ); } CHECK( magma_cresidual( B_d, b, x2, &residual, queue )); printf("%.8e %.8e\n", tempo2-tempo1, residual ); printf("];\n"); info = magma_c_applyprecond_left( MagmaNoTrans, B_d, b, &t, &zopts.precond_par, queue ); info = magma_c_applyprecond_right( MagmaNoTrans, B_d, t, &x, &zopts.precond_par, queue ); CHECK( magma_cresidual( B_d, b, x, &residual, queue )); zopts.solver_par.final_res = residual; magma_csolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&x1, queue ); magma_cmfree(&x2, queue ); magma_cmfree(&b, queue ); magma_cmfree(&t, queue ); i++; } cleanup: magma_cmfree(&B_d, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&x, queue ); magma_cmfree(&x1, queue ); magma_cmfree(&x2, queue ); magma_cmfree(&b, queue ); magma_cmfree(&t, queue ); magma_csolverinfo_free( &zopts.solver_par, &zopts.precond_par, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
extern "C" magma_int_t magma_ccumiccsetup( magma_c_matrix A, magma_c_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_c_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, U={Magma_CSR}; CHECK( magma_cmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); U.diagorder_type = Magma_VALUE; CHECK( magma_cmconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue )); // in case using fill-in if( precond->levels > 0 ){ magma_c_matrix hAL={Magma_CSR}, hAUt={Magma_CSR}; CHECK( magma_csymbilu( &hACSR, precond->levels, &hAL, &hAUt, queue )); magma_cmfree(&hAL, queue); magma_cmfree(&hAUt, queue); } CHECK( magma_cmconvert( hACSR, &U, Magma_CSR, Magma_CSRL, queue )); magma_cmfree( &hACSR, queue ); CHECK( magma_cmtransfer(U, &(precond->M), Magma_CPU, Magma_DEV, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_SYMMETRIC )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrA, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrA, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK_CUSPARSE( cusparseCcsric0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // copy the matrix to precond->L and (transposed) to precond->U CHECK( magma_cmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue )); CHECK( magma_cmtranspose( precond->L, &(precond->U), queue )); // extract the diagonal of L into precond->d CHECK( magma_cjacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_cvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_cjacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_cvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); } /* // to enable also the block-asynchronous iteration for the triangular solves CHECK( magma_cmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue )); hA.storage_type = Magma_CSR; magma_c_matrix hD, hR, hAt CHECK( magma_ccsrsplit( 256, hA, &hD, &hR, queue )); CHECK( magma_cmtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_cmtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&hD, queue ); magma_cmfree(&hR, queue ); CHECK( magma_c_cucsrtranspose( hA, &hAt, queue )); CHECK( magma_ccsrsplit( 256, hAt, &hD, &hR, queue )); CHECK( magma_cmtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_cmtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&hD, queue ); magma_cmfree(&hR, queue ); magma_cmfree(&hA, queue ); magma_cmfree(&hAt, queue ); */ cleanup: cusparseDestroySolveAnalysisInfo( precond->cuinfo ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroyMatDescr( descrA ); cusparseDestroy( cusparseHandle ); magma_cmfree(&U, queue ); magma_cmfree(&hA, queue ); return info; }
extern "C" magma_int_t magma_ccumilusetup( magma_c_matrix A, magma_c_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; //magma_cprint_matrix(A, queue ); // copy matrix into preconditioner parameter magma_c_matrix hA={Magma_CSR}, hACSR={Magma_CSR}; magma_c_matrix hL={Magma_CSR}, hU={Magma_CSR}; CHECK( magma_cmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); CHECK( magma_cmconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue )); // in case using fill-in if( precond->levels > 0 ){ magma_c_matrix hAL={Magma_CSR}, hAUt={Magma_CSR}; CHECK( magma_csymbilu( &hACSR, precond->levels, &hAL, &hAUt, queue )); magma_cmfree(&hAL, queue); magma_cmfree(&hAUt, queue); } CHECK( magma_cmtransfer(hACSR, &(precond->M), Magma_CPU, Magma_DEV, queue )); magma_cmfree( &hA, queue ); magma_cmfree( &hACSR, queue ); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrA, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK_CUSPARSE( cusparseCcsrilu0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK( magma_cmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue )); hL.diagorder_type = Magma_UNITY; CHECK( magma_cmconvert( hA, &hL , Magma_CSR, Magma_CSRL, queue )); hU.diagorder_type = Magma_VALUE; CHECK( magma_cmconvert( hA, &hU , Magma_CSR, Magma_CSRU, queue )); CHECK( magma_cmtransfer( hL, &(precond->L), Magma_CPU, Magma_DEV, queue )); CHECK( magma_cmtransfer( hU, &(precond->U), Magma_CPU, Magma_DEV, queue )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.dval, precond->L.drow, precond->L.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.dval, precond->U.drow, precond->U.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // extract the diagonal of L into precond->d CHECK( magma_cjacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_cvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_cjacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_cvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); } cleanup: cusparseDestroySolveAnalysisInfo( precond->cuinfo ); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); magma_cmfree( &hA, queue ); magma_cmfree( &hACSR, queue ); magma_cmfree(&hA, queue ); magma_cmfree(&hL, queue ); magma_cmfree(&hU, queue ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_c_matrix A={Magma_CSR}, A2={Magma_CSR}, A3={Magma_CSR}, A4={Magma_CSR}, A5={Magma_CSR}; int i=1; TESTING_CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf("%% matrix info: %lld-by-%lld with %lld nonzeros\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); // filename for temporary matrix storage const char *filename = "testmatrix.mtx"; // write to file TESTING_CHECK( magma_cwrite_csrtomtx( A, filename, queue )); // read from file TESTING_CHECK( magma_c_csr_mtx( &A2, filename, queue )); // delete temporary matrix unlink( filename ); //visualize printf("A2:\n"); TESTING_CHECK( magma_cprint_matrix( A2, queue )); //visualize TESTING_CHECK( magma_cmconvert(A2, &A4, Magma_CSR, Magma_CSRL, queue )); printf("A4:\n"); TESTING_CHECK( magma_cprint_matrix( A4, queue )); TESTING_CHECK( magma_cmconvert(A4, &A5, Magma_CSR, Magma_ELL, queue )); printf("A5:\n"); TESTING_CHECK( magma_cprint_matrix( A5, queue )); // pass it to another application and back magma_int_t m, n; magma_index_t *row, *col; magmaFloatComplex *val=NULL; TESTING_CHECK( magma_ccsrget( A2, &m, &n, &row, &col, &val, queue )); TESTING_CHECK( magma_ccsrset( m, n, row, col, val, &A3, queue )); TESTING_CHECK( magma_cmdiff( A, A2, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester IO: ok\n"); else printf("%% tester IO: failed\n"); TESTING_CHECK( magma_cmdiff( A, A3, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester matrix interface: ok\n"); else printf("%% tester matrix interface: failed\n"); magma_cmfree(&A, queue ); magma_cmfree(&A2, queue ); magma_cmfree(&A4, queue ); magma_cmfree(&A5, queue ); i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }
extern "C" magma_int_t magma_ccumiccsetup( magma_c_matrix A, magma_c_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; #if CUDA_VERSION >= 7000 csric02Info_t info_M=NULL; void *pBuffer = NULL; #endif magma_c_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, U={Magma_CSR}; CHECK( magma_cmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); U.diagorder_type = Magma_VALUE; CHECK( magma_cmconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue )); // in case using fill-in if( precond->levels > 0 ){ magma_c_matrix hAL={Magma_CSR}, hAUt={Magma_CSR}; CHECK( magma_csymbilu( &hACSR, precond->levels, &hAL, &hAUt, queue )); magma_cmfree(&hAL, queue); magma_cmfree(&hAUt, queue); } CHECK( magma_cmconvert( hACSR, &U, Magma_CSR, Magma_CSRL, queue )); magma_cmfree( &hACSR, queue ); CHECK( magma_cmtransfer(U, &(precond->M), Magma_CPU, Magma_DEV, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) )); // use kernel to manually check for zeros n the diagonal CHECK( magma_cdiagcheck( precond->M, queue ) ); #if CUDA_VERSION >= 7000 // this version has the bug fixed where a zero on the diagonal causes a crash CHECK_CUSPARSE( cusparseCreateCsric02Info(&info_M) ); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); int buffersize; int structural_zero; int numerical_zero; CHECK_CUSPARSE( cusparseCcsric02_bufferSize( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, &buffersize ) ); CHECK( magma_malloc((void**)&pBuffer, buffersize) ); CHECK_CUSPARSE( cusparseCcsric02_analysis( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer )); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &numerical_zero ) ); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &structural_zero ) ); CHECK_CUSPARSE( cusparseCcsric02( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer) ); #else // this version contains the bug but is needed for backward compability CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_SYMMETRIC )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrA, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrA, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK_CUSPARSE( cusparseCcsric0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); #endif CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // copy the matrix to precond->L and (transposed) to precond->U CHECK( magma_cmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue )); CHECK( magma_cmtranspose( precond->L, &(precond->U), queue )); // extract the diagonal of L into precond->d CHECK( magma_cjacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_cvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_cjacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_cvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue )); } /* // to enable also the block-asynchronous iteration for the triangular solves CHECK( magma_cmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue )); hA.storage_type = Magma_CSR; magma_c_matrix hD, hR, hAt CHECK( magma_ccsrsplit( 256, hA, &hD, &hR, queue )); CHECK( magma_cmtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_cmtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&hD, queue ); magma_cmfree(&hR, queue ); CHECK( magma_c_cucsrtranspose( hA, &hAt, queue )); CHECK( magma_ccsrsplit( 256, hAt, &hD, &hR, queue )); CHECK( magma_cmtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_cmtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&hD, queue ); magma_cmfree(&hR, queue ); magma_cmfree(&hA, queue ); magma_cmfree(&hAt, queue ); */ cleanup: #if CUDA_VERSION >= 7000 magma_free( pBuffer ); cusparseDestroyCsric02Info( info_M ); #endif cusparseDestroySolveAnalysisInfo( precond->cuinfo ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroyMatDescr( descrA ); cusparseDestroy( cusparseHandle ); magma_cmfree(&U, queue ); magma_cmfree(&hA, queue ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_copts zopts; magma_queue_t queue=NULL; magma_queue_create( /*devices[ opts->device ],*/ &queue ); real_Double_t res; magma_c_matrix A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; int i=1; real_Double_t start, end; CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // scale matrix CHECK( magma_cmscale( &A, zopts.scaling, queue )); // remove nonzeros in matrix start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_cmcsrcompressor( &A, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA CPU: %.2e seconds.\n", (end-start)/10 ); // transpose CHECK( magma_cmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_cmconvert( AT, &B, Magma_CSR, Magma_CSR, queue )); magma_cmfree(&AT, queue ); CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&B, queue ); start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_cmcsrcompressor_gpu( &B_d, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA GPU: %.2e seconds.\n", (end-start)/10 ); CHECK( magma_cmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_cmfree(&B_d, queue ); CHECK( magma_cmconvert( B, &AT, Magma_CSR, Magma_CSR, queue )); magma_cmfree(&B, queue ); // transpose back CHECK( magma_cmtranspose( AT, &A2, queue )); magma_cmfree(&AT, queue ); CHECK( magma_cmdiff( A, A2, &res, queue )); printf("# ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester matrix compressor: ok\n"); else printf("# tester matrix compressor: failed\n"); magma_cmfree(&A, queue ); magma_cmfree(&A2, queue ); i++; } cleanup: magma_cmfree(&AT, queue ); magma_cmfree(&B, queue ); magma_cmfree(&A, queue ); magma_cmfree(&A2, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing sparse matrix vector product */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_queue_t queue=NULL; magma_queue_create( &queue ); magma_c_matrix hA={Magma_CSR}, hA_SELLP={Magma_CSR}, hA_ELL={Magma_CSR}, dA={Magma_CSR}, dA_SELLP={Magma_CSR}, dA_ELL={Magma_CSR}; magma_c_matrix hx={Magma_CSR}, hy={Magma_CSR}, dx={Magma_CSR}, dy={Magma_CSR}, hrefvec={Magma_CSR}, hcheck={Magma_CSR}; hA_SELLP.blocksize = 8; hA_SELLP.alignment = 8; real_Double_t start, end, res; #ifdef MAGMA_WITH_MKL magma_int_t *pntre=NULL; #endif cusparseHandle_t cusparseHandle = NULL; cusparseMatDescr_t descr = NULL; magmaFloatComplex c_one = MAGMA_C_MAKE(1.0, 0.0); magmaFloatComplex c_zero = MAGMA_C_MAKE(0.0, 0.0); float accuracy = 1e-10; #define PRECISION_c #if defined(PRECISION_c) accuracy = 1e-4; #endif #if defined(PRECISION_s) accuracy = 1e-4; #endif magma_int_t i, j; for( i = 1; i < argc; ++i ) { if ( strcmp("--blocksize", argv[i]) == 0 ) { hA_SELLP.blocksize = atoi( argv[++i] ); } else if ( strcmp("--alignment", argv[i]) == 0 ) { hA_SELLP.alignment = atoi( argv[++i] ); } else break; } printf("\n# usage: ./run_cspmm" " [ --blocksize %d --alignment %d (for SELLP) ]" " matrices \n\n", int(hA_SELLP.blocksize), int(hA_SELLP.alignment) ); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_cm_5stencil( laplace_size, &hA, queue )); } else { // file-matrix test CHECK( magma_c_csr_mtx( &hA, argv[i], queue )); } printf("%% matrix info: %d-by-%d with %d nonzeros\n", int(hA.num_rows), int(hA.num_cols), int(hA.nnz) ); real_Double_t FLOPS = 2.0*hA.nnz/1e9; // m - number of rows for the sparse matrix // n - number of vectors to be multiplied in the SpMM product magma_int_t m, n; m = hA.num_rows; n = 48; // init CPU vectors CHECK( magma_cvinit( &hx, Magma_CPU, m, n, c_one, queue )); CHECK( magma_cvinit( &hy, Magma_CPU, m, n, c_zero, queue )); // init DEV vectors CHECK( magma_cvinit( &dx, Magma_DEV, m, n, c_one, queue )); CHECK( magma_cvinit( &dy, Magma_DEV, m, n, c_zero, queue )); // calling MKL with CSR #ifdef MAGMA_WITH_MKL CHECK( magma_imalloc_cpu( &pntre, m + 1 ) ); pntre[0] = 0; for (j=0; j < m; j++ ) { pntre[j] = hA.row[j+1]; } MKL_INT num_rows = hA.num_rows; MKL_INT num_cols = hA.num_cols; MKL_INT nnz = hA.nnz; MKL_INT num_vecs = n; MKL_INT *col; TESTING_MALLOC_CPU( col, MKL_INT, nnz ); for( magma_int_t t=0; t < hA.nnz; ++t ) { col[ t ] = hA.col[ t ]; } MKL_INT *row; TESTING_MALLOC_CPU( row, MKL_INT, num_rows ); for( magma_int_t t=0; t < hA.num_rows; ++t ) { row[ t ] = hA.col[ t ]; } // === Call MKL with consecutive SpMVs, using mkl_ccsrmv === // warmp up mkl_ccsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); start = magma_wtime(); for (j=0; j<10; j++ ) mkl_ccsrmv( "N", &num_rows, &num_cols, MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), MKL_ADDR(&c_zero), MKL_ADDR(hy.val) ); end = magma_wtime(); printf( "\n > MKL SpMVs : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10/(end-start) ); // === Call MKL with blocked SpMVs, using mkl_ccsrmm === char transa = 'n'; MKL_INT ldb = n, ldc=n; char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'}; // warm up mkl_ccsrmm( &transa, &num_rows, &num_vecs, &num_cols, MKL_ADDR(&c_one), matdescra, MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), &ldb, MKL_ADDR(&c_zero), MKL_ADDR(hy.val), &ldc ); start = magma_wtime(); for (j=0; j<10; j++ ) mkl_ccsrmm( &transa, &num_rows, &num_vecs, &num_cols, MKL_ADDR(&c_one), matdescra, MKL_ADDR(hA.val), col, row, pntre, MKL_ADDR(hx.val), &ldb, MKL_ADDR(&c_zero), MKL_ADDR(hy.val), &ldc ); end = magma_wtime(); printf( "\n > MKL SpMM : %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); TESTING_FREE_CPU( row ); TESTING_FREE_CPU( col ); row = NULL; col = NULL; #endif // MAGMA_WITH_MKL // copy matrix to GPU CHECK( magma_cmtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue )); // SpMV on GPU (CSR) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) CHECK( magma_c_spmv( c_one, dA, dx, c_zero, dy, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); CHECK( magma_cmtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue )); magma_cmfree(&dA, queue ); // convert to SELLP and copy to GPU CHECK( magma_cmconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue )); CHECK( magma_cmtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue )); magma_cmfree(&hA_SELLP, queue ); magma_cmfree( &dy, queue ); CHECK( magma_cvinit( &dy, Magma_DEV, dx.num_rows, dx.num_cols, c_zero, queue )); // SpMV on GPU (SELLP) start = magma_sync_wtime( queue ); for (j=0; j<10; j++) CHECK( magma_c_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n", (end-start)/10, FLOPS*10.*n/(end-start) ); CHECK( magma_cmtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue )); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("%% |x-y|_F = %8.2e\n", res); if ( res < accuracy ) printf("%% tester spmm SELL-P: ok\n"); else printf("%% tester spmm SELL-P: failed\n"); magma_cmfree( &hcheck, queue ); magma_cmfree(&dA_SELLP, queue ); // SpMV on GPU (CUSPARSE - CSR) // CUSPARSE context // magma_cmfree( &dy, queue ); CHECK( magma_cvinit( &dy, Magma_DEV, dx.num_rows, dx.num_cols, c_zero, queue )); //#ifdef PRECISION_d start = magma_sync_wtime( queue ); CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descr )); CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO )); magmaFloatComplex alpha = c_one; magmaFloatComplex beta = c_zero; // copy matrix to GPU CHECK( magma_cmtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue) ); for (j=0; j<10; j++) cusparseCcsrmm(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, dA.num_rows, n, dA.num_cols, dA.nnz, &alpha, descr, dA.dval, dA.drow, dA.dcol, dx.dval, dA.num_cols, &beta, dy.dval, dA.num_cols); end = magma_sync_wtime( queue ); printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n", (end-start)/10, FLOPS*10*n/(end-start) ); CHECK( magma_cmtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue )); res = 0.0; for(magma_int_t k=0; k<hA.num_rows; k++ ) res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]); printf("%% |x-y|_F = %8.2e\n", res); if ( res < accuracy ) printf("%% tester spmm cuSPARSE: ok\n"); else printf("%% tester spmm cuSPARSE: failed\n"); magma_cmfree( &hcheck, queue ); cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); descr = NULL; cusparseHandle = NULL; //#endif printf("\n\n"); // free CPU memory magma_cmfree(&hA, queue ); magma_cmfree(&hx, queue ); magma_cmfree(&hy, queue ); magma_cmfree(&hrefvec, queue ); // free GPU memory magma_cmfree(&dx, queue ); magma_cmfree(&dy, queue ); magma_cmfree(&dA, queue); i++; } cleanup: #ifdef MAGMA_WITH_MKL magma_free_cpu(pntre); #endif cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); magma_cmfree(&hA, queue ); magma_cmfree(&dA, queue ); magma_cmfree(&hA_ELL, queue ); magma_cmfree(&dA_ELL, queue ); magma_cmfree(&hA_SELLP, queue ); magma_cmfree(&dA_SELLP, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
extern "C" magma_int_t magma_cpqmr_merge( magma_c_matrix A, magma_c_matrix b, magma_c_matrix *x, magma_c_solver_par *solver_par, magma_c_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_QMR; solver_par->numiter = 0; solver_par->spmv_count = 0; // local variables magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE; // solver variables float nom0, r0, res=0.0, nomb; magmaFloatComplex rho = c_one, rho1 = c_one, eta = -c_one , pds = c_one, thet = c_one, thet1 = c_one, epsilon = c_one, beta = c_one, delta = c_one, pde = c_one, rde = c_one, gamm = c_one, gamm1 = c_one, psi = c_one; magma_int_t dofs = A.num_rows* b.num_cols; // need to transpose the matrix magma_c_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; // GPU workspace magma_c_matrix r={Magma_CSR}, r_tld={Magma_CSR}, v={Magma_CSR}, w={Magma_CSR}, wt={Magma_CSR}, d={Magma_CSR}, s={Magma_CSR}, z={Magma_CSR}, q={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, y={Magma_CSR}, vt={Magma_CSR}, yt={Magma_CSR}, zt={Magma_CSR}; CHECK( magma_cvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &yt, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &vt, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &zt, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_cresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_ccopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_ccopy( dofs, r.dval, 1, vt.dval, 1, queue ); magma_ccopy( dofs, r.dval, 1, wt.dval, 1, queue ); // transpose the matrix magma_cmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransposeconjugate( Ah2, &Ah1, queue ); magma_cmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_cmfree(&Ah2, queue ); nomb = magma_scnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } // no precond: y = vt, z = wt // magma_ccopy( dofs, vt.dval, 1, y.dval, 1, queue ); // magma_ccopy( dofs, wt.dval, 1, z.dval, 1, queue ); CHECK( magma_c_applyprecond_left( MagmaNoTrans, A, vt, &y, precond_par, queue )); CHECK( magma_c_applyprecond_right( MagmaTrans, A, wt, &z, precond_par, queue )); psi = magma_csqrt( magma_cdotc( dofs, z.dval, 1, z.dval, 1, queue )); rho = magma_csqrt( magma_cdotc( dofs, y.dval, 1, y.dval, 1, queue )); // v = vt / rho // y = y / rho // w = wt / psi // z = z / psi magma_cqmr_8( r.num_rows, r.num_cols, rho, psi, vt.dval, wt.dval, y.dval, z.dval, v.dval, w.dval, queue ); //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; // start iteration do { solver_par->numiter++; if( magma_c_isnan_inf( rho ) || magma_c_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } // delta = z' * y; delta = magma_cdotc( dofs, z.dval, 1, y.dval, 1, queue ); if( magma_c_isnan_inf( delta ) ){ info = MAGMA_DIVERGENCE; break; } // no precond: yt = y, zt = z // magma_ccopy( dofs, y.dval, 1, yt.dval, 1, queue ); // magma_ccopy( dofs, z.dval, 1, zt.dval, 1, queue ); CHECK( magma_c_applyprecond_right( MagmaNoTrans, A, y, &yt, precond_par, queue )); CHECK( magma_c_applyprecond_left( MagmaTrans, A, z, &zt, precond_par, queue )); if( solver_par->numiter == 1 ){ // p = y; // q = z; magma_ccopy( dofs, yt.dval, 1, p.dval, 1, queue ); magma_ccopy( dofs, zt.dval, 1, q.dval, 1, queue ); } else{ pde = psi * delta / epsilon; rde = rho * MAGMA_C_CONJ(delta/epsilon); // p = yt - pde * p // q = zt - rde * q magma_cqmr_2( r.num_rows, r.num_cols, pde, rde, yt.dval, zt.dval, p.dval, q.dval, queue ); } if( magma_c_isnan_inf( rho ) || magma_c_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } CHECK( magma_c_spmv( c_one, A, p, c_zero, pt, queue )); solver_par->spmv_count++; // epsilon = q' * pt; epsilon = magma_cdotc( dofs, q.dval, 1, pt.dval, 1, queue ); beta = epsilon / delta; if( magma_c_isnan_inf( epsilon ) || magma_c_isnan_inf( beta ) ){ info = MAGMA_DIVERGENCE; break; } // vt = pt - beta * v; magma_cqmr_7( r.num_rows, r.num_cols, beta, pt.dval, v.dval, vt.dval, queue ); magma_ccopy( dofs, v.dval, 1, vt.dval, 1, queue ); magma_cscal( dofs, -beta, vt.dval, 1, queue ); magma_caxpy( dofs, c_one, pt.dval, 1, vt.dval, 1, queue ); // no precond: y = vt // magma_ccopy( dofs, vt.dval, 1, y.dval, 1, queue ); CHECK( magma_c_applyprecond_left( MagmaNoTrans, A, vt, &y, precond_par, queue )); rho1 = rho; // rho = norm(y); rho = magma_csqrt( magma_cdotc( dofs, y.dval, 1, y.dval, 1, queue )); // wt = A' * q - beta' * w; CHECK( magma_c_spmv( c_one, AT, q, c_zero, wt, queue )); solver_par->spmv_count++; magma_caxpy( dofs, - MAGMA_C_CONJ( beta ), w.dval, 1, wt.dval, 1, queue ); // no precond: z = wt // magma_ccopy( dofs, wt.dval, 1, z.dval, 1, queue ); CHECK( magma_c_applyprecond_right( MagmaTrans, A, wt, &z, precond_par, queue )); thet1 = thet; thet = rho / (gamm * MAGMA_C_MAKE( MAGMA_C_ABS(beta), 0.0 )); gamm1 = gamm; gamm = c_one / magma_csqrt(c_one + thet*thet); eta = - eta * rho1 * gamm * gamm / (beta * gamm1 * gamm1); if( magma_c_isnan_inf( thet ) || magma_c_isnan_inf( gamm ) || magma_c_isnan_inf( eta ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter == 1 ){ // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_cqmr_4( r.num_rows, r.num_cols, eta, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } else{ // pds = (thet1 * gamm)^2; pds = (thet1 * gamm) * (thet1 * gamm); // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_cqmr_5( r.num_rows, r.num_cols, eta, pds, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } // psi = norm(z); psi = magma_csqrt( magma_cdotc( dofs, z.dval, 1, z.dval, 1, queue ) ); res = magma_scnrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } // v = vt / rho // y = y / rho // w = wt / psi // z = z / psi magma_cqmr_8( r.num_rows, r.num_cols, rho, psi, vt.dval, wt.dval, y.dval, z.dval, v.dval, w.dval, queue ); if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; float residual; CHECK( magma_cresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_cmfree(&r, queue ); magma_cmfree(&r_tld, queue ); magma_cmfree(&v, queue ); magma_cmfree(&w, queue ); magma_cmfree(&wt, queue ); magma_cmfree(&d, queue ); magma_cmfree(&s, queue ); magma_cmfree(&z, queue ); magma_cmfree(&q, queue ); magma_cmfree(&p, queue ); magma_cmfree(&zt, queue ); magma_cmfree(&vt, queue ); magma_cmfree(&yt, queue ); magma_cmfree(&pt, queue ); magma_cmfree(&y, queue ); magma_cmfree(&AT, queue ); magma_cmfree(&Ah1, queue ); magma_cmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_cqmr */
extern "C" magma_int_t magma_clsqr( magma_c_matrix A, magma_c_matrix b, magma_c_matrix *x, magma_c_solver_par *solver_par, magma_c_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_LSQR; solver_par->numiter = 0; solver_par->spmv_count = 0; magma_int_t m = A.num_rows * b.num_cols; magma_int_t n = A.num_cols * b.num_cols; // local variables magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE; // solver variables float s, nom0, r0, res=0, nomb, phibar, beta, alpha, c, rho, rhot, phi, thet, normr, normar, norma, sumnormd2, normd; // need to transpose the matrix magma_c_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; // GPU workspace magma_c_matrix r={Magma_CSR}, v={Magma_CSR}, z={Magma_CSR}, zt={Magma_CSR}, d={Magma_CSR}, vt={Magma_CSR}, q={Magma_CSR}, w={Magma_CSR}, u={Magma_CSR}; CHECK( magma_cvinit( &r, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &v, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &z, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &d, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &vt,Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &q, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &w, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &u, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_cvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // transpose the matrix magma_cmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransposeconjugate( Ah2, &Ah1, queue ); magma_cmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_cmfree(&Ah1, queue ); magma_cmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_cmfree(&Ah2, queue ); // solver setup CHECK( magma_cresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; nomb = magma_scnrm2( m, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } magma_ccopy( m, b.dval, 1, u.dval, 1, queue ); beta = magma_scnrm2( m, u.dval, 1, queue ); magma_cscal( m, MAGMA_C_MAKE(1./beta, 0.0 ), u.dval, 1, queue ); normr = beta; c = 1.0; s = 0.0; phibar = beta; CHECK( magma_c_spmv( c_one, AT, u, c_zero, v, queue )); if( precond_par->solver == Magma_NONE ){ ; } else { CHECK( magma_c_applyprecond_right( MagmaTrans, A, v, &zt, precond_par, queue )); CHECK( magma_c_applyprecond_left( MagmaTrans, A, zt, &v, precond_par, queue )); } alpha = magma_scnrm2( n, v.dval, 1, queue ); magma_cscal( n, MAGMA_C_MAKE(1./alpha, 0.0 ), v.dval, 1, queue ); normar = alpha * beta; norma = 0; sumnormd2 = 0; //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; // start iteration do { solver_par->numiter++; if( precond_par->solver == Magma_NONE || A.num_rows != A.num_cols ) { magma_ccopy( n, v.dval, 1 , z.dval, 1, queue ); } else { CHECK( magma_c_applyprecond_left( MagmaNoTrans, A, v, &zt, precond_par, queue )); CHECK( magma_c_applyprecond_right( MagmaNoTrans, A, zt, &z, precond_par, queue )); } //CHECK( magma_c_spmv( c_one, A, z, MAGMA_C_MAKE(-alpha,0.0), u, queue )); CHECK( magma_c_spmv( c_one, A, z, c_zero, zt, queue )); magma_cscal( m, MAGMA_C_MAKE(-alpha, 0.0 ), u.dval, 1, queue ); magma_caxpy( m, c_one, zt.dval, 1, u.dval, 1, queue ); solver_par->spmv_count++; beta = magma_scnrm2( m, u.dval, 1, queue ); magma_cscal( m, MAGMA_C_MAKE(1./beta, 0.0 ), u.dval, 1, queue ); // norma = norm([norma alpha beta]); norma = sqrt(norma*norma + alpha*alpha + beta*beta ); //lsvec( solver_par->numiter-1 ) = normar / norma; thet = -s * alpha; rhot = c * alpha; rho = sqrt( rhot * rhot + beta * beta ); c = rhot / rho; s = - beta / rho; phi = c * phibar; phibar = s * phibar; // d = (z - thet * d) / rho; magma_cscal( n, MAGMA_C_MAKE(-thet, 0.0 ), d.dval, 1, queue ); magma_caxpy( n, c_one, z.dval, 1, d.dval, 1, queue ); magma_cscal( n, MAGMA_C_MAKE(1./rho, 0.0 ), d.dval, 1, queue ); normd = magma_scnrm2( n, d.dval, 1, queue ); sumnormd2 = sumnormd2 + normd*normd; // convergence check res = normr; if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } // check for convergence in A*x=b if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ info = MAGMA_SUCCESS; break; } // check for convergence in min{|b-A*x|} if ( A.num_rows != A.num_cols && ( normar/(norma*normr) <= solver_par->rtol || normar <= solver_par->atol ) ){ printf("%% warning: quit from minimization convergence check.\n"); info = MAGMA_SUCCESS; break; } magma_caxpy( n, MAGMA_C_MAKE( phi, 0.0 ), d.dval, 1, x->dval, 1, queue ); normr = fabs(s) * normr; CHECK( magma_c_spmv( c_one, AT, u, c_zero, vt, queue )); solver_par->spmv_count++; if( precond_par->solver == Magma_NONE ){ ; } else { CHECK( magma_c_applyprecond_right( MagmaTrans, A, vt, &zt, precond_par, queue )); CHECK( magma_c_applyprecond_left( MagmaTrans, A, zt, &vt, precond_par, queue )); } magma_cscal( n, MAGMA_C_MAKE(-beta, 0.0 ), v.dval, 1, queue ); magma_caxpy( n, c_one, vt.dval, 1, v.dval, 1, queue ); alpha = magma_scnrm2( n, v.dval, 1, queue ); magma_cscal( n, MAGMA_C_MAKE(1./alpha, 0.0 ), v.dval, 1, queue ); normar = alpha * fabs(s*phi); } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; float residual; CHECK( magma_cresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_cmfree(&r, queue ); magma_cmfree(&v, queue ); magma_cmfree(&z, queue ); magma_cmfree(&zt, queue ); magma_cmfree(&d, queue ); magma_cmfree(&vt, queue ); magma_cmfree(&q, queue ); magma_cmfree(&u, queue ); magma_cmfree(&w, queue ); magma_cmfree(&AT, queue ); magma_cmfree(&Ah1, queue ); magma_cmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_cqmr */
extern "C" magma_int_t magma_ccsrsplit( magma_int_t bsize, magma_c_matrix A, magma_c_matrix *D, magma_c_matrix *R, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i, k, j, nnz_diag, nnz_offd; D->val = NULL; D->col = NULL; D->row = NULL; D->rowidx = NULL; D->blockinfo = NULL; D->diag = NULL; D->dval = NULL; D->dcol = NULL; D->drow = NULL; D->drowidx = NULL; D->ddiag = NULL; R->val = NULL; R->col = NULL; R->row = NULL; R->rowidx = NULL; R->blockinfo = NULL; R->diag = NULL; R->dval = NULL; R->dcol = NULL; R->drow = NULL; R->drowidx = NULL; R->ddiag = NULL; if ( A.memory_location == Magma_CPU && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) ) { nnz_diag = nnz_offd = 0; // Count the new number of nonzeroes in the two matrices for( i=0; i<A.num_rows; i+=bsize ){ for( k=i; k<min(A.num_rows,i+bsize); k++ ){ int check = 0; for( j=A.row[k]; j<A.row[k+1]; j++ ){ if ( A.col[j] < i ) nnz_offd++; else if ( A.col[j] < i+bsize ){ if( A.col[j] == k ){ check = 1; } nnz_diag++; } else nnz_offd++; } if( check == 0 ){ printf("error: matrix contains zero on diagonal at (%d,%d).\n", i, i); info = -1; goto cleanup; } } } // Allocate memory for the new matrices D->storage_type = Magma_CSRD; D->memory_location = A.memory_location; D->num_rows = A.num_rows; D->num_cols = A.num_cols; D->nnz = nnz_diag; R->storage_type = Magma_CSR; R->memory_location = A.memory_location; R->num_rows = A.num_rows; R->num_cols = A.num_cols; R->nnz = nnz_offd; CHECK( magma_cmalloc_cpu( &D->val, nnz_diag )); CHECK( magma_index_malloc_cpu( &D->row, A.num_rows+1 )); CHECK( magma_index_malloc_cpu( &D->col, nnz_diag )); CHECK( magma_cmalloc_cpu( &R->val, nnz_offd )); CHECK( magma_index_malloc_cpu( &R->row, A.num_rows+1 )); CHECK( magma_index_malloc_cpu( &R->col, nnz_offd )); // Fill up the new sparse matrices D->row[0] = 0; R->row[0] = 0; nnz_offd = nnz_diag = 0; for( i=0; i<A.num_rows; i+=bsize) { for( k=i; k<min(A.num_rows,i+bsize); k++ ) { D->row[k+1] = D->row[k]; R->row[k+1] = R->row[k]; for( j=A.row[k]; j<A.row[k+1]; j++ ) { if ( A.col[j] < i ) { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } else if ( A.col[j] < i+bsize ) { // larger than diagonal remain as before if ( A.col[j]>k ) { D->val[nnz_diag] = A.val[ j ]; D->col[nnz_diag] = A.col[ j ]; D->row[k+1]++; } // diagonal is written first else if ( A.col[j]==k ) { D->val[D->row[k]] = A.val[ j ]; D->col[D->row[k]] = A.col[ j ]; D->row[k+1]++; } // smaller than diagonal are shifted one to the right // to have room for the diagonal else { D->val[nnz_diag+1] = A.val[ j ]; D->col[nnz_diag+1] = A.col[ j ]; D->row[k+1]++; } nnz_diag++; } else { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } } } } } else { magma_c_matrix Ah={Magma_CSR}, ACSR={Magma_CSR}, DCSR={Magma_CSR}, RCSR={Magma_CSR}, Dh={Magma_CSR}, Rh={Magma_CSR}; CHECK( magma_cmtransfer( A, &Ah, A.memory_location, Magma_CPU, queue )); CHECK( magma_cmconvert( Ah, &ACSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_ccsrsplit( bsize, ACSR, &DCSR, &RCSR, queue )); CHECK( magma_cmconvert( DCSR, &Dh, Magma_CSR, A.storage_type, queue )); CHECK( magma_cmconvert( RCSR, &Rh, Magma_CSR, A.storage_type, queue )); CHECK( magma_cmtransfer( Dh, D, Magma_CPU, A.memory_location, queue )); CHECK( magma_cmtransfer( Rh, R, Magma_CPU, A.memory_location, queue )); magma_cmfree( &Ah, queue ); magma_cmfree( &ACSR, queue ); magma_cmfree( &Dh, queue ); magma_cmfree( &DCSR, queue ); magma_cmfree( &Rh, queue ); magma_cmfree( &RCSR, queue ); } cleanup: if( info != 0 ){ magma_cmfree( D, queue ); magma_cmfree( R, queue ); } return info; }
extern "C" magma_int_t magma_cmslice( magma_int_t num_slices, magma_int_t slice, magma_c_matrix A, magma_c_matrix *B, magma_c_matrix *ALOC, magma_c_matrix *ANLOC, magma_index_t *comm_i, magmaFloatComplex *comm_v, magma_int_t *start, magma_int_t *end, magma_queue_t queue ) { magma_int_t info = 0; if( A.num_rows != A.num_cols ){ printf("%% error: only supported for square matrices.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if ( A.memory_location == Magma_CPU && A.storage_type == Magma_CSR ){ CHECK( magma_cmconvert( A, B, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( B->col ); magma_free_cpu( B->val ); CHECK( magma_cmconvert( A, ALOC, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( ALOC->col ); magma_free_cpu( ALOC->row ); magma_free_cpu( ALOC->val ); CHECK( magma_cmconvert( A, ANLOC, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( ANLOC->col ); magma_free_cpu( ANLOC->row ); magma_free_cpu( ANLOC->val ); magma_int_t i,j,k, nnz, nnz_loc=0, loc_row = 0, nnz_nloc = 0; magma_index_t col; magma_int_t size = magma_ceildiv( A.num_rows, num_slices ); magma_int_t lstart = slice*size; magma_int_t lend = min( (slice+1)*size, A.num_rows ); // correct size for last slice size = lend-lstart; CHECK( magma_index_malloc_cpu( &ALOC->row, size+1 ) ); CHECK( magma_index_malloc_cpu( &ANLOC->row, size+1 ) ); // count elements for slice - identity for rest nnz = A.row[ lend ] - A.row[ lstart ] + ( A.num_rows - size ); CHECK( magma_index_malloc_cpu( &B->col, nnz ) ); CHECK( magma_cmalloc_cpu( &B->val, nnz ) ); // for the communication plan for( i=0; i<A.num_rows; i++ ) { comm_i[i] = 0; comm_v[i] = MAGMA_C_ZERO; } k=0; B->row[i] = 0; ALOC->row[0] = 0; ANLOC->row[0] = 0; // identity above slice for( i=0; i<lstart; i++ ) { B->row[i+1] = B->row[i]+1; B->val[k] = MAGMA_C_ONE; B->col[k] = i; k++; } // slice for( i=lstart; i<lend; i++ ) { B->row[i+1] = B->row[i] + (A.row[i+1]-A.row[i]); for( j=A.row[i]; j<A.row[i+1]; j++ ){ B->val[k] = A.val[j]; col = A.col[j]; B->col[k] = col; // communication plan if( col<lstart || col>=lend ){ comm_i[ col ] = 1; comm_v[ col ] = comm_v[ col ] + MAGMA_C_MAKE( MAGMA_C_ABS( A.val[j] ), 0.0 ); nnz_nloc++; } else { nnz_loc++; } k++; } loc_row++; ALOC->row[ loc_row ] = nnz_loc; ANLOC->row[ loc_row ] = nnz_nloc; } CHECK( magma_index_malloc_cpu( &ALOC->col, nnz_loc ) ); CHECK( magma_cmalloc_cpu( &ALOC->val, nnz_loc ) ); ALOC->num_rows = size; ALOC->num_cols = size; ALOC->nnz = nnz_loc; CHECK( magma_index_malloc_cpu( &ANLOC->col, nnz_nloc ) ); CHECK( magma_cmalloc_cpu( &ANLOC->val, nnz_nloc ) ); ANLOC->num_rows = size; ANLOC->num_cols = A.num_cols; ANLOC->nnz = nnz_nloc; nnz_loc = 0; nnz_nloc = 0; // local/nonlocal matrix for( i=lstart; i<lend; i++ ) { for( j=A.row[i]; j<A.row[i+1]; j++ ){ col = A.col[j]; // insert only in local part in ALOC, nonlocal in ANLOC if( col<lstart || col>=lend ){ ANLOC->val[ nnz_nloc ] = A.val[j]; ANLOC->col[ nnz_nloc ] = col; nnz_nloc++; } else { ALOC->val[ nnz_loc ] = A.val[j]; ALOC->col[ nnz_loc ] = col-lstart; nnz_loc++; } } } // identity below slice for( i=lend; i<A.num_rows; i++ ) { B->row[i+1] = B->row[i]+1; B->val[k] = MAGMA_C_ONE; B->col[k] = i; k++; } B->nnz = k; *start = lstart; *end = lend; } else { printf("error: mslice only supported for CSR matrices on the CPU: %d %d.\n", int(A.memory_location), int(A.storage_type) ); info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: return info; }