extern "C" magma_int_t magma_z_applyprecond_right( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_preconditioner *precond, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); if ( precond->solver == Magma_JACOBI ) { magma_zcopy( b.num_rows*b.num_cols, b.dval, 1, x->dval, 1 ); // x = b } else if ( precond->solver == Magma_ILU || ( precond->solver == Magma_AILU && precond->maxiter == -1)) { magma_zapplycumilu_r( b, x, precond, queue ); } else if ( precond->solver == Magma_ICC || ( precond->solver == Magma_AICC && precond->maxiter == -1) ) { magma_zapplycumicc_r( b, x, precond, queue ); } else if ( precond->solver == Magma_NONE ) { magma_zcopy( b.num_rows*b.num_cols, b.dval, 1, x->dval, 1 ); // x = b } else if ( precond->solver == Magma_FUNCTION ) { magma_zapplycustomprecond_r( b, x, precond, queue ); } else { printf( "error: preconditioner type not yet supported.\n" ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_z_applyprecond( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_preconditioner *precond, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); if ( precond->solver == Magma_JACOBI ) { magma_zjacobi_diagscal( A.num_rows, precond->d, b, x, queue ); } else if ( precond->solver == Magma_PASTIX ) { magma_zapplypastix( b, x, precond, queue ); } else if ( precond->solver == Magma_ILU ) { magma_z_vector tmp; magma_z_vinit( &tmp, Magma_DEV, A.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vfree( &tmp, queue ); } else if ( precond->solver == Magma_ICC ) { magma_z_vector tmp; magma_z_vinit( &tmp, Magma_DEV, A.num_rows, MAGMA_Z_ZERO, queue ); magma_z_vfree( &tmp, queue ); } else if ( precond->solver == Magma_NONE ) { magma_zcopy( b.num_rows, b.dval, 1, x->dval, 1 ); // x = b } else { printf( "error: preconditioner type not yet supported.\n" ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_NOT_SUPPORTED; } magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_zpcg_merge( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_PCGMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; // solver variables magmaDoubleComplex alpha, beta, gamma, rho, tmp1, *skp_h={0}; double nom, nom0, r0, res, nomb; magmaDoubleComplex den; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows*b.num_cols; magma_z_matrix r={Magma_CSR}, d={Magma_CSR}, z={Magma_CSR}, h={Magma_CSR}, rt={Magma_CSR}; magmaDoubleComplex *d1=NULL, *d2=NULL, *skp=NULL; // GPU workspace CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &rt, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &h, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zmalloc( &d1, dofs*(2) )); CHECK( magma_zmalloc( &d2, dofs*(2) )); // array for the parameters CHECK( magma_zmalloc( &skp, 7 )); // skp = [alpha|beta|gamma|rho|tmp1|tmp2|res] // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); // preconditioner CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, r, &rt, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, rt, &h, precond_par, queue )); magma_zcopy( dofs, h.dval, 1, d.dval, 1, queue ); nom = MAGMA_Z_ABS( magma_zdotc( dofs, r.dval, 1, h.dval, 1, queue )); CHECK( magma_z_spmv( c_one, A, d, c_zero, z, queue )); // z = A d den = magma_zdotc( dofs, d.dval, 1, z.dval, 1, queue ); // den = d'* z solver_par->init_res = nom0; nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } // check positive definite if ( MAGMA_Z_ABS(den) <= 0.0 ) { info = MAGMA_NONSPD; goto cleanup; } // array on host for the parameters CHECK( magma_zmalloc_cpu( &skp_h, 7 )); alpha = rho = gamma = tmp1 = c_one; beta = magma_zdotc( dofs, h.dval, 1, r.dval, 1, queue ); skp_h[0]=alpha; skp_h[1]=beta; skp_h[2]=gamma; skp_h[3]=rho; skp_h[4]=tmp1; skp_h[5]=MAGMA_Z_MAKE(nom, 0.0); skp_h[6]=MAGMA_Z_MAKE(nom, 0.0); magma_zsetvector( 7, skp_h, 1, skp, 1, queue ); //Chronometry real_Double_t tempo1, tempo2, tempop1, tempop2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; // computes SpMV and dot product CHECK( magma_zcgmerge_spmv1( A, d1, d2, d.dval, z.dval, skp, queue )); solver_par->spmv_count++; if( precond_par->solver == Magma_JACOBI ){ CHECK( magma_zjcgmerge_xrbeta( dofs, d1, d2, precond_par->d.dval, x->dval, r.dval, d.dval, z.dval, h.dval, skp, queue )); } else if( precond_par->solver == Magma_NONE ){ // updates x, r CHECK( magma_zpcgmerge_xrbeta1( dofs, x->dval, r.dval, d.dval, z.dval, skp, queue )); // computes scalars and updates d CHECK( magma_zpcgmerge_xrbeta2( dofs, d1, d2, r.dval, r.dval, d.dval, skp, queue )); } else { // updates x, r CHECK( magma_zpcgmerge_xrbeta1( dofs, x->dval, r.dval, d.dval, z.dval, skp, queue )); // preconditioner in between tempop1 = magma_sync_wtime( queue ); CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, r, &rt, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, rt, &h, precond_par, queue )); // magma_zcopy( dofs, r.dval, 1, h.dval, 1 ); tempop2 = magma_sync_wtime( queue ); precond_par->runtime += tempop2-tempop1; // computes scalars and updates d CHECK( magma_zpcgmerge_xrbeta2( dofs, d1, d2, h.dval, r.dval, d.dval, skp, queue )); } //if( solver_par->numiter==1){ // magma_zcopy( dofs, h.dval, 1, d.dval, 1 ); //} // updates x, r, computes scalars and updates d //CHECK( magma_zcgmerge_xrbeta( dofs, d1, d2, x->dval, r.dval, d.dval, z.dval, skp, queue )); // check stopping criterion (asynchronous copy) magma_zgetvector( 1 , skp+6, 1, skp_h+6, 1, queue ); res = sqrt(MAGMA_Z_ABS(skp_h[6])); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->atol || solver_par->iter_res/solver_par->init_res < solver_par->rtol ){ info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&z, queue ); magma_zmfree(&d, queue ); magma_zmfree(&rt, queue ); magma_zmfree(&h, queue ); magma_free( d1 ); magma_free( d2 ); magma_free( skp ); magma_free_cpu( skp_h ); solver_par->info = info; return info; } /* magma_zpcg_merge */
extern "C" magma_int_t magma_ziterref( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_ITERREF; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; double residual; magma_zresidual( A, b, *x, &residual, queue ); solver_par->init_res = residual; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, c_mone = MAGMA_Z_NEG_ONE; magma_int_t dofs = A.num_rows; // workspace magma_z_vector r,z; magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &z, Magma_DEV, dofs, c_zero, queue ); // solver variables double nom, nom0, r0; // solver setup magma_zscal( dofs, c_zero, x->dval, 1) ; // x = 0 magma_zcopy( dofs, b.dval, 1, r.dval, 1 ); // r = b nom0 = magma_dznrm2(dofs, r.dval, 1); // nom0 = || r || nom = nom0 * nom0; solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) { magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { magma_zscal( dofs, MAGMA_Z_MAKE(1./nom, 0.), r.dval, 1) ; // scale it magma_z_precond( A, r, &z, precond_par, queue ); // inner solver: A * z = r magma_zscal( dofs, MAGMA_Z_MAKE(nom, 0.), z.dval, 1) ; // scale it magma_zaxpy(dofs, c_one, z.dval, 1, x->dval, 1); // x = x + z magma_z_spmv( c_mone, A, *x, c_zero, r, queue ); // r = - A x magma_zaxpy(dofs, c_one, b.dval, 1, r.dval, 1); // r = r + b nom = magma_dznrm2(dofs, r.dval, 1); // nom = || r || if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( nom < r0 ) { break; } } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_zresidual( A, b, *x, &residual, queue ); solver_par->final_res = residual; solver_par->iter_res = nom; if ( solver_par->numiter < solver_par->maxiter) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_SLOW_CONVERGENCE; } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } magma_z_vfree(&r, queue ); magma_z_vfree(&z, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_ziterref */
extern "C" magma_int_t magma_zpbicg( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_PBICG; solver_par->numiter = 0; solver_par->spmv_count = 0; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t dofs = A.num_rows * b.num_cols; // workspace magma_z_matrix r={Magma_CSR}, rt={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, z={Magma_CSR}, zt={Magma_CSR}, q={Magma_CSR}, y={Magma_CSR}, yt={Magma_CSR}, qt={Magma_CSR}; // need to transpose the matrix magma_z_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &qt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &yt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables magmaDoubleComplex alpha, rho, beta, rho_new, ptq; double res, nomb, nom0, r0; // transpose the matrix magma_zmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_zmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransposeconjugate( Ah2, &Ah1, queue ); magma_zmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_zmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_zmfree(&Ah2, queue ); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); res = nom0; solver_par->init_res = nom0; magma_zcopy( dofs, r.dval, 1, rt.dval, 1, queue ); // rr = r rho_new = magma_zdotc( dofs, rt.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho = alpha = MAGMA_Z_MAKE( 1.0, 0. ); nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, r, &y, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, y, &z, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaTrans, A, rt, &yt, precond_par, queue )); CHECK( magma_z_applyprecond_left( MagmaTrans, A, yt, &zt, precond_par, queue )); //magma_zcopy( dofs, r.dval, 1 , y.dval, 1, queue ); // y=r //magma_zcopy( dofs, y.dval, 1 , z.dval, 1, queue ); // z=y //magma_zcopy( dofs, rt.dval, 1 , yt.dval, 1, queue ); // yt=rt //magma_zcopy( dofs, yt.dval, 1 , zt.dval, 1, queue ); // yt=rt rho= rho_new; rho_new = magma_zdotc( dofs, rt.dval, 1, z.dval, 1, queue ); // rho=<rt,z> if( magma_z_isnan_inf( rho_new ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter==1 ){ magma_zcopy( dofs, z.dval, 1 , p.dval, 1, queue ); // yt=rt magma_zcopy( dofs, zt.dval, 1 , pt.dval, 1, queue ); // zt=yt } else { beta = rho_new/rho; magma_zscal( dofs, beta, p.dval, 1, queue ); // p = beta*p magma_zaxpy( dofs, c_one , z.dval, 1 , p.dval, 1, queue ); // p = z+beta*p magma_zscal( dofs, MAGMA_Z_CONJ(beta), pt.dval, 1, queue ); // pt = beta*pt magma_zaxpy( dofs, c_one , zt.dval, 1 , pt.dval, 1, queue ); // pt = zt+beta*pt } CHECK( magma_z_spmv( c_one, A, p, c_zero, q, queue )); // v = Ap CHECK( magma_z_spmv( c_one, AT, pt, c_zero, qt, queue )); // v = Ap solver_par->spmv_count++; solver_par->spmv_count++; ptq = magma_zdotc( dofs, pt.dval, 1, q.dval, 1, queue ); alpha = rho_new /ptq; magma_zaxpy( dofs, alpha, p.dval, 1 , x->dval, 1, queue ); // x=x+alpha*p magma_zaxpy( dofs, c_neg_one * alpha, q.dval, 1 , r.dval, 1, queue ); // r=r+alpha*q magma_zaxpy( dofs, c_neg_one * MAGMA_Z_CONJ(alpha), qt.dval, 1 , rt.dval, 1, queue ); // r=r+alpha*q res = magma_dznrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&rt, queue ); magma_zmfree(&p, queue ); magma_zmfree(&pt, queue ); magma_zmfree(&q, queue ); magma_zmfree(&qt, queue ); magma_zmfree(&y, queue ); magma_zmfree(&yt, queue ); magma_zmfree(&z, queue ); magma_zmfree(&zt, queue ); magma_zmfree(&AT, queue ); magma_zmfree(&Ah1, queue ); magma_zmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_zpbicg */
extern "C" magma_int_t magma_zqmr_merge( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_QMRMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; // solver variables double nom0, r0, res=0, nomb; magmaDoubleComplex rho = c_one, rho1 = c_one, eta = -c_one , pds = c_one, thet = c_one, thet1 = c_one, epsilon = c_one, beta = c_one, delta = c_one, pde = c_one, rde = c_one, gamm = c_one, gamm1 = c_one, psi = c_one; magma_int_t dofs = A.num_rows* b.num_cols; // need to transpose the matrix magma_z_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; // GPU workspace magma_z_matrix r={Magma_CSR}, r_tld={Magma_CSR}, v={Magma_CSR}, w={Magma_CSR}, wt={Magma_CSR}, d={Magma_CSR}, s={Magma_CSR}, z={Magma_CSR}, q={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, y={Magma_CSR}; CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_zcopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, y.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, v.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, wt.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, z.dval, 1, queue ); // transpose the matrix magma_zmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_zmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransposeconjugate( Ah2, &Ah1, queue ); magma_zmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_zmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_zmfree(&Ah2, queue ); nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } psi = magma_zsqrt( magma_zdotc( dofs, z.dval, 1, z.dval, 1, queue )); rho = magma_zsqrt( magma_zdotc( dofs, y.dval, 1, y.dval, 1, queue )); // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_zqmr_1( r.num_rows, r.num_cols, rho, psi, y.dval, z.dval, v.dval, w.dval, queue ); //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; if( magma_z_isnan_inf( rho ) || magma_z_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } // delta = z' * y; delta = magma_zdotc( dofs, z.dval, 1, y.dval, 1, queue ); if( magma_z_isnan_inf( delta ) ){ info = MAGMA_DIVERGENCE; break; } // no precond: yt = y, zt = z //magma_zcopy( dofs, y.dval, 1, yt.dval, 1 ); //magma_zcopy( dofs, z.dval, 1, zt.dval, 1 ); if( solver_par->numiter == 1 ){ // p = y; // q = z; magma_zcopy( dofs, y.dval, 1, p.dval, 1, queue ); magma_zcopy( dofs, z.dval, 1, q.dval, 1, queue ); } else{ pde = psi * delta / epsilon; rde = rho * MAGMA_Z_CONJ(delta/epsilon); // p = y - pde * p // q = z - rde * q magma_zqmr_2( r.num_rows, r.num_cols, pde, rde, y.dval, z.dval, p.dval, q.dval, queue ); } if( magma_z_isnan_inf( rho ) || magma_z_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } CHECK( magma_z_spmv( c_one, A, p, c_zero, pt, queue )); solver_par->spmv_count++; // epsilon = q' * pt; epsilon = magma_zdotc( dofs, q.dval, 1, pt.dval, 1, queue ); beta = epsilon / delta; if( magma_z_isnan_inf( epsilon ) || magma_z_isnan_inf( beta ) ){ info = MAGMA_DIVERGENCE; break; } // v = pt - beta * v // y = v magma_zqmr_3( r.num_rows, r.num_cols, beta, pt.dval, v.dval, y.dval, queue ); rho1 = rho; // rho = norm(y); rho = magma_zsqrt( magma_zdotc( dofs, y.dval, 1, y.dval, 1, queue )); // wt = A' * q - beta' * w; CHECK( magma_z_spmv( c_one, AT, q, c_zero, wt, queue )); solver_par->spmv_count++; magma_zaxpy( dofs, - MAGMA_Z_CONJ( beta ), w.dval, 1, wt.dval, 1, queue ); // no precond: z = wt magma_zcopy( dofs, wt.dval, 1, z.dval, 1, queue ); thet1 = thet; thet = rho / (gamm * MAGMA_Z_MAKE( MAGMA_Z_ABS(beta), 0.0 )); gamm1 = gamm; gamm = c_one / magma_zsqrt(c_one + thet*thet); eta = - eta * rho1 * gamm * gamm / (beta * gamm1 * gamm1); if( magma_z_isnan_inf( thet ) || magma_z_isnan_inf( gamm ) || magma_z_isnan_inf( eta ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter == 1 ){ // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_zqmr_4( r.num_rows, r.num_cols, eta, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } else{ pds = (thet1 * gamm) * (thet1 * gamm); // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_zqmr_5( r.num_rows, r.num_cols, eta, pds, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } // psi = norm(z); psi = magma_zsqrt( magma_zdotc( dofs, z.dval, 1, z.dval, 1, queue ) ); res = magma_dznrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_zqmr_1( r.num_rows, r.num_cols, rho, psi, y.dval, z.dval, v.dval, w.dval, queue ); if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&r_tld, queue ); magma_zmfree(&v, queue ); magma_zmfree(&w, queue ); magma_zmfree(&wt, queue ); magma_zmfree(&d, queue ); magma_zmfree(&s, queue ); magma_zmfree(&z, queue ); magma_zmfree(&q, queue ); magma_zmfree(&p, queue ); magma_zmfree(&pt, queue ); magma_zmfree(&y, queue ); magma_zmfree(&AT, queue ); magma_zmfree(&Ah1, queue ); magma_zmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_zqmr_merge */
extern "C" magma_int_t magma_zpbicgstab_merge( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BICGSTAB; solver_par->numiter = 0; solver_par->spmv_count = 0; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows * b.num_cols; // workspace magma_z_matrix r={Magma_CSR}, rr={Magma_CSR}, p={Magma_CSR}, v={Magma_CSR}, z={Magma_CSR}, y={Magma_CSR}, ms={Magma_CSR}, mt={Magma_CSR}, s={Magma_CSR}, t={Magma_CSR}, d1={Magma_CSR}, d2={Magma_CSR}; CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &rr,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &ms,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &mt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &d1, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &d2, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables magmaDoubleComplex alpha, beta, omega, rho_old, rho_new; double nom, betanom, nom0, r0, res, nomb; res=0; //double den; // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); magma_zcopy( dofs, r.dval, 1, rr.dval, 1, queue ); // rr = r betanom = nom0; nom = nom0*nom0; rho_new = magma_zdotc( dofs, r.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho_old = omega = alpha = MAGMA_Z_MAKE( 1.0, 0. ); solver_par->init_res = nom0; CHECK( magma_z_spmv( c_one, A, r, c_zero, v, queue )); // z = A r nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } if ( nom < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2, tempop1, tempop2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; rho_old = rho_new; // rho_old=rho rho_new = magma_zdotc( dofs, rr.dval, 1, r.dval, 1, queue ); // rho=<rr,r> beta = rho_new/rho_old * alpha/omega; // beta=rho/rho_old *alpha/omega if( magma_z_isnan_inf( beta ) ){ info = MAGMA_DIVERGENCE; break; } // p = r + beta * ( p - omega * v ) magma_zbicgstab_1( r.num_rows, r.num_cols, beta, omega, r.dval, v.dval, p.dval, queue ); // preconditioner tempop1 = magma_sync_wtime( queue ); CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, p, &mt, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, mt, &y, precond_par, queue )); tempop2 = magma_sync_wtime( queue ); precond_par->runtime += tempop2-tempop1; CHECK( magma_z_spmv( c_one, A, y, c_zero, v, queue )); // v = Ap solver_par->spmv_count++; //alpha = rho_new / tmpval; alpha = rho_new /magma_zdotc( dofs, rr.dval, 1, v.dval, 1, queue ); if( magma_z_isnan_inf( alpha ) ){ info = MAGMA_DIVERGENCE; break; } // s = r - alpha v magma_zbicgstab_2( r.num_rows, r.num_cols, alpha, r.dval, v.dval, s.dval, queue ); // preconditioner tempop1 = magma_sync_wtime( queue ); CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, s, &ms, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, ms, &z, precond_par, queue )); tempop2 = magma_sync_wtime( queue ); precond_par->runtime += tempop2-tempop1; CHECK( magma_z_spmv( c_one, A, z, c_zero, t, queue )); // t=As solver_par->spmv_count++; omega = magma_zdotc( dofs, t.dval, 1, s.dval, 1, queue ) // omega = <s,t>/<t,t> / magma_zdotc( dofs, t.dval, 1, t.dval, 1, queue ); // x = x + alpha * y + omega * z // r = s - omega * t magma_zbicgstab_4( r.num_rows, r.num_cols, alpha, omega, y.dval, z.dval, s.dval, t.dval, x->dval, r.dval, queue ); res = betanom = magma_dznrm2( dofs, r.dval, 1, queue ); nom = betanom*betanom; if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->final_res = residual; solver_par->iter_res = res; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&rr, queue ); magma_zmfree(&p, queue ); magma_zmfree(&v, queue ); magma_zmfree(&s, queue ); magma_zmfree(&y, queue ); magma_zmfree(&z, queue ); magma_zmfree(&t, queue ); magma_zmfree(&ms, queue ); magma_zmfree(&mt, queue ); magma_zmfree(&d1, queue ); magma_zmfree(&d2, queue ); solver_par->info = info; return info; } /* magma_zbicgstab_merge */
extern "C" magma_int_t magma_zptfqmr_merge( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_TFQMRMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; // solver variables double nom0, r0, res=0, nomb; magmaDoubleComplex rho = c_one, rho_l = c_one, eta = c_zero , c = c_zero , theta = c_zero , tau = c_zero, alpha = c_one, beta = c_zero, sigma = c_zero; magma_int_t dofs = A.num_rows* b.num_cols; // GPU workspace magma_z_matrix r={Magma_CSR}, r_tld={Magma_CSR}, pu_m={Magma_CSR}, d={Magma_CSR}, w={Magma_CSR}, v={Magma_CSR}, t={Magma_CSR}, u_mp1={Magma_CSR}, u_m={Magma_CSR}, Au={Magma_CSR}, Ad={Magma_CSR}, Au_new={Magma_CSR}; CHECK( magma_zvinit( &t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &u_mp1,Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_zvinit( &r_tld,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &u_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_zvinit( &pu_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_zvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_zvinit( &Ad, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Au_new, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Au, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_zcopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, w.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, u_m.dval, 1, queue ); // preconditioner CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, u_m, &t, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, t, &pu_m, precond_par, queue )); CHECK( magma_z_spmv( c_one, A, pu_m, c_zero, v, queue )); // v = A u magma_zcopy( dofs, v.dval, 1, Au.dval, 1, queue ); nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } tau = magma_zsqrt( magma_zdotc( dofs, r.dval, 1, r_tld.dval, 1, queue) ); rho = magma_zdotc( dofs, r.dval, 1, r_tld.dval, 1, queue ); rho_l = rho; //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; // do this every iteration as unrolled alpha = rho / magma_zdotc( dofs, v.dval, 1, r_tld.dval, 1, queue ); sigma = theta * theta / alpha * eta; magma_ztfqmr_1( r.num_rows, r.num_cols, alpha, sigma, v.dval, Au.dval, u_m.dval, pu_m.dval, u_mp1.dval, w.dval, d.dval, Ad.dval, queue ); theta = magma_zsqrt( magma_zdotc(dofs, w.dval, 1, w.dval, 1, queue) ) / tau; c = c_one / magma_zsqrt( c_one + theta*theta ); tau = tau * theta *c; eta = c * c * alpha; sigma = theta * theta / alpha * eta; if ( magma_z_isnan_inf( theta ) || magma_z_isnan_inf( c ) || magma_z_isnan_inf( tau ) || magma_z_isnan_inf( eta ) || magma_z_isnan_inf( sigma ) ) { info = MAGMA_DIVERGENCE; break; } magma_ztfqmr_2( r.num_rows, r.num_cols, eta, d.dval, Ad.dval, x->dval, r.dval, queue ); res = magma_dznrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ info = MAGMA_SUCCESS; break; } // preconditioner CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, u_mp1, &t, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, t, &pu_m, precond_par, queue )); CHECK( magma_z_spmv( c_one, A, pu_m, c_zero, Au_new, queue )); // Au_new = A u_mp1 solver_par->spmv_count++; magma_zcopy( dofs, Au_new.dval, 1, Au.dval, 1, queue ); magma_zcopy( dofs, u_mp1.dval, 1, u_m.dval, 1, queue ); // here starts the second part of the loop ################################# magma_ztfqmr_5( r.num_rows, r.num_cols, alpha, sigma, v.dval, Au.dval, pu_m.dval, w.dval, d.dval, Ad.dval, queue ); sigma = theta * theta / alpha * eta; theta = magma_zsqrt( magma_zdotc(dofs, w.dval, 1, w.dval, 1, queue) ) / tau; c = c_one / magma_zsqrt( c_one + theta*theta ); tau = tau * theta *c; eta = c * c * alpha; magma_ztfqmr_2( r.num_rows, r.num_cols, eta, d.dval, Ad.dval, x->dval, r.dval, queue ); res = magma_dznrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ info = MAGMA_SUCCESS; break; } rho = magma_zdotc( dofs, w.dval, 1, r_tld.dval, 1, queue ); beta = rho / rho_l; rho_l = rho; magma_ztfqmr_3( r.num_rows, r.num_cols, beta, w.dval, u_m.dval, u_mp1.dval, queue ); // preconditioner CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, u_mp1, &t, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, t, &pu_m, precond_par, queue )); CHECK( magma_z_spmv( c_one, A, pu_m, c_zero, Au_new, queue )); // Au_new = A pu_m solver_par->spmv_count++; magma_ztfqmr_4( r.num_rows, r.num_cols, beta, Au_new.dval, v.dval, Au.dval, queue ); magma_zcopy( dofs, u_mp1.dval, 1, u_m.dval, 1, queue ); } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&r_tld, queue ); magma_zmfree(&d, queue ); magma_zmfree(&w, queue ); magma_zmfree(&v, queue ); magma_zmfree(&u_m, queue ); magma_zmfree(&u_mp1, queue ); magma_zmfree(&pu_m, queue ); magma_zmfree(&d, queue ); magma_zmfree(&t, queue ); magma_zmfree(&Au, queue ); magma_zmfree(&Au_new, queue ); magma_zmfree(&Ad, queue ); solver_par->info = info; return info; } /* magma_zptfqmr_merge */
extern "C" magma_int_t magma_zpidr_merge( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_PIDRMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; solver_par->init_res = 0.0; solver_par->final_res = 0.0; solver_par->iter_res = 0.0; solver_par->runtime = 0.0; // constants const magmaDoubleComplex c_zero = MAGMA_Z_ZERO; const magmaDoubleComplex c_one = MAGMA_Z_ONE; const magmaDoubleComplex c_n_one = MAGMA_Z_NEG_ONE; // internal user parameters const magma_int_t smoothing = 1; // 0 = disable, 1 = enable const double angle = 0.7; // [0-1] // local variables magma_int_t iseed[4] = {0, 0, 0, 1}; magma_int_t dof; magma_int_t s; magma_int_t distr; magma_int_t k, i, sk; magma_int_t innerflag; magma_int_t ldd; double residual; double nrm; double nrmb; double nrmr; double nrmt; double rho; magmaDoubleComplex om; magmaDoubleComplex gamma; magmaDoubleComplex fk; // matrices and vectors magma_z_matrix dxs = {Magma_CSR}; magma_z_matrix dr = {Magma_CSR}, drs = {Magma_CSR}; magma_z_matrix dP = {Magma_CSR}, dP1 = {Magma_CSR}; magma_z_matrix dG = {Magma_CSR}, dGcol = {Magma_CSR}; magma_z_matrix dU = {Magma_CSR}; magma_z_matrix dM = {Magma_CSR}, hMdiag = {Magma_CSR}; magma_z_matrix df = {Magma_CSR}; magma_z_matrix dt = {Magma_CSR}, dtt = {Magma_CSR}; magma_z_matrix dc = {Magma_CSR}; magma_z_matrix dv = {Magma_CSR}; magma_z_matrix dlu = {Magma_CSR}; magma_z_matrix dskp = {Magma_CSR}, hskp = {Magma_CSR}; magma_z_matrix dalpha = {Magma_CSR}, halpha = {Magma_CSR}; magma_z_matrix dbeta = {Magma_CSR}, hbeta = {Magma_CSR}; magmaDoubleComplex *d1 = NULL, *d2 = NULL; // chronometry real_Double_t tempo1, tempo2; // initial s space // TODO: add option for 's' (shadow space number) // Hack: uses '--restart' option as the shadow space number. // This is not a good idea because the default value of restart option is used to detect // if the user provided a custom restart. This means that if the default restart value // is changed then the code will think it was the user (unless the default value is // also updated in the 'if' statement below. s = 1; if ( solver_par->restart != 50 ) { if ( solver_par->restart > A.num_cols ) { s = A.num_cols; } else { s = solver_par->restart; } } solver_par->restart = s; // set max iterations solver_par->maxiter = min( 2 * A.num_cols, solver_par->maxiter ); // check if matrix A is square if ( A.num_rows != A.num_cols ) { //printf("Matrix A is not square.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } // |b| nrmb = magma_dznrm2( b.num_rows, b.dval, 1, queue ); if ( nrmb == 0.0 ) { magma_zscal( x->num_rows, MAGMA_Z_ZERO, x->dval, 1, queue ); info = MAGMA_SUCCESS; goto cleanup; } // t = 0 // make t twice as large to contain both, dt and dr ldd = magma_roundup( b.num_rows, 32 ); CHECK( magma_zvinit( &dt, Magma_DEV, ldd, 2, c_zero, queue )); dt.num_rows = b.num_rows; dt.num_cols = 1; dt.nnz = dt.num_rows; // redirect the dr.dval to the second part of dt CHECK( magma_zvinit( &dr, Magma_DEV, b.num_rows, 1, c_zero, queue )); magma_free( dr.dval ); dr.dval = dt.dval + ldd; // r = b - A x CHECK( magma_zresidualvec( A, b, *x, &dr, &nrmr, queue )); // |r| solver_par->init_res = nrmr; solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nrmr; } // check if initial is guess good enough if ( nrmr <= solver_par->atol || nrmr/nrmb <= solver_par->rtol ) { info = MAGMA_SUCCESS; goto cleanup; } // P = randn(n, s) // P = ortho(P) //--------------------------------------- // P = 0.0 CHECK( magma_zvinit( &dP, Magma_CPU, A.num_cols, s, c_zero, queue )); // P = randn(n, s) distr = 3; // 1 = unif (0,1), 2 = unif (-1,1), 3 = normal (0,1) dof = dP.num_rows * dP.num_cols; lapackf77_zlarnv( &distr, iseed, &dof, dP.val ); // transfer P to device CHECK( magma_zmtransfer( dP, &dP1, Magma_CPU, Magma_DEV, queue )); magma_zmfree( &dP, queue ); // P = ortho(P1) if ( dP1.num_cols > 1 ) { // P = magma_zqr(P1), QR factorization CHECK( magma_zqr( dP1.num_rows, dP1.num_cols, dP1, dP1.ld, &dP, NULL, queue )); } else { // P = P1 / |P1| nrm = magma_dznrm2( dof, dP1.dval, 1, queue ); nrm = 1.0 / nrm; magma_zdscal( dof, nrm, dP1.dval, 1, queue ); CHECK( magma_zmtransfer( dP1, &dP, Magma_DEV, Magma_DEV, queue )); } magma_zmfree( &dP1, queue ); //--------------------------------------- // allocate memory for the scalar products CHECK( magma_zvinit( &hskp, Magma_CPU, 4, 1, c_zero, queue )); CHECK( magma_zvinit( &dskp, Magma_DEV, 4, 1, c_zero, queue )); CHECK( magma_zvinit( &halpha, Magma_CPU, s, 1, c_zero, queue )); CHECK( magma_zvinit( &dalpha, Magma_DEV, s, 1, c_zero, queue )); CHECK( magma_zvinit( &hbeta, Magma_CPU, s, 1, c_zero, queue )); CHECK( magma_zvinit( &dbeta, Magma_DEV, s, 1, c_zero, queue )); // workspace for merged dot product CHECK( magma_zmalloc( &d1, max(2, s) * b.num_rows )); CHECK( magma_zmalloc( &d2, max(2, s) * b.num_rows )); // smoothing enabled if ( smoothing > 0 ) { // set smoothing solution vector CHECK( magma_zmtransfer( *x, &dxs, Magma_DEV, Magma_DEV, queue )); // tt = 0 // make tt twice as large to contain both, dtt and drs ldd = magma_roundup( b.num_rows, 32 ); CHECK( magma_zvinit( &dtt, Magma_DEV, ldd, 2, c_zero, queue )); dtt.num_rows = dr.num_rows; dtt.num_cols = 1; dtt.nnz = dtt.num_rows; // redirect the drs.dval to the second part of dtt CHECK( magma_zvinit( &drs, Magma_DEV, dr.num_rows, 1, c_zero, queue )); magma_free( drs.dval ); drs.dval = dtt.dval + ldd; // set smoothing residual vector magma_zcopyvector( dr.num_rows, dr.dval, 1, drs.dval, 1, queue ); } // G(n,s) = 0 if ( s > 1 ) { ldd = magma_roundup( A.num_rows, 32 ); CHECK( magma_zvinit( &dG, Magma_DEV, ldd, s, c_zero, queue )); dG.num_rows = A.num_rows; } else { CHECK( magma_zvinit( &dG, Magma_DEV, A.num_rows, s, c_zero, queue )); } // dGcol represents a single column of dG, array pointer is set inside loop CHECK( magma_zvinit( &dGcol, Magma_DEV, dG.num_rows, 1, c_zero, queue )); magma_free( dGcol.dval ); // U(n,s) = 0 if ( s > 1 ) { ldd = magma_roundup( A.num_cols, 32 ); CHECK( magma_zvinit( &dU, Magma_DEV, ldd, s, c_zero, queue )); dU.num_rows = A.num_cols; } else { CHECK( magma_zvinit( &dU, Magma_DEV, A.num_cols, s, c_zero, queue )); } // M(s,s) = I CHECK( magma_zvinit( &dM, Magma_DEV, s, s, c_zero, queue )); CHECK( magma_zvinit( &hMdiag, Magma_CPU, s, 1, c_zero, queue )); magmablas_zlaset( MagmaFull, dM.num_rows, dM.num_cols, c_zero, c_one, dM.dval, dM.ld, queue ); // f = 0 CHECK( magma_zvinit( &df, Magma_DEV, dP.num_cols, 1, c_zero, queue )); // c = 0 CHECK( magma_zvinit( &dc, Magma_DEV, dM.num_cols, 1, c_zero, queue )); // v = 0 CHECK( magma_zvinit( &dv, Magma_DEV, dr.num_rows, 1, c_zero, queue )); // lu = 0 CHECK( magma_zvinit( &dlu, Magma_DEV, dr.num_rows, 1, c_zero, queue )); //--------------START TIME--------------- // chronometry tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->timing[0] = 0.0; } om = MAGMA_Z_ONE; innerflag = 0; // start iteration do { solver_par->numiter++; // new RHS for small systems // f = P' r magma_zgemvmdot_shfl( dP.num_rows, dP.num_cols, dP.dval, dr.dval, d1, d2, df.dval, queue ); // shadow space loop for ( k = 0; k < s; ++k ) { sk = s - k; // c(k:s) = M(k:s,k:s) \ f(k:s) magma_zcopyvector( sk, &df.dval[k], 1, &dc.dval[k], 1, queue ); magma_ztrsv( MagmaLower, MagmaNoTrans, MagmaNonUnit, sk, &dM.dval[k*dM.ld+k], dM.ld, &dc.dval[k], 1, queue ); // v = r - G(:,k:s) c(k:s) magma_zcopyvector( dr.num_rows, dr.dval, 1, dv.dval, 1, queue ); magmablas_zgemv( MagmaNoTrans, dG.num_rows, sk, c_n_one, &dG.dval[k*dG.ld], dG.ld, &dc.dval[k], 1, c_one, dv.dval, 1, queue ); // preconditioning operation // v = L \ v; // v = U \ v; CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, dv, &dlu, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, dlu, &dv, precond_par, queue )); // U(:,k) = om * v + U(:,k:s) c(k:s) magmablas_zgemv( MagmaNoTrans, dU.num_rows, sk, c_one, &dU.dval[k*dU.ld], dU.ld, &dc.dval[k], 1, om, dv.dval, 1, queue ); magma_zcopyvector( dU.num_rows, dv.dval, 1, &dU.dval[k*dU.ld], 1, queue ); // G(:,k) = A U(:,k) dGcol.dval = dG.dval + k * dG.ld; CHECK( magma_z_spmv( c_one, A, dv, c_zero, dGcol, queue )); solver_par->spmv_count++; // bi-orthogonalize the new basis vectors for ( i = 0; i < k; ++i ) { // alpha = P(:,i)' G(:,k) halpha.val[i] = magma_zdotc( dP.num_rows, &dP.dval[i*dP.ld], 1, &dG.dval[k*dG.ld], 1, queue ); // alpha = alpha / M(i,i) halpha.val[i] = halpha.val[i] / hMdiag.val[i]; // G(:,k) = G(:,k) - alpha * G(:,i) magma_zaxpy( dG.num_rows, -halpha.val[i], &dG.dval[i*dG.ld], 1, &dG.dval[k*dG.ld], 1, queue ); } // non-first s iteration if ( k > 0 ) { // U update outside of loop using GEMV // U(:,k) = U(:,k) - U(:,1:k) * alpha(1:k) magma_zsetvector( k, halpha.val, 1, dalpha.dval, 1, queue ); magmablas_zgemv( MagmaNoTrans, dU.num_rows, k, c_n_one, dU.dval, dU.ld, dalpha.dval, 1, c_one, &dU.dval[k*dU.ld], 1, queue ); } // new column of M = P'G, first k-1 entries are zero // M(k:s,k) = P(:,k:s)' G(:,k) magma_zgemvmdot_shfl( dP.num_rows, sk, &dP.dval[k*dP.ld], &dG.dval[k*dG.ld], d1, d2, &dM.dval[k*dM.ld+k], queue ); magma_zgetvector( 1, &dM.dval[k*dM.ld+k], 1, &hMdiag.val[k], 1, queue ); // check M(k,k) == 0 if ( MAGMA_Z_EQUAL(hMdiag.val[k], MAGMA_Z_ZERO) ) { innerflag = 1; info = MAGMA_DIVERGENCE; break; } // beta = f(k) / M(k,k) magma_zgetvector( 1, &df.dval[k], 1, &fk, 1, queue ); hbeta.val[k] = fk / hMdiag.val[k]; // check for nan if ( magma_z_isnan( hbeta.val[k] ) || magma_z_isinf( hbeta.val[k] )) { innerflag = 1; info = MAGMA_DIVERGENCE; break; } // r = r - beta * G(:,k) magma_zaxpy( dr.num_rows, -hbeta.val[k], &dG.dval[k*dG.ld], 1, dr.dval, 1, queue ); // smoothing disabled if ( smoothing <= 0 ) { // |r| nrmr = magma_dznrm2( dr.num_rows, dr.dval, 1, queue ); // smoothing enabled } else { // x = x + beta * U(:,k) magma_zaxpy( x->num_rows, hbeta.val[k], &dU.dval[k*dU.ld], 1, x->dval, 1, queue ); // smoothing operation //--------------------------------------- // t = rs - r magma_zidr_smoothing_1( drs.num_rows, drs.num_cols, drs.dval, dr.dval, dtt.dval, queue ); // t't // t'rs CHECK( magma_zgemvmdot_shfl( dt.ld, 2, dtt.dval, dtt.dval, d1, d2, &dskp.dval[2], queue )); magma_zgetvector( 2, &dskp.dval[2], 1, &hskp.val[2], 1, queue ); // gamma = (t' * rs) / (t' * t) gamma = hskp.val[3] / hskp.val[2]; // rs = rs - gamma * (rs - r) magma_zaxpy( drs.num_rows, -gamma, dtt.dval, 1, drs.dval, 1, queue ); // xs = xs - gamma * (xs - x) magma_zidr_smoothing_2( dxs.num_rows, dxs.num_cols, -gamma, x->dval, dxs.dval, queue ); // |rs| nrmr = magma_dznrm2( drs.num_rows, drs.dval, 1, queue ); //--------------------------------------- } // store current timing and residual if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter) % solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter) / solver_par->verbose] = (real_Double_t)nrmr; solver_par->timing[(solver_par->numiter) / solver_par->verbose] = (real_Double_t)tempo2 - tempo1; } } // check convergence or iteration limit if ( nrmr <= solver_par->atol || nrmr/nrmb <= solver_par->rtol ) { s = k + 1; // for the x-update outside the loop innerflag = 2; info = MAGMA_SUCCESS; break; } // non-last s iteration if ( (k + 1) < s ) { // f(k+1:s) = f(k+1:s) - beta * M(k+1:s,k) magma_zaxpy( sk-1, -hbeta.val[k], &dM.dval[k*dM.ld+(k+1)], 1, &df.dval[k+1], 1, queue ); } } // smoothing disabled if ( smoothing <= 0 && innerflag != 1 ) { // update solution approximation x // x = x + U(:,1:s) * beta(1:s) magma_zsetvector( s, hbeta.val, 1, dbeta.dval, 1, queue ); magmablas_zgemv( MagmaNoTrans, dU.num_rows, s, c_one, dU.dval, dU.ld, dbeta.dval, 1, c_one, x->dval, 1, queue ); } // check convergence or iteration limit or invalid result of inner loop if ( innerflag > 0 ) { break; } // v = r magma_zcopy( dr.num_rows, dr.dval, 1, dv.dval, 1, queue ); // preconditioning operation // v = L \ v; // v = U \ v; CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, dv, &dlu, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, dlu, &dv, precond_par, queue )); // t = A v CHECK( magma_z_spmv( c_one, A, dv, c_zero, dt, queue )); solver_par->spmv_count++; // computation of a new omega //--------------------------------------- // t't // t'r CHECK( magma_zgemvmdot_shfl( dt.ld, 2, dt.dval, dt.dval, d1, d2, dskp.dval, queue )); magma_zgetvector( 2, dskp.dval, 1, hskp.val, 1, queue ); // |t| nrmt = magma_dsqrt( MAGMA_Z_REAL(hskp.val[0]) ); // rho = abs((t' * r) / (|t| * |r|)) rho = MAGMA_D_ABS( MAGMA_Z_REAL(hskp.val[1]) / (nrmt * nrmr) ); // om = (t' * r) / (|t| * |t|) om = hskp.val[1] / hskp.val[0]; if ( rho < angle ) { om = (om * angle) / rho; } //--------------------------------------- if ( MAGMA_Z_EQUAL(om, MAGMA_Z_ZERO) ) { info = MAGMA_DIVERGENCE; break; } // update approximation vector // x = x + om * v magma_zaxpy( x->num_rows, om, dv.dval, 1, x->dval, 1, queue ); // update residual vector // r = r - om * t magma_zaxpy( dr.num_rows, -om, dt.dval, 1, dr.dval, 1, queue ); // smoothing disabled if ( smoothing <= 0 ) { // residual norm nrmr = magma_dznrm2( dr.num_rows, dr.dval, 1, queue ); // smoothing enabled } else { // smoothing operation //--------------------------------------- // t = rs - r magma_zidr_smoothing_1( drs.num_rows, drs.num_cols, drs.dval, dr.dval, dtt.dval, queue ); // t't // t'rs CHECK( magma_zgemvmdot_shfl( dt.ld, 2, dtt.dval, dtt.dval, d1, d2, &dskp.dval[2], queue )); magma_zgetvector( 2, &dskp.dval[2], 1, &hskp.val[2], 1, queue ); // gamma = (t' * rs) / (t' * t) gamma = hskp.val[3] / hskp.val[2]; // rs = rs - gamma * (rs - r) magma_zaxpy( drs.num_rows, -gamma, dtt.dval, 1, drs.dval, 1, queue ); // xs = xs - gamma * (xs - x) magma_zidr_smoothing_2( dxs.num_rows, dxs.num_cols, -gamma, x->dval, dxs.dval, queue ); // |rs| nrmr = magma_dznrm2( drs.num_rows, drs.dval, 1, queue ); //--------------------------------------- } // store current timing and residual if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter) % solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter) / solver_par->verbose] = (real_Double_t)nrmr; solver_par->timing[(solver_par->numiter) / solver_par->verbose] = (real_Double_t)tempo2 - tempo1; } } // check convergence if ( nrmr <= solver_par->atol || nrmr/nrmb <= solver_par->rtol ) { info = MAGMA_SUCCESS; break; } } while ( solver_par->numiter + 1 <= solver_par->maxiter ); // smoothing enabled if ( smoothing > 0 ) { // x = xs magma_zcopyvector( x->num_rows, dxs.dval, 1, x->dval, 1, queue ); // r = rs magma_zcopyvector( dr.num_rows, drs.dval, 1, dr.dval, 1, queue ); } // get last iteration timing tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t)tempo2 - tempo1; //--------------STOP TIME---------------- // get final stats solver_par->iter_res = nrmr; CHECK( magma_zresidualvec( A, b, *x, &dr, &residual, queue )); solver_par->final_res = residual; // set solver conclusion if ( info != MAGMA_SUCCESS && info != MAGMA_DIVERGENCE ) { if ( solver_par->init_res > solver_par->final_res ) { info = MAGMA_SLOW_CONVERGENCE; } } cleanup: // free resources // smoothing enabled if ( smoothing > 0 ) { drs.dval = NULL; // needed because its pointer is redirected to dtt magma_zmfree( &dxs, queue ); magma_zmfree( &drs, queue ); magma_zmfree( &dtt, queue ); } dr.dval = NULL; // needed because its pointer is redirected to dt dGcol.dval = NULL; // needed because its pointer is redirected to dG magma_zmfree( &dr, queue ); magma_zmfree( &dP, queue ); magma_zmfree( &dP1, queue ); magma_zmfree( &dG, queue ); magma_zmfree( &dGcol, queue ); magma_zmfree( &dU, queue ); magma_zmfree( &dM, queue ); magma_zmfree( &hMdiag, queue ); magma_zmfree( &df, queue ); magma_zmfree( &dt, queue ); magma_zmfree( &dc, queue ); magma_zmfree( &dv, queue ); magma_zmfree( &dlu, queue ); magma_zmfree( &dskp, queue ); magma_zmfree( &dalpha, queue ); magma_zmfree( &dbeta, queue ); magma_zmfree( &hskp, queue ); magma_zmfree( &halpha, queue ); magma_zmfree( &hbeta, queue ); magma_free( d1 ); magma_free( d2 ); solver_par->info = info; return info; /* magma_zpidr_merge */ }
extern "C" magma_int_t magma_zcg_res( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_CG; solver_par->numiter = 0; solver_par->spmv_count = 0; // solver variables magmaDoubleComplex alpha, beta; double nom, nom0, r0, res, nomb; magmaDoubleComplex den, gammanew, gammaold = MAGMA_Z_MAKE(1.0,0.0); // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows* b.num_cols; // GPU workspace magma_z_matrix r={Magma_CSR}, p={Magma_CSR}, q={Magma_CSR}; CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); magma_zcopy( dofs, r.dval, 1, p.dval, 1, queue ); // p = h nom = MAGMA_Z_ABS( magma_zdotc( dofs, r.dval, 1, r.dval, 1, queue) ); CHECK( magma_z_spmv( c_one, A, p, c_zero, q, queue )); // q = A p solver_par->spmv_count++; den = magma_zdotc( dofs, p.dval, 1, q.dval, 1, queue ); // den = p dot q solver_par->init_res = nom0; nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } // check positive definite if ( MAGMA_Z_ABS(den) <= 0.0 ) { info = MAGMA_NONSPD; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; gammanew = magma_zdotc( dofs, r.dval, 1, r.dval, 1, queue ); // gn = < r,r> if ( solver_par->numiter == 1 ) { magma_zcopy( dofs, r.dval, 1, p.dval, 1, queue ); // p = r } else { beta = (gammanew/gammaold); // beta = gn/go magma_zscal( dofs, beta, p.dval, 1, queue ); // p = beta*p magma_zaxpy( dofs, c_one, r.dval, 1, p.dval, 1, queue ); // p = p + r } CHECK( magma_z_spmv( c_one, A, p, c_zero, q, queue )); // q = A p solver_par->spmv_count++; den = magma_zdotc( dofs, p.dval, 1, q.dval, 1, queue ); // den = p dot q alpha = gammanew / den; magma_zaxpy( dofs, alpha, p.dval, 1, x->dval, 1, queue ); // x = x + alpha p magma_zaxpy( dofs, -alpha, q.dval, 1, r.dval, 1, queue ); // r = r - alpha q gammaold = gammanew; res = magma_dznrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&p, queue ); magma_zmfree(&q, queue ); solver_par->info = info; return info; } /* magma_zcg */
magma_int_t magma_zpcg( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par ){ // prepare solver feedback solver_par->solver = Magma_PCG; solver_par->numiter = 0; solver_par->info = 0; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows; // GPU workspace magma_z_vector r, rt, p, q, h; magma_z_vinit( &r, Magma_DEV, dofs, c_zero ); magma_z_vinit( &rt, Magma_DEV, dofs, c_zero ); magma_z_vinit( &p, Magma_DEV, dofs, c_zero ); magma_z_vinit( &q, Magma_DEV, dofs, c_zero ); magma_z_vinit( &h, Magma_DEV, dofs, c_zero ); // solver variables magmaDoubleComplex alpha, beta; double nom, nom0, r0, gammaold, gammanew, den, res; // solver setup magma_zscal( dofs, c_zero, x->val, 1) ; // x = 0 magma_zcopy( dofs, b.val, 1, r.val, 1 ); // r = b // preconditioner magma_z_applyprecond_left( A, r, &rt, precond_par ); magma_z_applyprecond_right( A, rt, &h, precond_par ); magma_zcopy( dofs, h.val, 1, p.val, 1 ); // p = h nom = MAGMA_Z_REAL( magma_zdotc(dofs, r.val, 1, h.val, 1) ); nom0 = magma_dznrm2( dofs, r.val, 1 ); magma_z_spmv( c_one, A, p, c_zero, q ); // q = A p den = MAGMA_Z_REAL( magma_zdotc(dofs, p.val, 1, q.val, 1) );// den = p dot q solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) return MAGMA_SUCCESS; // check positive definite if (den <= 0.0) { printf("Operator A is not postive definite. (Ar,r) = %f\n", den); return -100; } //Chronometry real_Double_t tempo1, tempo2; magma_device_sync(); tempo1=magma_wtime(); if( solver_par->verbose > 0 ){ solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ){ // preconditioner magma_z_applyprecond_left( A, r, &rt, precond_par ); magma_z_applyprecond_right( A, rt, &h, precond_par ); gammanew = MAGMA_Z_REAL( magma_zdotc(dofs, r.val, 1, h.val, 1) ); // gn = < r,h> if( solver_par->numiter==1 ){ magma_zcopy( dofs, h.val, 1, p.val, 1 ); // p = h }else{ beta = MAGMA_Z_MAKE(gammanew/gammaold, 0.); // beta = gn/go magma_zscal(dofs, beta, p.val, 1); // p = beta*p magma_zaxpy(dofs, c_one, h.val, 1, p.val, 1); // p = p + h } magma_z_spmv( c_one, A, p, c_zero, q ); // q = A p den = MAGMA_Z_REAL(magma_zdotc(dofs, p.val, 1, q.val, 1)); // den = p dot q alpha = MAGMA_Z_MAKE(gammanew/den, 0.); magma_zaxpy(dofs, alpha, p.val, 1, x->val, 1); // x = x + alpha p magma_zaxpy(dofs, -alpha, q.val, 1, r.val, 1); // r = r - alpha q gammaold = gammanew; res = magma_dznrm2( dofs, r.val, 1 ); if( solver_par->verbose > 0 ){ magma_device_sync(); tempo2=magma_wtime(); if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nom0 < solver_par->epsilon ) { break; } } magma_device_sync(); tempo2=magma_wtime(); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; magma_zresidual( A, b, *x, &residual ); solver_par->iter_res = res; solver_par->final_res = residual; if( solver_par->numiter < solver_par->maxiter){ solver_par->info = 0; }else if( solver_par->init_res > solver_par->final_res ){ if( solver_par->verbose > 0 ){ if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = -2; } else{ if( solver_par->verbose > 0 ){ if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = -1; } magma_z_vfree(&r); magma_z_vfree(&rt); magma_z_vfree(&p); magma_z_vfree(&q); magma_z_vfree(&h); return MAGMA_SUCCESS; } /* magma_zcg */
extern "C" magma_int_t magma_zbicgstab_merge3( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BICGSTABMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; // solver variables magmaDoubleComplex alpha, beta, omega, rho_old, rho_new, *skp_h={0}; double nom, nom0, betanom, nomb; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows; // workspace magma_z_matrix q={Magma_CSR}, r={Magma_CSR}, rr={Magma_CSR}, p={Magma_CSR}, v={Magma_CSR}, s={Magma_CSR}, t={Magma_CSR}; magmaDoubleComplex *d1=NULL, *d2=NULL, *skp=NULL; d1 = NULL; d2 = NULL; skp = NULL; CHECK( magma_zmalloc( &d1, dofs*(2) )); CHECK( magma_zmalloc( &d2, dofs*(2) )); // array for the parameters CHECK( magma_zmalloc( &skp, 8 )); // skp = [alpha|beta|omega|rho_old|rho|nom|tmp1|tmp2] CHECK( magma_zvinit( &q, Magma_DEV, dofs*6, 1, c_zero, queue )); // q = rr|r|p|v|s|t rr.memory_location = Magma_DEV; rr.dval = NULL; rr.num_rows = rr.nnz = dofs; rr.num_cols = 1; rr.storage_type = Magma_DENSE; r.memory_location = Magma_DEV; r.dval = NULL; r.num_rows = r.nnz = dofs; r.num_cols = 1; r.storage_type = Magma_DENSE; p.memory_location = Magma_DEV; p.dval = NULL; p.num_rows = p.nnz = dofs; p.num_cols = 1; p.storage_type = Magma_DENSE; v.memory_location = Magma_DEV; v.dval = NULL; v.num_rows = v.nnz = dofs; v.num_cols = 1; v.storage_type = Magma_DENSE; s.memory_location = Magma_DEV; s.dval = NULL; s.num_rows = s.nnz = dofs; s.num_cols = 1; s.storage_type = Magma_DENSE; t.memory_location = Magma_DEV; t.dval = NULL; t.num_rows = t.nnz = dofs; t.num_cols = 1; t.storage_type = Magma_DENSE; rr.dval = q(0); r.dval = q(1); p.dval = q(2); v.dval = q(3); s.dval = q(4); t.dval = q(5); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); magma_zcopy( dofs, r.dval, 1, q(0), 1, queue ); // rr = r magma_zcopy( dofs, r.dval, 1, q(1), 1, queue ); // q = r betanom = nom0; nom = nom0*nom0; rho_new = magma_zdotc( dofs, r.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho_old = omega = alpha = MAGMA_Z_MAKE( 1.0, 0. ); beta = rho_new; solver_par->init_res = nom0; // array on host for the parameters CHECK( magma_zmalloc_cpu( &skp_h, 8 )); nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } skp_h[0]=alpha; skp_h[1]=beta; skp_h[2]=omega; skp_h[3]=rho_old; skp_h[4]=rho_new; skp_h[5]=MAGMA_Z_MAKE(nom, 0.0); magma_zsetvector( 8, skp_h, 1, skp, 1, queue ); CHECK( magma_z_spmv( c_one, A, r, c_zero, v, queue )); // z = A r nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if( nom0 < solver_par->atol || nom0/nomb < solver_par->rtol ){ info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; // computes p=r+beta*(p-omega*v) CHECK( magma_zbicgmerge1( dofs, skp, v.dval, r.dval, p.dval, queue )); CHECK( magma_z_spmv( c_one, A, p, c_zero, v, queue )); // v = Ap solver_par->spmv_count++; CHECK( magma_zmdotc( dofs, 1, q.dval, v.dval, d1, d2, skp, queue )); CHECK( magma_zbicgmerge4( 1, skp, queue )); CHECK( magma_zbicgmerge2( dofs, skp, r.dval, v.dval, s.dval, queue )); // s=r-alpha*v CHECK( magma_z_spmv( c_one, A, s, c_zero, t, queue )); // t=As solver_par->spmv_count++; CHECK( magma_zmdotc( dofs, 2, q.dval+4*dofs, t.dval, d1, d2, skp+6, queue )); CHECK( magma_zbicgmerge4( 2, skp, queue )); CHECK( magma_zbicgmerge_xrbeta( dofs, d1, d2, q.dval, r.dval, p.dval, s.dval, t.dval, x->dval, skp, queue )); // check stopping criterion magma_zgetvector_async( 1 , skp+5, 1, skp_h+5, 1, queue ); betanom = sqrt(MAGMA_Z_REAL(skp_h[5])); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < solver_par->atol || betanom/nomb < solver_par->rtol ) { break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = betanom; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->atol || solver_par->iter_res/solver_par->init_res < solver_par->rtol ){ info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&q, queue ); // frees all vectors magma_free(d1); magma_free(d2); magma_free( skp ); magma_free_cpu( skp_h ); solver_par->info = info; return info; } /* zbicgstab_merge */
extern "C" magma_int_t magma_ztfqmr_unrolled( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_TFQMR; solver_par->numiter = 0; solver_par->spmv_count = 0; solver_par->spmv_count = 0; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; // solver variables double nom0, r0, res, nomb; magmaDoubleComplex rho = c_one, rho_l = c_one, eta = c_zero , c = c_zero , theta = c_zero , tau = c_zero, alpha = c_one, beta = c_zero, sigma = c_zero; magma_int_t dofs = A.num_rows* b.num_cols; // GPU workspace magma_z_matrix r={Magma_CSR}, r_tld={Magma_CSR}, d={Magma_CSR}, w={Magma_CSR}, v={Magma_CSR}, u_mp1={Magma_CSR}, u_m={Magma_CSR}, Au={Magma_CSR}, Ad={Magma_CSR}, Au_new={Magma_CSR}; CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &u_mp1,Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_zvinit( &r_tld,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &u_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_zvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_zvinit( &Ad, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Au_new, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Au, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_zcopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, w.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, u_mp1.dval, 1, queue ); CHECK( magma_z_spmv( c_one, A, u_mp1, c_zero, v, queue )); // v = A u magma_zcopy( dofs, v.dval, 1, Au.dval, 1, queue ); nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } tau = magma_zsqrt( magma_zdotc( dofs, r.dval, 1, r_tld.dval, 1, queue ) ); rho = magma_zdotc( dofs, r.dval, 1, r_tld.dval, 1, queue ); rho_l = rho; //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; // do this every iteration as unrolled alpha = rho / magma_zdotc( dofs, v.dval, 1, r_tld.dval, 1, queue ); sigma = theta * theta / alpha * eta; magma_zaxpy( dofs, -alpha, v.dval, 1, u_mp1.dval, 1, queue ); // u_mp1 = u_mp_1 - alpha*v; magma_zaxpy( dofs, -alpha, Au.dval, 1, w.dval, 1, queue ); // w = w - alpha*Au; magma_zscal( dofs, sigma, d.dval, 1, queue ); magma_zaxpy( dofs, c_one, u_mp1.dval, 1, d.dval, 1, queue ); // d = u_mp1 + sigma*d; //magma_zscal( dofs, sigma, Ad.dval, 1, queue ); //magma_zaxpy( dofs, c_one, Au.dval, 1, Ad.dval, 1, queue ); // Ad = Au + sigma*Ad; theta = magma_zsqrt( magma_zdotc(dofs, w.dval, 1, w.dval, 1, queue ) ) / tau; c = c_one / magma_zsqrt( c_one + theta*theta ); tau = tau * theta *c; eta = c * c * alpha; sigma = theta * theta / alpha * eta; printf("sigma: %f+%fi\n", MAGMA_Z_REAL(sigma), MAGMA_Z_IMAG(sigma) ); CHECK( magma_z_spmv( c_one, A, d, c_zero, Ad, queue )); // Au_new = A u_mp1 solver_par->spmv_count++; magma_zaxpy( dofs, eta, d.dval, 1, x->dval, 1, queue ); // x = x + eta * d magma_zaxpy( dofs, -eta, Ad.dval, 1, r.dval, 1, queue ); // r = r - eta * Ad // here starts the second part of the loop ################################# magma_zaxpy( dofs, -alpha, Au.dval, 1, w.dval, 1, queue ); // w = w - alpha*Au; magma_zscal( dofs, sigma, d.dval, 1, queue ); magma_zaxpy( dofs, c_one, u_mp1.dval, 1, d.dval, 1, queue ); // d = u_mp1 + sigma*d; magma_zscal( dofs, sigma, Ad.dval, 1, queue ); magma_zaxpy( dofs, c_one, Au.dval, 1, Ad.dval, 1, queue ); // Ad = Au + sigma*Ad; theta = magma_zsqrt( magma_zdotc(dofs, w.dval, 1, w.dval, 1, queue ) ) / tau; c = c_one / magma_zsqrt( c_one + theta*theta ); tau = tau * theta *c; eta = c * c * alpha; magma_zaxpy( dofs, eta, d.dval, 1, x->dval, 1, queue ); // x = x + eta * d magma_zaxpy( dofs, -eta, Ad.dval, 1, r.dval, 1, queue ); // r = r - eta * Ad res = magma_dznrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } // do this every loop as unrolled rho_l = rho; rho = magma_zdotc( dofs, w.dval, 1, r_tld.dval, 1, queue ); beta = rho / rho_l; magma_zscal( dofs, beta, u_mp1.dval, 1, queue ); magma_zaxpy( dofs, c_one, w.dval, 1, u_mp1.dval, 1, queue ); // u_mp1 = w + beta*u_mp1; CHECK( magma_z_spmv( c_one, A, u_mp1, c_zero, Au_new, queue )); // Au_new = A u_mp1 solver_par->spmv_count++; // do this every loop as unrolled magma_zscal( dofs, beta*beta, v.dval, 1, queue ); magma_zaxpy( dofs, beta, Au.dval, 1, v.dval, 1, queue ); magma_zaxpy( dofs, c_one, Au_new.dval, 1, v.dval, 1, queue ); // v = Au_new + beta*(Au+beta*v); magma_zcopy( dofs, Au_new.dval, 1, Au.dval, 1, queue ); } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&r_tld, queue ); magma_zmfree(&d, queue ); magma_zmfree(&w, queue ); magma_zmfree(&v, queue ); magma_zmfree(&u_m, queue ); magma_zmfree(&u_mp1, queue ); magma_zmfree(&d, queue ); magma_zmfree(&Au, queue ); magma_zmfree(&Au_new, queue ); magma_zmfree(&Ad, queue ); solver_par->info = info; return info; } /* magma_zfqmr_unrolled */
extern "C" magma_int_t magma_zcg( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_CG; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows; // GPU workspace magma_z_vector r, p, q; magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &p, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &q, Magma_DEV, dofs, c_zero, queue ); // solver variables magmaDoubleComplex alpha, beta; double nom, nom0, r0, betanom, betanomsq, den; // solver setup magma_zscal( dofs, c_zero, x->dval, 1) ; // x = 0 magma_zcopy( dofs, b.dval, 1, r.dval, 1 ); // r = b magma_zcopy( dofs, b.dval, 1, p.dval, 1 ); // p = b nom0 = betanom = magma_dznrm2( dofs, r.dval, 1 ); nom = nom0 * nom0; // nom = r' * r magma_z_spmv( c_one, A, p, c_zero, q, queue ); // q = A p den = MAGMA_Z_REAL( magma_zdotc(dofs, p.dval, 1, q.dval, 1) );// den = p dot q solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) { magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } // check positive definite if (den <= 0.0) { printf("Operator A is not postive definite. (Ar,r) = %f\n", den); magmablasSetKernelStream( orig_queue ); return MAGMA_NONSPD; solver_par->info = MAGMA_NONSPD; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { alpha = MAGMA_Z_MAKE(nom/den, 0.); magma_zaxpy(dofs, alpha, p.dval, 1, x->dval, 1); // x = x + alpha p magma_zaxpy(dofs, -alpha, q.dval, 1, r.dval, 1); // r = r - alpha q betanom = magma_dznrm2(dofs, r.dval, 1); // betanom = || r || betanomsq = betanom * betanom; // betanoms = r' * r if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < r0 ) { break; } beta = MAGMA_Z_MAKE(betanomsq/nom, 0.); // beta = betanoms/nom magma_zscal(dofs, beta, p.dval, 1); // p = beta*p magma_zaxpy(dofs, c_one, r.dval, 1, p.dval, 1); // p = p + r magma_z_spmv( c_one, A, p, c_zero, q, queue ); // q = A p den = MAGMA_Z_REAL(magma_zdotc(dofs, p.dval, 1, q.dval, 1)); // den = p dot q nom = betanomsq; } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; magma_zresidual( A, b, *x, &residual, queue ); solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_SLOW_CONVERGENCE; } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } magma_z_vfree(&r, queue ); magma_z_vfree(&p, queue ); magma_z_vfree(&q, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_zcg */
extern "C" magma_int_t magma_zbombard_merge( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // queue variables const magma_queue_t squeue = 0; // synchronous kernel queues const magma_int_t nqueues = 3; // number of queues magma_queue_t queues[nqueues]; magma_int_t q1flag = 0; // set asynchronous kernel queues //printf("%% Kernel queues: (orig, queue) = (%p, %p)\n", (void *)orig_queue, (void *)queue); magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &queues[0] ); if ( queue != squeue ) { queues[1] = queue; q1flag = 0; } else { magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &(queues[1]) ); queue = queues[1]; q1flag = 1; } magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &(queues[2]) ); for (int i = 0; i < nqueues; ++i ) { ; //printf("Kernel queue #%d = %p\n", i, (void *)queues[i]); } // 1=QMR, 2=CGS, 3+BiCGSTAB magma_int_t flag = 0; int mdot = 1; // prepare solver feedback solver_par->solver = Magma_BOMBARD; solver_par->numiter = 0; // constants const magmaDoubleComplex c_zero = MAGMA_Z_ZERO; const magmaDoubleComplex c_one = MAGMA_Z_ONE; // solver variables double nom0, r0, res, Q_res, C_res, B_res, nomb; //QMR magmaDoubleComplex Q_rho = c_one, Q_rho1 = c_one, Q_eta = -c_one , Q_pds = c_one, Q_thet = c_one, Q_thet1 = c_one, Q_epsilon = c_one, Q_beta = c_one, Q_delta = c_one, Q_pde = c_one, Q_rde = c_one, Q_gamm = c_one, Q_gamm1 = c_one, Q_psi = c_one; //CGS magmaDoubleComplex C_rho, C_rho_l = c_one, C_alpha, C_beta; //BiCGSTAB magmaDoubleComplex B_alpha, B_beta, B_omega, B_rho_old, B_rho_new; magma_int_t dofs = A.num_rows* b.num_cols; // need to transpose the matrix // GPU workspace // overall initial residual magma_z_matrix r_tld={Magma_CSR}; // QMR magma_z_matrix AT = {Magma_CSR}, Q_r={Magma_CSR}, Q_x={Magma_CSR}, Q_v={Magma_CSR}, Q_w={Magma_CSR}, Q_wt={Magma_CSR}, Q_d={Magma_CSR}, Q_s={Magma_CSR}, Q_z={Magma_CSR}, Q_q={Magma_CSR}, Q_p={Magma_CSR}, Q_pt={Magma_CSR}, Q_y={Magma_CSR}; // CGS magma_z_matrix C_r={Magma_CSR}, C_rt={Magma_CSR}, C_x={Magma_CSR}, C_p={Magma_CSR}, C_q={Magma_CSR}, C_u={Magma_CSR}, C_v={Magma_CSR}, C_t={Magma_CSR}, C_p_hat={Magma_CSR}, C_q_hat={Magma_CSR}, C_u_hat={Magma_CSR}, C_v_hat={Magma_CSR}; //BiCGSTAB magma_z_matrix B_r={Magma_CSR}, B_x={Magma_CSR}, B_p={Magma_CSR}, B_v={Magma_CSR}, B_s={Magma_CSR}, B_t={Magma_CSR}; // multi-vector for block-SpMV magma_z_matrix SpMV_in_1={Magma_CSR}, SpMV_out_1={Magma_CSR}, SpMV_in_2={Magma_CSR}, SpMV_out_2={Magma_CSR}; // workspace for zmzdotc magma_z_matrix d1={Magma_CSR}, d2={Magma_CSR}, skp={Magma_CSR}; CHECK( magma_zvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &SpMV_in_1, Magma_DEV, A.num_rows, b.num_cols * 3, c_zero, queue )); CHECK( magma_zvinit( &SpMV_out_1, Magma_DEV, A.num_rows, b.num_cols * 3, c_zero, queue )); CHECK( magma_zvinit( &SpMV_in_2, Magma_DEV, A.num_rows, b.num_cols * 3, c_zero, queue )); CHECK( magma_zvinit( &SpMV_out_2, Magma_DEV, A.num_rows, b.num_cols * 3, c_zero, queue )); CHECK( magma_zvinit( &d1, Magma_DEV, A.num_rows, b.num_cols * 4, c_zero, queue )); CHECK( magma_zvinit( &d2, Magma_DEV, A.num_rows, b.num_cols * 4, c_zero, queue )); CHECK( magma_zvinit( &skp, Magma_CPU, 4, 1, c_zero, queue )); // QMR CHECK( magma_zvinit( &Q_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &Q_x, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // QMR CHECK( magma_zvinit( &C_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_x,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_p_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_q_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_u, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_u_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_v_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &C_t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // BiCGSTAB CHECK( magma_zvinit( &B_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &B_x,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &B_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &B_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &B_s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &B_t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r_tld, &nom0, queue)); solver_par->init_res = nom0; res = nom0; // QMR magma_zcopy( dofs, r_tld.dval, 1, Q_r.dval, 1, queue ); magma_zcopy( dofs, r_tld.dval, 1, Q_y.dval, 1, queue ); magma_zcopy( dofs, r_tld.dval, 1, Q_v.dval, 1, queue ); magma_zcopy( dofs, r_tld.dval, 1, Q_wt.dval, 1, queue ); magma_zcopy( dofs, r_tld.dval, 1, Q_z.dval, 1, queue ); magma_zcopy( dofs, x->dval, 1, Q_x.dval, 1, queue ); // transpose the matrix magma_zmtransposeconjugate( A, &AT, queue ); // CGS magma_zcopy( dofs, r_tld.dval, 1, C_r.dval, 1, queue ); magma_zcopy( dofs, x->dval, 1, C_x.dval, 1, queue ); // BiCGSTAB magma_zcopy( dofs, r_tld.dval, 1, B_r.dval, 1, queue ); magma_zcopy( dofs, x->dval, 1, B_x.dval, 1, queue ); CHECK( magma_z_spmv( c_one, A, B_r, c_zero, B_v, queue )); magma_zcopy( dofs, B_v.dval, 1, SpMV_out_1.dval+2*dofs, 1, queue ); nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } Q_psi = magma_zsqrt( magma_zdotc( dofs, Q_z.dval, 1, Q_z.dval, 1), queue ); Q_rho = magma_zsqrt( magma_zdotc( dofs, Q_y.dval, 1, Q_y.dval, 1), queue ); // BiCGSTAB B_rho_new = magma_zdotc( dofs, B_r.dval, 1, B_r.dval, 1, queue ); B_rho_old = B_omega = B_alpha = MAGMA_Z_MAKE( 1.0, 0. ); // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_zqmr_1( b.num_rows, b.num_cols, Q_rho, Q_psi, Q_y.dval, Q_z.dval, Q_v.dval, Q_w.dval, queue ); //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; // start iteration do { solver_par->numiter++; if(mdot == 0){ //QMR: delta = z' * y; Q_delta = magma_zdotc( dofs, Q_z.dval, 1, Q_y.dval, 1, queue ); //CGS: rho = r' * r_tld C_rho = magma_zdotc( dofs, C_r.dval, 1, r_tld.dval, 1, queue ); // BiCGSTAB B_rho_old = B_rho_new; B_rho_new = magma_zdotc( dofs, r_tld.dval, 1, B_r.dval, 1, queue ); // rho=<rr,r> B_beta = B_rho_new/B_rho_old * B_alpha/B_omega; // beta=rho/rho_old *alpha/omega }else{ B_rho_old = B_rho_new; magma_zmdotc4( b.num_rows, Q_z.dval, Q_y.dval, r_tld.dval, C_r.dval, r_tld.dval, B_r.dval, r_tld.dval, B_r.dval, d1.dval, d2.dval, skp.val, queue ); Q_delta = skp.val[0]; C_rho = skp.val[1]; B_rho_new = skp.val[2]; B_beta = B_rho_new/B_rho_old * B_alpha/B_omega; } if( solver_par->numiter == 1 ){ //QMR: p = y; //QMR: q = z; magma_zcopy( dofs, Q_y.dval, 1, SpMV_in_1.dval, 1, queue ); magma_zcopy( dofs, Q_z.dval, 1, SpMV_in_2.dval, 1, queue ); //QMR: u = r; //QMR: p = r; magma_zcgs_2( b.num_rows, b.num_cols, C_r.dval, C_u.dval, SpMV_in_1.dval+dofs, queue ); } else{ Q_pde = Q_psi * Q_delta / Q_epsilon; Q_rde = Q_rho * MAGMA_Z_CNJG(Q_delta/Q_epsilon); C_beta = C_rho / C_rho_l; //QMR p = y - pde * p //QMR q = z - rde * q magma_zqmr_2( b.num_rows, b.num_cols, Q_pde, Q_rde, Q_y.dval, Q_z.dval, SpMV_in_1.dval, SpMV_in_2.dval, queue ); //CGS: u = r + beta*q; //CGS: p = u + beta*( q + beta*p ); magma_zcgs_1( b.num_rows, b.num_cols, C_beta, C_r.dval, C_q.dval, C_u.dval, SpMV_in_1.dval+dofs, queue ); } // BiCGSTAB: p = r + beta * ( p - omega * v ) magma_zbicgstab_1( b.num_rows, b.num_cols, B_beta, B_omega, B_r.dval, SpMV_out_1.dval+2*dofs, SpMV_in_1.dval+2*dofs, queue ); /* //QMR CHECK( magma_z_spmv( c_one, A, Q_p, c_zero, Q_pt, queue )); //CGS CHECK( magma_z_spmv( c_one, A, C_p, c_zero, C_v_hat, queue )); // BiCGSTAB CHECK( magma_z_spmv( c_one, A, B_p, c_zero, B_v, queue )); */ // gather everything for a block-SpMV //magma_zcopy( dofs, Q_p.dval, 1, SpMV_in_1.dval , 1, queue ); //magma_zcopy( dofs, C_p.dval, 1, SpMV_in_1.dval+dofs , 1, queue ); //magma_zcopy( dofs, B_p.dval, 1, SpMV_in_1.dval+2*dofs , 1, queue ); // block SpMV CHECK( magma_z_spmv( c_one, A, SpMV_in_1, c_zero, SpMV_out_1, queue )); // scatter results //magma_zcopy( dofs, SpMV_out_1.dval , 1, Q_pt.dval, 1, queue ); //magma_zcopy( dofs, SpMV_out_1.dval+dofs , 1, C_v_hat.dval, 1, queue ); //magma_zcopy( dofs, SpMV_out_1.dval+2*dofs , 1, B_v.dval, 1, queue ); if( mdot == 0 ) { //QMR: epsilon = q' * pt; Q_epsilon = magma_zdotc( dofs, SpMV_in_2.dval, 1, SpMV_out_1.dval, 1, queue ); Q_beta = Q_epsilon / Q_delta; //CGS: alpha = r_tld' * v_hat C_alpha = C_rho / magma_zdotc( dofs, r_tld.dval, 1, SpMV_out_1.dval+dofs, 1, queue ); C_rho_l = C_rho; //BiCGSTAB B_alpha = B_rho_new / magma_zdotc( dofs, r_tld.dval, 1, SpMV_out_1.dval+2*dofs, 1, queue ); }else{ magma_zmdotc4( b.num_rows, SpMV_in_2.dval, SpMV_out_1.dval, r_tld.dval, SpMV_out_1.dval+dofs, r_tld.dval, SpMV_out_1.dval+2*dofs, r_tld.dval, SpMV_out_1.dval+2*dofs, d1.dval, d2.dval, skp.val, queue ); Q_epsilon = skp.val[0]; Q_beta = Q_epsilon / Q_delta; C_alpha = C_rho / skp.val[1]; C_rho_l = C_rho; B_alpha = B_rho_new / skp.val[2]; } //QMR: v = pt - beta * v //QMR: y = v magma_zqmr_3( b.num_rows, b.num_cols, Q_beta, SpMV_out_1.dval, Q_v.dval, Q_y.dval, queue ); //CGS: q = u - alpha v_hat //CGS: t = u + q magma_zcgs_3( b.num_rows, b.num_cols, C_alpha, SpMV_out_1.dval+dofs, C_u.dval, C_q.dval, SpMV_in_2.dval+dofs, queue ); // BiCGSTAB: s = r - alpha v magma_zbicgstab_2( b.num_rows, b.num_cols, B_alpha, B_r.dval, SpMV_out_1.dval+2*dofs, SpMV_in_2.dval+2*dofs, queue ); // gather everything for a block-SpMV //magma_zcopy( dofs, Q_q.dval, 1, SpMV_in_2.dval , 1, queue ); //magma_zcopy( dofs, C_t.dval, 1, SpMV_in_2.dval+dofs , 1, queue ); //magma_zcopy( dofs, B_s.dval, 1, SpMV_in_2.dval+2*dofs , 1, queue ); // block SpMV CHECK( magma_z_spmv( c_one, A, SpMV_in_2, c_zero, SpMV_out_2, queue )); // scatter results //magma_zcopy( dofs, SpMV_out_2.dval , 1, Q_wt.dval, 1, queue ); //magma_zcopy( dofs, SpMV_out_2.dval+dofs , 1, C_rt.dval, 1, queue ); //magma_zcopy( dofs, SpMV_out_2.dval+2*dofs , 1, B_t.dval, 1, queue ); Q_rho1 = Q_rho; if( mdot == 0 ) { //QMR rho = norm(y); Q_rho = magma_zsqrt( magma_zdotc( dofs, Q_y.dval, 1, Q_y.dval, 1), queue ); // BiCGSTAB B_omega = magma_zdotc( dofs, SpMV_out_2.dval+2*dofs, 1, SpMV_in_2.dval+2*dofs, 1 ) // omega = <s,t>/<t,t> / magma_zdotc( dofs, SpMV_out_2.dval+2*dofs, 1, SpMV_out_2.dval+2*dofs, 1, queue ); }else{ magma_zmdotc4( b.num_rows, Q_y.dval, Q_y.dval, Q_y.dval, Q_y.dval, SpMV_out_2.dval+2*dofs, SpMV_in_2.dval+2*dofs, SpMV_out_2.dval+2*dofs, SpMV_out_2.dval+2*dofs, d1.dval, d2.dval, skp.val, queue ); Q_rho = magma_zsqrt(skp.val[0]); B_omega = skp.val[2]/skp.val[3]; } // QMR Q_thet1 = Q_thet; Q_thet = Q_rho / (Q_gamm * MAGMA_Z_MAKE( MAGMA_Z_ABS(Q_beta), 0.0 )); Q_gamm1 = Q_gamm; Q_gamm = c_one / magma_zsqrt(c_one + Q_thet*Q_thet); Q_eta = - Q_eta * Q_rho1 * Q_gamm * Q_gamm / (Q_beta * Q_gamm1 * Q_gamm1); if( solver_par->numiter == 1 ){ //QMR: d = eta * p + pds * d; //QMR: s = eta * pt + pds * d; //QMR: x = x + d; //QMR: r = r - s; magma_zqmr_4( b.num_rows, b.num_cols, Q_eta, SpMV_in_1.dval, SpMV_out_1.dval, Q_d.dval, Q_s.dval, Q_x.dval, Q_r.dval, queue ); } else{ Q_pds = (Q_thet1 * Q_gamm) * (Q_thet1 * Q_gamm); //QMR: d = eta * p + pds * d; //QMR: s = eta * pt + pds * d; //QMR: x = x + d; //QMR: r = r - s; magma_zqmr_5( b.num_rows, b.num_cols, Q_eta, Q_pds, SpMV_in_1.dval, SpMV_out_1.dval, Q_d.dval, Q_s.dval, Q_x.dval, Q_r.dval, queue ); } // CGS: r = r -alpha*A u_hat // CGS: x = x + alpha u_hat magma_zcgs_4( b.num_rows, b.num_cols, C_alpha, SpMV_in_2.dval+dofs, SpMV_out_2.dval+dofs, C_x.dval, C_r.dval, queue ); // BiCGSTAB: x = x + alpha * p + omega * s // BiCGSTAB: r = s - omega * t magma_zbicgstab_3( b.num_rows, b.num_cols, B_alpha, B_omega, SpMV_in_1.dval+2*dofs, SpMV_in_2.dval+2*dofs, SpMV_out_2.dval+2*dofs, B_x.dval, B_r.dval, queue ); if( mdot == 0 ){ Q_res = magma_dznrm2( dofs, Q_r.dval, 1, queue ); C_res = magma_dznrm2( dofs, C_r.dval, 1, queue ); B_res = magma_dznrm2( dofs, B_r.dval, 1, queue ); //QMR: psi = norm(z); Q_psi = magma_zsqrt( magma_zdotc( dofs, Q_z.dval, 1, Q_z.dval, 1), queue ); }else{ magma_zmdotc4( b.num_rows, Q_r.dval, Q_r.dval, C_r.dval, C_r.dval, B_r.dval, B_r.dval, Q_z.dval, Q_z.dval, d1.dval, d2.dval, skp.val, queue ); Q_res = MAGMA_Z_ABS(magma_zsqrt(skp.val[0])); C_res = MAGMA_Z_ABS(magma_zsqrt(skp.val[1])); B_res = MAGMA_Z_ABS(magma_zsqrt(skp.val[2])); //QMR: psi = norm(z); Q_psi = magma_zsqrt(skp.val[3]); } //QMR: v = y / rho //QMR: y = y / rho //QMR: w = wt / psi //QMR: z = z / psi //QMR: wt = A' * q - beta' * w //QMR: no precond: z = wt magma_zqmr_6( b.num_rows, b.num_cols, Q_beta, Q_rho, Q_psi, Q_y.dval, Q_z.dval, Q_v.dval, Q_w.dval, SpMV_out_2.dval, queue ); // printf(" %e %e %e\n", Q_res, C_res, B_res); if( Q_res < res ){ res = Q_res; flag = 1; } if( C_res < res ){ res = C_res; flag = 2; } if( B_res < res ){ res = B_res; flag = 3; } if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); // copy back the best solver switch ( flag ) { case 1: printf("%% QMR fastest solver.\n"); magma_zcopy( dofs, Q_x.dval, 1, x->dval, 1, queue ); break; case 2: printf("%% CGS fastest solver.\n"); magma_zcopy( dofs, C_x.dval, 1, x->dval, 1, queue ); break; case 3: printf("%% BiCGSTAB fastest solver.\n"); magma_zcopy( dofs, B_x.dval, 1, x->dval, 1, queue ); break; } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r_tld, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r_tld, queue ); magma_zmfree(&SpMV_in_1, queue ); magma_zmfree(&SpMV_out_1, queue ); magma_zmfree(&SpMV_in_2, queue ); magma_zmfree(&SpMV_out_2, queue ); magma_zmfree(&d1, queue ); magma_zmfree(&d2, queue ); magma_zmfree(&skp, queue ); magma_zmfree(&AT, queue ); // QMR magma_zmfree(&Q_r, queue ); magma_zmfree(&Q_v, queue ); magma_zmfree(&Q_w, queue ); magma_zmfree(&Q_wt, queue ); magma_zmfree(&Q_d, queue ); magma_zmfree(&Q_s, queue ); magma_zmfree(&Q_z, queue ); magma_zmfree(&Q_q, queue ); magma_zmfree(&Q_p, queue ); magma_zmfree(&Q_pt, queue ); magma_zmfree(&Q_y, queue ); magma_zmfree(&Q_x, queue ); // CGS magma_zmfree(&C_r, queue ); magma_zmfree(&C_rt, queue ); magma_zmfree(&C_x, queue ); magma_zmfree(&C_p, queue ); magma_zmfree(&C_q, queue ); magma_zmfree(&C_u, queue ); magma_zmfree(&C_v, queue ); magma_zmfree(&C_t, queue ); magma_zmfree(&C_p_hat, queue ); magma_zmfree(&C_q_hat, queue ); magma_zmfree(&C_u_hat, queue ); magma_zmfree(&C_v_hat, queue ); // BiCGSTAB magma_zmfree(&B_r, queue ); magma_zmfree(&B_x, queue ); magma_zmfree(&B_p, queue ); magma_zmfree(&B_v, queue ); magma_zmfree(&B_s, queue ); magma_zmfree(&B_t, queue ); // destroy asynchronous queues magma_queue_destroy( queues[0] ); if ( q1flag == 1 ) { magma_queue_destroy( queues[1] ); } magma_queue_destroy( queues[2] ); solver_par->info = info; return info; } /* magma_zbombard_merge */