extern "C" magma_int_t magma_scgs( magma_s_matrix A, magma_s_matrix b, magma_s_matrix *x, magma_s_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_CGS; solver_par->numiter = 0; solver_par->spmv_count = 0; // constants const float c_zero = MAGMA_S_ZERO; const float c_one = MAGMA_S_ONE; const float c_neg_one = MAGMA_S_NEG_ONE; // solver variables float nom0, r0, res=0, nomb; float rho, rho_l = c_one, alpha, beta; magma_int_t dofs = A.num_rows* b.num_cols; // GPU workspace magma_s_matrix r={Magma_CSR}, rt={Magma_CSR}, r_tld={Magma_CSR}, p={Magma_CSR}, q={Magma_CSR}, u={Magma_CSR}, v={Magma_CSR}, t={Magma_CSR}, p_hat={Magma_CSR}, q_hat={Magma_CSR}, u_hat={Magma_CSR}, v_hat={Magma_CSR}; CHECK( magma_svinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &r_tld,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &p_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &q_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &u, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &u_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &v_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_sresidualvec( A, b, *x, &r, &nom0, queue)); magma_scopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); solver_par->init_res = nom0; nomb = magma_snrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; rho = magma_sdot( dofs, r.dval, 1, r_tld.dval, 1, queue ); // rho = < r,r_tld> if( magma_s_isnan_inf( rho ) ){ info = MAGMA_DIVERGENCE; break; } if ( solver_par->numiter > 1 ) { // direction vectors beta = rho / rho_l; magma_scopy( dofs, r.dval, 1, u.dval, 1, queue ); // u = r magma_saxpy( dofs, beta, q.dval, 1, u.dval, 1, queue ); // u = u + beta q magma_sscal( dofs, beta, p.dval, 1, queue ); // p = beta*p magma_saxpy( dofs, c_one, q.dval, 1, p.dval, 1, queue ); // p = q + beta*p magma_sscal( dofs, beta, p.dval, 1, queue ); // p = beta*(q + beta*p) magma_saxpy( dofs, c_one, u.dval, 1, p.dval, 1, queue ); // p = u + beta*(q + beta*p) //u = r + beta*q; //p = u + beta*( q + beta*p ); } else{ magma_scopy( dofs, r.dval, 1, u.dval, 1, queue ); // u = r magma_scopy( dofs, r.dval, 1, p.dval, 1, queue ); // p = r } CHECK( magma_s_spmv( c_one, A, p, c_zero, v_hat, queue )); // v = A p solver_par->spmv_count++; alpha = rho / magma_sdot( dofs, r_tld.dval, 1, v_hat.dval, 1, queue ); magma_scopy( dofs, u.dval, 1, q.dval, 1, queue ); // q = u magma_saxpy( dofs, -alpha, v_hat.dval, 1, q.dval, 1, queue ); // q = u - alpha v_hat magma_scopy( dofs, u.dval, 1, t.dval, 1, queue ); // t = q magma_saxpy( dofs, c_one, q.dval, 1, t.dval, 1, queue ); // t = u + q CHECK( magma_s_spmv( c_one, A, t, c_zero, rt, queue )); // t = A u_hat solver_par->spmv_count++; magma_saxpy( dofs, c_neg_one*alpha, rt.dval, 1, r.dval, 1, queue ); // r = r -alpha*A u_hat magma_saxpy( dofs, alpha, t.dval, 1, x->dval, 1, queue ); // x = x + alpha u_hat rho_l = rho; res = magma_snrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; float residual; CHECK( magma_sresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_smfree(&r, queue ); magma_smfree(&rt, queue ); magma_smfree(&r_tld, queue ); magma_smfree(&p, queue ); magma_smfree(&q, queue ); magma_smfree(&u, queue ); magma_smfree(&v, queue ); magma_smfree(&t, queue ); magma_smfree(&p_hat, queue ); magma_smfree(&q_hat, queue ); magma_smfree(&u_hat, queue ); magma_smfree(&v_hat, queue ); solver_par->info = info; return info; } /* magma_scgs */
extern "C" magma_int_t magma_sbicg( magma_s_matrix A, magma_s_matrix b, magma_s_matrix *x, magma_s_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BICG; solver_par->numiter = 0; solver_par->spmv_count = 0; // some useful variables float c_zero = MAGMA_S_ZERO; float c_one = MAGMA_S_ONE; float c_neg_one = MAGMA_S_NEG_ONE; magma_int_t dofs = A.num_rows * b.num_cols; // workspace magma_s_matrix r={Magma_CSR}, rt={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, z={Magma_CSR}, zt={Magma_CSR}, q={Magma_CSR}, y={Magma_CSR}, yt={Magma_CSR}, qt={Magma_CSR}; // need to transpose the matrix magma_s_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; CHECK( magma_svinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &qt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &yt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables float alpha, rho, beta, rho_new, ptq; float res, nomb, nom0, r0; // transpose the matrix magma_smtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_smconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_smfree(&Ah1, queue ); magma_smtransposeconjugate( Ah2, &Ah1, queue ); magma_smfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_smconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_smfree(&Ah1, queue ); magma_smtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_smfree(&Ah2, queue ); // solver setup CHECK( magma_sresidualvec( A, b, *x, &r, &nom0, queue)); res = nom0; solver_par->init_res = nom0; magma_scopy( dofs, r.dval, 1, rt.dval, 1, queue ); // rr = r rho_new = magma_sdot( dofs, rt.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho = alpha = MAGMA_S_MAKE( 1.0, 0. ); nomb = magma_snrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; magma_scopy( dofs, r.dval, 1 , y.dval, 1, queue ); // y=r magma_scopy( dofs, y.dval, 1 , z.dval, 1, queue ); // z=y magma_scopy( dofs, rt.dval, 1 , yt.dval, 1, queue ); // yt=rt magma_scopy( dofs, yt.dval, 1 , zt.dval, 1, queue ); // zt=yt rho= rho_new; rho_new = magma_sdot( dofs, rt.dval, 1, z.dval, 1, queue ); // rho=<rt,z> if( magma_s_isnan_inf( rho_new ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter==1 ){ magma_scopy( dofs, z.dval, 1 , p.dval, 1, queue ); // yt=rt magma_scopy( dofs, zt.dval, 1 , pt.dval, 1, queue ); // zt=yt } else { beta = rho_new/rho; magma_sscal( dofs, beta, p.dval, 1, queue ); // p = beta*p magma_saxpy( dofs, c_one , z.dval, 1 , p.dval, 1, queue ); // p = z+beta*p magma_sscal( dofs, MAGMA_S_CONJ(beta), pt.dval, 1, queue ); // pt = beta*pt magma_saxpy( dofs, c_one , zt.dval, 1 , pt.dval, 1, queue ); // pt = zt+beta*pt } CHECK( magma_s_spmv( c_one, A, p, c_zero, q, queue )); // v = Ap CHECK( magma_s_spmv( c_one, AT, pt, c_zero, qt, queue )); // v = Ap solver_par->spmv_count++; solver_par->spmv_count++; ptq = magma_sdot( dofs, pt.dval, 1, q.dval, 1, queue ); alpha = rho_new /ptq; magma_saxpy( dofs, alpha, p.dval, 1 , x->dval, 1, queue ); // x=x+alpha*p magma_saxpy( dofs, c_neg_one * alpha, q.dval, 1 , r.dval, 1, queue ); // r=r+alpha*q magma_saxpy( dofs, c_neg_one * MAGMA_S_CONJ(alpha), qt.dval, 1 , rt.dval, 1, queue ); // r=r+alpha*q res = magma_snrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; float residual; CHECK( magma_sresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_smfree(&r, queue ); magma_smfree(&rt, queue ); magma_smfree(&p, queue ); magma_smfree(&pt, queue ); magma_smfree(&q, queue ); magma_smfree(&qt, queue ); magma_smfree(&y, queue ); magma_smfree(&yt, queue ); magma_smfree(&z, queue ); magma_smfree(&zt, queue ); magma_smfree(&AT, queue ); magma_smfree(&Ah1, queue ); magma_smfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_sbicg */
extern "C" magma_int_t magma_sbicgstab_merge( magma_s_matrix A, magma_s_matrix b, magma_s_matrix *x, magma_s_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BICGSTAB; solver_par->numiter = 0; solver_par->spmv_count = 0; // some useful variables float c_zero = MAGMA_S_ZERO; float c_one = MAGMA_S_ONE; magma_int_t dofs = A.num_rows * b.num_cols; // workspace magma_s_matrix r={Magma_CSR}, rr={Magma_CSR}, p={Magma_CSR}, v={Magma_CSR}, s={Magma_CSR}, t={Magma_CSR}, d1={Magma_CSR}, d2={Magma_CSR}; CHECK( magma_svinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &rr,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &d1, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_svinit( &d2, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables float alpha, beta, omega, rho_old, rho_new; float betanom, nom0, r0, res, nomb; res=0; //float nom; //float den; // solver setup CHECK( magma_sresidualvec( A, b, *x, &r, &nom0, queue)); magma_scopy( dofs, r.dval, 1, rr.dval, 1, queue ); // rr = r betanom = nom0; //nom = nom0*nom0; rho_new = magma_sdot( dofs, r.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho_old = omega = alpha = MAGMA_S_MAKE( 1.0, 0. ); solver_par->init_res = nom0; CHECK( magma_s_spmv( c_one, A, r, c_zero, v, queue )); // z = A r //den = MAGMA_S_REAL( magma_sdot( dofs, v.dval, 1, r.dval, 1), queue ); // den = z' * r nomb = magma_snrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } if ( nomb < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; rho_old = rho_new; // rho_old=rho rho_new = magma_sdot( dofs, rr.dval, 1, r.dval, 1, queue ); // rho=<rr,r> beta = rho_new/rho_old * alpha/omega; // beta=rho/rho_old *alpha/omega if( magma_s_isnan_inf( beta ) ){ info = MAGMA_DIVERGENCE; break; } // p = r + beta * ( p - omega * v ) magma_sbicgstab_1( r.num_rows, r.num_cols, beta, omega, r.dval, v.dval, p.dval, queue ); CHECK( magma_s_spmv( c_one, A, p, c_zero, v, queue )); // v = Ap solver_par->spmv_count++; //alpha = rho_new / tmpval; alpha = rho_new /magma_sdot( dofs, rr.dval, 1, v.dval, 1, queue ); if( magma_s_isnan_inf( alpha ) ){ info = MAGMA_DIVERGENCE; break; } // s = r - alpha v magma_sbicgstab_2( r.num_rows, r.num_cols, alpha, r.dval, v.dval, s.dval, queue ); CHECK( magma_s_spmv( c_one, A, s, c_zero, t, queue )); // t=As solver_par->spmv_count++; omega = magma_sdot( dofs, t.dval, 1, s.dval, 1, queue ) // omega = <s,t>/<t,t> / magma_sdot( dofs, t.dval, 1, t.dval, 1, queue ); // x = x + alpha * p + omega * s // r = s - omega * t magma_sbicgstab_3( r.num_rows, r.num_cols, alpha, omega, p.dval, s.dval, t.dval, x->dval, r.dval, queue ); res = betanom = magma_snrm2( dofs, r.dval, 1, queue ); //nom = betanom*betanom; if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; float residual; CHECK( magma_sresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_smfree(&r, queue ); magma_smfree(&rr, queue ); magma_smfree(&p, queue ); magma_smfree(&v, queue ); magma_smfree(&s, queue ); magma_smfree(&t, queue ); magma_smfree(&d1, queue ); magma_smfree(&d2, queue ); solver_par->info = info; return info; } /* magma_sbicgstab_merge */