예제 #1
0
파일: dfgmres.cpp 프로젝트: maxhutch/magma
static void ApplyPlaneRotation(double *dx, double *dy, double cs, double sn)
{
#if defined(PRECISION_s) | defined(PRECISION_d)
      double temp = (*dx);
      *dx =  cs * (*dx) + sn * (*dy);
      *dy = -sn * temp + cs * (*dy);
#else  
    // below the code Joss Knight from MathWorks provided me with - this works. 
    // No idea why the above code fails for real - maybe rounding.
    double temp  =  MAGMA_D_CONJ(cs) * (*dx) +  MAGMA_D_CONJ(sn) * (*dy);
    *dy = -(sn) * (*dx) + cs * (*dy);
    *dx = temp;
#endif
}
예제 #2
0
파일: dfgmres.cpp 프로젝트: maxhutch/magma
static void
GeneratePlaneRotation(double dx, double dy, double *cs, double *sn)
{
#if defined(PRECISION_s) | defined(PRECISION_d)
    if (dy == MAGMA_D_ZERO) {
        *cs = MAGMA_D_ONE;
        *sn = MAGMA_D_ZERO;
    } else if (MAGMA_D_ABS((dy)) > MAGMA_D_ABS((dx))) {
        double temp = dx / dy;
        *sn = MAGMA_D_ONE / magma_dsqrt( ( MAGMA_D_ONE + temp*temp));
        *cs = temp * (*sn);
    } else {
        double temp = dy / dx;
        *cs = MAGMA_D_ONE / magma_dsqrt( ( MAGMA_D_ONE + temp*temp ));
        *sn = temp * (*cs);
    }
#else   
    // below the code Joss Knight from MathWorks provided me with - this works. 
    // No idea why the above code fails for real - maybe rounding.
    real_Double_t rho = sqrt(MAGMA_D_REAL(MAGMA_D_CONJ(dx)*dx + MAGMA_D_CONJ(dy)*dy));
    *cs = dx / rho;
    *sn = dy / rho;
#endif
}
예제 #3
0
extern "C" magma_int_t
magma_dqmr_merge(
    magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
    magma_d_solver_par *solver_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_QMRMERGE;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    // local variables
    double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE;
    // solver variables
    double nom0, r0, res=0, nomb;
    double rho = c_one, rho1 = c_one, eta = -c_one , pds = c_one, 
                        thet = c_one, thet1 = c_one, epsilon = c_one, 
                        beta = c_one, delta = c_one, pde = c_one, rde = c_one,
                        gamm = c_one, gamm1 = c_one, psi = c_one;
    
    magma_int_t dofs = A.num_rows* b.num_cols;

    // need to transpose the matrix
    magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR};
    
    // GPU workspace
    magma_d_matrix r={Magma_CSR}, r_tld={Magma_CSR},
                    v={Magma_CSR}, w={Magma_CSR}, wt={Magma_CSR},
                    d={Magma_CSR}, s={Magma_CSR}, z={Magma_CSR}, q={Magma_CSR}, 
                    p={Magma_CSR}, pt={Magma_CSR}, y={Magma_CSR};
    CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // solver setup
    CHECK(  magma_dresidualvec( A, b, *x, &r, &nom0, queue));
    solver_par->init_res = nom0;
    magma_dcopy( dofs, r.dval, 1, r_tld.dval, 1, queue );   
    magma_dcopy( dofs, r.dval, 1, y.dval, 1, queue );   
    magma_dcopy( dofs, r.dval, 1, v.dval, 1, queue );  
    magma_dcopy( dofs, r.dval, 1, wt.dval, 1, queue );   
    magma_dcopy( dofs, r.dval, 1, z.dval, 1, queue );  
    
    // transpose the matrix
    magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransposeconjugate( Ah2, &Ah1, queue );
    magma_dmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue );
    magma_dmfree(&Ah2, queue );
    
    nomb = magma_dnrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom0 < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }

    psi = magma_dsqrt( magma_ddot( dofs, z.dval, 1, z.dval, 1, queue ));
    rho = magma_dsqrt( magma_ddot( dofs, y.dval, 1, y.dval, 1, queue ));
    
        // v = y / rho
        // y = y / rho
        // w = wt / psi
        // z = z / psi
    magma_dqmr_1(  
    r.num_rows, 
    r.num_cols, 
    rho,
    psi,
    y.dval, 
    z.dval,
    v.dval,
    w.dval,
    queue );
    
    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    // start iteration
    do
    {
        solver_par->numiter++;
        if( magma_d_isnan_inf( rho ) || magma_d_isnan_inf( psi ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
 
            // delta = z' * y;
        delta = magma_ddot( dofs, z.dval, 1, y.dval, 1, queue );
        
        if( magma_d_isnan_inf( delta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        // no precond: yt = y, zt = z
        //magma_dcopy( dofs, y.dval, 1, yt.dval, 1 );
        //magma_dcopy( dofs, z.dval, 1, zt.dval, 1 );
        
        if( solver_par->numiter == 1 ){
                // p = y;
                // q = z;
            magma_dcopy( dofs, y.dval, 1, p.dval, 1, queue );
            magma_dcopy( dofs, z.dval, 1, q.dval, 1, queue );
        }
        else{
            pde = psi * delta / epsilon;
            rde = rho * MAGMA_D_CONJ(delta/epsilon);
            
                // p = y - pde * p
                // q = z - rde * q
            magma_dqmr_2(  
            r.num_rows, 
            r.num_cols, 
            pde,
            rde,
            y.dval,
            z.dval,
            p.dval, 
            q.dval, 
            queue );
        }
        if( magma_d_isnan_inf( rho ) || magma_d_isnan_inf( psi ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        CHECK( magma_d_spmv( c_one, A, p, c_zero, pt, queue ));
        solver_par->spmv_count++;
            // epsilon = q' * pt;
        epsilon = magma_ddot( dofs, q.dval, 1, pt.dval, 1, queue );
        beta = epsilon / delta;

        if( magma_d_isnan_inf( epsilon ) || magma_d_isnan_inf( beta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
            // v = pt - beta * v
            // y = v
        magma_dqmr_3(  
        r.num_rows, 
        r.num_cols, 
        beta,
        pt.dval,
        v.dval,
        y.dval,
        queue );
        
        
        rho1 = rho;      
            // rho = norm(y);
        rho = magma_dsqrt( magma_ddot( dofs, y.dval, 1, y.dval, 1, queue ));
        
            // wt = A' * q - beta' * w;
        CHECK( magma_d_spmv( c_one, AT, q, c_zero, wt, queue ));
        solver_par->spmv_count++;
        magma_daxpy( dofs, - MAGMA_D_CONJ( beta ), w.dval, 1, wt.dval, 1, queue );  
        
                    // no precond: z = wt
        magma_dcopy( dofs, wt.dval, 1, z.dval, 1, queue );
        


        thet1 = thet;        
        thet = rho / (gamm * MAGMA_D_MAKE( MAGMA_D_ABS(beta), 0.0 ));
        gamm1 = gamm;        
        
        gamm = c_one / magma_dsqrt(c_one + thet*thet);        
        eta = - eta * rho1 * gamm * gamm / (beta * gamm1 * gamm1);        

        if( magma_d_isnan_inf( thet ) || magma_d_isnan_inf( gamm ) || magma_d_isnan_inf( eta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        if( solver_par->numiter == 1 ){
            
                // d = eta * p + pds * d;
                // s = eta * pt + pds * d;
                // x = x + d;
                // r = r - s;
            magma_dqmr_4(  
            r.num_rows, 
            r.num_cols, 
            eta,
            p.dval,
            pt.dval,
            d.dval, 
            s.dval, 
            x->dval, 
            r.dval, 
            queue );
        }
        else{

            pds = (thet1 * gamm) * (thet1 * gamm);
            
                // d = eta * p + pds * d;
                // s = eta * pt + pds * d;
                // x = x + d;
                // r = r - s;
            magma_dqmr_5(  
            r.num_rows, 
            r.num_cols, 
            eta,
            pds,
            p.dval,
            pt.dval,
            d.dval, 
            s.dval, 
            x->dval, 
            r.dval, 
            queue );
        }
            // psi = norm(z);
        psi = magma_dsqrt( magma_ddot( dofs, z.dval, 1, z.dval, 1, queue ) );
        
        res = magma_dnrm2( dofs, r.dval, 1, queue );
        
        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        
        // v = y / rho
        // y = y / rho
        // w = wt / psi
        // z = z / psi
        magma_dqmr_1(  
        r.num_rows, 
        r.num_cols, 
        rho,
        psi,
        y.dval, 
        z.dval,
        v.dval,
        w.dval,
        queue );

        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            break;
        }
 
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    CHECK(  magma_dresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_dmfree(&r, queue );
    magma_dmfree(&r_tld, queue );
    magma_dmfree(&v,  queue );
    magma_dmfree(&w,  queue );
    magma_dmfree(&wt, queue );
    magma_dmfree(&d,  queue );
    magma_dmfree(&s,  queue );
    magma_dmfree(&z,  queue );
    magma_dmfree(&q,  queue );
    magma_dmfree(&p,  queue );
    magma_dmfree(&pt, queue );
    magma_dmfree(&y,  queue );
    magma_dmfree(&AT, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmfree(&Ah2, queue );


    
    solver_par->info = info;
    return info;
}   /* magma_dqmr_merge */
예제 #4
0
extern "C" void
magma_dsbtype2cb(magma_int_t n, magma_int_t nb,
                double *A, magma_int_t lda,
                double *V, magma_int_t ldv,
                double *TAU,
                magma_int_t st, magma_int_t ed, magma_int_t sweep,
                magma_int_t Vblksiz, magma_int_t wantz,
                double *work)
{
    double ctmp;
    magma_int_t J1, J2, len, lem, ldx;
    magma_int_t vpos, taupos;
    //magma_int_t blkid, tpos;
    magma_int_t ione = 1;
    const double c_one    =  MAGMA_D_ONE;

    if ( wantz == 0 ) {
        vpos   = (sweep%2)*n + st;
        taupos = (sweep%2)*n + st;
    } else {
        //findVTpos(n, nb, Vblksiz, sweep, st, &vpos, &taupos, &tpos, &blkid);
        magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep, st, ldv, &vpos, &taupos);
    }

    ldx = lda-1;
    J1  = ed+1;
    J2  = min(ed+nb,n-1);
    len = ed-st+1;
    lem = J2-J1+1;

    if ( lem > 0 ) {
        /* Apply remaining right commming from the top block */
        lapackf77_dlarfx("R", &lem, &len, V(vpos), TAU(taupos), A(J1, st), &ldx, work);
    }

    if ( lem > 1 ) {
        if ( wantz == 0 ) {
            vpos   = (sweep%2)*n + J1;
            taupos = (sweep%2)*n + J1;
        } else {
            magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep, J1, ldv, &vpos, &taupos);
            //findVTpos(n,nb,Vblksiz,sweep,J1, &vpos, &taupos, &tpos, &blkid);
        }

        /* Remove the first column of the created bulge */
        *V(vpos)  = c_one;
        
        //magma_int_t lem2=lem-1;
        //blasf77_dcopy( &lem2, A(ed+2, st), &ione, V(vpos+1), &ione );
        memcpy(V(vpos+1), A(J1+1, st), (lem-1)*sizeof(double));
        memset(A(J1+1, st), 0, (lem-1)*sizeof(double));

        /* Eliminate the col at st */
        lapackf77_dlarfg( &lem, A(J1, st), V(vpos+1), &ione, TAU(taupos) );

        /*
         * Apply left on A(J1:J2,st+1:ed)
         * We decrease len because we start at col st+1 instead of st.
         * col st is the col that has been revomved;
         */
        len = len-1;
        ctmp = MAGMA_D_CONJ(*TAU(taupos));
        lapackf77_dlarfx("L", &lem, &len, V(vpos),  &ctmp, A(J1, st+1), &ldx, work);
    }
}
예제 #5
0
파일: dpbicg.cpp 프로젝트: xulunfan/magma
extern "C" magma_int_t
magma_dpbicg(
    magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
    magma_d_solver_par *solver_par,
    magma_d_preconditioner *precond_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_PBICG;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;

    // some useful variables
    double c_zero = MAGMA_D_ZERO;
    double c_one  = MAGMA_D_ONE;
    double c_neg_one = MAGMA_D_NEG_ONE;
    
    magma_int_t dofs = A.num_rows * b.num_cols;

    // workspace
    magma_d_matrix r={Magma_CSR}, rt={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, 
                z={Magma_CSR}, zt={Magma_CSR}, q={Magma_CSR}, y={Magma_CSR}, 
                yt={Magma_CSR},  qt={Magma_CSR};
                
    // need to transpose the matrix
    magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR};
    
    CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &qt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &yt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // solver variables
    double alpha, rho, beta, rho_new, ptq;
    double res, nomb, nom0, r0;

        // transpose the matrix
    magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransposeconjugate( Ah2, &Ah1, queue );
    magma_dmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue );
    magma_dmfree(&Ah2, queue );
    
    // solver setup
    CHECK(  magma_dresidualvec( A, b, *x, &r, &nom0, queue));
    res = nom0;
    solver_par->init_res = nom0;
    magma_dcopy( dofs, r.dval, 1, rt.dval, 1, queue );                  // rr = r
    rho_new = magma_ddot( dofs, rt.dval, 1, r.dval, 1, queue );             // rho=<rr,r>
    rho = alpha = MAGMA_D_MAKE( 1.0, 0. );

    nomb = magma_dnrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom0 < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }

    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );


    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    // start iteration
    do
    {
        solver_par->numiter++;

        CHECK( magma_d_applyprecond_left( MagmaNoTrans, A, r, &y, precond_par, queue ));
        CHECK( magma_d_applyprecond_right( MagmaNoTrans, A, y, &z, precond_par, queue ));
        CHECK( magma_d_applyprecond_right( MagmaTrans, A, rt, &yt, precond_par, queue ));
        CHECK( magma_d_applyprecond_left( MagmaTrans, A, yt, &zt, precond_par, queue ));
        //magma_dcopy( dofs, r.dval, 1 , y.dval, 1, queue );             // y=r
        //magma_dcopy( dofs, y.dval, 1 , z.dval, 1, queue );             // z=y
        //magma_dcopy( dofs, rt.dval, 1 , yt.dval, 1, queue );           // yt=rt
        //magma_dcopy( dofs, yt.dval, 1 , zt.dval, 1, queue );           // yt=rt
        
        rho= rho_new;
        rho_new = magma_ddot( dofs, rt.dval, 1, z.dval, 1, queue );  // rho=<rt,z>
        if( magma_d_isnan_inf( rho_new ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        if( solver_par->numiter==1 ){
            magma_dcopy( dofs, z.dval, 1 , p.dval, 1, queue );           // yt=rt
            magma_dcopy( dofs, zt.dval, 1 , pt.dval, 1, queue );           // zt=yt
        } else {
            beta = rho_new/rho;
            magma_dscal( dofs, beta, p.dval, 1, queue );                 // p = beta*p
            magma_daxpy( dofs, c_one , z.dval, 1 , p.dval, 1, queue );   // p = z+beta*p
            magma_dscal( dofs, MAGMA_D_CONJ(beta), pt.dval, 1, queue );   // pt = beta*pt
            magma_daxpy( dofs, c_one , zt.dval, 1 , pt.dval, 1, queue );  // pt = zt+beta*pt
        }
        CHECK( magma_d_spmv( c_one, A, p, c_zero, q, queue ));      // v = Ap
        CHECK( magma_d_spmv( c_one, AT, pt, c_zero, qt, queue ));   // v = Ap
        solver_par->spmv_count++;
        solver_par->spmv_count++;
        ptq = magma_ddot( dofs, pt.dval, 1, q.dval, 1, queue );
        alpha = rho_new /ptq;
        
        
        magma_daxpy( dofs, alpha, p.dval, 1 , x->dval, 1, queue );                // x=x+alpha*p
        magma_daxpy( dofs, c_neg_one * alpha, q.dval, 1 , r.dval, 1, queue );     // r=r+alpha*q
        magma_daxpy( dofs, c_neg_one * MAGMA_D_CONJ(alpha), qt.dval, 1 , rt.dval, 1, queue );     // r=r+alpha*q

        res = magma_dnrm2( dofs, r.dval, 1, queue );

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            break;
        }
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    CHECK(  magma_dresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_dmfree(&r, queue );
    magma_dmfree(&rt, queue );
    magma_dmfree(&p, queue );
    magma_dmfree(&pt, queue );
    magma_dmfree(&q, queue );
    magma_dmfree(&qt, queue );
    magma_dmfree(&y, queue );
    magma_dmfree(&yt, queue );
    magma_dmfree(&z, queue );
    magma_dmfree(&zt, queue );
    magma_dmfree(&AT, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmfree(&Ah2, queue );

    solver_par->info = info;
    return info;
}   /* magma_dpbicg */