示例#1
0
extern "C" magma_int_t
magma_cpqmr_merge(
    magma_c_matrix A, magma_c_matrix b, magma_c_matrix *x,
    magma_c_solver_par *solver_par,
    magma_c_preconditioner *precond_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_QMR;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    
    // local variables
    magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE;
    // solver variables
    float nom0, r0, res=0.0, nomb;
    magmaFloatComplex rho = c_one, rho1 = c_one, eta = -c_one , pds = c_one, 
                        thet = c_one, thet1 = c_one, epsilon = c_one, 
                        beta = c_one, delta = c_one, pde = c_one, rde = c_one,
                        gamm = c_one, gamm1 = c_one, psi = c_one;
    
    magma_int_t dofs = A.num_rows* b.num_cols;

    // need to transpose the matrix
    magma_c_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR};
    
    // GPU workspace
    magma_c_matrix r={Magma_CSR}, r_tld={Magma_CSR},
                    v={Magma_CSR}, w={Magma_CSR}, wt={Magma_CSR},
                    d={Magma_CSR}, s={Magma_CSR}, z={Magma_CSR}, q={Magma_CSR}, 
                    p={Magma_CSR}, pt={Magma_CSR}, y={Magma_CSR},
                    vt={Magma_CSR}, yt={Magma_CSR}, zt={Magma_CSR};
    CHECK( magma_cvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &yt, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &vt, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &zt, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // solver setup
    CHECK(  magma_cresidualvec( A, b, *x, &r, &nom0, queue));
    solver_par->init_res = nom0;
    magma_ccopy( dofs, r.dval, 1, r_tld.dval, 1, queue );   
    magma_ccopy( dofs, r.dval, 1, vt.dval, 1, queue );  
    magma_ccopy( dofs, r.dval, 1, wt.dval, 1, queue );   
     
    
    // transpose the matrix
    magma_cmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransposeconjugate( Ah2, &Ah1, queue );
    magma_cmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue );
    magma_cmfree(&Ah2, queue );
    
    nomb = magma_scnrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom0 < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }
        // no precond: y = vt, z = wt
        // magma_ccopy( dofs, vt.dval, 1, y.dval, 1, queue );
        // magma_ccopy( dofs, wt.dval, 1, z.dval, 1, queue );
    CHECK( magma_c_applyprecond_left( MagmaNoTrans, A, vt, &y, precond_par, queue ));
    CHECK( magma_c_applyprecond_right( MagmaTrans, A, wt, &z, precond_par, queue ));

    psi = magma_csqrt( magma_cdotc( dofs, z.dval, 1, z.dval, 1, queue ));
    rho = magma_csqrt( magma_cdotc( dofs, y.dval, 1, y.dval, 1, queue ));
        // v = vt / rho
        // y = y / rho
        // w = wt / psi
        // z = z / psi
    magma_cqmr_8(  
    r.num_rows, 
    r.num_cols, 
    rho,
    psi,
    vt.dval,
    wt.dval,
    y.dval, 
    z.dval,
    v.dval,
    w.dval,
    queue );

    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    
    solver_par->numiter = 0;
    // start iteration
    do
    {
        solver_par->numiter++;
        if( magma_c_isnan_inf( rho ) || magma_c_isnan_inf( psi ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
            // delta = z' * y;
        delta = magma_cdotc( dofs, z.dval, 1, y.dval, 1, queue );
        if( magma_c_isnan_inf( delta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
            // no precond: yt = y, zt = z
        // magma_ccopy( dofs, y.dval, 1, yt.dval, 1, queue );
        // magma_ccopy( dofs, z.dval, 1, zt.dval, 1, queue );
        CHECK( magma_c_applyprecond_right( MagmaNoTrans, A, y, &yt, precond_par, queue ));
        CHECK( magma_c_applyprecond_left( MagmaTrans, A, z, &zt, precond_par, queue ));

        
        if( solver_par->numiter == 1 ){
                // p = y;
                // q = z;
            magma_ccopy( dofs, yt.dval, 1, p.dval, 1, queue );
            magma_ccopy( dofs, zt.dval, 1, q.dval, 1, queue );
        }
        else{
            pde = psi * delta / epsilon;
            rde = rho * MAGMA_C_CONJ(delta/epsilon);
                // p = yt - pde * p
                // q = zt - rde * q
            magma_cqmr_2(  
            r.num_rows, 
            r.num_cols, 
            pde,
            rde,
            yt.dval,
            zt.dval,
            p.dval, 
            q.dval, 
            queue );
        }
        if( magma_c_isnan_inf( rho ) || magma_c_isnan_inf( psi ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }

        CHECK( magma_c_spmv( c_one, A, p, c_zero, pt, queue ));
        solver_par->spmv_count++;
            // epsilon = q' * pt;
        epsilon = magma_cdotc( dofs, q.dval, 1, pt.dval, 1, queue );
        beta = epsilon / delta;

        if( magma_c_isnan_inf( epsilon ) || magma_c_isnan_inf( beta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
            // vt = pt - beta * v;
        magma_cqmr_7(  
        r.num_rows, 
        r.num_cols, 
        beta,
        pt.dval,
        v.dval,
        vt.dval,
        queue );

        magma_ccopy( dofs, v.dval, 1, vt.dval, 1, queue );
        magma_cscal( dofs, -beta, vt.dval, 1, queue ); 
        magma_caxpy( dofs, c_one, pt.dval, 1, vt.dval, 1, queue ); 
            // no precond: y = vt
        // magma_ccopy( dofs, vt.dval, 1, y.dval, 1, queue );
        CHECK( magma_c_applyprecond_left( MagmaNoTrans, A, vt, &y, precond_par, queue ));

        rho1 = rho;      
            // rho = norm(y);
        rho = magma_csqrt( magma_cdotc( dofs, y.dval, 1, y.dval, 1, queue ));
            // wt = A' * q - beta' * w;
        CHECK( magma_c_spmv( c_one, AT, q, c_zero, wt, queue ));
        solver_par->spmv_count++;
        magma_caxpy( dofs, - MAGMA_C_CONJ( beta ), w.dval, 1, wt.dval, 1, queue );  
            // no precond: z = wt
        // magma_ccopy( dofs, wt.dval, 1, z.dval, 1, queue );
        CHECK( magma_c_applyprecond_right( MagmaTrans, A, wt, &z, precond_par, queue ));

        thet1 = thet;        
        thet = rho / (gamm * MAGMA_C_MAKE( MAGMA_C_ABS(beta), 0.0 ));
        gamm1 = gamm;        
        
        gamm = c_one / magma_csqrt(c_one + thet*thet);        
        eta = - eta * rho1 * gamm * gamm / (beta * gamm1 * gamm1);        

        if( magma_c_isnan_inf( thet ) || magma_c_isnan_inf( gamm ) || magma_c_isnan_inf( eta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }

        if( solver_par->numiter == 1 ){
                // d = eta * p + pds * d;
                // s = eta * pt + pds * d;
                // x = x + d;
                // r = r - s;
            magma_cqmr_4(  
            r.num_rows, 
            r.num_cols, 
            eta,
            p.dval,
            pt.dval,
            d.dval, 
            s.dval, 
            x->dval, 
            r.dval, 
            queue );
        }
        else{
                // pds = (thet1 * gamm)^2;
            pds = (thet1 * gamm) * (thet1 * gamm);
                // d = eta * p + pds * d;
                // s = eta * pt + pds * d;
                // x = x + d;
                // r = r - s;
            magma_cqmr_5(  
            r.num_rows, 
            r.num_cols, 
            eta,
            pds,
            p.dval,
            pt.dval,
            d.dval, 
            s.dval, 
            x->dval, 
            r.dval, 
            queue );
        }
            // psi = norm(z);
        psi = magma_csqrt( magma_cdotc( dofs, z.dval, 1, z.dval, 1, queue ) );
        
        res = magma_scnrm2( dofs, r.dval, 1, queue );
        
        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
            // v = vt / rho
            // y = y / rho
            // w = wt / psi
            // z = z / psi
        magma_cqmr_8(  
        r.num_rows, 
        r.num_cols, 
        rho,
        psi,
        vt.dval,
        wt.dval,
        y.dval, 
        z.dval,
        v.dval,
        w.dval,
        queue );

        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            break;
        }
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    CHECK(  magma_cresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_cmfree(&r, queue );
    magma_cmfree(&r_tld, queue );
    magma_cmfree(&v,  queue );
    magma_cmfree(&w,  queue );
    magma_cmfree(&wt, queue );
    magma_cmfree(&d,  queue );
    magma_cmfree(&s,  queue );
    magma_cmfree(&z,  queue );
    magma_cmfree(&q,  queue );
    magma_cmfree(&p,  queue );
    magma_cmfree(&zt, queue );
    magma_cmfree(&vt, queue );
    magma_cmfree(&yt, queue );
    magma_cmfree(&pt, queue );
    magma_cmfree(&y,  queue );
    magma_cmfree(&AT, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmfree(&Ah2, queue );

    
    solver_par->info = info;
    return info;
}   /* magma_cqmr */
示例#2
0
extern "C" magma_int_t
magma_cbombard(
    magma_c_matrix A, magma_c_matrix b, 
    magma_c_matrix *x, magma_c_solver_par *solver_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // 1=QMR, 2=CGS, 3+BiCGSTAB
    magma_int_t flag = 0;
    
    // prepare solver feedback
    solver_par->solver = Magma_BOMBARD;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    // local variables
    magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE;
                        
    // solver variables
    float nom0, r0, res, Q_res, T_res, C_res, B_res, nomb;
    
    //QMR
    magmaFloatComplex Q_rho = c_one, Q_rho1 = c_one, Q_eta = -c_one , Q_pds = c_one, 
                        Q_thet = c_one, Q_thet1 = c_one, Q_epsilon = c_one, 
                        Q_beta = c_one, Q_delta = c_one, Q_pde = c_one, Q_rde = c_one,
                        Q_gamm = c_one, Q_gamm1 = c_one, Q_psi = c_one;
                        
    //TFQMR
    magmaFloatComplex T_rho = c_one, T_rho_l = c_one, T_eta = c_zero , T_c = c_zero , 
                        T_theta = c_zero , T_tau = c_zero, T_alpha = c_one, T_beta = c_zero,
                        T_sigma = c_zero;
                        
    //CGS
    magmaFloatComplex C_rho, C_rho_l = c_one, C_alpha, C_beta = c_zero;
    
    //BiCGSTAB
    magmaFloatComplex B_alpha, B_beta, B_omega, B_rho_old, B_rho_new;
    
    magma_int_t dofs = A.num_rows* b.num_cols;

    // need to transpose the matrix
    
    // GPU workspace
    // QMR
    magma_c_matrix AT = {Magma_CSR}, Ah1 = {Magma_CSR}, Ah2 = {Magma_CSR},
                    Q_r={Magma_CSR}, r_tld={Magma_CSR}, Q_x={Magma_CSR},
                    Q_v={Magma_CSR}, Q_w={Magma_CSR}, Q_wt={Magma_CSR},
                    Q_d={Magma_CSR}, Q_s={Magma_CSR}, Q_z={Magma_CSR}, Q_q={Magma_CSR}, 
                    Q_p={Magma_CSR}, Q_pt={Magma_CSR}, Q_y={Magma_CSR}, d1={Magma_CSR}, d2={Magma_CSR};
    //TFQMR
    // GPU workspace
    magma_c_matrix  T_r={Magma_CSR}, T_pu_m={Magma_CSR}, T_x={Magma_CSR},
                    T_d={Magma_CSR}, T_w={Magma_CSR}, T_v={Magma_CSR},
                    T_u_mp1={Magma_CSR}, T_u_m={Magma_CSR}, T_Au={Magma_CSR}, 
                    T_Ad={Magma_CSR}, T_Au_new={Magma_CSR};
                    
    // CGS
    magma_c_matrix C_r={Magma_CSR}, C_rt={Magma_CSR}, C_x={Magma_CSR},
                    C_p={Magma_CSR}, C_q={Magma_CSR}, C_u={Magma_CSR}, C_v={Magma_CSR},  C_t={Magma_CSR},
                    C_p_hat={Magma_CSR}, C_q_hat={Magma_CSR}, C_u_hat={Magma_CSR}, C_v_hat={Magma_CSR};
    //BiCGSTAB
    magma_c_matrix B_r={Magma_CSR}, B_x={Magma_CSR}, B_p={Magma_CSR}, B_v={Magma_CSR}, 
                    B_s={Magma_CSR}, B_t={Magma_CSR};

                    
    CHECK( magma_cvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &d1, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &d2, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // QMR
    CHECK( magma_cvinit( &Q_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &Q_x, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    // TFQMR
    CHECK( magma_cvinit( &T_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &T_u_mp1,Magma_DEV, A.num_rows, b.num_cols, c_one, queue ));
    CHECK( magma_cvinit( &T_u_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue ));
    CHECK( magma_cvinit( &T_pu_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue ));
    CHECK( magma_cvinit( &T_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &T_d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &T_w, Magma_DEV, A.num_rows, b.num_cols, c_one, queue ));
    CHECK( magma_cvinit( &T_Ad, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &T_Au_new, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &T_Au, Magma_DEV, A.num_rows, b.num_cols, c_one, queue ));
    CHECK( magma_cvinit( &T_x, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    // CGS
    CHECK( magma_cvinit( &C_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_x,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_p_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_q_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_u, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_u_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_v_hat, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &C_t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    // BiCGSTAB
    CHECK( magma_cvinit( &B_r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &B_x,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &B_p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &B_v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &B_s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &B_t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // solver setup
    CHECK(  magma_cresidualvec( A, b, *x, &r_tld, &nom0, queue));
    solver_par->init_res = nom0;
    res = nom0;
    
    // QMR
    magma_ccopy( dofs, r_tld.dval, 1, Q_r.dval, 1, queue );   
    magma_ccopy( dofs, r_tld.dval, 1, Q_y.dval, 1, queue );   
    magma_ccopy( dofs, r_tld.dval, 1, Q_v.dval, 1, queue );  
    magma_ccopy( dofs, r_tld.dval, 1, Q_wt.dval, 1, queue );   
    magma_ccopy( dofs, r_tld.dval, 1, Q_z.dval, 1, queue ); 
    magma_ccopy( dofs, x->dval, 1, Q_x.dval, 1, queue ); 
    // transpose the matrix
    // transpose the matrix
    magma_cmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransposeconjugate( Ah2, &Ah1, queue );
    magma_cmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue );
    magma_cmfree(&Ah2, queue );
    
    // TFQMR
    solver_par->init_res = nom0;
    magma_ccopy( dofs, r_tld.dval, 1, T_r.dval, 1, queue );   
    magma_ccopy( dofs, T_r.dval, 1, T_w.dval, 1, queue );   
    magma_ccopy( dofs, T_r.dval, 1, T_u_m.dval, 1, queue );  
    magma_ccopy( dofs, T_r.dval, 1, T_u_mp1.dval, 1, queue ); 
    magma_ccopy( dofs, T_u_m.dval, 1, T_pu_m.dval, 1, queue );  
    CHECK( magma_c_spmv( c_one, A, T_pu_m, c_zero, T_v, queue ));
    magma_ccopy( dofs, T_v.dval, 1, T_Au.dval, 1, queue );  
    
    // CGS
    magma_ccopy( dofs, r_tld.dval, 1, C_r.dval, 1, queue );   
    magma_ccopy( dofs, x->dval, 1, C_x.dval, 1, queue ); 
    
    // BiCGSTAB
    magma_ccopy( dofs, r_tld.dval, 1, B_r.dval, 1, queue );   
    magma_ccopy( dofs, x->dval, 1, B_x.dval, 1, queue ); 
    CHECK( magma_c_spmv( c_one, A, B_r, c_zero, B_v, queue ));     

    
    nomb = magma_scnrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom0 < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }
    
    T_tau = magma_csqrt( magma_cdotc( dofs, T_r.dval, 1, r_tld.dval, 1, queue) );
    T_rho = magma_cdotc( dofs, T_r.dval, 1, r_tld.dval, 1, queue );
    T_rho_l = T_rho;
    

    Q_psi = magma_csqrt( magma_cdotc( dofs, Q_z.dval, 1, Q_z.dval, 1, queue ));
    Q_rho = magma_csqrt( magma_cdotc( dofs, Q_y.dval, 1, Q_y.dval, 1, queue ));
    
    // BiCGSTAB
    B_rho_new = magma_cdotc( dofs, B_r.dval, 1, B_r.dval, 1, queue );            
    B_rho_old = B_omega = B_alpha = MAGMA_C_MAKE( 1.0, 0. );
    
        // v = y / rho
        // y = y / rho
        // w = wt / psi
        // z = z / psi
    magma_cqmr_1(  
    b.num_rows, 
    b.num_cols, 
    Q_rho,
    Q_psi,
    Q_y.dval, 
    Q_z.dval,
    Q_v.dval,
    Q_w.dval,
    queue );
    
    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    // start iteration
    do
    {
        solver_par->numiter++;
        
            //QMR: delta = z' * y;
        Q_delta = magma_cdotc( dofs, Q_z.dval, 1, Q_y.dval, 1, queue );
        
        // TFQMR
        T_alpha = T_rho / magma_cdotc( dofs, T_v.dval, 1, r_tld.dval, 1, queue );
        T_sigma = T_theta * T_theta / T_alpha * T_eta; 
        
        
            //CGS: rho = r' * r_tld
        C_rho = magma_cdotc( dofs, C_r.dval, 1, r_tld.dval, 1, queue );
        
            // BiCGSTAB
        B_rho_old = B_rho_new;    
        B_rho_new = magma_cdotc( dofs, r_tld.dval, 1, B_r.dval, 1, queue );  // rho=<rr,r>
        B_beta = B_rho_new/B_rho_old * B_alpha/B_omega;   // beta=rho/rho_old *alpha/omega

        
        if( solver_par->numiter == 1 ){
                //QMR: p = y;
                //QMR: q = z;
            magma_ccopy( dofs, Q_y.dval, 1, Q_p.dval, 1, queue );
            magma_ccopy( dofs, Q_z.dval, 1, Q_q.dval, 1, queue );
            
                //QMR: u = r;
                //QMR: p = r;
            magma_ccgs_2(  
            b.num_rows, 
            b.num_cols, 
            C_r.dval,
            C_u.dval,
            C_p.dval,
            queue );
        }
        else{
            Q_pde = Q_psi * Q_delta / Q_epsilon;
            Q_rde = Q_rho * MAGMA_C_CONJ(Q_delta/Q_epsilon);
            
            C_beta = C_rho / C_rho_l;  
            
                //QMR p = y - pde * p
                //QMR q = z - rde * q
            magma_cqmr_2(  
            b.num_rows, 
            b.num_cols, 
            Q_pde,
            Q_rde,
            Q_y.dval,
            Q_z.dval,
            Q_p.dval, 
            Q_q.dval, 
            queue );
            
                  //CGS: u = r + beta*q;
                  //CGS: p = u + beta*( q + beta*p );
            magma_ccgs_1(  
            b.num_rows, 
            b.num_cols, 
            C_beta,
            C_r.dval,
            C_q.dval, 
            C_u.dval,
            C_p.dval,
            queue );
        }
        
        // TFQMR
        magma_ctfqmr_1(  
        b.num_rows, 
        b.num_cols, 
        T_alpha,
        T_sigma,
        T_v.dval, 
        T_Au.dval,
        T_u_m.dval,
        T_pu_m.dval,
        T_u_mp1.dval,
        T_w.dval, 
        T_d.dval,
        T_Ad.dval,
        queue );
        
        T_theta = magma_csqrt( magma_cdotc(dofs, T_w.dval, 1, T_w.dval, 1, queue) ) / T_tau;
        T_c = c_one / magma_csqrt( c_one + T_theta*T_theta );
        T_tau = T_tau * T_theta *T_c;
        T_eta = T_c * T_c * T_alpha;
        T_sigma = T_theta * T_theta / T_alpha * T_eta;  
        
        magma_ctfqmr_2(  
        b.num_rows, 
        b.num_cols, 
        T_eta,
        T_d.dval,
        T_Ad.dval,
        T_x.dval, 
        T_r.dval, 
        queue );
        magma_ccopy( dofs, T_u_mp1.dval, 1, T_pu_m.dval, 1, queue );
        
            // BiCGSTAB: p = r + beta * ( p - omega * v )
        magma_cbicgstab_1(  
        b.num_rows, 
        b.num_cols, 
        B_beta,
        B_omega,
        B_r.dval, 
        B_v.dval,
        B_p.dval,
        queue );
        
        //QMR
        CHECK( magma_c_spmv( c_one, A, Q_p, c_zero, Q_pt, queue ));
        //TFQMR
        CHECK( magma_c_spmv( c_one, A, T_pu_m, c_zero, T_Au_new, queue ));
        //CGS
        CHECK( magma_c_spmv( c_one, A, C_p, c_zero, C_v_hat, queue ));
        // BiCGSTAB
        CHECK( magma_c_spmv( c_one, A, B_p, c_zero, B_v, queue ));      // v = Ap
        
        solver_par->spmv_count++;
        
            //QMR: epsilon = q' * pt;
        Q_epsilon = magma_cdotc( dofs, Q_q.dval, 1, Q_pt.dval, 1, queue );
        Q_beta = Q_epsilon / Q_delta;
            //TFQMR
        magma_ccopy( dofs, T_Au_new.dval, 1, T_Au.dval, 1, queue );  
        magma_ccopy( dofs, T_u_mp1.dval, 1, T_u_m.dval, 1, queue ); 
            //CGS: alpha = r_tld' * v_hat
        C_alpha = C_rho / magma_cdotc( dofs, r_tld.dval, 1, C_v_hat.dval, 1, queue );
            //BiCGSTAB
        B_alpha = B_rho_new / magma_cdotc( dofs, r_tld.dval, 1, B_v.dval, 1, queue );

        
            //QMR: v = pt - beta * v
            //QMR: y = v
        magma_cqmr_3(  
        b.num_rows, 
        b.num_cols, 
        Q_beta,
        Q_pt.dval,
        Q_v.dval,
        Q_y.dval,
        queue );
        
        // TFQMR
        magma_ctfqmr_5(  
        b.num_rows, 
        b.num_cols, 
        T_alpha,
        T_sigma,
        T_v.dval, 
        T_Au.dval,
        T_pu_m.dval,
        T_w.dval, 
        T_d.dval,
        T_Ad.dval,
        queue ); 
        
                // TFQMR
        T_sigma = T_theta * T_theta / T_alpha * T_eta;  
        
        T_theta = magma_csqrt( magma_cdotc(dofs, T_w.dval, 1, T_w.dval, 1, queue) ) / T_tau;
        T_c = c_one / magma_csqrt( c_one + T_theta*T_theta );
        T_tau = T_tau * T_theta *T_c;
        T_eta = T_c * T_c * T_alpha;
        
        // TFQMR
        magma_ctfqmr_2(  
        b.num_rows, 
        b.num_cols, 
        T_eta,
        T_d.dval,
        T_Ad.dval,
        T_x.dval, 
        T_r.dval, 
        queue );
        
        T_rho = magma_cdotc( dofs, T_w.dval, 1, r_tld.dval, 1, queue );
        T_beta = T_rho / T_rho_l;
        T_rho_l = T_rho;
        
        magma_ctfqmr_3(  
        b.num_rows, 
        b.num_cols, 
        T_beta,
        T_w.dval,
        T_u_m.dval,
        T_u_mp1.dval, 
        queue );
        magma_ccopy( dofs, T_u_mp1.dval, 1, T_pu_m.dval, 1, queue );  
        
        
            //CGS: q = u - alpha v_hat
            //CGS: t = u + q
        magma_ccgs_3(  
        b.num_rows, 
        b.num_cols, 
        C_alpha,
        C_v_hat.dval,
        C_u.dval, 
        C_q.dval,
        C_t.dval, 
        queue );
        
            // BiCGSTAB: s = r - alpha v
        magma_cbicgstab_2(  
        b.num_rows, 
        b.num_cols, 
        B_alpha,
        B_r.dval,
        B_v.dval,
        B_s.dval, 
        queue );
            
        
        Q_rho1 = Q_rho;      
            //QMR rho = norm(y);
        Q_rho = magma_csqrt( magma_cdotc( dofs, Q_y.dval, 1, Q_y.dval, 1, queue ) );
        
            //QMR wt = A' * q - beta' * w;
        CHECK( magma_c_spmv( c_one, AT, Q_q, c_zero, Q_wt, queue ));
        //TFQMR
        CHECK( magma_c_spmv( c_one, A, T_pu_m, c_zero, T_Au_new, queue ));
            //CGS t = A u_hat
        CHECK( magma_c_spmv( c_one, A, C_t, c_zero, C_rt, queue )); 
            //BiCGSTAB
        CHECK( magma_c_spmv( c_one, A, B_s, c_zero, B_t, queue ));       // t=As
        
        solver_par->spmv_count++;
        
        //BiCGSTAB
        B_omega = magma_cdotc( dofs, B_t.dval, 1, B_s.dval, 1, queue )   // omega = <s,t>/<t,t>
                   / magma_cdotc( dofs, B_t.dval, 1, B_t.dval, 1, queue );

                   
       // QMR
        magma_caxpy( dofs, - MAGMA_C_CONJ( Q_beta ), Q_w.dval, 1, Q_wt.dval, 1, queue );  
                    // no precond: z = wt
        magma_ccopy( dofs, Q_wt.dval, 1, Q_z.dval, 1, queue );
        
        
        //TFQMR
        magma_ctfqmr_4(  
        b.num_rows, 
        b.num_cols, 
        T_beta,
        T_Au_new.dval,
        T_v.dval,
        T_Au.dval, 
        queue );
        
        magma_ccopy( dofs, T_u_mp1.dval, 1, T_u_m.dval, 1, queue ); 
        
            
        // QMR
        Q_thet1 = Q_thet;        
        Q_thet = Q_rho / (Q_gamm * MAGMA_C_MAKE( MAGMA_C_ABS(Q_beta), 0.0 ));
        Q_gamm1 = Q_gamm;        
        
        Q_gamm = c_one / magma_csqrt(c_one + Q_thet*Q_thet);        
        Q_eta = - Q_eta * Q_rho1 * Q_gamm * Q_gamm / (Q_beta * Q_gamm1 * Q_gamm1);

        if ( solver_par->numiter == 1 ) {
                //QMR: d = eta * p + pds * d;
                //QMR: s = eta * pt + pds * d;
                //QMR: x = x + d;
                //QMR: r = r - s;
            magma_cqmr_4(  
            b.num_rows, 
            b.num_cols, 
            Q_eta,
            Q_p.dval,
            Q_pt.dval,
            Q_d.dval, 
            Q_s.dval, 
            Q_x.dval, 
            Q_r.dval, 
            queue );
        }
        else {
            Q_pds = (Q_thet1 * Q_gamm) * (Q_thet1 * Q_gamm);
            
                // d = eta * p + pds * d;
                // s = eta * pt + pds * d;
                // x = x + d;
                // r = r - s;
            magma_cqmr_5(  
            b.num_rows, 
            b.num_cols, 
            Q_eta,
            Q_pds,
            Q_p.dval,
            Q_pt.dval,
            Q_d.dval, 
            Q_s.dval, 
            Q_x.dval, 
            Q_r.dval, 
            queue );
        }
        
        
        // CGS: r = r -alpha*A u_hat
        // CGS: x = x + alpha u_hat
        magma_ccgs_4(  
        b.num_rows, 
        b.num_cols, 
        C_alpha,
        C_t.dval,
        C_rt.dval,
        C_x.dval, 
        C_r.dval,
        queue );
        C_rho_l = C_rho;  
        
            // BiCGSTAB: x = x + alpha * p + omega * s
            // BiCGSTAB: r = s - omega * t
        magma_cbicgstab_3(  
        b.num_rows, 
        b.num_cols, 
        B_alpha,
        B_omega,
        B_p.dval,
        B_s.dval,
        B_t.dval,
        B_x.dval,
        B_r.dval,
        queue );
        
            //QMR: psi = norm(z);
        Q_psi = magma_csqrt( magma_cdotc( dofs, Q_z.dval, 1, Q_z.dval, 1, queue ) );
        
            //QMR: v = y / rho
            //QMR: y = y / rho
            //QMR: w = wt / psi
            //QMR: z = z / psi
        magma_cqmr_1(  
        b.num_rows, 
        b.num_cols, 
        Q_rho,
        Q_psi,
        Q_y.dval, 
        Q_z.dval,
        Q_v.dval,
        Q_w.dval,
        queue );
        
        
        
        
        Q_res = magma_scnrm2( dofs, Q_r.dval, 1, queue );
        T_res = magma_scnrm2( dofs, T_r.dval, 1, queue );
        C_res = magma_scnrm2( dofs, C_r.dval, 1, queue );
        B_res = magma_scnrm2( dofs, B_r.dval, 1, queue );

        
            // printf(" %e   %e   %e\n", Q_res, C_res, B_res);
        if( Q_res < res ){
            res = Q_res;
            flag = 1;
        }
        if( T_res < res ){
            res = Q_res;
            flag = 2;
        }
        if( C_res < res ){
            res = C_res;
            flag = 3;
        }
        if( B_res < res ){
            res = B_res;
            flag = 4;
        }

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            info = MAGMA_SUCCESS;
            break;
        }
        if( magma_c_isnan_inf( Q_beta ) && magma_c_isnan_inf( C_beta ) && magma_c_isnan_inf( B_beta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        } 
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
        
    // copy back the best solver
    switch ( flag ) {
        case 1:
            printf("%% QMR fastest solver.\n");
            magma_ccopy( dofs, Q_x.dval, 1, x->dval, 1, queue ); 
            break;
       case 2:
            printf("%% TFQMR fastest solver.\n");
            magma_ccopy( dofs, T_x.dval, 1, x->dval, 1, queue ); 
            break;
       case 3:
            printf("%% CGS fastest solver.\n");
            magma_ccopy( dofs, C_x.dval, 1, x->dval, 1, queue ); 
            break;
       case 4:
            printf("%% BiCGSTAB fastest solver.\n");
            magma_ccopy( dofs, B_x.dval, 1, x->dval, 1, queue ); 
            break;
    }


    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    CHECK(  magma_cresidualvec( A, b, *x, &r_tld, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter  && info == MAGMA_SUCCESS ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_cmfree(&r_tld, queue );
    magma_cmfree(&d1, queue );
    magma_cmfree(&d2, queue );
    magma_cmfree(&AT,  queue );
    
    // QMR
    magma_cmfree(&Q_r,  queue );
    magma_cmfree(&Q_v,  queue );
    magma_cmfree(&Q_w,  queue );
    magma_cmfree(&Q_wt, queue );
    magma_cmfree(&Q_d,  queue );
    magma_cmfree(&Q_s,  queue );
    magma_cmfree(&Q_z,  queue );
    magma_cmfree(&Q_q,  queue );
    magma_cmfree(&Q_p,  queue );
    magma_cmfree(&Q_pt, queue );
    magma_cmfree(&Q_y,  queue );
    magma_cmfree(&Q_x,  queue );
    magma_cmfree(&Ah1, queue );
    magma_cmfree(&Ah2, queue );
    // TFQMR
    magma_cmfree(&T_r, queue );
    magma_cmfree(&T_x,  queue );
    magma_cmfree(&T_d, queue );
    magma_cmfree(&T_w, queue );
    magma_cmfree(&T_v, queue );
    magma_cmfree(&T_u_m, queue );
    magma_cmfree(&T_u_mp1, queue );
    magma_cmfree(&T_pu_m, queue );
    magma_cmfree(&T_d, queue );
    magma_cmfree(&T_Au, queue );
    magma_cmfree(&T_Au_new, queue );
    magma_cmfree(&T_Ad, queue );
    // CGS
    magma_cmfree(&C_r,  queue );
    magma_cmfree(&C_rt, queue );
    magma_cmfree(&C_x,  queue );
    magma_cmfree(&C_p,  queue );
    magma_cmfree(&C_q,  queue );
    magma_cmfree(&C_u,  queue );
    magma_cmfree(&C_v,  queue );
    magma_cmfree(&C_t,  queue );
    magma_cmfree(&C_p_hat, queue );
    magma_cmfree(&C_q_hat, queue );
    magma_cmfree(&C_u_hat, queue );
    magma_cmfree(&C_v_hat, queue );
    // BiCGSTAB
    magma_cmfree(&B_r, queue );
    magma_cmfree(&B_x, queue );
    magma_cmfree(&B_p, queue );
    magma_cmfree(&B_v, queue );
    magma_cmfree(&B_s, queue );
    magma_cmfree(&B_t, queue );
    
    solver_par->info = info;
    return info;
}   /* magma_cbombard */
示例#3
0
extern "C" magma_int_t
magma_clsqr(
    magma_c_matrix A, magma_c_matrix b, magma_c_matrix *x,
    magma_c_solver_par *solver_par,
    magma_c_preconditioner *precond_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_LSQR;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    magma_int_t m = A.num_rows * b.num_cols;
    magma_int_t n = A.num_cols * b.num_cols;
    
    // local variables
    magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE;
    // solver variables
    float s, nom0, r0, res=0, nomb, phibar, beta, alpha, c, rho, rhot, phi, thet, normr, normar, norma, sumnormd2, normd;

    // need to transpose the matrix
    magma_c_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR};
    
    // GPU workspace
    magma_c_matrix r={Magma_CSR},
                    v={Magma_CSR}, z={Magma_CSR}, zt={Magma_CSR},
                    d={Magma_CSR}, vt={Magma_CSR}, q={Magma_CSR}, 
                    w={Magma_CSR}, u={Magma_CSR};
    CHECK( magma_cvinit( &r, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &v, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &z, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &d, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &vt,Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &q, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &w, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &u, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_cvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // transpose the matrix
    magma_cmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransposeconjugate( Ah2, &Ah1, queue );
    magma_cmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue );
    magma_cmfree(&Ah2, queue );
    

    
    // solver setup
    CHECK(  magma_cresidualvec( A, b, *x, &r, &nom0, queue));
    solver_par->init_res = nom0;
    nomb = magma_scnrm2( m, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom0 < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }
    magma_ccopy( m, b.dval, 1, u.dval, 1, queue );  
    beta = magma_scnrm2( m, u.dval, 1, queue );
    magma_cscal( m, MAGMA_C_MAKE(1./beta, 0.0 ), u.dval, 1, queue );
    normr = beta;
    c = 1.0;
    s = 0.0;
    phibar = beta;
    CHECK( magma_c_spmv( c_one, AT, u, c_zero, v, queue ));
    
    if( precond_par->solver == Magma_NONE ){
        ;
    } else {
      CHECK( magma_c_applyprecond_right( MagmaTrans, A, v, &zt, precond_par, queue ));
      CHECK( magma_c_applyprecond_left( MagmaTrans, A, zt, &v, precond_par, queue ));
    }
    alpha = magma_scnrm2( n, v.dval, 1, queue );
    magma_cscal( n, MAGMA_C_MAKE(1./alpha, 0.0 ), v.dval, 1, queue );
    normar = alpha * beta;
    norma = 0;
    sumnormd2 = 0;
        
    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    solver_par->numiter = 0;
    // start iteration
    do
    {
        solver_par->numiter++;
        if( precond_par->solver == Magma_NONE || A.num_rows != A.num_cols ) {
            magma_ccopy( n, v.dval, 1 , z.dval, 1, queue );    
        } else {
            CHECK( magma_c_applyprecond_left( MagmaNoTrans, A, v, &zt, precond_par, queue ));
            CHECK( magma_c_applyprecond_right( MagmaNoTrans, A, zt, &z, precond_par, queue ));
        }
        //CHECK( magma_c_spmv( c_one, A, z, MAGMA_C_MAKE(-alpha,0.0), u, queue ));
        CHECK( magma_c_spmv( c_one, A, z, c_zero, zt, queue ));
        magma_cscal( m, MAGMA_C_MAKE(-alpha, 0.0 ), u.dval, 1, queue ); 
        magma_caxpy( m, c_one, zt.dval, 1, u.dval, 1, queue );
        
        solver_par->spmv_count++;
        beta = magma_scnrm2( m, u.dval, 1, queue );
        magma_cscal( m, MAGMA_C_MAKE(1./beta, 0.0 ), u.dval, 1, queue ); 
        // norma = norm([norma alpha beta]);
        norma = sqrt(norma*norma + alpha*alpha + beta*beta );
        
        //lsvec( solver_par->numiter-1 ) = normar / norma;
        
        thet = -s * alpha;
        rhot = c * alpha;
        rho = sqrt( rhot * rhot + beta * beta );
        c = rhot / rho;
        s = - beta / rho;
        phi = c * phibar;
        phibar = s * phibar;
        
        // d = (z - thet * d) / rho;
        magma_cscal( n, MAGMA_C_MAKE(-thet, 0.0 ), d.dval, 1, queue ); 
        magma_caxpy( n, c_one, z.dval, 1, d.dval, 1, queue );
        magma_cscal( n, MAGMA_C_MAKE(1./rho, 0.0 ), d.dval, 1, queue );
        normd = magma_scnrm2( n, d.dval, 1, queue );
        sumnormd2 = sumnormd2 + normd*normd;
        
        // convergence check
        res = normr;        
        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        // check for convergence in A*x=b
        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            info = MAGMA_SUCCESS;
            break;
        }
        // check for convergence in min{|b-A*x|}
        if ( A.num_rows != A.num_cols &&
               ( normar/(norma*normr) <= solver_par->rtol || normar <= solver_par->atol ) ){
            printf("%% warning: quit from minimization convergence check.\n");
            info = MAGMA_SUCCESS;
            break;
        }
        
        magma_caxpy( n, MAGMA_C_MAKE( phi, 0.0 ), d.dval, 1, x->dval, 1, queue );
        normr = fabs(s) * normr;
        CHECK( magma_c_spmv( c_one, AT, u, c_zero, vt, queue ));
        solver_par->spmv_count++;
        if( precond_par->solver == Magma_NONE ){
            ;    
        } else {
            CHECK( magma_c_applyprecond_right( MagmaTrans, A, vt, &zt, precond_par, queue ));
            CHECK( magma_c_applyprecond_left( MagmaTrans, A, zt, &vt, precond_par, queue ));
        }

        magma_cscal( n, MAGMA_C_MAKE(-beta, 0.0 ), v.dval, 1, queue ); 
        magma_caxpy( n, c_one, vt.dval, 1, v.dval, 1, queue );
        alpha = magma_scnrm2( n, v.dval, 1, queue );
        magma_cscal( n, MAGMA_C_MAKE(1./alpha, 0.0 ), v.dval, 1, queue ); 
        normar = alpha * fabs(s*phi);
         
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    CHECK(  magma_cresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_cmfree(&r, queue );
    magma_cmfree(&v,  queue );
    magma_cmfree(&z,  queue );
    magma_cmfree(&zt, queue );
    magma_cmfree(&d,  queue );
    magma_cmfree(&vt,  queue );
    magma_cmfree(&q,  queue );
    magma_cmfree(&u,  queue );
    magma_cmfree(&w,  queue );
    magma_cmfree(&AT, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmfree(&Ah2, queue );

    
    solver_par->info = info;
    return info;
}   /* magma_cqmr */
示例#4
0
extern "C" magma_int_t
magma_cmtransposeconjugate(
    magma_c_matrix A,
    magma_c_matrix *B,
    magma_queue_t queue )
{
    // for symmetric matrices: convert to csc using cusparse
    
    magma_int_t info = 0;
    cusparseHandle_t handle=NULL;
    cusparseMatDescr_t descrA=NULL;
    cusparseMatDescr_t descrB=NULL;
    
    magma_c_matrix ACSR={Magma_CSR}, BCSR={Magma_CSR};
    magma_c_matrix A_d={Magma_CSR}, B_d={Magma_CSR};

    if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) {
        // fill in information for B
        B->storage_type    = A.storage_type;
        B->diagorder_type  = A.diagorder_type;
        B->memory_location = Magma_DEV;
        B->num_rows        = A.num_cols;  // transposed
        B->num_cols        = A.num_rows;  // transposed
        B->nnz             = A.nnz;
        B->true_nnz = A.true_nnz;
        if ( A.fill_mode == MagmaFull ) {
            B->fill_mode = MagmaFull;
        }
        else if ( A.fill_mode == MagmaLower ) {
            B->fill_mode = MagmaUpper;
        }
        else if ( A.fill_mode == MagmaUpper ) {
            B->fill_mode = MagmaLower;
        }
        B->dval = NULL;
        B->drow = NULL;
        B->dcol = NULL;
        
        // memory allocation
        CHECK( magma_cmalloc( &B->dval, B->nnz ));
        CHECK( magma_index_malloc( &B->drow, B->num_rows + 1 ));
        CHECK( magma_index_malloc( &B->dcol, B->nnz ));
        // CUSPARSE context //
        CHECK_CUSPARSE( cusparseCreate( &handle ));
        CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() ));
        CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA ));
        CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB ));
        CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL ));
        CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL ));
        CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO ));
        CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO ));
        CHECK_CUSPARSE(
        cusparseCcsr2csc( handle, A.num_rows, A.num_cols, A.nnz,
                          A.dval, A.drow, A.dcol, B->dval, B->dcol, B->drow,
                          CUSPARSE_ACTION_NUMERIC,
                          CUSPARSE_INDEX_BASE_ZERO) );
        CHECK( magma_cmconjugate( B, queue ));
    } else if ( A.memory_location == Magma_CPU ){
        CHECK( magma_cmtransfer( A, &A_d, A.memory_location, Magma_DEV, queue ));
        CHECK( magma_cmtransposeconjugate( A_d, &B_d, queue ));
        CHECK( magma_cmtransfer( B_d, B, Magma_DEV, A.memory_location, queue ));
    } else {
        CHECK( magma_cmconvert( A, &ACSR, A.storage_type, Magma_CSR, queue ));
        CHECK( magma_cmtransposeconjugate( ACSR, &BCSR, queue ));
        CHECK( magma_cmconvert( BCSR, B, Magma_CSR, A.storage_type, queue ));
    }
cleanup:
    cusparseDestroyMatDescr( descrA );
    cusparseDestroyMatDescr( descrB );
    cusparseDestroy( handle );
    magma_cmfree( &A_d, queue );
    magma_cmfree( &B_d, queue );
    magma_cmfree( &ACSR, queue );
    magma_cmfree( &BCSR, queue );
    if( info != 0 ){
        magma_cmfree( B, queue );
    }
    return info;
}
示例#5
0
extern "C" magma_int_t
magma_ccumilusetup_transpose(
    magma_c_matrix A,
    magma_c_preconditioner *precond,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    magma_c_matrix Ah1={Magma_CSR}, Ah2={Magma_CSR};
    cusparseHandle_t cusparseHandle=NULL;
    cusparseMatDescr_t descrLT=NULL;
    cusparseMatDescr_t descrUT=NULL;
    
    // CUSPARSE context //
    CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
    CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));

    // transpose the matrix
    magma_cmtransfer( precond->L, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransposeconjugate( Ah2, &Ah1, queue );
    magma_cmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransfer( Ah2, &(precond->LT), Magma_CPU, Magma_DEV, queue );
    magma_cmfree(&Ah2, queue );
    
    magma_cmtransfer( precond->U, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_cmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransposeconjugate( Ah2, &Ah1, queue );
    magma_cmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_cmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_cmfree(&Ah1, queue );
    magma_cmtransfer( Ah2, &(precond->UT), Magma_CPU, Magma_DEV, queue );
    magma_cmfree(&Ah2, queue );
   
    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrLT ));
    CHECK_CUSPARSE( cusparseSetMatType( descrLT, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrLT, CUSPARSE_DIAG_TYPE_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrLT, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrLT, CUSPARSE_FILL_MODE_UPPER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoLT ));
    CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->LT.num_rows,
        precond->LT.nnz, descrLT,
        precond->LT.dval, precond->LT.drow, precond->LT.dcol, precond->cuinfoLT ));
    
    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrUT ));
    CHECK_CUSPARSE( cusparseSetMatType( descrUT, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrUT, CUSPARSE_DIAG_TYPE_NON_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrUT, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrUT, CUSPARSE_FILL_MODE_LOWER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoUT ));
    CHECK_CUSPARSE( cusparseCcsrsm_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->UT.num_rows,
        precond->UT.nnz, descrUT,
        precond->UT.dval, precond->UT.drow, precond->UT.dcol, precond->cuinfoUT ));
cleanup:
    cusparseDestroyMatDescr( descrLT );
    cusparseDestroyMatDescr( descrUT );
    cusparseDestroy( cusparseHandle );
    magma_cmfree(&Ah1, queue );
    magma_cmfree(&Ah2, queue );

    return info;
}