コード例 #1
0
magma_int_t
magma_sbicgstab_merge( magma_s_sparse_matrix A, magma_s_vector b, 
        magma_s_vector *x, magma_s_solver_par *solver_par ){

    // prepare solver feedback
    solver_par->solver = Magma_BICGSTABMERGE;
    solver_par->numiter = 0;
    solver_par->info = 0;

    // some useful variables
    float c_zero = MAGMA_S_ZERO, c_one = MAGMA_S_ONE;
    
    magma_int_t dofs = A.num_rows;

    // GPU stream
    magma_queue_t stream[2];
    magma_event_t event[1];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );
    magma_event_create( &event[0] );

    // workspace
    magma_s_vector q, r,rr,p,v,s,t;
    float *d1, *d2, *skp;
    magma_smalloc( &d1, dofs*(2) );
    magma_smalloc( &d2, dofs*(2) );
    // array for the parameters
    magma_smalloc( &skp, 8 );       
    // skp = [alpha|beta|omega|rho_old|rho|nom|tmp1|tmp2]
    magma_s_vinit( &q, Magma_DEV, dofs*6, c_zero );

    // q = rr|r|p|v|s|t
    rr.memory_location = Magma_DEV; rr.val = NULL; rr.num_rows = rr.nnz = dofs;
    r.memory_location = Magma_DEV; r.val = NULL; r.num_rows = r.nnz = dofs;
    p.memory_location = Magma_DEV; p.val = NULL; p.num_rows = p.nnz = dofs;
    v.memory_location = Magma_DEV; v.val = NULL; v.num_rows = v.nnz = dofs;
    s.memory_location = Magma_DEV; s.val = NULL; s.num_rows = s.nnz = dofs;
    t.memory_location = Magma_DEV; t.val = NULL; t.num_rows = t.nnz = dofs;

    rr.val = q(0);
    r.val = q(1);
    p.val = q(2);
    v.val = q(3);
    s.val = q(4);
    t.val = q(5);
    
    // solver variables
    float alpha, beta, omega, rho_old, rho_new, *skp_h;
    float nom, nom0, betanom, r0, den;

    // solver setup
    magma_sscal( dofs, c_zero, x->val, 1) ;                            // x = 0
    magma_scopy( dofs, b.val, 1, q(0), 1 );                            // rr = b
    magma_scopy( dofs, b.val, 1, q(1), 1 );                            // r = b

    rho_new = magma_sdot( dofs, r.val, 1, r.val, 1 );             // rho=<rr,r>
    nom = MAGMA_S_REAL(magma_sdot( dofs, r.val, 1, r.val, 1 ));    
    nom0 = betanom = sqrt(nom);                                 // nom = || r ||                            
    rho_old = omega = alpha = MAGMA_S_MAKE( 1.0, 0. );
    beta = rho_new;
    solver_par->init_res = nom0;
    // array on host for the parameters    
    magma_smalloc_cpu( &skp_h, 8 );
    skp_h[0]=alpha; 
    skp_h[1]=beta; 
    skp_h[2]=omega; 
    skp_h[3]=rho_old; 
    skp_h[4]=rho_new; 
    skp_h[5]=MAGMA_S_MAKE(nom, 0.0);
    magma_ssetvector( 8, skp_h, 1, skp, 1 );
    magma_s_spmv( c_one, A, r, c_zero, v );                     // z = A r
    den = MAGMA_S_REAL( magma_sdot(dofs, v.val, 1, r.val, 1) );// den = z dot r

    if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) 
        r0 = ATOLERANCE;
    if ( nom < r0 )
        return MAGMA_SUCCESS;
    // check positive definite  
    if (den <= 0.0) {
        printf("Operator A is not postive definite. (Ar,r) = %f\n", den);
        return -100;
    }

    //Chronometry
    real_Double_t tempo1, tempo2;
    magma_device_sync(); tempo1=magma_wtime();
    if( solver_par->verbose > 0 ){
        solver_par->res_vec[0] = nom0;
        solver_par->timing[0] = 0.0;
    }

    // start iteration
    for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; 
                                                    solver_par->numiter++ ){

        magmablasSetKernelStream(stream[0]);

        // computes p=r+beta*(p-omega*v)
        magma_sbicgmerge1( dofs, skp, v.val, r.val, p.val );

        magma_s_spmv( c_one, A, p, c_zero, v );                 // v = Ap

        magma_smdotc( dofs, 1, q.val, v.val, d1, d2, skp );                     
        magma_sbicgmerge4(  1, skp );
        magma_sbicgmerge2( dofs, skp, r.val, v.val, s.val );    // s=r-alpha*v

        magma_s_spmv( c_one, A, s, c_zero, t );                 // t=As

        magma_smdotc( dofs, 2, q.val+4*dofs, t.val, d1, d2, skp+6 );
        magma_sbicgmerge4(  2, skp );
        magma_sbicgmerge3( dofs, skp, p.val, s.val,     // x=x+alpha*p+omega*s
                            t.val, x->val, r.val );     // r=s-omega*t
        magma_smdotc( dofs, 2, q.val, r.val, d1, d2, skp+4);
        magma_sbicgmerge4(  3, skp );

        // check stopping criterion (asynchronous copy)
        magma_sgetvector_async( 1 , skp+5, 1, 
                                                        skp_h+5, 1, stream[1] );
        betanom = sqrt(MAGMA_S_REAL(skp_h[5]));

        if( solver_par->verbose > 0 ){
            magma_device_sync(); tempo2=magma_wtime();
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        
        if (  betanom  < r0 ) {
            break;
        }
    }
    magma_device_sync(); tempo2=magma_wtime();
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    magma_sresidual( A, b, *x, &residual );
    solver_par->iter_res = betanom;
    solver_par->final_res = residual;

    if( solver_par->numiter < solver_par->maxiter){
        solver_par->info = 0;
    }else if( solver_par->init_res > solver_par->final_res ){
        if( solver_par->verbose > 0 ){
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = -2;
    }
    else{
        if( solver_par->verbose > 0 ){
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = -1;
    }
    magma_s_vfree(&q);  // frees all vectors

    magma_free(d1);
    magma_free(d2);
    magma_free( skp );
    magma_free_cpu( skp_h );

    return MAGMA_SUCCESS;
}   /* sbicgstab_merge */
コード例 #2
0
ファイル: sbicgstab_merge3.cpp プロジェクト: maxhutch/magma
extern "C" magma_int_t
magma_sbicgstab_merge3(
    magma_s_matrix A, magma_s_matrix b,
    magma_s_matrix *x, magma_s_solver_par *solver_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_BICGSTABMERGE;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    // solver variables
    float alpha, beta, omega, rho_old, rho_new, *skp_h={0};
    float nom, nom0, betanom, nomb;

    // some useful variables
    float c_zero = MAGMA_S_ZERO, c_one = MAGMA_S_ONE;
    
    magma_int_t dofs = A.num_rows;

    // workspace
    magma_s_matrix q={Magma_CSR}, r={Magma_CSR}, rr={Magma_CSR}, p={Magma_CSR}, v={Magma_CSR}, s={Magma_CSR}, t={Magma_CSR};
    float *d1=NULL, *d2=NULL, *skp=NULL;
    d1 = NULL;
    d2 = NULL;
    skp = NULL;
    CHECK( magma_smalloc( &d1, dofs*(2) ));
    CHECK( magma_smalloc( &d2, dofs*(2) ));
    // array for the parameters
    CHECK( magma_smalloc( &skp, 8 ));
    // skp = [alpha|beta|omega|rho_old|rho|nom|tmp1|tmp2]
    CHECK( magma_svinit( &q, Magma_DEV, dofs*6, 1, c_zero, queue ));

    // q = rr|r|p|v|s|t
    rr.memory_location = Magma_DEV; rr.dval = NULL; rr.num_rows = rr.nnz = dofs; rr.num_cols = 1; rr.storage_type = Magma_DENSE;
    r.memory_location = Magma_DEV; r.dval = NULL; r.num_rows = r.nnz = dofs; r.num_cols = 1; r.storage_type = Magma_DENSE;
    p.memory_location = Magma_DEV; p.dval = NULL; p.num_rows = p.nnz = dofs; p.num_cols = 1; p.storage_type = Magma_DENSE;
    v.memory_location = Magma_DEV; v.dval = NULL; v.num_rows = v.nnz = dofs; v.num_cols = 1; v.storage_type = Magma_DENSE;
    s.memory_location = Magma_DEV; s.dval = NULL; s.num_rows = s.nnz = dofs; s.num_cols = 1; s.storage_type = Magma_DENSE;
    t.memory_location = Magma_DEV; t.dval = NULL; t.num_rows = t.nnz = dofs; t.num_cols = 1; t.storage_type = Magma_DENSE;

    rr.dval = q(0);
    r.dval = q(1);
    p.dval = q(2);
    v.dval = q(3);
    s.dval = q(4);
    t.dval = q(5);

    // solver setup
    CHECK(  magma_sresidualvec( A, b, *x, &r, &nom0, queue));
    magma_scopy( dofs, r.dval, 1, q(0), 1, queue );                            // rr = r
    magma_scopy( dofs, r.dval, 1, q(1), 1, queue );                            // q = r
    betanom = nom0;
    nom = nom0*nom0;
    rho_new = magma_sdot( dofs, r.dval, 1, r.dval, 1, queue );             // rho=<rr,r>
    rho_old = omega = alpha = MAGMA_S_MAKE( 1.0, 0. );
    beta = rho_new;
    solver_par->init_res = nom0;
    // array on host for the parameters
    CHECK( magma_smalloc_cpu( &skp_h, 8 ));
    
    nomb = magma_snrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = nom0;
        solver_par->timing[0] = 0.0;
    }
    
    skp_h[0]=alpha;
    skp_h[1]=beta;
    skp_h[2]=omega;
    skp_h[3]=rho_old;
    skp_h[4]=rho_new;
    skp_h[5]=MAGMA_S_MAKE(nom, 0.0);
    magma_ssetvector( 8, skp_h, 1, skp, 1, queue );
    CHECK( magma_s_spmv( c_one, A, r, c_zero, v, queue ));             // z = A r
    nomb = magma_snrm2( dofs, b.dval, 1, queue );
    if( nom0 < solver_par->atol ||
        nom0/nomb < solver_par->rtol ){
        info = MAGMA_SUCCESS;
        goto cleanup;
    }

    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );

    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    // start iteration
    do
    {
        solver_par->numiter++;

        // computes p=r+beta*(p-omega*v)
        CHECK( magma_sbicgmerge1( dofs, skp, v.dval, r.dval, p.dval, queue ));

        CHECK( magma_s_spmv( c_one, A, p, c_zero, v, queue ));         // v = Ap
        solver_par->spmv_count++;
        CHECK( magma_smdotc( dofs, 1, q.dval, v.dval, d1, d2, skp, queue ));
        CHECK( magma_sbicgmerge4(  1, skp, queue ));
        CHECK( magma_sbicgmerge2( dofs, skp, r.dval, v.dval, s.dval, queue )); // s=r-alpha*v

        CHECK( magma_s_spmv( c_one, A, s, c_zero, t, queue ));         // t=As
        solver_par->spmv_count++;
        CHECK( magma_smdotc( dofs, 2, q.dval+4*dofs, t.dval, d1, d2, skp+6, queue ));
        CHECK( magma_sbicgmerge4(  2, skp, queue ));

        CHECK( magma_sbicgmerge_xrbeta( dofs, d1, d2, q.dval, r.dval, p.dval,
                                                    s.dval, t.dval, x->dval, skp, queue ));

        // check stopping criterion
        magma_sgetvector_async( 1 , skp+5, 1, skp_h+5, 1, queue );
        betanom = sqrt(MAGMA_S_REAL(skp_h[5]));

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        
        if (  betanom  < solver_par->atol || 
              betanom/nomb < solver_par->rtol ) {
            break;
        }
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    CHECK(  magma_sresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = betanom;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->atol ||
            solver_par->iter_res/solver_par->init_res < solver_par->rtol ){
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_smfree(&q, queue );  // frees all vectors
    magma_free(d1);
    magma_free(d2);
    magma_free( skp );
    magma_free_cpu( skp_h );

    solver_par->info = info;
    return info;
}   /* sbicgstab_merge */