Exemplo n.º 1
0
extern "C" magma_int_t
magma_spcg_merge(
    magma_s_matrix A, magma_s_matrix b, magma_s_matrix *x,
    magma_s_solver_par *solver_par,
    magma_s_preconditioner *precond_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_PCGMERGE;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    // solver variables
    float alpha, beta, gamma, rho, tmp1, *skp_h={0};
    float nom, nom0, r0,  res, nomb;
    float den;

    // some useful variables
    float c_zero = MAGMA_S_ZERO, c_one = MAGMA_S_ONE;
    magma_int_t dofs = A.num_rows*b.num_cols;

    magma_s_matrix r={Magma_CSR}, d={Magma_CSR}, z={Magma_CSR}, h={Magma_CSR},
                    rt={Magma_CSR};
    float *d1=NULL, *d2=NULL, *skp=NULL;

    // GPU workspace
    CHECK( magma_svinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_svinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_svinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_svinit( &rt, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_svinit( &h, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    
    CHECK( magma_smalloc( &d1, dofs*(2) ));
    CHECK( magma_smalloc( &d2, dofs*(2) ));
    // array for the parameters
    CHECK( magma_smalloc( &skp, 7 ));
    // skp = [alpha|beta|gamma|rho|tmp1|tmp2|res]

    // solver setup
    CHECK(  magma_sresidualvec( A, b, *x, &r, &nom0, queue));
    
    // preconditioner
    CHECK( magma_s_applyprecond_left( MagmaNoTrans, A, r, &rt, precond_par, queue ));
    CHECK( magma_s_applyprecond_right( MagmaNoTrans, A, rt, &h, precond_par, queue ));
    
    magma_scopy( dofs, h.dval, 1, d.dval, 1, queue );  
    nom = MAGMA_S_ABS( magma_sdot( dofs, r.dval, 1, h.dval, 1, queue ));
    CHECK( magma_s_spmv( c_one, A, d, c_zero, z, queue ));              // z = A d
    den = magma_sdot( dofs, d.dval, 1, z.dval, 1, queue ); // den = d'* z
    solver_par->init_res = nom0;
    
    nomb = magma_snrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }
    // check positive definite
    if ( MAGMA_S_ABS(den) <= 0.0 ) {
        info = MAGMA_NONSPD;
        goto cleanup;
    }    
    
    // array on host for the parameters
    CHECK( magma_smalloc_cpu( &skp_h, 7 ));
    
    alpha = rho = gamma = tmp1 = c_one;
    beta =  magma_sdot( dofs, h.dval, 1, r.dval, 1, queue );
    skp_h[0]=alpha;
    skp_h[1]=beta;
    skp_h[2]=gamma;
    skp_h[3]=rho;
    skp_h[4]=tmp1;
    skp_h[5]=MAGMA_S_MAKE(nom, 0.0);
    skp_h[6]=MAGMA_S_MAKE(nom, 0.0);

    magma_ssetvector( 7, skp_h, 1, skp, 1, queue );

    //Chronometry
    real_Double_t tempo1, tempo2, tempop1, tempop2;
    tempo1 = magma_sync_wtime( queue );
    
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    // start iteration
    do
    {
        solver_par->numiter++;
        
        // computes SpMV and dot product
        CHECK( magma_scgmerge_spmv1(  A, d1, d2, d.dval, z.dval, skp, queue ));            
        solver_par->spmv_count++;
            
        
        if( precond_par->solver == Magma_JACOBI ){
                CHECK( magma_sjcgmerge_xrbeta( dofs, d1, d2, precond_par->d.dval, x->dval, r.dval, d.dval, z.dval, h.dval, skp, queue ));
        }
        else if( precond_par->solver == Magma_NONE ){
            // updates x, r
            CHECK( magma_spcgmerge_xrbeta1( dofs, x->dval, r.dval, d.dval, z.dval, skp, queue ));
            // computes scalars and updates d
            CHECK( magma_spcgmerge_xrbeta2( dofs, d1, d2, r.dval, r.dval, d.dval, skp, queue ));
        } else {
        
            // updates x, r
            CHECK( magma_spcgmerge_xrbeta1( dofs, x->dval, r.dval, d.dval, z.dval, skp, queue ));
            
            // preconditioner in between
            tempop1 = magma_sync_wtime( queue );
            CHECK( magma_s_applyprecond_left( MagmaNoTrans, A, r, &rt, precond_par, queue ));
            CHECK( magma_s_applyprecond_right( MagmaNoTrans, A, rt, &h, precond_par, queue ));
            //            magma_scopy( dofs, r.dval, 1, h.dval, 1 );  
            tempop2 = magma_sync_wtime( queue );
            precond_par->runtime += tempop2-tempop1;
            
            // computes scalars and updates d
            CHECK( magma_spcgmerge_xrbeta2( dofs, d1, d2, h.dval, r.dval, d.dval, skp, queue ));
        }
        
        //if( solver_par->numiter==1){
        //    magma_scopy( dofs, h.dval, 1, d.dval, 1 );   
        //}
        
                // updates x, r, computes scalars and updates d
        //CHECK( magma_scgmerge_xrbeta( dofs, d1, d2, x->dval, r.dval, d.dval, z.dval, skp, queue ));

        
        // check stopping criterion (asynchronous copy)
        magma_sgetvector( 1 , skp+6, 1, skp_h+6, 1, queue );
        res = sqrt(MAGMA_S_ABS(skp_h[6]));

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            break;
        }
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    CHECK(  magma_sresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->atol ||
            solver_par->iter_res/solver_par->init_res < solver_par->rtol ){
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_smfree(&r, queue );
    magma_smfree(&z, queue );
    magma_smfree(&d, queue );
    magma_smfree(&rt, queue );
    magma_smfree(&h, queue );

    magma_free( d1 );
    magma_free( d2 );
    magma_free( skp );
    magma_free_cpu( skp_h );

    solver_par->info = info;
    return info;
}   /* magma_spcg_merge */
Exemplo n.º 2
0
extern "C" magma_int_t
magma_scg_merge(
    magma_s_sparse_matrix A, magma_s_vector b, magma_s_vector *x,  
    magma_s_solver_par *solver_par,
    magma_queue_t queue )
{
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );

    // prepare solver feedback
    solver_par->solver = Magma_CGMERGE;
    solver_par->numiter = 0;
    solver_par->info = MAGMA_SUCCESS; 

    // some useful variables
    float c_zero = MAGMA_S_ZERO, c_one = MAGMA_S_ONE;
    magma_int_t dofs = A.num_rows;

    // GPU stream
    magma_queue_t stream[2];
    magma_event_t event[1];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );
    magma_event_create( &event[0] );

    // GPU workspace
    magma_s_vector r, d, z;
    magma_s_vinit( &r, Magma_DEV, dofs, c_zero, queue );
    magma_s_vinit( &d, Magma_DEV, dofs, c_zero, queue );
    magma_s_vinit( &z, Magma_DEV, dofs, c_zero, queue );
    
    float *d1, *d2, *skp;
    d1 = NULL;
    d2 = NULL;
    skp = NULL;
    magma_int_t stat_dev = 0, stat_cpu = 0;
    stat_dev += magma_smalloc( &d1, dofs*(1) );
    stat_dev += magma_smalloc( &d2, dofs*(1) );
    // array for the parameters
    stat_dev += magma_smalloc( &skp, 6 );       
    // skp = [alpha|beta|gamma|rho|tmp1|tmp2]
    if( stat_dev != 0 ){
        magma_free( d1 );
        magma_free( d2 );
        magma_free( skp );
        printf("error: memory allocation.\n");
        return MAGMA_ERR_DEVICE_ALLOC;
    }

    // solver variables
    float alpha, beta, gamma, rho, tmp1, *skp_h;
    float nom, nom0, r0, betanom, den;

    // solver setup
    magma_sscal( dofs, c_zero, x->dval, 1) ;                     // x = 0
    magma_scopy( dofs, b.dval, 1, r.dval, 1 );                    // r = b
    magma_scopy( dofs, b.dval, 1, d.dval, 1 );                    // d = b
    nom0 = betanom = magma_snrm2( dofs, r.dval, 1 );               
    nom = nom0 * nom0;                                           // nom = r' * r
    magma_s_spmv( c_one, A, d, c_zero, z, queue );                      // z = A d
    den = MAGMA_S_REAL( magma_sdot(dofs, d.dval, 1, z.dval, 1) ); // den = d'* z
    solver_par->init_res = nom0;
    
    // array on host for the parameters
    stat_cpu += magma_smalloc_cpu( &skp_h, 6 );
    if( stat_cpu != 0 ){
        magma_free( d1 );
        magma_free( d2 );
        magma_free( skp );
        magma_free_cpu( skp_h );
        printf("error: memory allocation.\n");
        return MAGMA_ERR_HOST_ALLOC;
    }
    
    alpha = rho = gamma = tmp1 = c_one; 
    beta =  magma_sdot(dofs, r.dval, 1, r.dval, 1);
    skp_h[0]=alpha; 
    skp_h[1]=beta; 
    skp_h[2]=gamma; 
    skp_h[3]=rho; 
    skp_h[4]=tmp1; 
    skp_h[5]=MAGMA_S_MAKE(nom, 0.0);

    magma_ssetvector( 6, skp_h, 1, skp, 1 );
    
    if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) 
        r0 = ATOLERANCE;
    if ( nom < r0 ) {
        magmablasSetKernelStream( orig_queue );
        return MAGMA_SUCCESS;
    }
    // check positive definite
    if (den <= 0.0) {
        printf("Operator A is not postive definite. (Ar,r) = %f\n", den);
        magmablasSetKernelStream( orig_queue );
        return MAGMA_NONSPD;
        solver_par->info = MAGMA_NONSPD;;
    }
    
    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t) nom0;
        solver_par->timing[0] = 0.0;
    }
    
    // start iteration
    for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; 
                                                    solver_par->numiter++ ) {

        magmablasSetKernelStream(stream[0]);
        
        // computes SpMV and dot product
        magma_scgmerge_spmv1(  A, d1, d2, d.dval, z.dval, skp, queue ); 
            
        // updates x, r, computes scalars and updates d
        magma_scgmerge_xrbeta( dofs, d1, d2, x->dval, r.dval, d.dval, z.dval, skp, queue ); 

        // check stopping criterion (asynchronous copy)
        magma_sgetvector_async( 1 , skp+1, 1, 
                                                    skp_h+1, 1, stream[1] );
        betanom = sqrt(MAGMA_S_REAL(skp_h[1]));

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if (  betanom  < r0 ) {
            break;
        }

    } 
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    magma_sresidual( A, b, *x, &residual, queue );
    solver_par->iter_res = betanom;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter) {
        solver_par->info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_SLOW_CONVERGENCE;
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_DIVERGENCE;
    }
    magma_s_vfree(&r, queue );
    magma_s_vfree(&z, queue );
    magma_s_vfree(&d, queue );

    magma_free( d1 );
    magma_free( d2 );
    magma_free( skp );
    magma_free_cpu( skp_h );

    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}   /* magma_scg_merge */
Exemplo n.º 3
0
extern "C" magma_int_t
magma_scg_merge(
    magma_s_matrix A, magma_s_matrix b, magma_s_matrix *x,
    magma_s_solver_par *solver_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_CGMERGE;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    // solver variables
    float alpha, beta, gamma, rho, tmp1, *skp_h={0};
    float nom, nom0, betanom, den, nomb;

    // some useful variables
    float c_zero = MAGMA_S_ZERO, c_one = MAGMA_S_ONE;
    magma_int_t dofs = A.num_rows*b.num_cols;

    magma_s_matrix r={Magma_CSR}, d={Magma_CSR}, z={Magma_CSR}, B={Magma_CSR}, C={Magma_CSR};
    float *d1=NULL, *d2=NULL, *skp=NULL;

    // GPU workspace
    CHECK( magma_svinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_svinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_svinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    
    CHECK( magma_smalloc( &d1, dofs*(1) ));
    CHECK( magma_smalloc( &d2, dofs*(1) ));
    // array for the parameters
    CHECK( magma_smalloc( &skp, 6 ));
    // skp = [alpha|beta|gamma|rho|tmp1|tmp2]
    
    // solver setup
    magma_sscal( dofs, c_zero, x->dval, 1, queue );                      // x = 0
    //CHECK(  magma_sresidualvec( A, b, *x, &r, nom0, queue));
    magma_scopy( dofs, b.dval, 1, r.dval, 1, queue );                    // r = b
    magma_scopy( dofs, r.dval, 1, d.dval, 1, queue );                    // d = r
    nom0 = betanom = magma_snrm2( dofs, r.dval, 1, queue );
    nom = nom0 * nom0;                                           // nom = r' * r
    CHECK( magma_s_spmv( c_one, A, d, c_zero, z, queue ));              // z = A d
    den = MAGMA_S_ABS( magma_sdot( dofs, d.dval, 1, z.dval, 1, queue ) ); // den = d'* z
    solver_par->init_res = nom0;
    
    nomb = magma_snrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    
    // array on host for the parameters
    CHECK( magma_smalloc_cpu( &skp_h, 6 ));
    
    alpha = rho = gamma = tmp1 = c_one;
    beta =  magma_sdot( dofs, r.dval, 1, r.dval, 1, queue );
    skp_h[0]=alpha;
    skp_h[1]=beta;
    skp_h[2]=gamma;
    skp_h[3]=rho;
    skp_h[4]=tmp1;
    skp_h[5]=MAGMA_S_MAKE(nom, 0.0);

    magma_ssetvector( 6, skp_h, 1, skp, 1, queue );

    if( nom0 < solver_par->atol ||
        nom0/nomb < solver_par->rtol ){
        info = MAGMA_SUCCESS;
        goto cleanup;
    }
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t) nom0;
        solver_par->timing[0] = 0.0;
    }
    // check positive definite
    if (den <= 0.0) {
        info = MAGMA_NONSPD; 
        goto cleanup;
    }
    
    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );

    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    // start iteration
    do
    {
        solver_par->numiter++;

        // computes SpMV and dot product
        CHECK( magma_scgmerge_spmv1(  A, d1, d2, d.dval, z.dval, skp, queue ));
        solver_par->spmv_count++;
        // updates x, r, computes scalars and updates d
        CHECK( magma_scgmerge_xrbeta( dofs, d1, d2, x->dval, r.dval, d.dval, z.dval, skp, queue ));

        // check stopping criterion (asynchronous copy)
        magma_sgetvector( 1 , skp+1, 1, skp_h+1, 1, queue );
        betanom = sqrt(MAGMA_S_ABS(skp_h[1]));

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if (  betanom  < solver_par->atol || 
              betanom/nomb < solver_par->rtol ) {
            break;
        }
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    CHECK(  magma_sresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = betanom;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->atol ||
            solver_par->iter_res/solver_par->init_res < solver_par->rtol ){
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_smfree(&r, queue );
    magma_smfree(&z, queue );
    magma_smfree(&d, queue );
    magma_smfree(&B, queue );
    magma_smfree(&C, queue );

    magma_free( d1 );
    magma_free( d2 );
    magma_free( skp );
    magma_free_cpu( skp_h );

    solver_par->info = info;
    return info;
}   /* magma_scg_merge */