magma_int_t magma_zbaiter( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par ) { // prepare solver feedback solver_par->solver = Magma_BAITER; solver_par->info = 0; magma_z_sparse_matrix A_d, D, R, D_d, R_d; magma_z_mtransfer( A, &A_d, Magma_CPU, Magma_DEV ); // initial residual real_Double_t tempo1, tempo2; double residual; magma_zresidual( A_d, b, *x, &residual ); solver_par->init_res = residual; solver_par->res_vec = NULL; solver_par->timing = NULL; // setup magma_zcsrsplit( 256, A, &D, &R ); magma_z_mtransfer( D, &D_d, Magma_CPU, Magma_DEV ); magma_z_mtransfer( R, &R_d, Magma_CPU, Magma_DEV ); magma_int_t localiter = 1; magma_device_sync(); tempo1=magma_wtime(); // block-asynchronous iteration iterator for( int iter=0; iter<solver_par->maxiter; iter++) magma_zbajac_csr( localiter, D_d, R_d, b, x ); magma_device_sync(); tempo2=magma_wtime(); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_zresidual( A_d, b, *x, &residual ); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if( solver_par->init_res > solver_par->final_res ) solver_par->info = 0; else solver_par->info = -1; magma_z_mfree(&D); magma_z_mfree(&R); magma_z_mfree(&D_d); magma_z_mfree(&R_d); magma_z_mfree(&A_d); return MAGMA_SUCCESS; } /* magma_zbaiter */
extern "C" magma_int_t magma_ziterref( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_ITERREF; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; double residual; magma_zresidual( A, b, *x, &residual, queue ); solver_par->init_res = residual; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, c_mone = MAGMA_Z_NEG_ONE; magma_int_t dofs = A.num_rows; // workspace magma_z_vector r,z; magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &z, Magma_DEV, dofs, c_zero, queue ); // solver variables double nom, nom0, r0; // solver setup magma_zscal( dofs, c_zero, x->dval, 1) ; // x = 0 magma_zcopy( dofs, b.dval, 1, r.dval, 1 ); // r = b nom0 = magma_dznrm2(dofs, r.dval, 1); // nom0 = || r || nom = nom0 * nom0; solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) { magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { magma_zscal( dofs, MAGMA_Z_MAKE(1./nom, 0.), r.dval, 1) ; // scale it magma_z_precond( A, r, &z, precond_par, queue ); // inner solver: A * z = r magma_zscal( dofs, MAGMA_Z_MAKE(nom, 0.), z.dval, 1) ; // scale it magma_zaxpy(dofs, c_one, z.dval, 1, x->dval, 1); // x = x + z magma_z_spmv( c_mone, A, *x, c_zero, r, queue ); // r = - A x magma_zaxpy(dofs, c_one, b.dval, 1, r.dval, 1); // r = r + b nom = magma_dznrm2(dofs, r.dval, 1); // nom = || r || if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( nom < r0 ) { break; } } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_zresidual( A, b, *x, &residual, queue ); solver_par->final_res = residual; solver_par->iter_res = nom; if ( solver_par->numiter < solver_par->maxiter) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_SLOW_CONVERGENCE; } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } magma_z_vfree(&r, queue ); magma_z_vfree(&z, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_ziterref */
extern "C" magma_int_t magma_zbaiter_overlap( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BAITERO; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO; // initial residual real_Double_t tempo1, tempo2, runtime=0; double residual; magma_int_t localiter = precond_par->maxiter; magma_z_matrix Ah={Magma_CSR}, ACSR={Magma_CSR}, A_d={Magma_CSR}, r={Magma_CSR}, D={Magma_CSR}, R={Magma_CSR}; // setup magma_int_t matrices; matrices = precond_par->levels; struct magma_z_matrix D_d[ 256 ]; struct magma_z_matrix R_d[ 256 ]; magma_int_t overlap; magma_int_t blocksize = 256; if( matrices==2 || matrices==4 || matrices==8 || matrices==16 || matrices==32 || matrices==64 || matrices==128 ){ overlap = blocksize/matrices; }else if( matrices == 1){ overlap = 0; }else{ printf("error: overlap ratio not supported.\n"); goto cleanup; } CHECK( magma_zmtransfer( A, &Ah, A.memory_location, Magma_CPU, queue )); CHECK( magma_zmconvert( Ah, &ACSR, Ah.storage_type, Magma_CSR, queue )); CHECK( magma_zmtransfer( ACSR, &A_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zresidualvec( A_d, b, *x, &r, &residual, queue)); solver_par->init_res = residual; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t) residual; } // setup for( int i=0; i<matrices; i++ ){ CHECK( magma_zcsrsplit( i*overlap, 256, ACSR, &D, &R, queue )); CHECK( magma_zmtransfer( D, &D_d[i], Magma_CPU, Magma_DEV, queue )); CHECK( magma_zmtransfer( R, &R_d[i], Magma_CPU, Magma_DEV, queue )); magma_zmfree(&D, queue ); magma_zmfree(&R, queue ); } magma_int_t iterinc; if( solver_par->verbose == 0 ){ iterinc = solver_par->maxiter; } else{ iterinc = solver_par->verbose; } solver_par->numiter = 0; solver_par->spmv_count = 0; // block-asynchronous iteration iterator do { tempo1 = magma_sync_wtime( queue ); solver_par->numiter+= iterinc; for( int z=0; z<iterinc; z++){ CHECK( magma_zbajac_csr_overlap( localiter, matrices, overlap, D_d, R_d, b, x, queue )); } tempo2 = magma_sync_wtime( queue ); runtime += tempo2-tempo1; if ( solver_par->verbose > 0 ) { CHECK( magma_zresidualvec( A_d, b, *x, &r, &residual, queue)); solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) residual; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) runtime; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); solver_par->runtime = runtime; CHECK( magma_zresidual( A_d, b, *x, &residual, queue)); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if ( solver_par->init_res > solver_par->final_res ){ info = MAGMA_SUCCESS; } else { info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&D, queue ); magma_zmfree(&R, queue ); for( int i=0; i<matrices; i++ ){ magma_zmfree(&D_d[i], queue ); magma_zmfree(&R_d[i], queue ); } magma_zmfree(&A_d, queue ); magma_zmfree(&ACSR, queue ); magma_zmfree(&Ah, queue ); solver_par->info = info; return info; } /* magma_zbaiter_overlap */
extern "C" magma_int_t magma_zjacobi( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_JACOBI; solver_par->info = MAGMA_SUCCESS; real_Double_t tempo1, tempo2; double residual; magma_zresidual( A, b, *x, &residual, queue ); solver_par->init_res = residual; solver_par->res_vec = NULL; solver_par->timing = NULL; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, c_mone = MAGMA_Z_NEG_ONE; magma_int_t dofs = A.num_rows; double nom0; magma_z_sparse_matrix M; magma_z_vector c, r; magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_z_spmv( c_one, A, *x, c_zero, r, queue ); // r = A x magma_zaxpy(dofs, c_mone, b.dval, 1, r.dval, 1); // r = r - b nom0 = magma_dznrm2(dofs, r.dval, 1); // den = || r || // Jacobi setup magma_zjacobisetup( A, b, &M, &c, queue ); magma_z_solver_par jacobiiter_par; jacobiiter_par.maxiter = solver_par->maxiter; tempo1 = magma_sync_wtime( queue ); // Jacobi iterator magma_zjacobiiter( M, c, x, &jacobiiter_par, queue ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; magma_zresidual( A, b, *x, &residual, queue ); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if ( solver_par->init_res > solver_par->final_res ) solver_par->info = MAGMA_SUCCESS; else solver_par->info = MAGMA_DIVERGENCE; magma_z_mfree( &M, queue ); magma_z_vfree( &c, queue ); magma_z_vfree( &r, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_zjacobi */
magma_int_t magma_zpcg( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par ){ // prepare solver feedback solver_par->solver = Magma_PCG; solver_par->numiter = 0; solver_par->info = 0; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows; // GPU workspace magma_z_vector r, rt, p, q, h; magma_z_vinit( &r, Magma_DEV, dofs, c_zero ); magma_z_vinit( &rt, Magma_DEV, dofs, c_zero ); magma_z_vinit( &p, Magma_DEV, dofs, c_zero ); magma_z_vinit( &q, Magma_DEV, dofs, c_zero ); magma_z_vinit( &h, Magma_DEV, dofs, c_zero ); // solver variables magmaDoubleComplex alpha, beta; double nom, nom0, r0, gammaold, gammanew, den, res; // solver setup magma_zscal( dofs, c_zero, x->val, 1) ; // x = 0 magma_zcopy( dofs, b.val, 1, r.val, 1 ); // r = b // preconditioner magma_z_applyprecond_left( A, r, &rt, precond_par ); magma_z_applyprecond_right( A, rt, &h, precond_par ); magma_zcopy( dofs, h.val, 1, p.val, 1 ); // p = h nom = MAGMA_Z_REAL( magma_zdotc(dofs, r.val, 1, h.val, 1) ); nom0 = magma_dznrm2( dofs, r.val, 1 ); magma_z_spmv( c_one, A, p, c_zero, q ); // q = A p den = MAGMA_Z_REAL( magma_zdotc(dofs, p.val, 1, q.val, 1) );// den = p dot q solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) return MAGMA_SUCCESS; // check positive definite if (den <= 0.0) { printf("Operator A is not postive definite. (Ar,r) = %f\n", den); return -100; } //Chronometry real_Double_t tempo1, tempo2; magma_device_sync(); tempo1=magma_wtime(); if( solver_par->verbose > 0 ){ solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ){ // preconditioner magma_z_applyprecond_left( A, r, &rt, precond_par ); magma_z_applyprecond_right( A, rt, &h, precond_par ); gammanew = MAGMA_Z_REAL( magma_zdotc(dofs, r.val, 1, h.val, 1) ); // gn = < r,h> if( solver_par->numiter==1 ){ magma_zcopy( dofs, h.val, 1, p.val, 1 ); // p = h }else{ beta = MAGMA_Z_MAKE(gammanew/gammaold, 0.); // beta = gn/go magma_zscal(dofs, beta, p.val, 1); // p = beta*p magma_zaxpy(dofs, c_one, h.val, 1, p.val, 1); // p = p + h } magma_z_spmv( c_one, A, p, c_zero, q ); // q = A p den = MAGMA_Z_REAL(magma_zdotc(dofs, p.val, 1, q.val, 1)); // den = p dot q alpha = MAGMA_Z_MAKE(gammanew/den, 0.); magma_zaxpy(dofs, alpha, p.val, 1, x->val, 1); // x = x + alpha p magma_zaxpy(dofs, -alpha, q.val, 1, r.val, 1); // r = r - alpha q gammaold = gammanew; res = magma_dznrm2( dofs, r.val, 1 ); if( solver_par->verbose > 0 ){ magma_device_sync(); tempo2=magma_wtime(); if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nom0 < solver_par->epsilon ) { break; } } magma_device_sync(); tempo2=magma_wtime(); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; magma_zresidual( A, b, *x, &residual ); solver_par->iter_res = res; solver_par->final_res = residual; if( solver_par->numiter < solver_par->maxiter){ solver_par->info = 0; }else if( solver_par->init_res > solver_par->final_res ){ if( solver_par->verbose > 0 ){ if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = -2; } else{ if( solver_par->verbose > 0 ){ if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = -1; } magma_z_vfree(&r); magma_z_vfree(&rt); magma_z_vfree(&p); magma_z_vfree(&q); magma_z_vfree(&h); return MAGMA_SUCCESS; } /* magma_zcg */
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0); magma_z_matrix A={Magma_CSR}, B_d={Magma_CSR}; magma_z_matrix x={Magma_CSR}, b={Magma_CSR}; int i=1; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_zm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_z_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", int(A.num_rows), int(A.num_cols), int(A.nnz) ); magma_int_t n = A.num_rows; CHECK( magma_zmtransfer( A, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess CHECK( magma_zvinit( &b, Magma_DEV, A.num_cols, 1, zero, queue )); CHECK( magma_zvinit( &x, Magma_DEV, A.num_cols, 1, one, queue )); CHECK( magma_zprint_vector( b, 90, 10, queue )); CHECK( magma_zprint_matrix( A, queue )); printf("\n\n\n"); CHECK( magma_zprint_matrix( B_d, queue )); double res; res = magma_dznrm2(n, b.dval, 1, queue ); printf("norm0: %f\n", res); CHECK( magma_z_spmv( one, B_d, x, zero, b, queue )); // b = A x CHECK( magma_zprint_vector( b, 0, 100, queue )); CHECK( magma_zprint_vector( b, b.num_rows-10, 10, queue )); res = magma_dznrm2( n, b.dval, 1, queue ); printf("norm: %f\n", res); CHECK( magma_zresidual( B_d, x, b, &res, queue)); printf("res: %f\n", res); magma_zmfree(&B_d, queue ); magma_zmfree(&A, queue ); magma_zmfree(&x, queue ); magma_zmfree(&b, queue ); i++; } cleanup: magma_zmfree(&A, queue ); magma_zmfree(&B_d, queue ); magma_zmfree(&x, queue ); magma_zmfree(&b, queue ); magma_queue_destroy( queue ); magma_finalize(); return info; }
extern "C" magma_int_t magma_zbicgstab_merge2( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BICGSTABMERGE2; solver_par->numiter = 0; solver_par->spmv_count = 0; // solver variables magmaDoubleComplex alpha, beta, omega, rho_old, rho_new, *skp_h={0}; double nom, nom0, betanom, nomb; //double den; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows; // workspace magma_z_matrix q={Magma_CSR}, r={Magma_CSR}, rr={Magma_CSR}, p={Magma_CSR}, v={Magma_CSR}, s={Magma_CSR}, t={Magma_CSR}; magmaDoubleComplex *d1=NULL, *d2=NULL, *skp=NULL; d1 = NULL; d2 = NULL; skp = NULL; CHECK( magma_zmalloc( &d1, dofs*(2) )); CHECK( magma_zmalloc( &d2, dofs*(2) )); // array for the parameters CHECK( magma_zmalloc( &skp, 8 )); // skp = [alpha|beta|omega|rho_old|rho|nom|tmp1|tmp2] CHECK( magma_zvinit( &q, Magma_DEV, dofs*6, 1, c_zero, queue )); // q = rr|r|p|v|s|t rr.memory_location = Magma_DEV; rr.dval = NULL; rr.num_rows = rr.nnz = dofs; r.memory_location = Magma_DEV; r.dval = NULL; r.num_rows = r.nnz = dofs; p.memory_location = Magma_DEV; p.dval = NULL; p.num_rows = p.nnz = dofs; v.memory_location = Magma_DEV; v.dval = NULL; v.num_rows = v.nnz = dofs; s.memory_location = Magma_DEV; s.dval = NULL; s.num_rows = s.nnz = dofs; t.memory_location = Magma_DEV; t.dval = NULL; t.num_rows = t.nnz = dofs; rr.dval = q(0); r.dval = q(1); p.dval = q(2); v.dval = q(3); s.dval = q(4); t.dval = q(5); // solver setup magma_zscal( dofs, c_zero, x->dval, 1, queue ); // x = 0 CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); magma_zcopy( dofs, r.dval, 1, q(0), 1, queue ); // rr = r magma_zcopy( dofs, r.dval, 1, q(1), 1, queue ); // q = r betanom = nom0; nom = nom0*nom0; rho_new = magma_zdotc( dofs, r.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho_old = omega = alpha = MAGMA_Z_MAKE( 1.0, 0. ); beta = rho_new; solver_par->init_res = nom0; // array on host for the parameters CHECK( magma_zmalloc_cpu( &skp_h, 8 )); skp_h[0]=alpha; skp_h[1]=beta; skp_h[2]=omega; skp_h[3]=rho_old; skp_h[4]=rho_new; skp_h[5]=MAGMA_Z_MAKE(nom, 0.0); magma_zsetvector( 8, skp_h, 1, skp, 1, queue ); solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if( nom0 < solver_par->atol || nom0/nomb < solver_par->rtol ){ info = MAGMA_SUCCESS; goto cleanup; } CHECK( magma_z_spmv( c_one, A, r, c_zero, v, queue )); // z = A r //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; // computes p=r+beta*(p-omega*v) CHECK( magma_zbicgmerge1( dofs, skp, v.dval, r.dval, p.dval, queue )); CHECK( magma_zbicgmerge_spmv1( A, d1, d2, q(2), q(0), q(3), skp, queue )); solver_par->spmv_count++; CHECK( magma_zbicgmerge2( dofs, skp, r.dval, v.dval, s.dval, queue )); // s=r-alpha*v CHECK( magma_zbicgmerge_spmv2( A, d1, d2, q(4), q(5), skp, queue )); solver_par->spmv_count++; CHECK( magma_zbicgmerge_xrbeta( dofs, d1, d2, q(0), q(1), q(2), q(4), q(5), x->dval, skp, queue )); // check stopping criterion (asynchronous copy) magma_zgetvector( 1 , skp+5, 1, skp_h+5, 1, queue ); betanom = sqrt(MAGMA_Z_REAL(skp_h[5])); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < solver_par->atol || betanom/nomb < solver_par->rtol ) { break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidual( A, b, *x, &residual, NULL )); solver_par->iter_res = betanom; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->atol || solver_par->iter_res/solver_par->init_res < solver_par->rtol ){ info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&q, queue ); // frees all vectors magma_free(d1); magma_free(d2); magma_free( skp ); magma_free_cpu( skp_h ); solver_par->info = info; return info; } /* zbicgstab_merge2 */
extern "C" magma_int_t magma_zcg( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_CG; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; magma_int_t dofs = A.num_rows; // GPU workspace magma_z_vector r, p, q; magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &p, Magma_DEV, dofs, c_zero, queue ); magma_z_vinit( &q, Magma_DEV, dofs, c_zero, queue ); // solver variables magmaDoubleComplex alpha, beta; double nom, nom0, r0, betanom, betanomsq, den; // solver setup magma_zscal( dofs, c_zero, x->dval, 1) ; // x = 0 magma_zcopy( dofs, b.dval, 1, r.dval, 1 ); // r = b magma_zcopy( dofs, b.dval, 1, p.dval, 1 ); // p = b nom0 = betanom = magma_dznrm2( dofs, r.dval, 1 ); nom = nom0 * nom0; // nom = r' * r magma_z_spmv( c_one, A, p, c_zero, q, queue ); // q = A p den = MAGMA_Z_REAL( magma_zdotc(dofs, p.dval, 1, q.dval, 1) );// den = p dot q solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) { magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } // check positive definite if (den <= 0.0) { printf("Operator A is not postive definite. (Ar,r) = %f\n", den); magmablasSetKernelStream( orig_queue ); return MAGMA_NONSPD; solver_par->info = MAGMA_NONSPD; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { alpha = MAGMA_Z_MAKE(nom/den, 0.); magma_zaxpy(dofs, alpha, p.dval, 1, x->dval, 1); // x = x + alpha p magma_zaxpy(dofs, -alpha, q.dval, 1, r.dval, 1); // r = r - alpha q betanom = magma_dznrm2(dofs, r.dval, 1); // betanom = || r || betanomsq = betanom * betanom; // betanoms = r' * r if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < r0 ) { break; } beta = MAGMA_Z_MAKE(betanomsq/nom, 0.); // beta = betanoms/nom magma_zscal(dofs, beta, p.dval, 1); // p = beta*p magma_zaxpy(dofs, c_one, r.dval, 1, p.dval, 1); // p = p + r magma_z_spmv( c_one, A, p, c_zero, q, queue ); // q = A p den = MAGMA_Z_REAL(magma_zdotc(dofs, p.dval, 1, q.dval, 1)); // den = p dot q nom = betanomsq; } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; magma_zresidual( A, b, *x, &residual, queue ); solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_SLOW_CONVERGENCE; } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } magma_z_vfree(&r, queue ); magma_z_vfree(&p, queue ); magma_z_vfree(&q, queue ); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_zcg */