static void ApplyPlaneRotation(double *dx, double *dy, double cs, double sn) { #if defined(PRECISION_s) | defined(PRECISION_d) double temp = (*dx); *dx = cs * (*dx) + sn * (*dy); *dy = -sn * temp + cs * (*dy); #else // below the code Joss Knight from MathWorks provided me with - this works. // No idea why the above code fails for real - maybe rounding. double temp = MAGMA_D_CONJ(cs) * (*dx) + MAGMA_D_CONJ(sn) * (*dy); *dy = -(sn) * (*dx) + cs * (*dy); *dx = temp; #endif }
static void GeneratePlaneRotation(double dx, double dy, double *cs, double *sn) { #if defined(PRECISION_s) | defined(PRECISION_d) if (dy == MAGMA_D_ZERO) { *cs = MAGMA_D_ONE; *sn = MAGMA_D_ZERO; } else if (MAGMA_D_ABS((dy)) > MAGMA_D_ABS((dx))) { double temp = dx / dy; *sn = MAGMA_D_ONE / magma_dsqrt( ( MAGMA_D_ONE + temp*temp)); *cs = temp * (*sn); } else { double temp = dy / dx; *cs = MAGMA_D_ONE / magma_dsqrt( ( MAGMA_D_ONE + temp*temp )); *sn = temp * (*cs); } #else // below the code Joss Knight from MathWorks provided me with - this works. // No idea why the above code fails for real - maybe rounding. real_Double_t rho = sqrt(MAGMA_D_REAL(MAGMA_D_CONJ(dx)*dx + MAGMA_D_CONJ(dy)*dy)); *cs = dx / rho; *sn = dy / rho; #endif }
extern "C" magma_int_t magma_dqmr_merge( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_QMRMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; // local variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE; // solver variables double nom0, r0, res=0, nomb; double rho = c_one, rho1 = c_one, eta = -c_one , pds = c_one, thet = c_one, thet1 = c_one, epsilon = c_one, beta = c_one, delta = c_one, pde = c_one, rde = c_one, gamm = c_one, gamm1 = c_one, psi = c_one; magma_int_t dofs = A.num_rows* b.num_cols; // need to transpose the matrix magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; // GPU workspace magma_d_matrix r={Magma_CSR}, r_tld={Magma_CSR}, v={Magma_CSR}, w={Magma_CSR}, wt={Magma_CSR}, d={Magma_CSR}, s={Magma_CSR}, z={Magma_CSR}, q={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, y={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_dcopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, y.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, v.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, wt.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, z.dval, 1, queue ); // transpose the matrix magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransposeconjugate( Ah2, &Ah1, queue ); magma_dmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_dmfree(&Ah2, queue ); nomb = magma_dnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } psi = magma_dsqrt( magma_ddot( dofs, z.dval, 1, z.dval, 1, queue )); rho = magma_dsqrt( magma_ddot( dofs, y.dval, 1, y.dval, 1, queue )); // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_dqmr_1( r.num_rows, r.num_cols, rho, psi, y.dval, z.dval, v.dval, w.dval, queue ); //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; if( magma_d_isnan_inf( rho ) || magma_d_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } // delta = z' * y; delta = magma_ddot( dofs, z.dval, 1, y.dval, 1, queue ); if( magma_d_isnan_inf( delta ) ){ info = MAGMA_DIVERGENCE; break; } // no precond: yt = y, zt = z //magma_dcopy( dofs, y.dval, 1, yt.dval, 1 ); //magma_dcopy( dofs, z.dval, 1, zt.dval, 1 ); if( solver_par->numiter == 1 ){ // p = y; // q = z; magma_dcopy( dofs, y.dval, 1, p.dval, 1, queue ); magma_dcopy( dofs, z.dval, 1, q.dval, 1, queue ); } else{ pde = psi * delta / epsilon; rde = rho * MAGMA_D_CONJ(delta/epsilon); // p = y - pde * p // q = z - rde * q magma_dqmr_2( r.num_rows, r.num_cols, pde, rde, y.dval, z.dval, p.dval, q.dval, queue ); } if( magma_d_isnan_inf( rho ) || magma_d_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } CHECK( magma_d_spmv( c_one, A, p, c_zero, pt, queue )); solver_par->spmv_count++; // epsilon = q' * pt; epsilon = magma_ddot( dofs, q.dval, 1, pt.dval, 1, queue ); beta = epsilon / delta; if( magma_d_isnan_inf( epsilon ) || magma_d_isnan_inf( beta ) ){ info = MAGMA_DIVERGENCE; break; } // v = pt - beta * v // y = v magma_dqmr_3( r.num_rows, r.num_cols, beta, pt.dval, v.dval, y.dval, queue ); rho1 = rho; // rho = norm(y); rho = magma_dsqrt( magma_ddot( dofs, y.dval, 1, y.dval, 1, queue )); // wt = A' * q - beta' * w; CHECK( magma_d_spmv( c_one, AT, q, c_zero, wt, queue )); solver_par->spmv_count++; magma_daxpy( dofs, - MAGMA_D_CONJ( beta ), w.dval, 1, wt.dval, 1, queue ); // no precond: z = wt magma_dcopy( dofs, wt.dval, 1, z.dval, 1, queue ); thet1 = thet; thet = rho / (gamm * MAGMA_D_MAKE( MAGMA_D_ABS(beta), 0.0 )); gamm1 = gamm; gamm = c_one / magma_dsqrt(c_one + thet*thet); eta = - eta * rho1 * gamm * gamm / (beta * gamm1 * gamm1); if( magma_d_isnan_inf( thet ) || magma_d_isnan_inf( gamm ) || magma_d_isnan_inf( eta ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter == 1 ){ // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_dqmr_4( r.num_rows, r.num_cols, eta, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } else{ pds = (thet1 * gamm) * (thet1 * gamm); // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_dqmr_5( r.num_rows, r.num_cols, eta, pds, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } // psi = norm(z); psi = magma_dsqrt( magma_ddot( dofs, z.dval, 1, z.dval, 1, queue ) ); res = magma_dnrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_dqmr_1( r.num_rows, r.num_cols, rho, psi, y.dval, z.dval, v.dval, w.dval, queue ); if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&r_tld, queue ); magma_dmfree(&v, queue ); magma_dmfree(&w, queue ); magma_dmfree(&wt, queue ); magma_dmfree(&d, queue ); magma_dmfree(&s, queue ); magma_dmfree(&z, queue ); magma_dmfree(&q, queue ); magma_dmfree(&p, queue ); magma_dmfree(&pt, queue ); magma_dmfree(&y, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&Ah1, queue ); magma_dmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_dqmr_merge */
extern "C" void magma_dsbtype2cb(magma_int_t n, magma_int_t nb, double *A, magma_int_t lda, double *V, magma_int_t ldv, double *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz, magma_int_t wantz, double *work) { double ctmp; magma_int_t J1, J2, len, lem, ldx; magma_int_t vpos, taupos; //magma_int_t blkid, tpos; magma_int_t ione = 1; const double c_one = MAGMA_D_ONE; if ( wantz == 0 ) { vpos = (sweep%2)*n + st; taupos = (sweep%2)*n + st; } else { //findVTpos(n, nb, Vblksiz, sweep, st, &vpos, &taupos, &tpos, &blkid); magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep, st, ldv, &vpos, &taupos); } ldx = lda-1; J1 = ed+1; J2 = min(ed+nb,n-1); len = ed-st+1; lem = J2-J1+1; if ( lem > 0 ) { /* Apply remaining right commming from the top block */ lapackf77_dlarfx("R", &lem, &len, V(vpos), TAU(taupos), A(J1, st), &ldx, work); } if ( lem > 1 ) { if ( wantz == 0 ) { vpos = (sweep%2)*n + J1; taupos = (sweep%2)*n + J1; } else { magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep, J1, ldv, &vpos, &taupos); //findVTpos(n,nb,Vblksiz,sweep,J1, &vpos, &taupos, &tpos, &blkid); } /* Remove the first column of the created bulge */ *V(vpos) = c_one; //magma_int_t lem2=lem-1; //blasf77_dcopy( &lem2, A(ed+2, st), &ione, V(vpos+1), &ione ); memcpy(V(vpos+1), A(J1+1, st), (lem-1)*sizeof(double)); memset(A(J1+1, st), 0, (lem-1)*sizeof(double)); /* Eliminate the col at st */ lapackf77_dlarfg( &lem, A(J1, st), V(vpos+1), &ione, TAU(taupos) ); /* * Apply left on A(J1:J2,st+1:ed) * We decrease len because we start at col st+1 instead of st. * col st is the col that has been revomved; */ len = len-1; ctmp = MAGMA_D_CONJ(*TAU(taupos)); lapackf77_dlarfx("L", &lem, &len, V(vpos), &ctmp, A(J1, st+1), &ldx, work); } }
extern "C" magma_int_t magma_dpbicg( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_d_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_PBICG; solver_par->numiter = 0; solver_par->spmv_count = 0; // some useful variables double c_zero = MAGMA_D_ZERO; double c_one = MAGMA_D_ONE; double c_neg_one = MAGMA_D_NEG_ONE; magma_int_t dofs = A.num_rows * b.num_cols; // workspace magma_d_matrix r={Magma_CSR}, rt={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, z={Magma_CSR}, zt={Magma_CSR}, q={Magma_CSR}, y={Magma_CSR}, yt={Magma_CSR}, qt={Magma_CSR}; // need to transpose the matrix magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &qt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &yt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables double alpha, rho, beta, rho_new, ptq; double res, nomb, nom0, r0; // transpose the matrix magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransposeconjugate( Ah2, &Ah1, queue ); magma_dmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_dmfree(&Ah2, queue ); // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); res = nom0; solver_par->init_res = nom0; magma_dcopy( dofs, r.dval, 1, rt.dval, 1, queue ); // rr = r rho_new = magma_ddot( dofs, rt.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho = alpha = MAGMA_D_MAKE( 1.0, 0. ); nomb = magma_dnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; CHECK( magma_d_applyprecond_left( MagmaNoTrans, A, r, &y, precond_par, queue )); CHECK( magma_d_applyprecond_right( MagmaNoTrans, A, y, &z, precond_par, queue )); CHECK( magma_d_applyprecond_right( MagmaTrans, A, rt, &yt, precond_par, queue )); CHECK( magma_d_applyprecond_left( MagmaTrans, A, yt, &zt, precond_par, queue )); //magma_dcopy( dofs, r.dval, 1 , y.dval, 1, queue ); // y=r //magma_dcopy( dofs, y.dval, 1 , z.dval, 1, queue ); // z=y //magma_dcopy( dofs, rt.dval, 1 , yt.dval, 1, queue ); // yt=rt //magma_dcopy( dofs, yt.dval, 1 , zt.dval, 1, queue ); // yt=rt rho= rho_new; rho_new = magma_ddot( dofs, rt.dval, 1, z.dval, 1, queue ); // rho=<rt,z> if( magma_d_isnan_inf( rho_new ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter==1 ){ magma_dcopy( dofs, z.dval, 1 , p.dval, 1, queue ); // yt=rt magma_dcopy( dofs, zt.dval, 1 , pt.dval, 1, queue ); // zt=yt } else { beta = rho_new/rho; magma_dscal( dofs, beta, p.dval, 1, queue ); // p = beta*p magma_daxpy( dofs, c_one , z.dval, 1 , p.dval, 1, queue ); // p = z+beta*p magma_dscal( dofs, MAGMA_D_CONJ(beta), pt.dval, 1, queue ); // pt = beta*pt magma_daxpy( dofs, c_one , zt.dval, 1 , pt.dval, 1, queue ); // pt = zt+beta*pt } CHECK( magma_d_spmv( c_one, A, p, c_zero, q, queue )); // v = Ap CHECK( magma_d_spmv( c_one, AT, pt, c_zero, qt, queue )); // v = Ap solver_par->spmv_count++; solver_par->spmv_count++; ptq = magma_ddot( dofs, pt.dval, 1, q.dval, 1, queue ); alpha = rho_new /ptq; magma_daxpy( dofs, alpha, p.dval, 1 , x->dval, 1, queue ); // x=x+alpha*p magma_daxpy( dofs, c_neg_one * alpha, q.dval, 1 , r.dval, 1, queue ); // r=r+alpha*q magma_daxpy( dofs, c_neg_one * MAGMA_D_CONJ(alpha), qt.dval, 1 , rt.dval, 1, queue ); // r=r+alpha*q res = magma_dnrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&rt, queue ); magma_dmfree(&p, queue ); magma_dmfree(&pt, queue ); magma_dmfree(&q, queue ); magma_dmfree(&qt, queue ); magma_dmfree(&y, queue ); magma_dmfree(&yt, queue ); magma_dmfree(&z, queue ); magma_dmfree(&zt, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&Ah1, queue ); magma_dmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_dpbicg */