void lis_idrs_omega(LIS_VECTOR t, LIS_VECTOR s, LIS_SCALAR angle, LIS_SCALAR *om) { LIS_REAL ns,nt; LIS_SCALAR rho,ts; lis_vector_dot(t,t,&nt); lis_vector_dot(t,s,&ts); *om = ts / nt; }
void lis_vector_dot_f(LIS_VECTOR_F *x, LIS_VECTOR_F *y, LIS_SCALAR *value, LIS_INT *ierr) { LIS_DEBUG_FUNC_IN; *ierr = lis_vector_dot((LIS_VECTOR)LIS_V2P(x),(LIS_VECTOR)LIS_V2P(y),value); if( *ierr ) return; LIS_DEBUG_FUNC_OUT; return; }
void lis_idrs_orth(LIS_INT s, LIS_VECTOR *P) { LIS_INT n,i,j; LIS_REAL r; LIS_SCALAR d; n = P[0]->n; for(j=0;j<s;j++) { lis_vector_nrm2(P[j],&r); r = 1.0/r; lis_vector_scale(r,P[j]); for(i=j+1;i<s;i++) { lis_vector_dot(P[j],P[i],&d); lis_vector_axpy(-d,P[j],P[i]); } } }
LIS_INT lis_eii(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue, ievalue; LIS_SCALAR lshift; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,iter2,output; LIS_REAL nrm2,resid; LIS_VECTOR z,q; LIS_SOLVER solver; double time,itime,ptime,p_c_time,p_i_time; LIS_INT err; LIS_PRECON precon; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; LIS_DEBUG_FUNC_IN; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; lshift = esolver->lshift; output = esolver->options[LIS_EOPTIONS_OUTPUT]; A = esolver->A; x = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,x); } evalue = 1.0; z = esolver->work[0]; q = esolver->work[1]; iter=0; ievalue = 1/(evalue); #ifdef _LONG__DOUBLE if( output & (A->my_rank==0) ) printf("local shift : %Le\n", lshift); #else #if defined(_COMPLEX) if( output & (A->my_rank==0) ) printf("local shift : "CFMT"\n", cfmt(lshift)); #else if( output & (A->my_rank==0) ) printf("local shift : %e\n", lshift); #endif #endif if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p none",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_solver_get_solvername(nsol, solvername); lis_solver_get_preconname(precon_type, preconname); if( output & (A->my_rank==0) ) printf("linear solver : %s\n", solvername); if( output & (A->my_rank==0) ) printf("preconditioner : %s\n", preconname); /* create preconditioner */ solver->A = A; err = lis_precon_create(solver, &precon); if( err ) { lis_solver_work_destroy(solver); solver->retcode = err; return err; } while (iter<emaxiter) { iter = iter+1; /* x = x / ||x||_2 */ lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); /* z = (A - lshift I)^-1 * x */ lis_solve_kernel(A, x, z, solver, precon); lis_solver_get_iter(solver,&iter2); /* 1/evalue = <x,z> */ lis_vector_dot(x, z, &ievalue); /* resid = ||z - 1/evalue * x||_2 / |1/evalue| */ lis_vector_axpyz(-ievalue,x,z,q); lis_vector_nrm2(q, &resid); resid = sabs(resid/ievalue); /* x = z */ lis_vector_copy(z,x); /* convergence check */ lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time); esolver->ptime += solver->ptime; esolver->itime += solver->itime; esolver->p_c_time += solver->p_c_time; esolver->p_i_time += solver->p_i_time; if( output ) { if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = 1/ievalue; lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_precon_destroy(precon); lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } lis_precon_destroy(precon); esolver->retcode = LIS_MAXITER; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = 1/ievalue; lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); if (lshift != 0) { lis_matrix_shift_diagonal(A, -lshift); } lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_orthomin(LIS_SOLVER solver) { LIS_Comm comm; LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR x; LIS_VECTOR r,rtld,*p,*ap,*aptld; LIS_SCALAR *dotsave; LIS_SCALAR alpha, beta; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_INT m,l,lmax,ip,ip0; LIS_DEBUG_FUNC_IN; comm = LIS_COMM_WORLD; A = solver->A; M = solver->precon; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = &solver->work[2]; ap = &solver->work[ (m+1)+2]; aptld = &solver->work[2*(m+1)+2]; dotsave = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR) * (m+1),"lis_orthomin::dotsave" ); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,M,r,rtld,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; iter=1; while( iter<=maxiter ) { ip = (iter-1) % (m+1); /* p[ip] = rtld */ lis_vector_copy(rtld,p[ip]); /* ap[ip] = A*p[ip] */ /* aptld[ip] = M^-1 ap[ip] */ lis_matvec(A,p[ip],ap[ip]); time = lis_wtime(); lis_psolve(solver, ap[ip], aptld[ip]); ptime += lis_wtime()-time; lmax = _min(m,iter-1); for(l=1;l<=lmax;l++) { ip0 = (ip+m+1-l) % (m+1); /* beta = -<Ar[ip],Ap[ip0]> / <Ap[ip0],Ap[ip0]> */ lis_vector_dot(aptld[ip],aptld[ip0],&beta); beta = -beta * dotsave[l-1]; lis_vector_axpy(beta,p[ip0] ,p[ip]); lis_vector_axpy(beta,ap[ip0] ,ap[ip]); lis_vector_axpy(beta,aptld[ip0],aptld[ip]); } for(l=m-1;l>0;l--) { dotsave[l] = dotsave[l-1]; } lis_vector_dot(aptld[ip],aptld[ip],&dotsave[0]); /* test breakdown */ if( dotsave[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } dotsave[0] = 1.0/dotsave[0]; /* alpha = <rtld,Aptld[ip]> */ lis_vector_dot(rtld,aptld[ip],&alpha); alpha = alpha * dotsave[0]; lis_vector_axpy( alpha,p[ip],x); lis_vector_axpy(-alpha,ap[ip],r); lis_vector_axpy(-alpha,aptld[ip],rtld); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } iter++; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_cgs_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; phat = solver->work[3]; q = solver->work[4]; qhat = solver->work[5]; u = solver->work[5]; uhat = solver->work[6]; vhat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); uhat->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; phat->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]); /* u = r + beta*q */ lis_vector_axpyz(beta.hi[0],q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpay(q,beta.hi[0],p); lis_vector_xpay(u,beta.hi[0],p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dot(rtld,vhat,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* q = u - alpha*vhat */ lis_vector_axpyz(-alpha.hi[0],vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyz(1.0,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_vector_axpy(alpha.hi[0],uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_vector_axpy(-alpha.hi[0],qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } rho_old.hi[0] = rho.hi[0]; } uhat->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; phat->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* u = r + beta*q */ lis_vector_axpyzex_mmmm(beta,q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dotex_mmm(rtld,vhat,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* q = u - alpha*vhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyzex_mmmm(one,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_gmres_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,s,z,*v; LIS_QUAD *h; LIS_SCALAR *hd; LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp; LIS_QUAD_PTR rnorm; LIS_REAL bnrm2,nrm2,tol,tol2; LIS_INT iter,maxiter,n,output; LIS_INT iter2,maxiter2; double time,ptime; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; m = solver->options[LIS_OPTIONS_RESTART]; h_dim = m+1; ptime = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; LIS_QUAD_SCALAR_MALLOC(aa,0,1); LIS_QUAD_SCALAR_MALLOC(bb,1,1); LIS_QUAD_SCALAR_MALLOC(rr,2,1); LIS_QUAD_SCALAR_MALLOC(a2,3,1); LIS_QUAD_SCALAR_MALLOC(b2,4,1); LIS_QUAD_SCALAR_MALLOC(t,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(one,7,1); LIS_QUAD_SCALAR_MALLOC(rnorm,8,1); h = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_gmres_switch::h" ); hd = (LIS_SCALAR *)h; cs = (m+1)*h_dim; sn = (m+2)*h_dim; one.hi[0] = 1.0; one.lo[0] = 0.0; z->precision = LIS_PRECISION_DEFAULT; /* r = M^-1 * (b - A * x) */ lis_matvec(A,x,z); lis_vector_xpay(b,-1.0,z); lis_psolve(solver,z,v[0]); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; iter=0; while( iter<maxiter2 ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2(v[0],&rnorm.hi[0]); lis_vector_scale(1.0/rnorm.hi[0],v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm.hi[0]; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dot(v[i1v],v[k],&t.hi[0]); hd[k+iih] = t.hi[0]; lis_vector_axpy(-t.hi[0],v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&t.hi[0]); hd[i1+iih] = t.hi[0]; lis_vector_scale(1.0/t.hi[0],v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = hd[jj+iih]; aa.hi[0] = hd[jj+cs]*t.hi[0]; aa.hi[0] += hd[jj+sn]*hd[k+iih]; bb.hi[0] = -hd[jj+sn]*t.hi[0]; bb.hi[0] += hd[jj+cs]*hd[k+iih]; hd[jj+iih] = aa.hi[0]; hd[k+iih] = bb.hi[0]; } aa.hi[0] = hd[ii+iih]; bb.hi[0] = hd[i1+iih]; a2.hi[0] = aa.hi[0]*aa.hi[0]; b2.hi[0] = bb.hi[0]*bb.hi[0]; rr.hi[0] = sqrt(a2.hi[0]+b2.hi[0]); if( rr.hi[0]==0.0 ) rr.hi[0]=1.0e-17; hd[ii+cs] = aa.hi[0]/rr.hi[0]; hd[ii+sn] = bb.hi[0]/rr.hi[0]; s->value[i1] = -hd[ii+sn]*s->value[ii]; s->value[ii] = hd[ii+cs]*s->value[ii]; aa.hi[0] = hd[ii+cs]*hd[ii+iih]; aa.hi[0] += hd[ii+sn]*hd[i1+iih]; hd[ii+iih] = aa.hi[0]; /* convergence check */ nrm2 = fabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) break; } while( i<m && iter <maxiter2 ); /* Solve H * Y = S for upper Hessenberg matrix H */ s->value[ii] = s->value[ii]/hd[ii+iih]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; for(j=jj+1;j<=ii;j++) { t.hi[0] -= hd[jj+j*h_dim]*s->value[j]; } s->value[jj] = t.hi[0]/hd[jj+jj*h_dim]; } /* z = z + y * v */ for(k=0;k<n;k++) { z->value[k] = s->value[0]*v[0]->value[k]; } for(j=1;j<=ii;j++) { lis_vector_axpy(s->value[j],v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpy(1,r,x); if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } for(j=1;j<=i;j++) { jj = i1-j+1; s->value[jj-1] = -hd[jj-1+sn]*s->value[jj]; s->value[jj] = hd[jj-1+cs]*s->value[jj]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; if( j==0 ) t.hi[0] = t.hi[0]-1.0; lis_vector_axpy(t.hi[0],v[j],v[0]); } } /* Initial Residual */ z->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2); tol = solver->tol; iter2=iter; while( iter2<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2ex_mm(v[0],&rnorm); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi); lis_vector_scaleex_mm(tmp,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_allex_nm(0.0,s); s->value[0] = rnorm.hi[0]; s->value_lo[0] = rnorm.lo[0]; i = 0; do { iter2++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dotex_mmm(v[i1v],v[k],&t); h[k+iih].hi = t.hi[0]; h[k+iih].lo = t.lo[0]; lis_quad_minus((LIS_QUAD *)t.hi); lis_vector_axpyex_mmm(t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2ex_mm(v[i1v],&t); h[i1+iih].hi = t.hi[0]; h[i1+iih].lo = t.lo[0]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi); lis_vector_scaleex_mm(tmp,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = h[jj+iih].hi; t.lo[0] = h[jj+iih].lo; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi); lis_quad_minus((LIS_QUAD *)bb.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi); h[jj+iih].hi = aa.hi[0]; h[jj+iih].lo = aa.lo[0]; h[k+iih].hi = bb.hi[0]; h[k+iih].lo = bb.lo[0]; } aa.hi[0] = h[ii+iih].hi; aa.lo[0] = h[ii+iih].lo; bb.hi[0] = h[i1+iih].hi; bb.lo[0] = h[i1+iih].lo; lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi); lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi); lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi); lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi); tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[i1] = aa.hi[0]; s->value_lo[i1] = aa.lo[0]; s->value[ii] = bb.hi[0]; s->value_lo[ii] = bb.lo[0]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); h[ii+iih].hi = aa.hi[0]; h[ii+iih].lo = aa.lo[0]; /* convergence check */ nrm2 = fabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter2 <maxiter ); /* Solve H * Y = S for upper Hessenberg matrix H */ tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]); s->value[ii] = tmp.hi[0]; s->value_lo[ii] = tmp.lo[0]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; t.lo[0] = s->value_lo[jj]; for(j=jj+1;j<=ii;j++) { tmp.hi[0] = s->value[j]; tmp.lo[0] = s->value_lo[j]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]); lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi); } lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]); s->value[jj] = tmp.hi[0]; s->value_lo[jj] = tmp.lo[0]; } /* z = z + y * v */ for(k=0;k<n;k++) { aa.hi[0] = s->value[0]; aa.lo[0] = s->value_lo[0]; bb.hi[0] = v[0]->value[k]; bb.lo[0] = v[0]->value_lo[k]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi); z->value[k] = tmp.hi[0]; z->value_lo[k] = tmp.lo[0]; } for(j=1;j<=ii;j++) { aa.hi[0] = s->value[j]; aa.lo[0] = s->value_lo[j]; lis_vector_axpyex_mmm(aa,v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpyex_mmm(one,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; tmp.hi[0] = s->value[jj]; tmp.lo[0] = s->value_lo[jj]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+sn]); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+cs]); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[jj-1] = aa.hi[0]; s->value_lo[jj-1] = aa.lo[0]; s->value[jj] = bb.hi[0]; s->value_lo[jj] = bb.lo[0]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; t.lo[0] = s->value_lo[j]; if( j==0 ) { lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi); } lis_vector_axpyex_mmm(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter2+1; solver->iter2 = iter; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicr(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, ap, map, az, aptld; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; ap = solver->work[6]; az = solver->work[7]; map = solver->work[8]; aptld = solver->work[9]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); lis_vector_copy(z,p); lis_vector_copy(ztld,ptld); LIS_MATVEC(A,z,ap); lis_vector_dot(ap,ztld,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* aptld = A^T * ptld */ /* map = M^-1 * ap */ LIS_MATVECT(A,ptld,aptld); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; /* tmpdot1 = <map,aptld> */ lis_vector_dot(map,aptld,&tmpdot1); /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho_old / tmpdot1 */ /* x = x + alpha*p */ /* r = r - alpha*ap */ alpha = rho_old / tmpdot1; lis_vector_axpy(alpha,p,x); lis_vector_axpy(-alpha,ap,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*aptld */ /* z = z - alpha*map */ /* ztld = M^-T * rtld */ /* az = A * z */ /* rho = <az,ztld> */ lis_vector_axpy(-alpha,aptld,rtld); lis_vector_axpy(-alpha,map,z); times = lis_wtime(); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; LIS_MATVEC(A,z,az); lis_vector_dot(az,ztld,&rho); /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* ap = az + beta*ap */ beta = rho / rho_old; lis_vector_xpay(z,beta,p); lis_vector_xpay(ztld,beta,ptld); lis_vector_xpay(az,beta,ap); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_esi_quad(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x, Ax; LIS_SCALAR xAx, xx, mu, lshift; LIS_INT ss; LIS_INT emaxiter; LIS_REAL tol; LIS_INT i,j,k; LIS_SCALAR evalue,dotvr; LIS_INT iter,giter,output,niesolver; LIS_INT nprocs,my_rank; LIS_REAL nrm2,dot,resid,resid0; LIS_QUAD_PTR qdot_vv, qdot_vr; LIS_VECTOR *v,r,q; LIS_SOLVER solver; LIS_PRECON precon; double times,itimes,ptimes,p_c_times,p_i_times; LIS_INT err; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; LIS_DEBUG_FUNC_IN; A = esolver->A; x = esolver->x; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; lshift = esolver->lshift; output = esolver->options[LIS_EOPTIONS_OUTPUT]; niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER]; r = esolver->work[0]; q = esolver->work[1]; v = &esolver->work[2]; Ax = esolver->work[3]; LIS_QUAD_SCALAR_MALLOC(qdot_vv,0,1); LIS_QUAD_SCALAR_MALLOC(qdot_vr,1,1); lis_vector_set_all(1.0,r); lis_vector_nrm2(r, &nrm2); lis_vector_scale(1/nrm2,r); switch ( niesolver ) { case LIS_ESOLVER_II: lis_solver_create(&solver); lis_solver_set_option("-i bicg -p ilu -precision quad",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); break; case LIS_ESOLVER_AII: lis_solver_create(&solver); lis_solver_set_option("-i bicg -p ilu -precision quad",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); lis_vector_set_all(1.0,q); lis_solve(A, q, x, solver); lis_precon_create(solver, &precon); solver->precon = precon; break; case LIS_ESOLVER_RQI: lis_solver_create(&solver); lis_solver_set_option("-p ilu -precision quad -maxiter 10",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); break; } giter=0; j=0; while (j<ss) { lis_vector_duplicate(A,&esolver->evector[j]); j = j+1; lis_vector_copy(r, v[j]); if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI) { /* create preconditioner */ solver->A = A; err = lis_precon_create(solver, &precon); if( err ) { lis_solver_work_destroy(solver); solver->retcode = err; return err; } } if (niesolver==LIS_ESOLVER_RQI) { lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); lis_matvec(A, x, Ax); lis_vector_dot(x, Ax, &xAx); lis_vector_dot(x, x, &xx); mu = xAx / xx; } iter = 0; while (iter<emaxiter) { /* diagonalization */ iter = iter+1; giter = giter+1; for (k=1;k<j;k++) { lis_vector_dotex_mmm(v[j], v[k], &qdot_vv); lis_quad_minus((LIS_QUAD *)qdot_vv.hi); lis_vector_axpyex_mmm(qdot_vv,v[k],v[j]); } switch( niesolver ) { case LIS_ESOLVER_PI: lis_matvec(A,v[j],r); break; case LIS_ESOLVER_II: lis_solve_kernel(A, v[j], r, solver, precon); break; case LIS_ESOLVER_AII: lis_psolve(solver, v[j], r); break; case LIS_ESOLVER_RQI: lis_vector_nrm2(v[j], &nrm2); lis_vector_scale(1/nrm2, v[j]); lis_matrix_shift_diagonal(A, -mu); lis_solve_kernel(A, v[j], r, solver, precon); lis_matrix_shift_diagonal(A, mu); break; } if ( j==1 && ( niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_AII || niesolver==LIS_ESOLVER_RQI )) { lis_solver_get_timeex(solver,×,&itimes,&ptimes,&p_c_times,&p_i_times); esolver->ptimes += solver->ptimes; esolver->itimes += solver->itimes; esolver->p_c_times += solver->p_c_times; esolver->p_i_times += solver->p_i_times; } lis_vector_nrm2(r, &nrm2); lis_vector_dotex_mmm(v[j], r, &qdot_vr); lis_quad_minus((LIS_QUAD *)qdot_vr.hi); lis_vector_axpyzex_mmmm(qdot_vr,v[j],r,q); lis_quad_minus((LIS_QUAD *)qdot_vr.hi); dotvr = qdot_vr.hi[0]; mu = mu + 1/dotvr; lis_vector_nrm2(q, &resid); resid = fabs(resid / dotvr); lis_vector_scale(1/nrm2,r); lis_vector_copy(r, v[j]); if ( j==1 ) { if( output & LIS_PRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_PRINT_OUT ) printf("iter: %5d residual = %e\n", iter, resid); esolver->iter = iter; esolver->resid = resid; } if (tol>resid) break; } if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI) { lis_precon_destroy(precon); } switch ( niesolver ) { case LIS_ESOLVER_PI: esolver->evalue[j-1] = dotvr; break; case LIS_ESOLVER_II: esolver->evalue[j-1] = 1/dotvr; break; case LIS_ESOLVER_AII: esolver->evalue[j-1] = 1/dotvr; break; case LIS_ESOLVER_RQI: esolver->evalue[j-1] = mu; break; } lis_vector_copy(v[j], esolver->evector[j-1]); if (A->my_rank==0 && ss>1) { #ifdef _LONGLONG printf("Subspace: mode number = %lld\n", j-1); #else printf("Subspace: mode number = %d\n", j-1); #endif printf("Subspace: eigenvalue = %e\n", esolver->evalue[j-1]); #ifdef _LONGLONG printf("Subspace: number of iterations = %lld\n",iter); #else printf("Subspace: number of iterations = %d\n",iter); #endif printf("Subspace: relative residual 2-norm = %e\n",resid); } } lis_vector_copy(esolver->evector[esolver->options[LIS_EOPTIONS_MODE]], esolver->x); switch ( niesolver ) { case LIS_ESOLVER_II: if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_solver_destroy(solver); break; case LIS_ESOLVER_AII: if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_precon_destroy(precon); lis_solver_destroy(solver); break; case LIS_ESOLVER_RQI: if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_solver_destroy(solver); break; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_ecr(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,i,j,output; LIS_INT nprocs,my_rank; LIS_REAL nrm2,resid; LIS_SCALAR lshift; LIS_VECTOR r,p,Ax,Ar,Ap; LIS_SCALAR alpha, beta; LIS_SCALAR rAp, rp, ApAp, pAp, pp, ArAp, pAr; double times,itimes,ptimes,p_c_times,p_i_times; A = esolver->A; x = esolver->x; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; lshift = esolver->lshift; if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); r = esolver->work[0]; p = esolver->work[1]; Ax = esolver->work[2]; Ar = esolver->work[3]; Ap = esolver->work[4]; lis_vector_set_all(1.0,x); lis_vector_nrm2(x,&nrm2); lis_vector_scale(1/nrm2,x); lis_matvec(A,x,Ax); lis_vector_dot(x,Ax,&evalue); lis_vector_axpyz(-evalue,x,Ax,r); lis_vector_scale(-1.0,r); lis_vector_copy(r,p); lis_matvec(A,p,Ap); iter=0; while (iter<emaxiter) { iter = iter + 1; lis_vector_dot(r,Ap,&rAp); lis_vector_dot(r,p,&rp); lis_vector_dot(Ap,Ap,&ApAp); lis_vector_dot(p,Ap,&pAp); lis_vector_dot(p,p,&pp); alpha = (rAp - evalue * rp) / (ApAp - evalue * (2.0 * pAp - evalue * pp)); lis_vector_axpy(alpha,p,x); lis_matvec(A,x,Ax); lis_vector_dot(x,Ax,&evalue); lis_vector_nrm2(x, &nrm2); evalue = evalue / (nrm2 * nrm2); lis_vector_axpyz(-evalue,x,Ax,r); lis_vector_scale(-1.0,r); lis_matvec(A,r,Ar); lis_vector_dot(Ar,Ap,&ArAp); lis_vector_dot(p,Ar,&pAr); lis_vector_dot(r,Ap,&rAp); lis_vector_dot(r,p,&rp); beta = - (ArAp - evalue * ((pAr + rAp) - evalue * rp))/ (ApAp - evalue * (2.0 * pAp - evalue * pp)); lis_vector_xpay(r,beta,p); lis_vector_nrm2(r,&nrm2); resid = fabs(nrm2 / (evalue)); if( output ) { if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, resid); } if (resid<tol) break; } esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); if (resid<tol) { esolver->retcode = LIS_SUCCESS; return LIS_SUCCESS; } else { esolver->retcode = LIS_MAXITER; return LIS_MAXITER; } }
LIS_INT lis_idrs(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,t,v,av,*dX,*dR,*P; LIS_SCALAR om, h; LIS_SCALAR *M,*m,*c,*MM; LIS_REAL bnrm2, nrm2, tol; LIS_REAL angle; LIS_INT i,j,k,s,oldest; LIS_INT iter,maxiter,n,output,conv; double times,ptimes,tim; unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; s = solver->options[LIS_OPTIONS_IDRS_RESTART]; ptimes = 0.0; r = solver->work[0]; t = solver->work[1]; v = solver->work[2]; av = solver->work[3]; dX = &solver->work[4]; P = &solver->work[4+s]; dR = &solver->work[4+2*s]; angle = 0.7; m = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::m"); c = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::c"); M = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR), "lis_idrs::M"); MM = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR), "lis_idrs::M"); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { lis_free2(4,m,c,M,MM); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; init_by_array(init, length); for(k=0;k<s;k++) { for(i=0;i<n;i++) { P[k]->value[i] = genrand_real1(); } } lis_idrs_orth(s,P); for( k=0; k<s; k++ ) { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, r, dX[k]); ptimes += lis_wtime()-times; LIS_MATVEC(A,dX[k],dR[k]); #endif lis_vector_dot(dR[k],dR[k],&h); lis_vector_dot(dR[k],r,&om); om = om / h; lis_vector_scale(om,dX[k]); lis_vector_scale(-om,dR[k]); lis_vector_axpy(1.0,dX[k],x); lis_vector_axpy(1.0,dR[k],r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[k+1] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", k+1, nrm2); } if( tol >= nrm2 ) { lis_free2(4,m,c,M,MM); solver->retcode = LIS_SUCCESS; solver->iter = k+1; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(i=0;i<s;i++) { lis_vector_dot(P[i],dR[k],&M[k*s+i]); } } iter = s; oldest = 0; for(i=0;i<s;i++) { lis_vector_dot(P[i],r,&m[i]); } while( iter<=maxiter ) { tim = lis_wtime(); lis_array_solve(s,M,m,c,MM); /* solve Mc=m */ lis_vector_copy(r,v); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dR[j],v); } if( (iter%(s+1))==s ) { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; LIS_MATVEC(A,av,t); #endif lis_vector_dot(t,t,&h); lis_vector_dot(t,v,&om); om = om / h; #if 0 lis_vector_scale(-om,t); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dR[j],t); } lis_vector_copy(t,dR[oldest]); lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; for(j=0;j<s;j++) { h -= dX[j]->value[i] * c[j]; } dX[oldest]->value[i] = h; } for(i=0;i<n;i++) { h = -om*t->value[i]; for(j=0;j<s;j++) { h -= dR[j]->value[i] * c[j]; } dR[oldest]->value[i] = h; } #endif } else { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; #endif #if 0 lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; for(j=0;j<s;j++) { h -= dX[j]->value[i] * c[j]; } dX[oldest]->value[i] = h; } #endif LIS_MATVEC(A,dX[oldest],dR[oldest]); lis_vector_scale(-1.0,dR[oldest]); } lis_vector_axpy(1.0,dR[oldest],r); lis_vector_axpy(1.0,dX[oldest],x); iter++; /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { lis_free2(4,m,c,M,MM); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(i=0;i<s;i++) { lis_vector_dot(P[i],dR[oldest],&h); m[i] += h; M[oldest*s+i] = h; } oldest++; if( oldest==s ) oldest = 0; tim = lis_wtime() - tim; /* printf("update m,M: %e\n",tim); */ } lis_free2(4,m,c,M,MM); solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_idr1(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,t,v,av,*dX,*dR,*P; LIS_SCALAR om, h; LIS_SCALAR M,m,c; LIS_REAL bnrm2, nrm2, tol; LIS_REAL angle; LIS_INT i,j,k,s,oldest; LIS_INT iter,maxiter,n,output,conv; double times,ptimes,tim; unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; s = 1; ptimes = 0.0; r = solver->work[0]; t = solver->work[1]; v = solver->work[2]; av = solver->work[3]; P = &solver->work[4]; dX = &solver->work[4+s]; dR = &solver->work[4+2*s]; angle = 0.7; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; init_by_array(init, length); for(i=0;i<n;i++) { P[0]->value[i] = genrand_real1(); } /* lis_vector_copy(r,P[0]); */ lis_idrs_orth(s,P); #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, r, dX[0]); ptimes += lis_wtime()-times; LIS_MATVEC(A,dX[0],dR[0]); #else #ifdef PRE_BOTH times = lis_wtime(); lis_psolve_right(solver, r, t); ptimes += lis_wtime()-times; LIS_MATVEC(A,t,av); lis_vector_print(av); times = lis_wtime(); lis_psolve_left(solver, av, v); ptimes += lis_wtime()-times; #endif #endif /* lis_idrs_omega(dR[k],r,angle,&om); */ lis_vector_dot(dR[0],dR[0],&h); lis_vector_dot(dR[0],r,&om); om = om / h; lis_vector_scale(om,dX[0]); lis_vector_scale(-om,dR[0]); lis_vector_axpy(1.0,dX[0],x); lis_vector_axpy(1.0,dR[0],r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[1] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", 1, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = 1; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_dot(P[0],dR[0],&M); iter = s; oldest = 0; lis_vector_dot(P[0],r,&m); while( iter<=maxiter ) { tim = lis_wtime(); /* solve Mc=m */ c = m/M; for(i=0;i<n;i++) { v->value[i] = r->value[i] + -c*dR[0]->value[i]; } /* lis_vector_copy(r,v); lis_vector_axpy(-c,dR[0],v); */ #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; LIS_MATVEC(A,av,t); #else #ifdef PRE_BOTH times = lis_wtime(); lis_psolve_right(solver, v, t); ptimes += lis_wtime()-times; LIS_MATVEC(A,t,av); times = lis_wtime(); lis_psolve_left(solver, av, t); ptimes += lis_wtime()-times; #endif #endif /* lis_idrs_omega(t,v,angle,&om); lis_vector_dot(t,t,&h); lis_vector_dot(t,v,&om); */ h = t->value[0]*t->value[0]; om = t->value[0]*v->value[0]; for(i=1;i<n;i++) { h += t->value[i]*t->value[i]; om += t->value[i]*v->value[i]; } om = om / h; /* printf("i=%d om = %lf\n",iter,om); */ #if 0 lis_vector_scale(-om,t); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dR[j],t); } lis_vector_copy(t,dR[oldest]); lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; h -= dX[0]->value[i] * c; dX[0]->value[i] = h; h = -om*t->value[i]; h -= dR[0]->value[i] * c; dR[0]->value[i] = h; } #endif lis_vector_axpy(1.0,dR[0],r); lis_vector_axpy(1.0,dX[0],x); iter++; /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_dot(P[0],dR[0],&h); m += h; M = h; /* solve Mc=m */ c = m/M; for(i=0;i<n;i++) { v->value[i] = r->value[i] + -c*dR[0]->value[i]; } /* lis_vector_copy(r,v); lis_vector_axpy(-c,dR[0],v); */ #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; #endif #if 0 lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; h -= dX[0]->value[i] * c; dX[0]->value[i] = h; } #endif LIS_MATVEC(A,dX[0],dR[0]); lis_vector_scale(-1.0,dR[0]); lis_vector_axpy(1.0,dR[0],r); lis_vector_axpy(1.0,dX[0],x); iter++; /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_dot(P[0],dR[0],&h); m += h; M = h; tim = lis_wtime() - tim; /* printf("update m,M: %e\n",tim); */ } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_VECTOR x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; rho_old = (LIS_SCALAR)1.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_all(0, p); lis_vector_set_all(0, ptld); for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ time = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptime += lis_wtime()-time; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho); /* printf("rho = %e\n",rho);*/ /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta = rho / rho_old; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta,p); lis_matvec(A,p,q); lis_vector_xpay(ztld,beta,ptld); lis_matvect(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1); /* printf("tmpdot1 = %e\n",tmpdot1);*/ /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha = rho / tmpdot1; /* x = x + alpha*p */ lis_vector_axpy(alpha,p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha,qtld,rtld); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_egpi(LIS_ESOLVER esolver) { LIS_Comm comm; LIS_MATRIX A,B; LIS_VECTOR w,v,y,q; LIS_SCALAR eta,theta; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,iter2,output; LIS_SCALAR oshift,ishift; LIS_REAL nrm2,resid; LIS_SOLVER solver; double time,itime,ptime,p_c_time,p_i_time; LIS_INT err; LIS_PRECON precon; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; LIS_DEBUG_FUNC_IN; comm = LIS_COMM_WORLD; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; #ifdef _COMPLEX oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN] + esolver->params[LIS_EPARAMS_SHIFT_IM - LIS_EOPTIONS_LEN] * _Complex_I; #else oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN]; #endif A = esolver->A; B = esolver->B; v = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,v); } w = esolver->work[0]; y = esolver->work[1]; q = esolver->work[2]; if ( esolver->ishift != 0.0 ) oshift = ishift; if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, oshift); if( output ) { #ifdef _COMPLEX lis_printf(comm,"shift : (%e, %e)\n", (double)creal(oshift), (double)cimag(oshift)); #else lis_printf(comm,"shift : %e\n", (double)oshift); #endif } lis_solver_create(&solver); lis_solver_set_option("-i bicg -p none",solver); err = lis_solver_set_optionC(solver); CHKERR(err); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_solver_get_solvername(nsol, solvername); lis_solver_get_preconname(precon_type, preconname); if( output ) { lis_printf(comm,"linear solver : %s\n", solvername); lis_printf(comm,"preconditioner : %s\n", preconname); } /* create preconditioner */ solver->A = B; err = lis_precon_create(solver, &precon); if( err ) { lis_solver_work_destroy(solver); solver->retcode = err; return err; } iter=0; while (iter<emaxiter) { iter = iter+1; /* v = v / ||v||_2 */ lis_vector_nrm2(v, &nrm2); lis_vector_scale(1.0/nrm2, v); /* w = A * v */ lis_matvec(A, v, w); /* v = v / <v,w>^1/2, w = w / <v,w>^1/2 */ lis_vector_dot(v, w, &eta); eta = sqrt(eta); lis_vector_scale(1.0/eta, v); lis_vector_scale(1.0/eta, w); /* y = B^-1 * w */ err = lis_solve_kernel(B, w, y, solver, precon); if( err ) { lis_solver_work_destroy(solver); solver->retcode = err; return err; } lis_solver_get_iter(solver, &iter2); /* theta = <w,y> */ lis_vector_dot(w, y, &theta); /* resid = ||y - theta * v||_2 / |theta| */ lis_vector_axpyz(-theta, v, y, q); lis_vector_nrm2(q, &resid); resid = resid / fabs(theta); /* v = y */ lis_vector_copy(y, v); /* convergence check */ lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time); esolver->ptime += solver->ptime; esolver->itime += solver->itime; esolver->p_c_time += solver->p_c_time; esolver->p_i_time += solver->p_i_time; if( output ) { if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid; if( output & LIS_EPRINT_OUT ) lis_print_rhistory(comm,iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = theta + oshift; lis_vector_nrm2(v, &nrm2); lis_vector_scale(1.0/nrm2, v); if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, -oshift); lis_precon_destroy(precon); lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } lis_precon_destroy(precon); esolver->retcode = LIS_MAXITER; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = theta + oshift; lis_vector_nrm2(v, &nrm2); lis_vector_scale(1.0/nrm2, v); if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, -oshift); lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_epi(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,output; LIS_INT nprocs,my_rank; LIS_REAL nrm2,resid; LIS_VECTOR z,q; double times, ptimes; LIS_DEBUG_FUNC_IN; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; A = esolver->A; x = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,x); } z = esolver->work[0]; q = esolver->work[1]; iter=0; while (iter<emaxiter) { iter = iter+1; /* x = x / ||x||_2 */ lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); /* z = A * x */ lis_matvec(A,x,z); /* evalue = <x,z> */ lis_vector_dot(x, z, &evalue); /* resid = ||z - evalue * x||_2 / |evalue| */ lis_vector_axpyz(-evalue,x,z,q); lis_vector_nrm2(q, &resid); resid = fabs(resid / evalue); /* x = z */ lis_vector_copy(z, x); /* convergence check */ if( output ) { if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } esolver->retcode = LIS_MAXITER; esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_ecg(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,iter3,nsolver,i,j,output; LIS_INT nprocs,my_rank; LIS_REAL nrm2,resid,resid3; LIS_SCALAR lshift; LIS_VECTOR b,D,r,w,p,Aw,Ax,Ap,ones,Ds; LIS_SCALAR *SA, *SB, *SW, *v3, *SAv3, *SBv3, *z3, *q3, *SBz3, evalue3, ievalue3; LIS_SOLVER solver; LIS_PRECON precon; LIS_MATRIX A0; LIS_VECTOR x0,z,q; double times,itimes,ptimes,p_c_times,p_i_times; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; A = esolver->A; x = esolver->x; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; lshift = esolver->lshift; if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); SA = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SA"); SB = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SB"); SW = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SW"); v3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::v3"); SAv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SAv3"); SBv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBv3"); SBz3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBz3"); z3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::z3"); q3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::q3"); b = esolver->work[0]; D = esolver->work[1]; Ds = esolver->work[2]; r = esolver->work[3]; w = esolver->work[4]; p = esolver->work[5]; Aw = esolver->work[6]; Ax = esolver->work[7]; Ap = esolver->work[8]; lis_vector_set_all(1.0,b); lis_vector_nrm2(b, &nrm2); lis_vector_scale(1/nrm2, b); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p ilu",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); lis_solve(A, b, x, solver); lis_vector_copy(b,Ax); lis_vector_nrm2(x, &nrm2); lis_vector_set_all(0.0,p); lis_vector_set_all(0.0,Ap); lis_precon_create(solver, &precon); solver->precon = precon; iter=0; while (iter<emaxiter) { iter = iter + 1; lis_vector_dot(x,Ax,&evalue); lis_vector_axpyz(-(evalue),x,Ax,r); lis_vector_nrm2(r, &nrm2); resid = fabs(nrm2/(evalue)); if( output ) { if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, resid); } if (resid<tol) break; lis_psolve(solver, x, w); lis_vector_copy(x,Aw); lis_vector_nrm2(w, &nrm2); lis_vector_dot(w,Aw,&SA[0]); lis_vector_dot(x,Aw,&SA[3]); lis_vector_dot(p,Aw,&SA[6]); SA[1] = SA[3]; lis_vector_dot(x,Ax,&SA[4]); lis_vector_dot(p,Ax,&SA[7]); SA[2] = SA[6]; SA[5] = SA[7]; lis_vector_dot(p,Ap,&SA[8]); lis_vector_dot(w,w,&SB[0]); lis_vector_dot(x,w,&SB[3]); lis_vector_dot(p,w,&SB[6]); SB[1] = SB[3]; lis_vector_dot(x,x,&SB[4]); lis_vector_dot(p,x,&SB[7]); SB[2] = SB[6]; SB[5] = SB[7]; lis_vector_dot(p,p,&SB[8]); lis_array_set_all(3, 1.0, v3); iter3=0; while (iter3<emaxiter) { iter3 = iter3 + 1; lis_array_nrm2(3, v3, &nrm2); lis_array_scale(3, 1/nrm2, v3); lis_array_matvec(3, SB, v3, SBv3, LIS_INS_VALUE); lis_array_invvec(3, SA, SBv3, z3); lis_array_dot2(3, SBv3, z3, &ievalue3); if (ievalue3==0) { printf("ievalue3 is zero\n"); lis_precon_destroy(precon); lis_solver_destroy(solver); esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_free(SA); lis_free(SB); lis_free(SW); lis_free(v3); lis_free(SAv3); lis_free(SBv3); lis_free(SBz3); lis_free(z3); lis_free(q3); return LIS_BREAKDOWN; } lis_array_axpyz(3, -ievalue3, SBv3, z3, q3); lis_array_nrm2(3, q3, &resid3); resid3 = fabs(resid3 / ievalue3); if (resid3<1e-12) break; lis_array_copy(3,z3,v3); } evalue3 = 1 / ievalue3; lis_vector_scale(v3[0],w); lis_vector_axpy(v3[2],p,w); lis_vector_xpay(w,v3[1],x); lis_vector_copy(w,p); lis_vector_scale(v3[0],Aw); lis_vector_axpy(v3[2],Ap,Aw); lis_vector_xpay(Aw,v3[1],Ax); lis_vector_copy(Aw,Ap); lis_vector_nrm2(x,&nrm2); lis_vector_scale(1/nrm2,x); lis_vector_scale(1/nrm2,Ax); lis_vector_nrm2(p,&nrm2); lis_vector_scale(1/nrm2,p); lis_vector_scale(1/nrm2,Ap); lis_solver_get_timeex(solver,×,&itimes,&ptimes,&p_c_times,&p_i_times); esolver->ptimes += solver->ptimes; esolver->itimes += solver->itimes; esolver->p_c_times += solver->p_c_times; esolver->p_i_times += solver->p_i_times; } lis_precon_destroy(precon); lis_solver_destroy(solver); esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_free(SA); lis_free(SB); lis_free(SW); lis_free(v3); lis_free(SAv3); lis_free(SBv3); lis_free(SBz3); lis_free(z3); lis_free(q3); if (resid<tol) { esolver->retcode = LIS_SUCCESS; return LIS_SUCCESS; } else { esolver->retcode = LIS_MAXITER; return LIS_MAXITER; } }
LIS_INT lis_bicgsafe(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, mr, amr, t, mt, p, ap; LIS_VECTOR y, u, au, z; LIS_SCALAR alpha, beta; LIS_REAL rho, rho_old; LIS_SCALAR qsi, eta; LIS_SCALAR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; t = solver->work[6]; mt = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); time = lis_wtime(); lis_psolve(solver, r, mr); ptime += lis_wtime()-time; lis_matvec(A,mr,amr); lis_vector_dot(rtld,r,&rho_old); lis_vector_copy(amr,ap); lis_vector_copy(mr,p); beta = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* tmpdot[0] = <rtld,ap> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dot(rtld,ap,&tmpdot[0]); alpha = rho_old / tmpdot[0]; /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dot(y,y,&tmpdot[0]); lis_vector_dot(amr,r,&tmpdot[1]); lis_vector_dot(y,r,&tmpdot[2]); lis_vector_dot(amr,y,&tmpdot[3]); lis_vector_dot(amr,amr,&tmpdot[4]); if(iter==1) { qsi = tmpdot[1] / tmpdot[4]; eta = 0.0; } else { tmp = tmpdot[4]*tmpdot[0] - tmpdot[3]*tmpdot[3]; qsi = (tmpdot[0]*tmpdot[1] - tmpdot[2]*tmpdot[3]) / tmp; eta = (tmpdot[4]*tmpdot[2] - tmpdot[3]*tmpdot[1]) / tmp; } /* t = qsi*ap + eta*y */ lis_vector_copy(y,t); lis_vector_scale(eta,t); lis_vector_axpy(qsi,ap,t); /* mt = M^-1 * t */ time = lis_wtime(); lis_psolve(solver, t, mt); ptime += lis_wtime()-time; /* u = mt + eta*beta*u */ /* au = A * u */ lis_vector_xpay(mt,eta*beta,u); lis_matvec(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scale(eta,z); lis_vector_axpy(qsi,mr,z); lis_vector_axpy(-alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ lis_vector_scale(eta,y); lis_vector_axpy(qsi,amr,y); lis_vector_axpy(-alpha,au,y); /* x = x + alpha*p + z */ lis_vector_axpy(alpha,p,x); lis_vector_axpy(1.0,z,x); /* r = r - alpha*ap - y */ lis_vector_axpy(-alpha,ap,r); lis_vector_axpy(-1.0,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho); if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta = (rho / rho_old) * (alpha / qsi); /* mr = M^-1 * r */ /* amr = A * mr */ time = lis_wtime(); lis_psolve(solver, r, mr); ptime += lis_wtime()-time; lis_matvec(A,mr,amr); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpy(-1.0,u,p); lis_vector_xpay(mr,beta,p); lis_vector_axpy(-1.0,au,ap); lis_vector_xpay(amr,beta,ap); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_fgmres(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,s, *z, *v; LIS_SCALAR *h; LIS_SCALAR aa,bb,rr,a2,b2,t; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_REAL rnorm; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,i1h,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; conv = solver->options[LIS_OPTIONS_CONV_COND]; h_dim = m+1; ptimes = 0.0; s = solver->work[0]; r = solver->work[1]; z = &solver->work[2]; v = &solver->work[m+2]; h = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR) * (h_dim+1) * (h_dim+2),"lis_gmres::h" ); cs = (m+1)*h_dim; sn = (m+2)*h_dim; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; rnorm = 1.0 / bnrm2; iter=0; while( iter<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_scale(bnrm2,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; i1h = i*h_dim; /* z = M^-1 v */ times = lis_wtime(); lis_psolve(solver, v[iiv], z[iiv]); ptimes += lis_wtime()-times; /* v = Az */ LIS_MATVEC(A,z[iiv], v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i]v[k] */ lis_vector_dot(v[i1v],v[k],&t); h[k + iih] = t; lis_vector_axpy(-t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&t); h[i1 + iih] = t; lis_vector_scale(1.0/t,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t = h[jj + iih]; aa = h[jj + cs]*t; aa += h[jj + sn]*h[k + iih]; bb = -h[jj + sn]*t; bb += h[jj + cs]*h[k + iih]; h[jj + iih] = aa; h[k + iih] = bb; } aa = h[ii + iih]; bb = h[i1 + iih]; a2 = aa*aa; b2 = bb*bb; rr = sqrt(a2 + b2); if( rr==0.0 ) rr=1.0e-17; h[ii + cs] = aa / rr; h[ii + sn] = bb / rr; s->value[i1] = -h[ii + sn]*s->value[ii]; s->value[ii] = h[ii + cs]*s->value[ii]; aa = h[ii + cs]*h[ii + iih]; aa += h[ii + sn]*h[i1 + iih]; h[ii + iih] = aa; /* convergence check */ nrm2 = fabs(s->value[i1]); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter <maxiter ); /* Solve H*Y =S for upper triangular H */ s->value[ii] = s->value[ii] / h[ii + iih]; for(k=1;k<=ii;k++) { jj = ii-k; t = s->value[jj]; for(j=jj+1;j<=ii;j++) { t -= h[jj + j*h_dim]*s->value[j]; } s->value[jj] = t / h[jj + jj*h_dim]; } /* x = x + zy */ for(j=0;j<=ii;j++) { lis_vector_axpy(s->value[j],z[j],x); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } LIS_MATVEC(A,x,v[0]); lis_vector_xpay(b,-1.0,v[0]); lis_vector_nrm2(v[0],&rnorm); bnrm2 = 1.0 / rnorm; } solver->retcode = LIS_MAXITER; solver->iter = iter+1; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, rhat, p, ptld, phat; LIS_VECTOR t, ttld, that, t0, t0hat; LIS_VECTOR y, w, u, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,output,conv; LIS_INT iter2,maxiter2; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; rhat = solver->work[2]; p = solver->work[3]; ptld = solver->work[4]; phat = solver->work[5]; t = solver->work[6]; ttld = solver->work[7]; that = solver->work[8]; t0 = solver->work[9]; t0hat = solver->work[10]; y = solver->work[11]; w = solver->work[12]; u = solver->work[13]; z = solver->work[14]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; qsi.hi[0] = 1.0; qsi.lo[0] = 0.0; one.hi[0] = -1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]); /* w = ttld + beta*ptld */ lis_vector_axpyz(beta.hi[0],ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpy(-1,u,p); lis_vector_xpay(rhat,beta.hi[0],p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0]; /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyz(-1,w,ptld,y); lis_vector_xpay(t,alpha.hi[0],y); lis_vector_axpy(-1,r,y); /* t = r - alpha*ptld */ lis_vector_axpyz(-alpha.hi[0],ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dot(y,y,&tmpdot[0].hi[0]); lis_vector_dot(ttld,t,&tmpdot[1].hi[0]); lis_vector_dot(y,t,&tmpdot[2].hi[0]); lis_vector_dot(ttld,y,&tmpdot[3].hi[0]); lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]); if(iter==1) { qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0]; eta.hi[0] = 0.0; } else { tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0] - tmpdot[3].hi[0]*tmpdot[3].hi[0]; qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0]; eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0]; } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpay(t0hat,beta.hi[0],u); lis_vector_axpy(-1,rhat,u); lis_vector_scale(eta.hi[0],u); lis_vector_axpy(qsi.hi[0],phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scale(eta.hi[0],z); lis_vector_axpy(qsi.hi[0],rhat,z); lis_vector_axpy(-alpha.hi[0],u,z); /* x = x + alpha*p + z */ lis_vector_axpy(alpha.hi[0],p,x); lis_vector_axpy(1,z,x); /* r = t - eta*y - qsi*ttld */ lis_vector_axpyz(-eta.hi[0],y,t,r); lis_vector_axpy(-qsi.hi[0],ttld,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } lis_vector_copy(t,t0); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; t->precision = LIS_PRECISION_QUAD; t0->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; that->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; qsi.hi[0] = 1.0; one.hi[0] = -1.0; /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* w = ttld + beta*ptld */ lis_vector_axpyzex_mmmm(beta,ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(rhat,beta,p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi); /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyzex_mmmm(one,w,ptld,y); lis_vector_xpayex_mmm(t,alpha,y); lis_vector_axpyex_mmm(one,r,y); /* t = r - alpha*ptld */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(ttld,t,&tmpdot[1]); lis_vector_dotex_mmm(y,t,&tmpdot[2]); lis_vector_dotex_mmm(ttld,y,&tmpdot[3]); lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpayex_mmm(t0hat,beta,u); lis_vector_axpyex_mmm(one,rhat,u); lis_vector_scaleex_mm(eta,u); lis_vector_axpyex_mmm(qsi,phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,rhat,z); lis_vector_axpyex_mmm(alpha,u,z); /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_vector_axpyex_mmm(one,z,x); lis_quad_minus((LIS_QUAD *)one.hi); /* r = t - eta*y - qsi*ttld */ lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); lis_vector_axpyzex_mmmm(eta,y,t,r); lis_vector_axpyex_mmm(qsi,ttld,r); lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_copyex_mm(t,t0); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->iter2 = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg_switch(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); r->precision = LIS_PRECISION_DEFAULT; rtld->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; ptld->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = rho.hi[0] / rho_old.hi[0]; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta.hi[0],p); LIS_MATVEC(A,p,q); lis_vector_xpay(ztld,beta.hi[0],ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* x = x + alpha*p */ lis_vector_axpy(alpha.hi[0],p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha.hi[0],q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha.hi[0],qtld,rtld); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; rtld->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; /* solver->precon->precon_type = 0;*/ solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); /* memset(z->value_lo,0,n*sizeof(LIS_SCALAR)); memset(ztld->value_lo,0,n*sizeof(LIS_SCALAR));*/ ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dotex_mmm(z,rtld,&rho); /* printf("rho = %e %e\n",rho.hi[0],rho.lo[0]);*/ /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpayex_mmm(z,beta,p); LIS_MATVEC(A,p,q); lis_vector_xpayex_mmm(ztld,beta,ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dotex_mmm(ptld,q,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* x = x + alpha*p */ lis_vector_axpyex_mmm(alpha,p,x); /* r = r - alpha*q */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpyex_mmm(alpha,qtld,rtld); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgstab(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, t,p,v, s, phat, shat; LIS_SCALAR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[1]; t = solver->work[2]; p = solver->work[3]; v = solver->work[4]; phat = solver->work[5]; shat = solver->work[6]; alpha = (LIS_SCALAR)1.0; omega = (LIS_SCALAR)1.0; rho_old = (LIS_SCALAR)1.0; lis_vector_set_all(0.0,p); lis_vector_set_all(0.0,phat); lis_vector_set_all(0.0,s); lis_vector_set_all(0.0,shat); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); for( iter=1; iter<=maxiter; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho); /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter==1 ) { lis_vector_copy(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ beta = (rho / rho_old) * (alpha / omega); /* p = r + beta*(p - omega*v) */ lis_vector_axpy(-omega,v,p); lis_vector_xpay(r,beta,p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dot(rtld,v,&tmpdot1); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ alpha = rho / tmpdot1; /* s = r - alpha*v */ lis_vector_axpy(-alpha,v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); /* lis_vector_nrm2(s,&nrm2); nrm2 = nrm2 * bnrm2;*/ if( nrm2 <= tol ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha,phat,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dot(t,s,&tmpdot1); lis_vector_dot(t,t,&tmpdot2); omega = tmpdot1 / tmpdot2; /* x = x + alpha*phat + omega*shat */ lis_vector_axpy(alpha,phat,x); lis_vector_axpy(omega,shat,x); /* r = s - omega*t */ lis_vector_axpy(-omega,t,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); /* lis_vector_nrm2(r,&nrm2); nrm2 = nrm2 * bnrm2;*/ if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } if( omega==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_gmres(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,s,z,*v; LIS_SCALAR *h; LIS_SCALAR aa,bb,rr,a2,b2,t; LIS_REAL tnrm2; LIS_REAL bnrm2,nrm2,tol; LIS_INT iter,maxiter,n,output; double time,ptime; LIS_REAL rnorm; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; h_dim = m+1; ptime = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; h = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR)*(h_dim+1)*(h_dim+2),"lis_gmres::h" ); cs = (m+1)*h_dim; sn = (m+2)*h_dim; /* r = M^-1 * (b - A * x) */ lis_matvec(A,x,z); lis_vector_xpay(b,-1.0,z); lis_psolve(solver,z,v[0]); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; iter=0; while( iter<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2(v[0],&rnorm); lis_vector_scale(1.0/rnorm,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z,v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dot(v[i1v],v[k],&t); h[k+iih] = t; lis_vector_axpy(-t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&tnrm2); h[i1+iih] = tnrm2; lis_vector_scale(1.0/tnrm2,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t = h[jj+iih]; aa = h[jj+cs]*t; aa += h[jj+sn]*h[k+iih]; bb = -h[jj+sn]*t; bb += h[jj+cs]*h[k+iih]; h[jj+iih] = aa; h[k+iih] = bb; } aa = h[ii+iih]; bb = h[i1+iih]; a2 = aa*aa; b2 = bb*bb; rr = sqrt(a2+b2); if( rr==0.0 ) rr=1.0e-17; h[ii+cs] = aa/rr; h[ii+sn] = bb/rr; s->value[i1] = -h[ii+sn]*s->value[ii]; s->value[ii] = h[ii+cs]*s->value[ii]; aa = h[ii+cs]*h[ii+iih]; aa += h[ii+sn]*h[i1+iih]; h[ii+iih] = aa; /* convergence check */ nrm2 = sabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter <maxiter ); /* Solve H * Y = S for upper Hessenberg matrix H */ s->value[ii] = s->value[ii]/h[ii+iih]; for(k=1;k<=ii;k++) { jj = ii-k; t = s->value[jj]; for(j=jj+1;j<=ii;j++) { t -= h[jj+j*h_dim]*s->value[j]; } s->value[jj] = t/h[jj+jj*h_dim]; } /* z = z + y * v */ #ifdef _OPENMP #pragma omp parallel for private(k) #endif for(k=0;k<n;k++) { z->value[k] = s->value[0]*v[0]->value[k]; } for(j=1;j<=ii;j++) { lis_vector_axpy(s->value[j],v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpy(1,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; s->value[jj-1] = -h[jj-1+sn]*s->value[jj]; s->value[jj] = h[jj-1+cs]*s->value[jj]; } for(j=0;j<=i1;j++) { t = s->value[j]; if( j==0 ) t = t-1.0; lis_vector_axpy(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter+1; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgstab_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, t,p,v, s, phat, shat; LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[1]; t = solver->work[2]; p = solver->work[3]; v = solver->work[4]; phat = solver->work[5]; shat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(omega,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; omega.hi[0] = 1.0; omega.lo[0] = 0.0; lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, phat); lis_vector_set_allex_nm(0.0, s); lis_vector_set_allex_nm(0.0, shat); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); s->precision = LIS_PRECISION_DEFAULT; shat->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; phat->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter==1 ) { lis_vector_copy(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / omega.hi[0]); /* p = r + beta*(p - omega*v) */ lis_vector_axpy(-omega.hi[0],v,p); lis_vector_xpay(r,beta.hi[0],p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dot(rtld,v,&tmpdot1.hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* s = r - alpha*v */ lis_vector_axpy(-alpha.hi[0],v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol2 ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha.hi[0],phat,x); solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dot(t,s,&tmpdot1.hi[0]); lis_vector_dot(t,t,&tmpdot2.hi[0]); omega.hi[0] = tmpdot1.hi[0] / tmpdot2.hi[0]; /* x = x + alpha*phat + omega*shat */ lis_vector_axpy(alpha.hi[0],phat,x); lis_vector_axpy(omega.hi[0],shat,x); /* r = s - omega*t */ lis_vector_axpy(-omega.hi[0],t,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } if( omega.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old.hi[0] = rho.hi[0]; } s->precision = LIS_PRECISION_QUAD; shat->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; phat->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; omega.hi[0] = 1.0; lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, phat); lis_vector_set_allex_nm(0.0, s); lis_vector_set_allex_nm(0.0, shat); /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter2==1 ) { lis_vector_copyex_mm(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi); /* p = r + beta*(p - omega*v) */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,v,p); lis_vector_xpayex_mmm(r,beta,p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dotex_mmm(rtld,v,&tmpdot1); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* s = r - alpha*v */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( tol > nrm2 ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,phat,x); solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dotex_mmm(t,s,&tmpdot1); lis_vector_dotex_mmm(t,t,&tmpdot2); lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi); /* x = x + alpha*phat + omega*shat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,phat,x); lis_vector_axpyex_mmm(omega,shat,x); /* r = s - omega*t */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,t,r); lis_quad_minus((LIS_QUAD *)omega.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } if( omega.hi[0]==0.0 && omega.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_cgs(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; phat = solver->work[3]; q = solver->work[4]; qhat = solver->work[5]; u = solver->work[5]; uhat = solver->work[6]; vhat = solver->work[6]; alpha = (LIS_SCALAR)1.0; rho_old = (LIS_SCALAR)1.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_all(0,q); lis_vector_set_all(0,p); for( iter=1; iter<=maxiter; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho); /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta = (rho / rho_old); /* u = r + beta*q */ lis_vector_axpyz(beta,q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpay(q,beta,p); lis_vector_xpay(u,beta,p); /* phat = M^-1 * p */ time = lis_wtime(); lis_psolve(solver, p, phat); ptime += lis_wtime()-time; /* v = A * phat */ lis_matvec(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dot(rtld,vhat,&tmpdot1); /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha = rho / tmpdot1; /* q = u - alpha*vhat */ lis_vector_axpyz(-alpha,vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyz(1,u,q,phat); time = lis_wtime(); lis_psolve(solver, phat, uhat); ptime += lis_wtime()-time; /* x = x + alpha*uhat */ lis_vector_axpy(alpha,uhat,x); /* qhat = A * uhat */ lis_matvec(A,uhat,qhat); /* r = r - alpha*qhat */ lis_vector_axpy(-alpha,qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrstab(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, s, ap, ms, map, ams, z; LIS_SCALAR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[2]; ms = solver->work[3]; ams = solver->work[4]; p = solver->work[5]; ap = solver->work[6]; map = solver->work[7]; z = solver->work[8]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_copy(z,p); lis_vector_dot(rtld,z,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ /* alpha = rho_old / tmpdot1 */ /* s = r - alpha*ap */ LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dot(rtld,map,&tmpdot1); alpha = rho_old / tmpdot1; lis_vector_axpyz(-alpha,ap,r,s); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha,p,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* ms = z - alpha*map */ /* ams = A * ms */ /* tmpdot1 = <ams,s> */ /* tmpdot2 = <ams,ams> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_axpyz(-alpha,map,z,ms); LIS_MATVEC(A,ms,ams); lis_vector_dot(ams,s,&tmpdot1); lis_vector_dot(ams,ams,&tmpdot2); omega = tmpdot1 / tmpdot2; /* x = x + alpha*p + omega*ms */ /* r = s - omega*ams */ lis_vector_axpy(alpha,p,x); lis_vector_axpy(omega,ms,x); lis_vector_axpyz(-omega,ams,s,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dot(rtld,z,&rho); if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / omega) */ /* p = z + beta*(p - omega*map) */ beta = (rho / rho_old) * (alpha / omega); lis_vector_axpy(-omega,map,p); lis_vector_xpay(z,beta,p); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_crs(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; z = solver->work[3]; u = solver->work[3]; uq = solver->work[3]; q = solver->work[4]; ap = solver->work[4]; map = solver->work[5]; auq = solver->work[5]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); rho_old = 1.0; lis_vector_set_all(0,q); lis_vector_set_all(0,p); for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dot(rtld,z,&rho); /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* u = z + beta*q */ /* p = u + beta*(q + beta*p) */ /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ beta = rho / rho_old; lis_vector_axpyz(beta,q,z,u); lis_vector_xpay(q,beta,p); lis_vector_xpay(u,beta,p); LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dot(rtld,map,&tmpdot1); /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ /* q = u - alpha*map */ /* uq = u + q */ /* auq = A * uq */ /* x = x + alpha*uq */ /* r = r - alpha*auq */ alpha = rho / tmpdot1; lis_vector_axpyz(-alpha,map,u,q); lis_vector_axpyz(1,u,q,uq); LIS_MATVEC(A,uq,auq); lis_vector_axpy(alpha,uq,x); lis_vector_axpy(-alpha,auq,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_epi(LIS_ESOLVER esolver) { LIS_Comm comm; LIS_MATRIX A; LIS_VECTOR v,y,q; LIS_SCALAR theta; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,output; LIS_SCALAR oshift,ishift; LIS_REAL nrm2,resid; LIS_DEBUG_FUNC_IN; comm = LIS_COMM_WORLD; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; #ifdef _COMPLEX oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN] + esolver->params[LIS_EPARAMS_SHIFT_IM - LIS_EOPTIONS_LEN] * _Complex_I; #else oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN]; #endif A = esolver->A; v = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,v); } y = esolver->work[0]; q = esolver->work[1]; if ( esolver->ishift != 0.0 ) oshift = ishift; if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, oshift); if( output ) { #ifdef _COMPLEX lis_printf(comm,"shift : (%e, %e)\n", (double)creal(oshift), (double)cimag(oshift)); #else lis_printf(comm,"shift : %e\n", (double)oshift); #endif } iter=0; while (iter<emaxiter) { iter = iter+1; /* v = v / ||v||_2 */ lis_vector_nrm2(v, &nrm2); lis_vector_scale(1.0/nrm2, v); /* y = A * v */ lis_matvec(A,v,y); /* theta = <v,y> */ lis_vector_dot(v, y, &theta); /* resid = ||y - theta * v||_2 / |theta| */ lis_vector_axpyz(-theta, v, y, q); lis_vector_nrm2(q, &resid); resid = resid / fabs(theta); /* v = y */ lis_vector_copy(y, v); /* convergence check */ if( output ) { if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid; if( output & LIS_EPRINT_OUT ) lis_print_rhistory(comm,iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = theta + oshift; lis_vector_nrm2(v, &nrm2); lis_vector_scale(1.0/nrm2, v); if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, -oshift); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } esolver->retcode = LIS_MAXITER; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = theta + oshift; lis_vector_nrm2(v, &nrm2); lis_vector_scale(1.0/nrm2, v); if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, -oshift); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_minres(LIS_SOLVER solver) { LIS_Comm comm; LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR v1,v2,v3,v4,w0,w1,w2; LIS_REAL nrm2,tol; LIS_SCALAR alpha; LIS_REAL beta2,beta3; LIS_SCALAR gamma1,gamma2,gamma3; LIS_SCALAR delta,eta; LIS_SCALAR sigma1,sigma2,sigma3; LIS_SCALAR rho1,rho2,rho3; LIS_REAL r0_euc,r_euc; LIS_INT iter,maxiter,output; double time,ptime; LIS_DEBUG_FUNC_IN; comm = LIS_COMM_WORLD; A = solver->A; b = solver->b; x = solver->x; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; ptime = 0.0; v1 = solver->work[0]; v2 = solver->work[1]; v3 = solver->work[2]; v4 = solver->work[3]; w0 = solver->work[4]; w1 = solver->work[5]; w2 = solver->work[6]; /* Lanczos algorithm */ lis_matvec(A,x,v2); lis_vector_xpay(b,-1.0,v2); time = lis_wtime(); lis_psolve(solver,v2,v3); ptime += lis_wtime()-time; lis_vector_copy(v3,v2); /* Compute elements of Hermitian tridiagonal matrix */ lis_vector_nrm2(v2,&r_euc); eta = beta2 = r0_euc = r_euc; gamma2 = gamma1 = 1.0; sigma2 = sigma1 = 0.0; lis_vector_set_all(0.0,v1); lis_vector_set_all(0.0,w0); lis_vector_set_all(0.0,w1); nrm2 = r_euc / r0_euc; for(iter=1;iter<=maxiter;iter++) { /* Lanczos algorithm */ lis_vector_scale(1.0 / beta2,v2); lis_matvec(A,v2,v3); time = lis_wtime(); lis_psolve(solver,v3,v4); ptime += lis_wtime()-time; lis_vector_dot(v2,v4,&alpha); lis_vector_axpy(-alpha,v2,v4); lis_vector_axpy(-beta2,v1,v4); lis_vector_nrm2(v4,&beta3); /* Compute elements of Hermitian tridiagonal matrix */ delta = gamma2 * alpha - gamma1 * sigma2 * beta2; rho1 = sqrt(delta * delta + beta3 * beta3); rho2 = sigma2 * alpha + gamma1 * gamma2 * beta2; rho3 = sigma1 * beta2; gamma3 = delta / rho1; sigma3 = beta3 / rho1; lis_vector_axpyz(-rho3,w0,v2,w2); lis_vector_axpy(-rho2,w1,w2); lis_vector_scale(1.0 / rho1,w2); lis_vector_axpy(gamma3 * eta,w2,x); /* convergence check */ r_euc *= fabs(sigma3); nrm2 = r_euc / r0_euc; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2); } if( nrm2 <= tol ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } eta *= -sigma3; lis_vector_copy(v2,v1); lis_vector_copy(v4,v2); lis_vector_copy(w1,w0); lis_vector_copy(w2,w1); beta2 = beta3; gamma1 = gamma2; gamma2 = gamma3; sigma1 = sigma2; sigma2 = sigma3; } lis_vector_destroy(v1); lis_vector_destroy(v2); lis_vector_destroy(v3); lis_vector_destroy(v4); lis_vector_destroy(w0); lis_vector_destroy(w1); lis_vector_destroy(w2); solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_eai_quad(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_INT ss,ic; LIS_INT emaxiter,iter0,hqriter; LIS_REAL tol,hqrerr,D; LIS_INT i,j; LIS_INT output, niesolver; LIS_REAL nrm2,resid0; LIS_VECTOR *v,w; LIS_SCALAR *h,*hq,*hr,evalue,evalue0; LIS_SOLVER solver; LIS_ESOLVER esolver2; char esolvername[128],solvername[128],preconname[128]; LIS_INT nsol,precon_type; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER]; h = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::h"); hq = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::hq"); hr = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::hr"); A = esolver->A; w = esolver->work[0]; v = &esolver->work[1]; lis_vector_set_all(0.0,v[0]); lis_vector_set_all(1.0,w); lis_vector_nrm2(w, &nrm2); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p none",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_solver_get_solvername(nsol, solvername); lis_solver_get_preconname(precon_type, preconname); lis_esolver_get_esolvername(niesolver, esolvername); if( A->my_rank==0 ) printf("inner eigensolver : %s\n", esolvername); if( A->my_rank==0 ) printf("linear solver : %s\n", solvername); if( A->my_rank==0 ) printf("preconditioner : %s\n", preconname); for (i=0;i<ss*ss;i++) h[i] = 0.0; j=-1; while (j<ss-1) { j = j+1; lis_vector_copy(w, v[j]); /* w = A * v(j) */ lis_matvec(A, v[j], w); /* reorthogonalization */ for (i=0;i<=j;i++) { /* h(i,j) = <v(i), w> */ lis_vector_dot(v[i], w, &h[i+j*ss]); /* w = w - h(i,j) * v(i) */ lis_vector_axpy(-h[i+j*ss], v[i], w); } /* h(j+1,j) = ||w||_2 */ lis_vector_nrm2(w, &h[j+1+j*ss]); /* convergence check */ if (fabs(h[j+1+j*ss])<tol) break; /* v(j+1) = w / h(i+1,j) */ lis_vector_scale(1/h[j+1+j*ss],w); lis_vector_copy(w,v[j+1]); } /* compute eigenvalues of a real upper Hessenberg matrix H(j) = SH'(j)S^* */ lis_array_qr(ss,h,hq,hr,&hqriter,&hqrerr); if( A->my_rank==0 ) { #ifdef _LONG__LONG printf("size of subspace : %lld\n\n", ss); #else printf("size of subspace : %d\n\n", ss); #endif if( output ) printf("approximate eigenvalues in subspace:\n\n"); i=0; while (i<ss-1) { i = i + 1; if (fabs(h[i+(i-1)*ss])<tol) { #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i-1); #else printf("Arnoldi: mode number = %d\n",i-1); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le\n",h[i-1+(i-1)*ss]); #else printf("Arnoldi: eigenvalue = %e\n",h[i-1+(i-1)*ss]); #endif esolver->evalue[i-1] = h[i-1+(i-1)*ss]; } else { D = (h[i-1+(i-1)*ss]-h[i+i*ss]) * (h[i-1+(i-1)*ss]-h[i+i*ss]) + 4 * h[i-1+i*ss] * h[i+(i-1)*ss]; if (D<0) { #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i-1); #else printf("Arnoldi: mode number = %d\n",i-1); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le + %Le i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2); #else printf("Arnoldi: eigenvalue = %e + %e i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2); #endif #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i); #else printf("Arnoldi: mode number = %d\n",i); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le - %Le i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2); #else printf("Arnoldi: eigenvalue = %e - %e i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2); #endif esolver->evalue[i-1] = (h[i-1+(i-1)*ss]+h[i+i*ss])/2; esolver->evalue[i] = (h[i-1+(i-1)*ss]+h[i+i*ss])/2; i=i+1; } else { #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i-1); #else printf("Arnoldi: mode number = %d\n",i-1); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le\n",h[i-1+(i-1)*ss]); #else printf("Arnoldi: eigenvalue = %e\n",h[i-1+(i-1)*ss]); #endif esolver->evalue[i-1] = h[i-1+(i-1)*ss]; } } } if (i<ss) { #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i); #else printf("Arnoldi: mode number = %d\n",i); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le\n",h[i+i*ss]); #else printf("Arnoldi: eigenvalue = %e\n",h[i+i*ss]); #endif } if( output ) printf("\n"); if( output ) printf("compute refined (real) eigenpairs, where imaginary parts are currently neglected:\n\n"); } lis_esolver_create(&esolver2); esolver2->options[LIS_EOPTIONS_ESOLVER] = niesolver; esolver2->options[LIS_EOPTIONS_SUBSPACE] = 1; esolver2->options[LIS_EOPTIONS_MAXITER] = emaxiter; esolver2->options[LIS_EOPTIONS_OUTPUT] = esolver->options[LIS_EOPTIONS_OUTPUT]; esolver2->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN] = tol; esolver2->eprecision = LIS_PRECISION_QUAD; /* compute refined (real) eigenpairs, where imaginary parts are currently neglected */ for (i=0;i<ss;i++) { lis_vector_duplicate(A, &esolver->evector[i]); esolver2->lshift = -(esolver->evalue[i]); lis_esolve(A, esolver->evector[i], &evalue, esolver2); lis_esolver_work_destroy(esolver2); esolver->evalue[i] = evalue - esolver2->lshift; esolver->iter[i] = esolver2->iter[0]; esolver->resid[i] = esolver2->resid[0]; if (i==0) { evalue0 = esolver->evalue[0]; iter0 = esolver2->iter[0]; resid0 = esolver2->resid[0]; if( output & LIS_EPRINT_MEM ) { for (ic=0;ic<iter0+1;ic++) { esolver->rhistory[ic] = esolver2->rhistory[ic]; } } esolver->ptime = esolver2->ptime; esolver->itime = esolver2->itime; esolver->p_c_time = esolver2->p_c_time; esolver->p_i_time = esolver2->p_i_time; } if (A->my_rank==0) { #ifdef _LONG__LONG if( output ) printf("Arnoldi: mode number = %lld\n", i); #else if( output ) printf("Arnoldi: mode number = %d\n", i); #endif #ifdef _LONG__DOUBLE if( output ) printf("Arnoldi: eigenvalue = %Le\n", esolver->evalue[i]); #else if( output ) printf("Arnoldi: eigenvalue = %e\n", esolver->evalue[i]); #endif #ifdef _LONG__LONG if( output ) printf("Arnoldi: number of iterations = %lld\n",esolver2->iter[0]); #else if( output ) printf("Arnoldi: number of iterations = %d\n",esolver2->iter[0]); #endif #ifdef _LONG__DOUBLE if( output ) printf("Arnoldi: relative residual = %Le\n\n",esolver2->resid[0]); #else if( output ) printf("Arnoldi: relative residual = %e\n\n",esolver2->resid[0]); #endif } } esolver->evalue[0] = evalue0; esolver->iter[0] = iter0; esolver->resid[0] = resid0; lis_vector_copy(esolver->evector[0], esolver->x); lis_esolver_destroy(esolver2); lis_free(h); lis_free(hq); lis_free(hr); lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }