LIS_INT lis_cgs_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; phat = solver->work[3]; q = solver->work[4]; qhat = solver->work[5]; u = solver->work[5]; uhat = solver->work[6]; vhat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); uhat->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; phat->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]); /* u = r + beta*q */ lis_vector_axpyz(beta.hi[0],q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpay(q,beta.hi[0],p); lis_vector_xpay(u,beta.hi[0],p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dot(rtld,vhat,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* q = u - alpha*vhat */ lis_vector_axpyz(-alpha.hi[0],vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyz(1.0,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_vector_axpy(alpha.hi[0],uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_vector_axpy(-alpha.hi[0],qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } rho_old.hi[0] = rho.hi[0]; } uhat->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; phat->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* u = r + beta*q */ lis_vector_axpyzex_mmmm(beta,q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dotex_mmm(rtld,vhat,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* q = u - alpha*vhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyzex_mmmm(one,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_gmres_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,s,z,*v; LIS_QUAD *h; LIS_SCALAR *hd; LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp; LIS_QUAD_PTR rnorm; LIS_REAL bnrm2,nrm2,tol,tol2; LIS_INT iter,maxiter,n,output; LIS_INT iter2,maxiter2; double time,ptime; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; m = solver->options[LIS_OPTIONS_RESTART]; h_dim = m+1; ptime = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; LIS_QUAD_SCALAR_MALLOC(aa,0,1); LIS_QUAD_SCALAR_MALLOC(bb,1,1); LIS_QUAD_SCALAR_MALLOC(rr,2,1); LIS_QUAD_SCALAR_MALLOC(a2,3,1); LIS_QUAD_SCALAR_MALLOC(b2,4,1); LIS_QUAD_SCALAR_MALLOC(t,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(one,7,1); LIS_QUAD_SCALAR_MALLOC(rnorm,8,1); h = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_gmres_switch::h" ); hd = (LIS_SCALAR *)h; cs = (m+1)*h_dim; sn = (m+2)*h_dim; one.hi[0] = 1.0; one.lo[0] = 0.0; z->precision = LIS_PRECISION_DEFAULT; /* r = M^-1 * (b - A * x) */ lis_matvec(A,x,z); lis_vector_xpay(b,-1.0,z); lis_psolve(solver,z,v[0]); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; iter=0; while( iter<maxiter2 ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2(v[0],&rnorm.hi[0]); lis_vector_scale(1.0/rnorm.hi[0],v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm.hi[0]; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dot(v[i1v],v[k],&t.hi[0]); hd[k+iih] = t.hi[0]; lis_vector_axpy(-t.hi[0],v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&t.hi[0]); hd[i1+iih] = t.hi[0]; lis_vector_scale(1.0/t.hi[0],v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = hd[jj+iih]; aa.hi[0] = hd[jj+cs]*t.hi[0]; aa.hi[0] += hd[jj+sn]*hd[k+iih]; bb.hi[0] = -hd[jj+sn]*t.hi[0]; bb.hi[0] += hd[jj+cs]*hd[k+iih]; hd[jj+iih] = aa.hi[0]; hd[k+iih] = bb.hi[0]; } aa.hi[0] = hd[ii+iih]; bb.hi[0] = hd[i1+iih]; a2.hi[0] = aa.hi[0]*aa.hi[0]; b2.hi[0] = bb.hi[0]*bb.hi[0]; rr.hi[0] = sqrt(a2.hi[0]+b2.hi[0]); if( rr.hi[0]==0.0 ) rr.hi[0]=1.0e-17; hd[ii+cs] = aa.hi[0]/rr.hi[0]; hd[ii+sn] = bb.hi[0]/rr.hi[0]; s->value[i1] = -hd[ii+sn]*s->value[ii]; s->value[ii] = hd[ii+cs]*s->value[ii]; aa.hi[0] = hd[ii+cs]*hd[ii+iih]; aa.hi[0] += hd[ii+sn]*hd[i1+iih]; hd[ii+iih] = aa.hi[0]; /* convergence check */ nrm2 = fabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) break; } while( i<m && iter <maxiter2 ); /* Solve H * Y = S for upper Hessenberg matrix H */ s->value[ii] = s->value[ii]/hd[ii+iih]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; for(j=jj+1;j<=ii;j++) { t.hi[0] -= hd[jj+j*h_dim]*s->value[j]; } s->value[jj] = t.hi[0]/hd[jj+jj*h_dim]; } /* z = z + y * v */ for(k=0;k<n;k++) { z->value[k] = s->value[0]*v[0]->value[k]; } for(j=1;j<=ii;j++) { lis_vector_axpy(s->value[j],v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpy(1,r,x); if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } for(j=1;j<=i;j++) { jj = i1-j+1; s->value[jj-1] = -hd[jj-1+sn]*s->value[jj]; s->value[jj] = hd[jj-1+cs]*s->value[jj]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; if( j==0 ) t.hi[0] = t.hi[0]-1.0; lis_vector_axpy(t.hi[0],v[j],v[0]); } } /* Initial Residual */ z->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2); tol = solver->tol; iter2=iter; while( iter2<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2ex_mm(v[0],&rnorm); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi); lis_vector_scaleex_mm(tmp,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_allex_nm(0.0,s); s->value[0] = rnorm.hi[0]; s->value_lo[0] = rnorm.lo[0]; i = 0; do { iter2++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dotex_mmm(v[i1v],v[k],&t); h[k+iih].hi = t.hi[0]; h[k+iih].lo = t.lo[0]; lis_quad_minus((LIS_QUAD *)t.hi); lis_vector_axpyex_mmm(t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2ex_mm(v[i1v],&t); h[i1+iih].hi = t.hi[0]; h[i1+iih].lo = t.lo[0]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi); lis_vector_scaleex_mm(tmp,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = h[jj+iih].hi; t.lo[0] = h[jj+iih].lo; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi); lis_quad_minus((LIS_QUAD *)bb.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi); h[jj+iih].hi = aa.hi[0]; h[jj+iih].lo = aa.lo[0]; h[k+iih].hi = bb.hi[0]; h[k+iih].lo = bb.lo[0]; } aa.hi[0] = h[ii+iih].hi; aa.lo[0] = h[ii+iih].lo; bb.hi[0] = h[i1+iih].hi; bb.lo[0] = h[i1+iih].lo; lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi); lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi); lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi); lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi); tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[i1] = aa.hi[0]; s->value_lo[i1] = aa.lo[0]; s->value[ii] = bb.hi[0]; s->value_lo[ii] = bb.lo[0]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); h[ii+iih].hi = aa.hi[0]; h[ii+iih].lo = aa.lo[0]; /* convergence check */ nrm2 = fabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter2 <maxiter ); /* Solve H * Y = S for upper Hessenberg matrix H */ tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]); s->value[ii] = tmp.hi[0]; s->value_lo[ii] = tmp.lo[0]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; t.lo[0] = s->value_lo[jj]; for(j=jj+1;j<=ii;j++) { tmp.hi[0] = s->value[j]; tmp.lo[0] = s->value_lo[j]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]); lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi); } lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]); s->value[jj] = tmp.hi[0]; s->value_lo[jj] = tmp.lo[0]; } /* z = z + y * v */ for(k=0;k<n;k++) { aa.hi[0] = s->value[0]; aa.lo[0] = s->value_lo[0]; bb.hi[0] = v[0]->value[k]; bb.lo[0] = v[0]->value_lo[k]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi); z->value[k] = tmp.hi[0]; z->value_lo[k] = tmp.lo[0]; } for(j=1;j<=ii;j++) { aa.hi[0] = s->value[j]; aa.lo[0] = s->value_lo[j]; lis_vector_axpyex_mmm(aa,v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpyex_mmm(one,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; tmp.hi[0] = s->value[jj]; tmp.lo[0] = s->value_lo[jj]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+sn]); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+cs]); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[jj-1] = aa.hi[0]; s->value_lo[jj-1] = aa.lo[0]; s->value[jj] = bb.hi[0]; s->value_lo[jj] = bb.lo[0]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; t.lo[0] = s->value_lo[j]; if( j==0 ) { lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi); } lis_vector_axpyex_mmm(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter2+1; solver->iter2 = iter; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg_switch(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); r->precision = LIS_PRECISION_DEFAULT; rtld->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; ptld->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = rho.hi[0] / rho_old.hi[0]; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta.hi[0],p); LIS_MATVEC(A,p,q); lis_vector_xpay(ztld,beta.hi[0],ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* x = x + alpha*p */ lis_vector_axpy(alpha.hi[0],p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha.hi[0],q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha.hi[0],qtld,rtld); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; rtld->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; /* solver->precon->precon_type = 0;*/ solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); /* memset(z->value_lo,0,n*sizeof(LIS_SCALAR)); memset(ztld->value_lo,0,n*sizeof(LIS_SCALAR));*/ ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dotex_mmm(z,rtld,&rho); /* printf("rho = %e %e\n",rho.hi[0],rho.lo[0]);*/ /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpayex_mmm(z,beta,p); LIS_MATVEC(A,p,q); lis_vector_xpayex_mmm(ztld,beta,ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dotex_mmm(ptld,q,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* x = x + alpha*p */ lis_vector_axpyex_mmm(alpha,p,x); /* r = r - alpha*q */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpyex_mmm(alpha,qtld,rtld); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_solve_kernel(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT nsolver, precon_type, maxiter; LIS_INT err; LIS_SCALAR *residual; LIS_VECTOR xx; LIS_INT output; LIS_INT scale; LIS_INT conv_cond; LIS_INT precision,is_use_at,storage,block; LIS_INT i,n,np; double p_c_times, p_i_times,itimes; LIS_SCALAR nrm2,tol,tol_w; LIS_VECTOR t; LIS_VECTOR bb; LIS_MATRIX AA,B; LIS_MATRIX At; char buf[64]; LIS_DEBUG_FUNC_IN; nsolver = solver->options[LIS_OPTIONS_SOLVER]; precon_type = solver->options[LIS_OPTIONS_PRECON]; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; scale = solver->options[LIS_OPTIONS_SCALE]; precision = solver->options[LIS_OPTIONS_PRECISION]; is_use_at = solver->options[LIS_OPTIONS_USE_AT]; storage = solver->options[LIS_OPTIONS_STORAGE]; block = solver->options[LIS_OPTIONS_STORAGE_BLOCK]; conv_cond = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol_w = solver->params[LIS_PARAMS_RESID_WEIGHT-LIS_OPTIONS_LEN]; solver->precision = precision; if( nsolver < 1 || nsolver > LIS_SOLVERS_LEN ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_SOLVER is %d (Set between 1 to %d)\n",nsolver, LIS_SOLVERS_LEN); return LIS_ERR_ILL_ARG; } if( precon_type < 0 || precon_type > precon_register_type ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_PRECON is %d (Set between 0 to %d)\n",precon_type, precon_register_type-1); return LIS_ERR_ILL_ARG; } if( maxiter<0 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_MAXITER(=%d) is less than 0\n",maxiter); return LIS_ERR_ILL_ARG; } #ifdef USE_MPI if( precon_type == LIS_PRECON_TYPE_SAAMG && solver->A->nprocs < 2) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter A->nprocs (=%d) is less than 2 (Set more than 1 when using parallel version of SAAMG)\n",solver->A->nprocs); return LIS_ERR_ILL_ARG; } #endif #ifdef USE_QUAD_PRECISION if( precision==LIS_PRECISION_QUAD && lis_solver_execute_quad[nsolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision solver %s is not implemented\n",lis_solvername[nsolver]); return LIS_ERR_NOT_IMPLEMENTED; } else if( precision==LIS_PRECISION_SWITCH && lis_solver_execute_switch[nsolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch solver %s is not implemented\n",lis_solvername[nsolver]); return LIS_ERR_NOT_IMPLEMENTED; } if( solver->options[LIS_OPTIONS_SWITCH_MAXITER]==-1 ) { solver->options[LIS_OPTIONS_SWITCH_MAXITER] = maxiter; } #endif err = lis_solver_check_params[nsolver](solver); if( err ) { solver->retcode = err; return err; } /* end parameter check */ solver->A = A; solver->b = b; /* create initial vector */ #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(A,&xx); #else if( precision==LIS_PRECISION_DOUBLE ) { err = lis_vector_duplicate(A,&xx); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx); } #endif if( err ) { solver->retcode = err; return err; } if( solver->options[LIS_OPTIONS_INITGUESS_ZEROS] ) { if( output ) lis_printf(A->comm,"initial vector x = 0\n"); #ifndef USE_QUAD_PRECISION lis_vector_set_all(0.0,xx); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_set_all(0.0,xx); } else { lis_vector_set_allex_nm(0.0,xx); } #endif } else { if( output ) lis_printf(A->comm,"initial vector x = user defined\n"); #ifndef USE_QUAD_PRECISION lis_vector_copy(x,xx); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(x,xx); } else { lis_vector_copyex_nm(x,xx); } #endif } /* create residual history vector */ if( solver->residual ) lis_free(solver->residual); residual = (LIS_SCALAR *)lis_malloc((maxiter+2)*sizeof(LIS_SCALAR),"lis_solve::residual"); if( residual==NULL ) { LIS_SETERR_MEM((maxiter+2)*sizeof(LIS_SCALAR)); lis_vector_destroy(xx); solver->retcode = err; return err; } residual[0] = 1.0; n = A->n; np = A->np; t = NULL; At = NULL; p_c_times = lis_wtime(); if( precon_type==LIS_PRECON_TYPE_IS ) { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI); } else if( !b->is_scaled ) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { b->value[i] = b->value[i]*solver->d->value[i]; } } if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR ) { solver->options[LIS_OPTIONS_ISLEVEL] = 0; } } else if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR && precon_type!=LIS_PRECON_TYPE_NONE ) { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI); } } else if( scale ) { if( storage==LIS_MATRIX_BSR && scale==LIS_SCALE_JACOBI ) { if( A->matrix_type!=LIS_MATRIX_BSR ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_blocksize(B,block,block,NULL,NULL); lis_matrix_set_type(B,storage); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } err = lis_matrix_split(A); if( err ) return err; err = lis_matrix_diag_duplicate(A->D,&solver->WD); if( err ) return err; lis_matrix_diag_copy(A->D,solver->WD); lis_matrix_diag_inverse(solver->WD); lis_matrix_bscaling_bsr(A,solver->WD); lis_vector_duplicate(A,&t); lis_matrix_diag_matvec(solver->WD,b,t); lis_vector_copy(t,b); lis_vector_destroy(t); t = NULL; } else { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( scale==LIS_SCALE_JACOBI && nsolver==LIS_SOLVER_CG ) { scale = LIS_SCALE_SYMM_DIAG; } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,scale); } else if( !b->is_scaled ) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { b->value[i] = b->value[i]*solver->d->value[i]; } } } } /* precon_type = precon->precon_type;*/ if( precon_type==LIS_PRECON_TYPE_IS ) { if( nsolver < LIS_SOLVER_JACOBI || nsolver > LIS_SOLVER_SOR ) { AA = solver->A; bb = solver->b; } else { AA = precon->A; bb = precon->Pb; } } else { AA = A; bb = b; } p_c_times = lis_wtime() - p_c_times; itimes = lis_wtime(); /* Matrix Convert */ solver->A = AA; solver->b = bb; err = lis_matrix_convert_self(solver); if( err ) { lis_vector_destroy(xx); lis_solver_work_destroy(solver); lis_free(residual); solver->retcode = err; return err; } block = solver->A->bnr; if( A->my_rank==0 ) { if( output ) printf("precision : %s\n", lis_precisionname[precision]); if( output ) printf("solver : %s %d\n", lis_solvername[nsolver],nsolver); switch( precon_type ) { case LIS_PRECON_TYPE_ILU: i = solver->options[LIS_OPTIONS_FILL]; if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_VBR ) { if( output ) sprintf(buf,"Block %s(%d)",lis_preconname[precon_type],i); } else { if( output ) sprintf(buf,"%s(%d)",lis_preconname[precon_type],i); } break; default: if( output ) sprintf(buf,"%s",lis_preconname[precon_type]); break; } if( solver->options[LIS_OPTIONS_ADDS] && precon_type ) { if( output ) printf("precon : %s + additive schwarz\n", buf); } else { if( output ) printf("precon : %s\n", buf); } } switch(conv_cond) { case LIS_CONV_COND_NRM2_R: case LIS_CONV_COND_NRM2_B: if( A->my_rank==0 ) { if( output ) ("CONV_COND : ||r||_2 <= %6.1e*||r_0||_2\n", tol); } break; case LIS_CONV_COND_NRM1_B: lis_vector_nrm1(b,&nrm2); nrm2 = nrm2*tol_w + tol; if( A->my_rank==0 ) { if( output ) printf("conv_cond : ||r||_1 <= %6.1e*||b||_1 + %6.1e = %6.1e\n", tol_w,tol,nrm2); } break; } if( A->my_rank==0 ) { if( AA->matrix_type==LIS_MATRIX_BSR || AA->matrix_type==LIS_MATRIX_BSC ) { if( output ) printf("storage : %s(%d x %d)\n", lis_storagename[AA->matrix_type-1],block,block); } else { if( output ) printf("storage : %s\n", lis_storagename[AA->matrix_type-1]); } } /* create work vector */ err = lis_solver_malloc_work[nsolver](solver); if( err ) { lis_vector_destroy(xx); lis_precon_destroy(precon); solver->retcode = err; return err; } if( nsolver==LIS_SOLVER_BICG && is_use_at ) { if( output ) lis_printf(A->comm,"Use At\n"); lis_matrix_duplicate(AA,&At); lis_matrix_set_type(At,LIS_USE_AT_TYPE[AA->matrix_type]); lis_matrix_convert(AA,At); solver->At = At; } solver->x = xx; solver->xx = x; solver->precon = precon; solver->residual = residual; /* execute solver */ #ifndef USE_QUAD_PRECISION err = lis_solver_execute[nsolver](solver); #else if( precision==LIS_PRECISION_DOUBLE ) { err = lis_solver_execute[nsolver](solver); } else if( precision==LIS_PRECISION_QUAD ) { err = lis_solver_execute_quad[nsolver](solver); } else if( precision==LIS_PRECISION_SWITCH ) { err = lis_solver_execute_switch[nsolver](solver); } #endif solver->retcode = err; if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { x->value[i] = xx->value[i]*solver->d->value[i]; } } else { #ifndef USE_QUAD_PRECISION lis_vector_copy(xx,x); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(xx,x); } else { lis_vector_copyex_mn(xx,x); } #endif } itimes = lis_wtime() - itimes - solver->ptimes; p_i_times = solver->ptimes; solver->ptimes = p_c_times + p_i_times; solver->p_c_times = p_c_times; solver->p_i_times = p_i_times; solver->times = solver->ptimes + itimes; solver->itimes = itimes; lis_solver_work_destroy(solver); lis_vector_duplicate(A,&t); xx->precision = LIS_PRECISION_DEFAULT; lis_matvec(A,xx,t); lis_vector_xpay(b,-1.0,t); if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { t->value[i] = t->value[i]/solver->d->value[i]; } } lis_vector_nrm2(t,&nrm2); /* solver->resid = nrm2; */ if( A->my_rank==0 ) { if( err ) { if( output ) printf("lis_solve : %s(code=%d)\n\n",lis_returncode[err],err); } else { if( output ) printf("lis_solve : normal end\n\n"); } } if( precision==LIS_PRECISION_DOUBLE ) { solver->iter2 = solver->iter; } else if( precision==LIS_PRECISION_QUAD ) { solver->iter2 = 0; } lis_vector_destroy(t); /* lis_vector_destroy(d);*/ lis_vector_destroy(xx); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_bicgstab_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, t,p,v, s, phat, shat; LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[1]; t = solver->work[2]; p = solver->work[3]; v = solver->work[4]; phat = solver->work[5]; shat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(omega,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; omega.hi[0] = 1.0; omega.lo[0] = 0.0; lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, phat); lis_vector_set_allex_nm(0.0, s); lis_vector_set_allex_nm(0.0, shat); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); s->precision = LIS_PRECISION_DEFAULT; shat->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; phat->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter==1 ) { lis_vector_copy(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / omega.hi[0]); /* p = r + beta*(p - omega*v) */ lis_vector_axpy(-omega.hi[0],v,p); lis_vector_xpay(r,beta.hi[0],p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dot(rtld,v,&tmpdot1.hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* s = r - alpha*v */ lis_vector_axpy(-alpha.hi[0],v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol2 ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha.hi[0],phat,x); solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dot(t,s,&tmpdot1.hi[0]); lis_vector_dot(t,t,&tmpdot2.hi[0]); omega.hi[0] = tmpdot1.hi[0] / tmpdot2.hi[0]; /* x = x + alpha*phat + omega*shat */ lis_vector_axpy(alpha.hi[0],phat,x); lis_vector_axpy(omega.hi[0],shat,x); /* r = s - omega*t */ lis_vector_axpy(-omega.hi[0],t,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } if( omega.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old.hi[0] = rho.hi[0]; } s->precision = LIS_PRECISION_QUAD; shat->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; phat->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; omega.hi[0] = 1.0; lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, phat); lis_vector_set_allex_nm(0.0, s); lis_vector_set_allex_nm(0.0, shat); /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter2==1 ) { lis_vector_copyex_mm(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi); /* p = r + beta*(p - omega*v) */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,v,p); lis_vector_xpayex_mmm(r,beta,p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dotex_mmm(rtld,v,&tmpdot1); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* s = r - alpha*v */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( tol > nrm2 ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,phat,x); solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dotex_mmm(t,s,&tmpdot1); lis_vector_dotex_mmm(t,t,&tmpdot2); lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi); /* x = x + alpha*phat + omega*shat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,phat,x); lis_vector_axpyex_mmm(omega,shat,x); /* r = s - omega*t */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,t,r); lis_quad_minus((LIS_QUAD *)omega.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } if( omega.hi[0]==0.0 && omega.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, rhat, p, ptld, phat; LIS_VECTOR t, ttld, that, t0, t0hat; LIS_VECTOR y, w, u, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,output,conv; LIS_INT iter2,maxiter2; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; rhat = solver->work[2]; p = solver->work[3]; ptld = solver->work[4]; phat = solver->work[5]; t = solver->work[6]; ttld = solver->work[7]; that = solver->work[8]; t0 = solver->work[9]; t0hat = solver->work[10]; y = solver->work[11]; w = solver->work[12]; u = solver->work[13]; z = solver->work[14]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; qsi.hi[0] = 1.0; qsi.lo[0] = 0.0; one.hi[0] = -1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]); /* w = ttld + beta*ptld */ lis_vector_axpyz(beta.hi[0],ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpy(-1,u,p); lis_vector_xpay(rhat,beta.hi[0],p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0]; /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyz(-1,w,ptld,y); lis_vector_xpay(t,alpha.hi[0],y); lis_vector_axpy(-1,r,y); /* t = r - alpha*ptld */ lis_vector_axpyz(-alpha.hi[0],ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dot(y,y,&tmpdot[0].hi[0]); lis_vector_dot(ttld,t,&tmpdot[1].hi[0]); lis_vector_dot(y,t,&tmpdot[2].hi[0]); lis_vector_dot(ttld,y,&tmpdot[3].hi[0]); lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]); if(iter==1) { qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0]; eta.hi[0] = 0.0; } else { tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0] - tmpdot[3].hi[0]*tmpdot[3].hi[0]; qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0]; eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0]; } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpay(t0hat,beta.hi[0],u); lis_vector_axpy(-1,rhat,u); lis_vector_scale(eta.hi[0],u); lis_vector_axpy(qsi.hi[0],phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scale(eta.hi[0],z); lis_vector_axpy(qsi.hi[0],rhat,z); lis_vector_axpy(-alpha.hi[0],u,z); /* x = x + alpha*p + z */ lis_vector_axpy(alpha.hi[0],p,x); lis_vector_axpy(1,z,x); /* r = t - eta*y - qsi*ttld */ lis_vector_axpyz(-eta.hi[0],y,t,r); lis_vector_axpy(-qsi.hi[0],ttld,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } lis_vector_copy(t,t0); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; t->precision = LIS_PRECISION_QUAD; t0->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; that->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; qsi.hi[0] = 1.0; one.hi[0] = -1.0; /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* w = ttld + beta*ptld */ lis_vector_axpyzex_mmmm(beta,ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(rhat,beta,p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi); /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyzex_mmmm(one,w,ptld,y); lis_vector_xpayex_mmm(t,alpha,y); lis_vector_axpyex_mmm(one,r,y); /* t = r - alpha*ptld */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(ttld,t,&tmpdot[1]); lis_vector_dotex_mmm(y,t,&tmpdot[2]); lis_vector_dotex_mmm(ttld,y,&tmpdot[3]); lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpayex_mmm(t0hat,beta,u); lis_vector_axpyex_mmm(one,rhat,u); lis_vector_scaleex_mm(eta,u); lis_vector_axpyex_mmm(qsi,phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,rhat,z); lis_vector_axpyex_mmm(alpha,u,z); /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_vector_axpyex_mmm(one,z,x); lis_quad_minus((LIS_QUAD *)one.hi); /* r = t - eta*y - qsi*ttld */ lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); lis_vector_axpyzex_mmmm(eta,y,t,r); lis_vector_axpyex_mmm(qsi,ttld,r); lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_copyex_mm(t,t0); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->iter2 = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }