LIS_INT lis_epi(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,output; LIS_INT nprocs,my_rank; LIS_REAL nrm2,resid; LIS_VECTOR z,q; double times, ptimes; LIS_DEBUG_FUNC_IN; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; A = esolver->A; x = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,x); } z = esolver->work[0]; q = esolver->work[1]; iter=0; while (iter<emaxiter) { iter = iter+1; /* x = x / ||x||_2 */ lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); /* z = A * x */ lis_matvec(A,x,z); /* evalue = <x,z> */ lis_vector_dot(x, z, &evalue); /* resid = ||z - evalue * x||_2 / |evalue| */ lis_vector_axpyz(-evalue,x,z,q); lis_vector_nrm2(q, &resid); resid = fabs(resid / evalue); /* x = z */ lis_vector_copy(z, x); /* convergence check */ if( output ) { if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } esolver->retcode = LIS_MAXITER; esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_idr1(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,t,v,av,*dX,*dR,*P; LIS_SCALAR om, h; LIS_SCALAR M,m,c; LIS_REAL bnrm2, nrm2, tol; LIS_REAL angle; LIS_INT i,j,k,s,oldest; LIS_INT iter,maxiter,n,output,conv; double times,ptimes,tim; unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; s = 1; ptimes = 0.0; r = solver->work[0]; t = solver->work[1]; v = solver->work[2]; av = solver->work[3]; P = &solver->work[4]; dX = &solver->work[4+s]; dR = &solver->work[4+2*s]; angle = 0.7; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; init_by_array(init, length); for(i=0;i<n;i++) { P[0]->value[i] = genrand_real1(); } /* lis_vector_copy(r,P[0]); */ lis_idrs_orth(s,P); #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, r, dX[0]); ptimes += lis_wtime()-times; LIS_MATVEC(A,dX[0],dR[0]); #else #ifdef PRE_BOTH times = lis_wtime(); lis_psolve_right(solver, r, t); ptimes += lis_wtime()-times; LIS_MATVEC(A,t,av); lis_vector_print(av); times = lis_wtime(); lis_psolve_left(solver, av, v); ptimes += lis_wtime()-times; #endif #endif /* lis_idrs_omega(dR[k],r,angle,&om); */ lis_vector_dot(dR[0],dR[0],&h); lis_vector_dot(dR[0],r,&om); om = om / h; lis_vector_scale(om,dX[0]); lis_vector_scale(-om,dR[0]); lis_vector_axpy(1.0,dX[0],x); lis_vector_axpy(1.0,dR[0],r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[1] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", 1, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = 1; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_dot(P[0],dR[0],&M); iter = s; oldest = 0; lis_vector_dot(P[0],r,&m); while( iter<=maxiter ) { tim = lis_wtime(); /* solve Mc=m */ c = m/M; for(i=0;i<n;i++) { v->value[i] = r->value[i] + -c*dR[0]->value[i]; } /* lis_vector_copy(r,v); lis_vector_axpy(-c,dR[0],v); */ #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; LIS_MATVEC(A,av,t); #else #ifdef PRE_BOTH times = lis_wtime(); lis_psolve_right(solver, v, t); ptimes += lis_wtime()-times; LIS_MATVEC(A,t,av); times = lis_wtime(); lis_psolve_left(solver, av, t); ptimes += lis_wtime()-times; #endif #endif /* lis_idrs_omega(t,v,angle,&om); lis_vector_dot(t,t,&h); lis_vector_dot(t,v,&om); */ h = t->value[0]*t->value[0]; om = t->value[0]*v->value[0]; for(i=1;i<n;i++) { h += t->value[i]*t->value[i]; om += t->value[i]*v->value[i]; } om = om / h; /* printf("i=%d om = %lf\n",iter,om); */ #if 0 lis_vector_scale(-om,t); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dR[j],t); } lis_vector_copy(t,dR[oldest]); lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; h -= dX[0]->value[i] * c; dX[0]->value[i] = h; h = -om*t->value[i]; h -= dR[0]->value[i] * c; dR[0]->value[i] = h; } #endif lis_vector_axpy(1.0,dR[0],r); lis_vector_axpy(1.0,dX[0],x); iter++; /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_dot(P[0],dR[0],&h); m += h; M = h; /* solve Mc=m */ c = m/M; for(i=0;i<n;i++) { v->value[i] = r->value[i] + -c*dR[0]->value[i]; } /* lis_vector_copy(r,v); lis_vector_axpy(-c,dR[0],v); */ #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; #endif #if 0 lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; h -= dX[0]->value[i] * c; dX[0]->value[i] = h; } #endif LIS_MATVEC(A,dX[0],dR[0]); lis_vector_scale(-1.0,dR[0]); lis_vector_axpy(1.0,dR[0],r); lis_vector_axpy(1.0,dX[0],x); iter++; /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_dot(P[0],dR[0],&h); m += h; M = h; tim = lis_wtime() - tim; /* printf("update m,M: %e\n",tim); */ } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_idrs(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,t,v,av,*dX,*dR,*P; LIS_SCALAR om, h; LIS_SCALAR *M,*m,*c,*MM; LIS_REAL bnrm2, nrm2, tol; LIS_REAL angle; LIS_INT i,j,k,s,oldest; LIS_INT iter,maxiter,n,output,conv; double times,ptimes,tim; unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; s = solver->options[LIS_OPTIONS_IDRS_RESTART]; ptimes = 0.0; r = solver->work[0]; t = solver->work[1]; v = solver->work[2]; av = solver->work[3]; dX = &solver->work[4]; P = &solver->work[4+s]; dR = &solver->work[4+2*s]; angle = 0.7; m = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::m"); c = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::c"); M = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR), "lis_idrs::M"); MM = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR), "lis_idrs::M"); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { lis_free2(4,m,c,M,MM); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; init_by_array(init, length); for(k=0;k<s;k++) { for(i=0;i<n;i++) { P[k]->value[i] = genrand_real1(); } } lis_idrs_orth(s,P); for( k=0; k<s; k++ ) { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, r, dX[k]); ptimes += lis_wtime()-times; LIS_MATVEC(A,dX[k],dR[k]); #endif lis_vector_dot(dR[k],dR[k],&h); lis_vector_dot(dR[k],r,&om); om = om / h; lis_vector_scale(om,dX[k]); lis_vector_scale(-om,dR[k]); lis_vector_axpy(1.0,dX[k],x); lis_vector_axpy(1.0,dR[k],r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[k+1] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", k+1, nrm2); } if( tol >= nrm2 ) { lis_free2(4,m,c,M,MM); solver->retcode = LIS_SUCCESS; solver->iter = k+1; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(i=0;i<s;i++) { lis_vector_dot(P[i],dR[k],&M[k*s+i]); } } iter = s; oldest = 0; for(i=0;i<s;i++) { lis_vector_dot(P[i],r,&m[i]); } while( iter<=maxiter ) { tim = lis_wtime(); lis_array_solve(s,M,m,c,MM); /* solve Mc=m */ lis_vector_copy(r,v); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dR[j],v); } if( (iter%(s+1))==s ) { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; LIS_MATVEC(A,av,t); #endif lis_vector_dot(t,t,&h); lis_vector_dot(t,v,&om); om = om / h; #if 0 lis_vector_scale(-om,t); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dR[j],t); } lis_vector_copy(t,dR[oldest]); lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; for(j=0;j<s;j++) { h -= dX[j]->value[i] * c[j]; } dX[oldest]->value[i] = h; } for(i=0;i<n;i++) { h = -om*t->value[i]; for(j=0;j<s;j++) { h -= dR[j]->value[i] * c[j]; } dR[oldest]->value[i] = h; } #endif } else { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; #endif #if 0 lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; for(j=0;j<s;j++) { h -= dX[j]->value[i] * c[j]; } dX[oldest]->value[i] = h; } #endif LIS_MATVEC(A,dX[oldest],dR[oldest]); lis_vector_scale(-1.0,dR[oldest]); } lis_vector_axpy(1.0,dR[oldest],r); lis_vector_axpy(1.0,dX[oldest],x); iter++; /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { lis_free2(4,m,c,M,MM); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(i=0;i<s;i++) { lis_vector_dot(P[i],dR[oldest],&h); m[i] += h; M[oldest*s+i] = h; } oldest++; if( oldest==s ) oldest = 0; tim = lis_wtime() - tim; /* printf("update m,M: %e\n",tim); */ } lis_free2(4,m,c,M,MM); solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r, rtld, rhat, p, ptld, phat; LIS_VECTOR t, ttld, that, t0, t0hat; LIS_VECTOR y, w, u, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; rhat = solver->work[2]; p = solver->work[3]; ptld = solver->work[4]; phat = solver->work[5]; t = solver->work[6]; ttld = solver->work[7]; that = solver->work[8]; t0 = solver->work[9]; t0hat = solver->work[10]; y = solver->work[11]; w = solver->work[12]; u = solver->work[13]; z = solver->work[14]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; qsi.hi[0] = 1.0; qsi.lo[0] = 0.0; one.hi[0] = -1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]); /* w = ttld + beta*ptld */ lis_vector_axpyz(beta.hi[0],ptld,ttld,w); /* rhat = M^-1 * r */ times = lis_wtime(); lis_psolve(solver, r, rhat); ptimes += lis_wtime()-times; /* p = rhat + beta*(p - u) */ lis_vector_axpy(-1,u,p); lis_vector_xpay(rhat,beta.hi[0],p); /* ptld = A * p */ LIS_MATVEC(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0]; /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyz(-1,w,ptld,y); lis_vector_xpay(t,alpha.hi[0],y); lis_vector_axpy(-1,r,y); /* t = r - alpha*ptld */ lis_vector_axpyz(-alpha.hi[0],ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ times = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptimes += lis_wtime()-times; /* ttld = A * that */ LIS_MATVEC(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dot(y,y,&tmpdot[0].hi[0]); lis_vector_dot(ttld,t,&tmpdot[1].hi[0]); lis_vector_dot(y,t,&tmpdot[2].hi[0]); lis_vector_dot(ttld,y,&tmpdot[3].hi[0]); lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]); if(iter==1) { qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0]; eta.hi[0] = 0.0; } else { tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0] - tmpdot[3].hi[0]*tmpdot[3].hi[0]; qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0]; eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0]; } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpay(t0hat,beta.hi[0],u); lis_vector_axpy(-1,rhat,u); lis_vector_scale(eta.hi[0],u); lis_vector_axpy(qsi.hi[0],phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scale(eta.hi[0],z); lis_vector_axpy(qsi.hi[0],rhat,z); lis_vector_axpy(-alpha.hi[0],u,z); /* x = x + alpha*p + z */ lis_vector_axpy(alpha.hi[0],p,x); lis_vector_axpy(1,z,x); /* r = t - eta*y - qsi*ttld */ lis_vector_axpyz(-eta.hi[0],y,t,r); lis_vector_axpy(-qsi.hi[0],ttld,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } lis_vector_copy(t,t0); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; t->precision = LIS_PRECISION_QUAD; t0->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; that->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; qsi.hi[0] = 1.0; one.hi[0] = -1.0; /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* w = ttld + beta*ptld */ lis_vector_axpyzex_mmmm(beta,ptld,ttld,w); /* rhat = M^-1 * r */ times = lis_wtime(); lis_psolve(solver, r, rhat); ptimes += lis_wtime()-times; /* p = rhat + beta*(p - u) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(rhat,beta,p); /* ptld = A * p */ LIS_MATVEC(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi); /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyzex_mmmm(one,w,ptld,y); lis_vector_xpayex_mmm(t,alpha,y); lis_vector_axpyex_mmm(one,r,y); /* t = r - alpha*ptld */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ times = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptimes += lis_wtime()-times; /* ttld = A * that */ LIS_MATVEC(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(ttld,t,&tmpdot[1]); lis_vector_dotex_mmm(y,t,&tmpdot[2]); lis_vector_dotex_mmm(ttld,y,&tmpdot[3]); lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpayex_mmm(t0hat,beta,u); lis_vector_axpyex_mmm(one,rhat,u); lis_vector_scaleex_mm(eta,u); lis_vector_axpyex_mmm(qsi,phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,rhat,z); lis_vector_axpyex_mmm(alpha,u,z); /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_vector_axpyex_mmm(one,z,x); lis_quad_minus((LIS_QUAD *)one.hi); /* r = t - eta*y - qsi*ttld */ lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); lis_vector_axpyzex_mmmm(eta,y,t,r); lis_vector_axpyex_mmm(qsi,ttld,r); lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_copyex_mm(t,t0); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->iter2 = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_minres(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR v1,v2,v3,v4,w0,w1,w2; LIS_REAL nrm2,tol; LIS_SCALAR alpha,beta2,beta3; LIS_SCALAR gamma1,gamma2,gamma3; LIS_SCALAR delta,eta; LIS_SCALAR sigma1,sigma2,sigma3; LIS_SCALAR rho1,rho2,rho3; LIS_SCALAR r0_euc,r_euc; LIS_INT iter,maxiter,output; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; ptime = 0.0; v1 = solver->work[0]; v2 = solver->work[1]; v3 = solver->work[2]; v4 = solver->work[3]; w0 = solver->work[4]; w1 = solver->work[5]; w2 = solver->work[6]; /* Lanczos algorithm */ lis_matvec(A,x,v2); lis_vector_xpay(b,-1.0,v2); time = lis_wtime(); lis_psolve(solver,v2,v3); ptime += lis_wtime()-time; lis_vector_copy(v3,v2); /* Compute elements of Hermitian tridiagonal matrix */ lis_vector_nrm2(v2,&r_euc); eta = beta2 = r0_euc = r_euc; gamma2 = gamma1 = 1.0; sigma2 = sigma1 = 0.0; lis_vector_set_all(0.0,v1); lis_vector_set_all(0.0,w0); lis_vector_set_all(0.0,w1); nrm2 = r_euc / r0_euc; for(iter=1;iter<=maxiter;iter++) { /* Lanczos algorithm */ lis_vector_scale(1.0 / beta2,v2); lis_matvec(A,v2,v3); time = lis_wtime(); lis_psolve(solver,v3,v4); ptime += lis_wtime()-time; lis_vector_dot(v2,v4,&alpha); lis_vector_axpy(-alpha,v2,v4); lis_vector_axpy(-beta2,v1,v4); lis_vector_nrm2(v4,&beta3); /* Compute elements of Hermitian tridiagonal matrix */ delta = gamma2 * alpha - gamma1 * sigma2 * beta2; rho1 = sqrt(delta * delta + beta3 * beta3); rho2 = sigma2 * alpha + gamma1 * gamma2 * beta2; rho3 = sigma1 * beta2; gamma3 = delta / rho1; sigma3 = beta3 / rho1; lis_vector_axpyz(-rho3,w0,v2,w2); lis_vector_axpy(-rho2,w1,w2); lis_vector_scale(1.0 / rho1,w2); lis_vector_axpy(gamma3 * eta,w2,x); /* convergence check */ r_euc *= fabs(sigma3); nrm2 = r_euc / r0_euc; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( nrm2 <= tol ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } eta *= -sigma3; lis_vector_copy(v2,v1); lis_vector_copy(v4,v2); lis_vector_copy(w1,w0); lis_vector_copy(w2,w1); beta2 = beta3; gamma1 = gamma2; gamma2 = gamma3; sigma1 = sigma2; sigma2 = sigma3; } lis_vector_destroy(v1); lis_vector_destroy(v2); lis_vector_destroy(v3); lis_vector_destroy(v4); lis_vector_destroy(w0); lis_vector_destroy(w1); lis_vector_destroy(w2); solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r, rtld, mr, amr, t, mt, p, ap; LIS_VECTOR y, u, au, z; LIS_SCALAR alpha, beta, rho, rho_old; LIS_SCALAR qsi, eta; LIS_SCALAR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; t = solver->work[6]; mt = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); times = lis_wtime(); lis_psolve(solver, r, mr); ptimes += lis_wtime()-times; LIS_MATVEC(A,mr,amr); lis_vector_dot(rtld,r,&rho_old); lis_vector_copy(amr,ap); lis_vector_copy(mr,p); beta = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* tmpdot[0] = <rtld,ap> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dot(rtld,ap,&tmpdot[0]); alpha = rho_old / tmpdot[0]; /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dot(y,y,&tmpdot[0]); lis_vector_dot(amr,r,&tmpdot[1]); lis_vector_dot(y,r,&tmpdot[2]); lis_vector_dot(amr,y,&tmpdot[3]); lis_vector_dot(amr,amr,&tmpdot[4]); if(iter==1) { qsi = tmpdot[1] / tmpdot[4]; eta = 0.0; } else { tmp = tmpdot[4]*tmpdot[0] - tmpdot[3]*tmpdot[3]; qsi = (tmpdot[0]*tmpdot[1] - tmpdot[2]*tmpdot[3]) / tmp; eta = (tmpdot[4]*tmpdot[2] - tmpdot[3]*tmpdot[1]) / tmp; } /* t = qsi*ap + eta*y */ lis_vector_copy(y,t); lis_vector_scale(eta,t); lis_vector_axpy(qsi,ap,t); /* mt = M^-1 * t */ times = lis_wtime(); lis_psolve(solver, t, mt); ptimes += lis_wtime()-times; /* u = mt + eta*beta*u */ /* au = A * u */ lis_vector_xpay(mt,eta*beta,u); LIS_MATVEC(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scale(eta,z); lis_vector_axpy(qsi,mr,z); lis_vector_axpy(-alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ lis_vector_scale(eta,y); lis_vector_axpy(qsi,amr,y); lis_vector_axpy(-alpha,au,y); /* x = x + alpha*p + z */ lis_vector_axpy(alpha,p,x); lis_vector_axpy(1.0,z,x); /* r = r - alpha*ap - y */ lis_vector_axpy(-alpha,ap,r); lis_vector_axpy(-1.0,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho); if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta = (rho / rho_old) * (alpha / qsi); /* mr = M^-1 * r */ /* amr = A * mr */ times = lis_wtime(); lis_psolve(solver, r, mr); ptimes += lis_wtime()-times; LIS_MATVEC(A,mr,amr); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpy(-1.0,u,p); lis_vector_xpay(mr,beta,p); lis_vector_axpy(-1.0,au,ap); lis_vector_xpay(amr,beta,ap); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_gmres_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,s, z, *v; LIS_QUAD *h; LIS_SCALAR *hd; LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp; LIS_QUAD_PTR rnorm; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,i1h,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; m = solver->options[LIS_OPTIONS_RESTART]; h_dim = m+1; ptimes = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; LIS_QUAD_SCALAR_MALLOC(aa,0,1); LIS_QUAD_SCALAR_MALLOC(bb,1,1); LIS_QUAD_SCALAR_MALLOC(rr,2,1); LIS_QUAD_SCALAR_MALLOC(a2,3,1); LIS_QUAD_SCALAR_MALLOC(b2,4,1); LIS_QUAD_SCALAR_MALLOC(t,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(one,7,1); LIS_QUAD_SCALAR_MALLOC(rnorm,8,1); h = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD) * (h_dim+1) * (h_dim+2),"lis_gmres_switch::h" ); hd = (LIS_SCALAR *)h; cs = (m+1)*h_dim; sn = (m+2)*h_dim; one.hi[0] = 1.0; one.lo[0] = 0.0; z->precision = LIS_PRECISION_DEFAULT; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; iter=0; while( iter<maxiter2 ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2(v[0],&rnorm.hi[0]); lis_vector_scale(1.0/rnorm.hi[0],v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm.hi[0]; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; i1h = i*h_dim; /* z = M^-1 v */ times = lis_wtime(); lis_psolve(solver, v[iiv], z); ptimes += lis_wtime()-times; /* v = Az */ LIS_MATVEC(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i]v[k] */ lis_vector_dot(v[i1v],v[k],&t.hi[0]); hd[k + iih] = t.hi[0]; lis_vector_axpy(-t.hi[0],v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&t.hi[0]); hd[i1 + iih] = t.hi[0]; lis_vector_scale(1.0/t.hi[0],v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = hd[jj + iih]; aa.hi[0] = hd[jj + cs]*t.hi[0]; aa.hi[0] += hd[jj + sn]*hd[k + iih]; bb.hi[0] = -hd[jj + sn]*t.hi[0]; bb.hi[0] += hd[jj + cs]*hd[k + iih]; hd[jj + iih] = aa.hi[0]; hd[k + iih] = bb.hi[0]; } aa.hi[0] = hd[ii + iih]; bb.hi[0] = hd[i1 + iih]; a2.hi[0] = aa.hi[0]*aa.hi[0]; b2.hi[0] = bb.hi[0]*bb.hi[0]; rr.hi[0] = sqrt(a2.hi[0] + b2.hi[0]); if( rr.hi[0]==0.0 ) rr.hi[0]=1.0e-17; hd[ii + cs] = aa.hi[0] / rr.hi[0]; hd[ii + sn] = bb.hi[0] / rr.hi[0]; s->value[i1] = -hd[ii + sn]*s->value[ii]; s->value[ii] = hd[ii + cs]*s->value[ii]; aa.hi[0] = hd[ii + cs]*hd[ii + iih]; aa.hi[0] += hd[ii + sn]*hd[i1 + iih]; hd[ii + iih] = aa.hi[0]; /* convergence check */ nrm2 = fabs(s->value[i1]) * bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) break; } while( i<m && iter <maxiter2 ); /* Solve H*Y =S for upper triangular H */ s->value[ii] = s->value[ii] / hd[ii + iih]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; for(j=jj+1;j<=ii;j++) { t.hi[0] -= hd[jj + j*h_dim]*s->value[j]; } s->value[jj] = t.hi[0] / hd[jj + jj*h_dim]; } /* x = x + yv */ for(k=0;k<n;k++) { z->value[k] = s->value[0]*v[0]->value[k]; } for(j=1;j<=ii;j++) { lis_vector_axpy(s->value[j],v[j],z); } /* r = M^-1 z */ times = lis_wtime(); lis_psolve(solver, z, r); ptimes += lis_wtime()-times; /* x = x + r */ lis_vector_axpy(1,r,x); if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } for(j=1;j<=i;j++) { jj = i1-j+1; s->value[jj-1] = -hd[jj-1 + sn] * s->value[jj]; s->value[jj] = hd[jj-1 + cs] * s->value[jj]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; if( j==0 ) t.hi[0] = t.hi[0]-1.0; lis_vector_axpy(t.hi[0],v[j],v[0]); } } /* Initial Residual */ z->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2); tol = solver->tol; iter2=iter; while( iter2<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2ex_mm(v[0],&rnorm); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi); lis_vector_scaleex_mm(tmp,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_allex_nm(0.0,s); s->value[0] = rnorm.hi[0]; s->value_lo[0] = rnorm.lo[0]; i = 0; do { iter2++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; i1h = i*h_dim; /* z = M^-1 v */ times = lis_wtime(); lis_psolve(solver, v[iiv], z); ptimes += lis_wtime()-times; /* v = Az */ LIS_MATVEC(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i]v[k] */ lis_vector_dotex_mmm(v[i1v],v[k],&t); h[k + iih].hi = t.hi[0]; h[k + iih].lo = t.lo[0]; lis_quad_minus((LIS_QUAD *)t.hi); lis_vector_axpyex_mmm(t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2ex_mm(v[i1v],&t); h[i1 + iih].hi = t.hi[0]; h[i1 + iih].lo = t.lo[0]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi); lis_vector_scaleex_mm(tmp,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = h[jj + iih].hi; t.lo[0] = h[jj + iih].lo; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi); lis_quad_minus((LIS_QUAD *)bb.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi); h[jj + iih].hi = aa.hi[0]; h[jj + iih].lo = aa.lo[0]; h[k + iih].hi = bb.hi[0]; h[k + iih].lo = bb.lo[0]; } aa.hi[0] = h[ii + iih].hi; aa.lo[0] = h[ii + iih].lo; bb.hi[0] = h[i1 + iih].hi; bb.lo[0] = h[i1 + iih].lo; lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi); lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi); lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi); lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii + cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii + sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi); tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii + sn],(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii + cs],(LIS_QUAD *)tmp.hi); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[i1] = aa.hi[0]; s->value_lo[i1] = aa.lo[0]; s->value[ii] = bb.hi[0]; s->value_lo[ii] = bb.lo[0]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); h[ii + iih].hi = aa.hi[0]; h[ii + iih].lo = aa.lo[0]; /* convergence check */ nrm2 = fabs(s->value[i1]) * bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter2 <maxiter ); /* Solve H*Y =S for upper triangular H */ tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii + iih]); s->value[ii] = tmp.hi[0]; s->value_lo[ii] = tmp.lo[0]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; t.lo[0] = s->value_lo[jj]; for(j=jj+1;j<=ii;j++) { tmp.hi[0] = s->value[j]; tmp.lo[0] = s->value_lo[j]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj + j*h_dim]); lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi); } lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj + jj*h_dim]); s->value[jj] = tmp.hi[0]; s->value_lo[jj] = tmp.lo[0]; } /* x = x + yv */ for(k=0;k<n;k++) { aa.hi[0] = s->value[0]; aa.lo[0] = s->value_lo[0]; bb.hi[0] = v[0]->value[k]; bb.lo[0] = v[0]->value_lo[k]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi); z->value[k] = tmp.hi[0]; z->value_lo[k] = tmp.lo[0]; } for(j=1;j<=ii;j++) { aa.hi[0] = s->value[j]; aa.lo[0] = s->value_lo[j]; lis_vector_axpyex_mmm(aa,v[j],z); } /* r = M^-1 z */ times = lis_wtime(); lis_psolve(solver, z, r); ptimes += lis_wtime()-times; /* x = x + r */ lis_vector_axpyex_mmm(one,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; tmp.hi[0] = s->value[jj]; tmp.lo[0] = s->value_lo[jj]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1 + sn]); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1 + cs]); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[jj-1] = aa.hi[0]; s->value_lo[jj-1] = aa.lo[0]; s->value[jj] = bb.hi[0]; s->value_lo[jj] = bb.lo[0]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; t.lo[0] = s->value_lo[j]; if( j==0 ) { lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi); } lis_vector_axpyex_mmm(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter2+1; solver->iter2 = iter; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_gmres(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,s, z, *v; LIS_SCALAR *h; LIS_SCALAR aa,bb,rr,a2,b2,t; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_REAL rnorm; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,i1h,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; conv = solver->options[LIS_OPTIONS_CONV_COND]; h_dim = m+1; ptimes = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; h = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR) * (h_dim+1) * (h_dim+2),"lis_gmres::h" ); cs = (m+1)*h_dim; sn = (m+2)*h_dim; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; iter=0; while( iter<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2(v[0],&rnorm); lis_vector_scale(1.0/rnorm,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; i1h = i*h_dim; /* z = M^-1 v */ times = lis_wtime(); lis_psolve(solver, v[iiv], z); ptimes += lis_wtime()-times; /* v = Az */ LIS_MATVEC(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i]v[k] */ lis_vector_dot(v[i1v],v[k],&t); h[k + iih] = t; lis_vector_axpy(-t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&t); h[i1 + iih] = t; lis_vector_scale(1.0/t,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t = h[jj + iih]; aa = h[jj + cs]*t; aa += h[jj + sn]*h[k + iih]; bb = -h[jj + sn]*t; bb += h[jj + cs]*h[k + iih]; h[jj + iih] = aa; h[k + iih] = bb; } aa = h[ii + iih]; bb = h[i1 + iih]; a2 = aa*aa; b2 = bb*bb; rr = sqrt(a2 + b2); if( rr==0.0 ) rr=1.0e-17; h[ii + cs] = aa / rr; h[ii + sn] = bb / rr; s->value[i1] = -h[ii + sn]*s->value[ii]; s->value[ii] = h[ii + cs]*s->value[ii]; aa = h[ii + cs]*h[ii + iih]; aa += h[ii + sn]*h[i1 + iih]; h[ii + iih] = aa; /* convergence check */ nrm2 = fabs(s->value[i1]) * bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter <maxiter ); /* Solve H*Y =S for upper triangular H */ s->value[ii] = s->value[ii] / h[ii + iih]; for(k=1;k<=ii;k++) { jj = ii-k; t = s->value[jj]; for(j=jj+1;j<=ii;j++) { t -= h[jj + j*h_dim]*s->value[j]; } s->value[jj] = t / h[jj + jj*h_dim]; } /* x = x + yv */ #ifdef _OPENMP #pragma omp parallel for private(k) #endif for(k=0;k<n;k++) { z->value[k] = s->value[0]*v[0]->value[k]; } for(j=1;j<=ii;j++) { lis_vector_axpy(s->value[j],v[j],z); } /* r = M^-1 z */ times = lis_wtime(); lis_psolve(solver, z, r); ptimes += lis_wtime()-times; /* x = x + r */ lis_vector_axpy(1,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; s->value[jj-1] = -h[jj-1 + sn] * s->value[jj]; s->value[jj] = h[jj-1 + cs] * s->value[jj]; } for(j=0;j<=i1;j++) { t = s->value[j]; if( j==0 ) t = t-1.0; lis_vector_axpy(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter+1; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_eii_quad(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue, ievalue; LIS_SCALAR lshift; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,iter2,output; LIS_REAL nrm2,resid; LIS_QUAD_PTR qdot_xz; LIS_VECTOR z,q; LIS_SOLVER solver; double time,itime,ptime,p_c_time,p_i_time; LIS_INT err; LIS_PRECON precon; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; LIS_DEBUG_FUNC_IN; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; lshift = esolver->lshift; output = esolver->options[LIS_EOPTIONS_OUTPUT]; A = esolver->A; x = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,x); } evalue = 1.0; z = esolver->work[0]; q = esolver->work[1]; LIS_QUAD_SCALAR_MALLOC(qdot_xz,0,1); iter=0; ievalue = 1/(evalue); #ifdef _LONG__DOUBLE if( output & (A->my_rank==0) ) printf("local shift : %Le\n", lshift); #else if( output & (A->my_rank==0) ) printf("local shift : %e\n", lshift); #endif if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p none -precision quad",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_solver_get_solvername(nsol, solvername); lis_solver_get_preconname(precon_type, preconname); if( output & (A->my_rank==0) ) printf("linear solver : %s\n", solvername); if( output & (A->my_rank==0) ) printf("preconditioner : %s\n", preconname); /* create preconditioner */ solver->A = A; err = lis_precon_create(solver, &precon); if( err ) { lis_solver_work_destroy(solver); solver->retcode = err; return err; } while (iter<emaxiter) { iter = iter+1; /* x = x / ||x||_2 */ lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); /* z = (A - lshift I)^-1 * x */ lis_solve_kernel(A, x, z, solver, precon); lis_solver_get_iter(solver,&iter2); /* 1/evalue = <x,z> */ lis_vector_dotex_mmm(x, z, &qdot_xz); lis_quad_minus((LIS_QUAD *)qdot_xz.hi); lis_vector_axpyzex_mmmm(qdot_xz,x,z,q); lis_quad_minus((LIS_QUAD *)qdot_xz.hi); ievalue = qdot_xz.hi[0]; /* resid = ||z - 1/evalue * x||_2 / |1/evalue| */ lis_vector_nrm2(q, &resid); resid = fabs(resid/ievalue); /* x = z */ lis_vector_copy(z,x); /* convergence check */ lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time); esolver->ptime += solver->ptime; esolver->itime += solver->itime; esolver->p_c_time += solver->p_c_time; esolver->p_i_time += solver->p_i_time; if( output ) { if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = 1/ievalue; lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_precon_destroy(precon); lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } lis_precon_destroy(precon); esolver->retcode = LIS_MAXITER; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = 1/ievalue; lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); if (lshift != 0) { lis_matrix_shift_diagonal(A, -lshift); } lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_eai_quad(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_INT ss,ic; LIS_INT emaxiter,iter0,hqriter; LIS_REAL tol,hqrerr,D; LIS_INT i,j; LIS_INT output, niesolver; LIS_REAL nrm2,resid0; LIS_VECTOR *v,w; LIS_SCALAR *h,*hq,*hr,evalue,evalue0; LIS_SOLVER solver; LIS_ESOLVER esolver2; char esolvername[128],solvername[128],preconname[128]; LIS_INT nsol,precon_type; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER]; h = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::h"); hq = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::hq"); hr = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::hr"); A = esolver->A; w = esolver->work[0]; v = &esolver->work[1]; lis_vector_set_all(0.0,v[0]); lis_vector_set_all(1.0,w); lis_vector_nrm2(w, &nrm2); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p none",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_solver_get_solvername(nsol, solvername); lis_solver_get_preconname(precon_type, preconname); lis_esolver_get_esolvername(niesolver, esolvername); if( A->my_rank==0 ) printf("inner eigensolver : %s\n", esolvername); if( A->my_rank==0 ) printf("linear solver : %s\n", solvername); if( A->my_rank==0 ) printf("preconditioner : %s\n", preconname); for (i=0;i<ss*ss;i++) h[i] = 0.0; j=-1; while (j<ss-1) { j = j+1; lis_vector_copy(w, v[j]); /* w = A * v(j) */ lis_matvec(A, v[j], w); /* reorthogonalization */ for (i=0;i<=j;i++) { /* h(i,j) = <v(i), w> */ lis_vector_dot(v[i], w, &h[i+j*ss]); /* w = w - h(i,j) * v(i) */ lis_vector_axpy(-h[i+j*ss], v[i], w); } /* h(j+1,j) = ||w||_2 */ lis_vector_nrm2(w, &h[j+1+j*ss]); /* convergence check */ if (fabs(h[j+1+j*ss])<tol) break; /* v(j+1) = w / h(i+1,j) */ lis_vector_scale(1/h[j+1+j*ss],w); lis_vector_copy(w,v[j+1]); } /* compute eigenvalues of a real upper Hessenberg matrix H(j) = SH'(j)S^* */ lis_array_qr(ss,h,hq,hr,&hqriter,&hqrerr); if( A->my_rank==0 ) { #ifdef _LONG__LONG printf("size of subspace : %lld\n\n", ss); #else printf("size of subspace : %d\n\n", ss); #endif if( output ) printf("approximate eigenvalues in subspace:\n\n"); i=0; while (i<ss-1) { i = i + 1; if (fabs(h[i+(i-1)*ss])<tol) { #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i-1); #else printf("Arnoldi: mode number = %d\n",i-1); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le\n",h[i-1+(i-1)*ss]); #else printf("Arnoldi: eigenvalue = %e\n",h[i-1+(i-1)*ss]); #endif esolver->evalue[i-1] = h[i-1+(i-1)*ss]; } else { D = (h[i-1+(i-1)*ss]-h[i+i*ss]) * (h[i-1+(i-1)*ss]-h[i+i*ss]) + 4 * h[i-1+i*ss] * h[i+(i-1)*ss]; if (D<0) { #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i-1); #else printf("Arnoldi: mode number = %d\n",i-1); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le + %Le i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2); #else printf("Arnoldi: eigenvalue = %e + %e i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2); #endif #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i); #else printf("Arnoldi: mode number = %d\n",i); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le - %Le i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2); #else printf("Arnoldi: eigenvalue = %e - %e i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2); #endif esolver->evalue[i-1] = (h[i-1+(i-1)*ss]+h[i+i*ss])/2; esolver->evalue[i] = (h[i-1+(i-1)*ss]+h[i+i*ss])/2; i=i+1; } else { #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i-1); #else printf("Arnoldi: mode number = %d\n",i-1); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le\n",h[i-1+(i-1)*ss]); #else printf("Arnoldi: eigenvalue = %e\n",h[i-1+(i-1)*ss]); #endif esolver->evalue[i-1] = h[i-1+(i-1)*ss]; } } } if (i<ss) { #ifdef _LONG__LONG printf("Arnoldi: mode number = %lld\n",i); #else printf("Arnoldi: mode number = %d\n",i); #endif #ifdef _LONG__DOUBLE printf("Arnoldi: eigenvalue = %Le\n",h[i+i*ss]); #else printf("Arnoldi: eigenvalue = %e\n",h[i+i*ss]); #endif } if( output ) printf("\n"); if( output ) printf("compute refined (real) eigenpairs, where imaginary parts are currently neglected:\n\n"); } lis_esolver_create(&esolver2); esolver2->options[LIS_EOPTIONS_ESOLVER] = niesolver; esolver2->options[LIS_EOPTIONS_SUBSPACE] = 1; esolver2->options[LIS_EOPTIONS_MAXITER] = emaxiter; esolver2->options[LIS_EOPTIONS_OUTPUT] = esolver->options[LIS_EOPTIONS_OUTPUT]; esolver2->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN] = tol; esolver2->eprecision = LIS_PRECISION_QUAD; /* compute refined (real) eigenpairs, where imaginary parts are currently neglected */ for (i=0;i<ss;i++) { lis_vector_duplicate(A, &esolver->evector[i]); esolver2->lshift = -(esolver->evalue[i]); lis_esolve(A, esolver->evector[i], &evalue, esolver2); lis_esolver_work_destroy(esolver2); esolver->evalue[i] = evalue - esolver2->lshift; esolver->iter[i] = esolver2->iter[0]; esolver->resid[i] = esolver2->resid[0]; if (i==0) { evalue0 = esolver->evalue[0]; iter0 = esolver2->iter[0]; resid0 = esolver2->resid[0]; if( output & LIS_EPRINT_MEM ) { for (ic=0;ic<iter0+1;ic++) { esolver->rhistory[ic] = esolver2->rhistory[ic]; } } esolver->ptime = esolver2->ptime; esolver->itime = esolver2->itime; esolver->p_c_time = esolver2->p_c_time; esolver->p_i_time = esolver2->p_i_time; } if (A->my_rank==0) { #ifdef _LONG__LONG if( output ) printf("Arnoldi: mode number = %lld\n", i); #else if( output ) printf("Arnoldi: mode number = %d\n", i); #endif #ifdef _LONG__DOUBLE if( output ) printf("Arnoldi: eigenvalue = %Le\n", esolver->evalue[i]); #else if( output ) printf("Arnoldi: eigenvalue = %e\n", esolver->evalue[i]); #endif #ifdef _LONG__LONG if( output ) printf("Arnoldi: number of iterations = %lld\n",esolver2->iter[0]); #else if( output ) printf("Arnoldi: number of iterations = %d\n",esolver2->iter[0]); #endif #ifdef _LONG__DOUBLE if( output ) printf("Arnoldi: relative residual = %Le\n\n",esolver2->resid[0]); #else if( output ) printf("Arnoldi: relative residual = %e\n\n",esolver2->resid[0]); #endif } } esolver->evalue[0] = evalue0; esolver->iter[0] = iter0; esolver->resid[0] = resid0; lis_vector_copy(esolver->evector[0], esolver->x); lis_esolver_destroy(esolver2); lis_free(h); lis_free(hq); lis_free(hr); lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }