void lis_matvect_f(LIS_MATRIX_F *A, LIS_VECTOR_F *x, LIS_VECTOR_F *y, LIS_INT *ierr) { LIS_DEBUG_FUNC_IN; *ierr = lis_matvect((LIS_MATRIX)LIS_V2P(A),(LIS_VECTOR)LIS_V2P(x),(LIS_VECTOR)LIS_V2P(y)); LIS_DEBUG_FUNC_OUT; return; }
LIS_INT lis_crs_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; z = solver->work[3]; u = solver->work[3]; uq = solver->work[3]; q = solver->work[4]; ap = solver->work[4]; map = solver->work[5]; auq = solver->work[5]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); lis_matvect(A,p,rtld); lis_vector_set_allex_nm(0.0,q); lis_vector_set_allex_nm(0.0,p); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* rho = <rtld,z> */ time = lis_wtime(); lis_psolve(solver, r, z); ptime += lis_wtime()-time; lis_vector_dotex_mmm(rtld,z,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* u = z + beta*q */ /* p = u + beta*(q + beta*p) */ /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_vector_axpyzex_mmmm(beta,q,z,u); lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); lis_matvec(A,p,ap); time = lis_wtime(); lis_psolve(solver, ap, map); ptime += lis_wtime()-time; lis_vector_dotex_mmm(rtld,map,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ /* q = u - alpha*map */ /* uq = u + q */ /* auq = A * uq */ /* x = x + alpha*uq */ /* r = r - alpha*auq */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,map,u,q); lis_vector_axpyzex_mmmm(one,u,q,uq); lis_matvec(A,uq,auq); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uq,x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,auq,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_psolvet_adds(LIS_SOLVER solver, LIS_VECTOR B, LIS_VECTOR X) { LIS_INT i,k,n,np,iter,ptype; LIS_SCALAR *b,*x,*w,*r; LIS_VECTOR W,R; LIS_PRECON precon; LIS_DEBUG_FUNC_IN; precon = solver->precon; n = precon->A->n; np = precon->A->np; W = precon->work[0]; R = precon->work[1]; b = B->value; x = X->value; w = W->value; r = R->value; iter = solver->options[LIS_OPTIONS_ADDS_ITER]; ptype = solver->options[LIS_OPTIONS_PRECON]; if( solver->precision==LIS_PRECISION_DEFAULT ) { lis_vector_set_all(0.0,X); lis_vector_copy(B,R); for(k=0;k<iter+1;k++) { for(i=n;i<np;i++) { r[i] = 0.0; } lis_psolvet_xxx[ptype](solver,R,W); #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { x[i] += w[i]; } if(k!=iter) { lis_matvect(precon->A,X,R); #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { r[i] = b[i] - r[i]; } } } } else { lis_vector_set_all(0.0,X); lis_vector_copy(B,R); for(k=0;k<iter+1;k++) { for(i=n;i<np;i++) { r[i] = 0.0; } lis_psolvet_xxx[ptype](solver,R,W); for(i=0;i<n;i++) { x[i] += w[i]; } if(k==iter) break; X->precision = LIS_PRECISION_DEFAULT; lis_matvect(precon->A,X,R); X->precision = LIS_PRECISION_QUAD; for(i=0;i<n;i++) { r[i] = b[i] - r[i]; } } } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_bicrsafe_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map; LIS_VECTOR y, my, u, au, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; map = solver->work[6]; my = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; artld = solver->work[12]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_matvect(A,rtld,artld); time = lis_wtime(); lis_psolve(solver, r, mr); ptime += lis_wtime()-time; lis_matvec(A,mr,amr); lis_vector_dotex_mmm(rtld,amr,&rho_old); lis_vector_copyex_mm(amr,ap); lis_vector_copyex_mm(mr,p); one.hi[0] = -1.0; one.lo[0] = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* map = M^-1 * ap */ time = lis_wtime(); lis_psolve(solver, ap, map); ptime += lis_wtime()-time; /* tmpdot[0] = <artld,map> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dotex_mmm(artld,map,&tmpdot[0]); lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot[0].hi); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(amr,r,&tmpdot[1]); lis_vector_dotex_mmm(y,r,&tmpdot[2]); lis_vector_dotex_mmm(amr,y,&tmpdot[3]); lis_vector_dotex_mmm(amr,amr,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*map + eta*my + eta*beta*u */ /* au = A * u */ lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi); lis_vector_scaleex_mm(tmp,u); lis_vector_axpyex_mmm(qsi,map,u); lis_vector_axpyex_mmm(eta,my,u); lis_matvec(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,mr,z); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ /* my = M^-1 * y */ lis_vector_scaleex_mm(eta,y); lis_vector_axpyex_mmm(qsi,amr,y); lis_vector_axpyex_mmm(alpha,au,y); time = lis_wtime(); lis_psolve(solver, y, my); ptime += lis_wtime()-time; /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(one,z,x); /* r = r - alpha*ap - y */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,ap,r); lis_vector_axpyex_mmm(one,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* mr = mr - alpha*map - my */ /* amr = A * mr */ /* rho = <rtld,amr> */ lis_vector_axpyex_mmm(alpha,map,mr); lis_vector_axpyex_mmm(one,my,mr); lis_matvec(A,mr,amr); lis_vector_dotex_mmm(rtld,amr,&rho); if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(mr,beta,p); lis_vector_axpyex_mmm(one,au,ap); lis_vector_xpayex_mmm(amr,beta,ap); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrsafe(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map; LIS_VECTOR y, my, u, au, z; LIS_SCALAR alpha, beta; LIS_REAL rho, rho_old; LIS_SCALAR qsi, eta; LIS_SCALAR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; map = solver->work[6]; my = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; artld = solver->work[12]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_matvect(A,rtld,artld); time = lis_wtime(); lis_psolve(solver, r, mr); ptime += lis_wtime()-time; lis_matvec(A,mr,amr); lis_vector_dot(rtld,amr,&rho_old); lis_vector_copy(amr,ap); lis_vector_copy(mr,p); beta = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* map = M^-1 * ap */ time = lis_wtime(); lis_psolve(solver, ap, map); ptime += lis_wtime()-time; /* tmpdot[0] = <artld,map> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dot(artld,map,&tmpdot[0]); alpha = rho_old / tmpdot[0]; /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dot(y,y,&tmpdot[0]); lis_vector_dot(amr,r,&tmpdot[1]); lis_vector_dot(y,r,&tmpdot[2]); lis_vector_dot(amr,y,&tmpdot[3]); lis_vector_dot(amr,amr,&tmpdot[4]); if(iter==1) { qsi = tmpdot[1] / tmpdot[4]; eta = 0.0; } else { tmp = tmpdot[4]*tmpdot[0] - tmpdot[3]*tmpdot[3]; qsi = (tmpdot[0]*tmpdot[1] - tmpdot[2]*tmpdot[3]) / tmp; eta = (tmpdot[4]*tmpdot[2] - tmpdot[3]*tmpdot[1]) / tmp; } /* u = qsi*map + eta*my + eta*beta*u */ /* au = A * u */ lis_vector_scale(eta*beta,u); lis_vector_axpy(qsi,map,u); lis_vector_axpy(eta,my,u); lis_matvec(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scale(eta,z); lis_vector_axpy(qsi,mr,z); lis_vector_axpy(-alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ /* my = M^-1 * y */ lis_vector_scale(eta,y); lis_vector_axpy(qsi,amr,y); lis_vector_axpy(-alpha,au,y); time = lis_wtime(); lis_psolve(solver, y, my); ptime += lis_wtime()-time; /* x = x + alpha*p + z */ lis_vector_axpy(alpha,p,x); lis_vector_axpy(1.0,z,x); /* r = r - alpha*ap - y */ lis_vector_axpy(-alpha,ap,r); lis_vector_axpy(-1.0,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* mr = mr - alpha*map - my */ /* amr = A * mr */ /* rho = <rtld,amr> */ lis_vector_axpy(-alpha,map,mr); lis_vector_axpy(-1.0,my,mr); lis_matvec(A,mr,amr); lis_vector_dot(rtld,amr,&rho); if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta = (rho / rho_old) * (alpha / qsi); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpy(-1.0,u,p); lis_vector_xpay(mr,beta,p); lis_vector_axpy(-1.0,au,ap); lis_vector_xpay(amr,beta,ap); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicr_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r,rtld, z,ztld,p, ptld, ap, map, az, aptld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; ap = solver->work[6]; az = solver->work[7]; map = solver->work[8]; aptld = solver->work[9]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); lis_vector_copyex_mm(z,p); lis_vector_copyex_mm(ztld,ptld); lis_matvec(A,z,ap); lis_vector_dotex_mmm(ap,ztld,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* aptld = A^T * ptld */ /* map = M^-1 * ap */ lis_matvect(A,ptld,aptld); time = lis_wtime(); lis_psolve(solver, ap, map); ptime += lis_wtime()-time; /* tmpdot1 = <map,aptld> */ lis_vector_dotex_mmm(map,aptld,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho_old / tmpdot1 */ /* x = x + alpha*p */ /* r = r - alpha*ap */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,ap,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*aptld */ /* z = z - alpha*map */ /* ztld = M^-T * rtld */ /* az = A * z */ /* rho = <az,ztld> */ lis_vector_axpyex_mmm(alpha,aptld,rtld); lis_vector_axpyex_mmm(alpha,map,z); time = lis_wtime(); lis_psolvet(solver, rtld, ztld); ptime += lis_wtime()-time; lis_matvec(A,z,az); lis_vector_dotex_mmm(az,ztld,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* ap = az + beta*ap */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_vector_xpayex_mmm(z,beta,p); lis_vector_xpayex_mmm(ztld,beta,ptld); lis_vector_xpayex_mmm(az,beta,ap); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg_switch(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_VECTOR x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,output,conv; LIS_INT iter2,maxiter2; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); r->precision = LIS_PRECISION_DEFAULT; rtld->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; ptld->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ time = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptime += lis_wtime()-time; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = rho.hi[0] / rho_old.hi[0]; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta.hi[0],p); lis_matvec(A,p,q); lis_vector_xpay(ztld,beta.hi[0],ptld); lis_matvect(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* x = x + alpha*p */ lis_vector_axpy(alpha.hi[0],p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha.hi[0],q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha.hi[0],qtld,rtld); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; rtld->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; /* solver->precon->precon_type = 0;*/ solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ time = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); /* memset(z->value_lo,0,n*sizeof(LIS_SCALAR)); memset(ztld->value_lo,0,n*sizeof(LIS_SCALAR));*/ ptime += lis_wtime()-time; /* rho = <z,rtld> */ lis_vector_dotex_mmm(z,rtld,&rho); /* printf("rho = %e %e\n",rho.hi[0],rho.lo[0]);*/ /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpayex_mmm(z,beta,p); lis_matvec(A,p,q); lis_vector_xpayex_mmm(ztld,beta,ptld); lis_matvect(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dotex_mmm(ptld,q,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* x = x + alpha*p */ lis_vector_axpyex_mmm(alpha,p,x); /* r = r - alpha*q */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpyex_mmm(alpha,qtld,rtld); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_VECTOR x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; rho_old = (LIS_SCALAR)1.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_all(0, p); lis_vector_set_all(0, ptld); for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ time = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptime += lis_wtime()-time; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho); /* printf("rho = %e\n",rho);*/ /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta = rho / rho_old; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta,p); lis_matvec(A,p,q); lis_vector_xpay(ztld,beta,ptld); lis_matvect(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1); /* printf("tmpdot1 = %e\n",tmpdot1);*/ /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha = rho / tmpdot1; /* x = x + alpha*p */ lis_vector_axpy(alpha,p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha,qtld,rtld); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }