LIS_INT lis_bicr_quad(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, ap, map, az, aptld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; ap = solver->work[6]; az = solver->work[7]; map = solver->work[8]; aptld = solver->work[9]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); lis_vector_copyex_mm(z,p); lis_vector_copyex_mm(ztld,ptld); LIS_MATVEC(A,z,ap); lis_vector_dotex_mmm(ap,ztld,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* aptld = A^T * ptld */ /* map = M^-1 * ap */ LIS_MATVECT(A,ptld,aptld); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; /* tmpdot1 = <map,aptld> */ lis_vector_dotex_mmm(map,aptld,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho_old / tmpdot1 */ /* x = x + alpha*p */ /* r = r - alpha*ap */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,ap,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*aptld */ /* z = z - alpha*map */ /* ztld = M^-T * rtld */ /* az = A * z */ /* rho = <az,ztld> */ lis_vector_axpyex_mmm(alpha,aptld,rtld); lis_vector_axpyex_mmm(alpha,map,z); times = lis_wtime(); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; LIS_MATVEC(A,z,az); lis_vector_dotex_mmm(az,ztld,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* ap = az + beta*ap */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_vector_xpayex_mmm(z,beta,p); lis_vector_xpayex_mmm(ztld,beta,ptld); lis_vector_xpayex_mmm(az,beta,ap); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_crs_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; z = solver->work[3]; u = solver->work[3]; uq = solver->work[3]; q = solver->work[4]; ap = solver->work[4]; map = solver->work[5]; auq = solver->work[5]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); lis_vector_set_allex_nm(0.0,q); lis_vector_set_allex_nm(0.0,p); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,z,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* u = z + beta*q */ /* p = u + beta*(q + beta*p) */ /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_vector_axpyzex_mmmm(beta,q,z,u); lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,map,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ /* q = u - alpha*map */ /* uq = u + q */ /* auq = A * uq */ /* x = x + alpha*uq */ /* r = r - alpha*auq */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,map,u,q); lis_vector_axpyzex_mmmm(one,u,q,uq); LIS_MATVEC(A,uq,auq); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uq,x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,auq,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg_switch(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); r->precision = LIS_PRECISION_DEFAULT; rtld->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; ptld->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = rho.hi[0] / rho_old.hi[0]; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta.hi[0],p); LIS_MATVEC(A,p,q); lis_vector_xpay(ztld,beta.hi[0],ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* x = x + alpha*p */ lis_vector_axpy(alpha.hi[0],p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha.hi[0],q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha.hi[0],qtld,rtld); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; rtld->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; /* solver->precon->precon_type = 0;*/ solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); /* memset(z->value_lo,0,n*sizeof(LIS_SCALAR)); memset(ztld->value_lo,0,n*sizeof(LIS_SCALAR));*/ ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dotex_mmm(z,rtld,&rho); /* printf("rho = %e %e\n",rho.hi[0],rho.lo[0]);*/ /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpayex_mmm(z,beta,p); LIS_MATVEC(A,p,q); lis_vector_xpayex_mmm(ztld,beta,ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dotex_mmm(ptld,q,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* x = x + alpha*p */ lis_vector_axpyex_mmm(alpha,p,x); /* r = r - alpha*q */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpyex_mmm(alpha,qtld,rtld); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; rho_old = (LIS_SCALAR)1.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_all(0, p); lis_vector_set_all(0, ptld); for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho); /* printf("rho = %e\n",rho);*/ /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta = rho / rho_old; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta,p); LIS_MATVEC(A,p,q); lis_vector_xpay(ztld,beta,ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1); /* printf("tmpdot1 = %e\n",tmpdot1);*/ /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha = rho / tmpdot1; /* x = x + alpha*p */ lis_vector_axpy(alpha,p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha,qtld,rtld); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrstab(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, s, ap, ms, map, ams, z; LIS_SCALAR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[2]; ms = solver->work[3]; ams = solver->work[4]; p = solver->work[5]; ap = solver->work[6]; map = solver->work[7]; z = solver->work[8]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_copy(z,p); lis_vector_dot(rtld,z,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ /* alpha = rho_old / tmpdot1 */ /* s = r - alpha*ap */ LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dot(rtld,map,&tmpdot1); alpha = rho_old / tmpdot1; lis_vector_axpyz(-alpha,ap,r,s); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha,p,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* ms = z - alpha*map */ /* ams = A * ms */ /* tmpdot1 = <ams,s> */ /* tmpdot2 = <ams,ams> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_axpyz(-alpha,map,z,ms); LIS_MATVEC(A,ms,ams); lis_vector_dot(ams,s,&tmpdot1); lis_vector_dot(ams,ams,&tmpdot2); omega = tmpdot1 / tmpdot2; /* x = x + alpha*p + omega*ms */ /* r = s - omega*ams */ lis_vector_axpy(alpha,p,x); lis_vector_axpy(omega,ms,x); lis_vector_axpyz(-omega,ams,s,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dot(rtld,z,&rho); if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / omega) */ /* p = z + beta*(p - omega*map) */ beta = (rho / rho_old) * (alpha / omega); lis_vector_axpy(-omega,map,p); lis_vector_xpay(z,beta,p); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrsafe_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map; LIS_VECTOR y, my, u, au, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; map = solver->work[6]; my = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; artld = solver->work[12]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); LIS_MATVECT(A,rtld,artld); times = lis_wtime(); lis_psolve(solver, r, mr); ptimes += lis_wtime()-times; LIS_MATVEC(A,mr,amr); lis_vector_dotex_mmm(rtld,amr,&rho_old); lis_vector_copyex_mm(amr,ap); lis_vector_copyex_mm(mr,p); one.hi[0] = -1.0; one.lo[0] = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* map = M^-1 * ap */ times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; /* tmpdot[0] = <artld,map> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dotex_mmm(artld,map,&tmpdot[0]); lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot[0].hi); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(amr,r,&tmpdot[1]); lis_vector_dotex_mmm(y,r,&tmpdot[2]); lis_vector_dotex_mmm(amr,y,&tmpdot[3]); lis_vector_dotex_mmm(amr,amr,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*map + eta*my + eta*beta*u */ /* au = A * u */ lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi); lis_vector_scaleex_mm(tmp,u); lis_vector_axpyex_mmm(qsi,map,u); lis_vector_axpyex_mmm(eta,my,u); LIS_MATVEC(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,mr,z); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ /* my = M^-1 * y */ lis_vector_scaleex_mm(eta,y); lis_vector_axpyex_mmm(qsi,amr,y); lis_vector_axpyex_mmm(alpha,au,y); times = lis_wtime(); lis_psolve(solver, y, my); ptimes += lis_wtime()-times; /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(one,z,x); /* r = r - alpha*ap - y */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,ap,r); lis_vector_axpyex_mmm(one,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* mr = mr - alpha*map - my */ /* amr = A * mr */ /* rho = <rtld,amr> */ lis_vector_axpyex_mmm(alpha,map,mr); lis_vector_axpyex_mmm(one,my,mr); LIS_MATVEC(A,mr,amr); lis_vector_dotex_mmm(rtld,amr,&rho); if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(mr,beta,p); lis_vector_axpyex_mmm(one,au,ap); lis_vector_xpayex_mmm(amr,beta,ap); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrsafe(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map; LIS_VECTOR y, my, u, au, z; LIS_SCALAR alpha, beta, rho, rho_old; LIS_SCALAR qsi, eta; LIS_SCALAR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; map = solver->work[6]; my = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; artld = solver->work[12]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); LIS_MATVECT(A,rtld,artld); times = lis_wtime(); lis_psolve(solver, r, mr); ptimes += lis_wtime()-times; LIS_MATVEC(A,mr,amr); lis_vector_dot(rtld,amr,&rho_old); lis_vector_copy(amr,ap); lis_vector_copy(mr,p); beta = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* map = M^-1 * ap */ times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; /* tmpdot[0] = <artld,map> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dot(artld,map,&tmpdot[0]); alpha = rho_old / tmpdot[0]; /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dot(y,y,&tmpdot[0]); lis_vector_dot(amr,r,&tmpdot[1]); lis_vector_dot(y,r,&tmpdot[2]); lis_vector_dot(amr,y,&tmpdot[3]); lis_vector_dot(amr,amr,&tmpdot[4]); if(iter==1) { qsi = tmpdot[1] / tmpdot[4]; eta = 0.0; } else { tmp = tmpdot[4]*tmpdot[0] - tmpdot[3]*tmpdot[3]; qsi = (tmpdot[0]*tmpdot[1] - tmpdot[2]*tmpdot[3]) / tmp; eta = (tmpdot[4]*tmpdot[2] - tmpdot[3]*tmpdot[1]) / tmp; } /* u = qsi*map + eta*my + eta*beta*u */ /* au = A * u */ lis_vector_scale(eta*beta,u); lis_vector_axpy(qsi,map,u); lis_vector_axpy(eta,my,u); LIS_MATVEC(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scale(eta,z); lis_vector_axpy(qsi,mr,z); lis_vector_axpy(-alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ /* my = M^-1 * y */ lis_vector_scale(eta,y); lis_vector_axpy(qsi,amr,y); lis_vector_axpy(-alpha,au,y); times = lis_wtime(); lis_psolve(solver, y, my); ptimes += lis_wtime()-times; /* x = x + alpha*p + z */ lis_vector_axpy(alpha,p,x); lis_vector_axpy(1.0,z,x); /* r = r - alpha*ap - y */ lis_vector_axpy(-alpha,ap,r); lis_vector_axpy(-1.0,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* mr = mr - alpha*map - my */ /* amr = A * mr */ /* rho = <rtld,amr> */ lis_vector_axpy(-alpha,map,mr); lis_vector_axpy(-1.0,my,mr); LIS_MATVEC(A,mr,amr); lis_vector_dot(rtld,amr,&rho); if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta = (rho / rho_old) * (alpha / qsi); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpy(-1.0,u,p); lis_vector_xpay(mr,beta,p); lis_vector_axpy(-1.0,au,ap); lis_vector_xpay(amr,beta,ap); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }