LIS_INT lis_crs_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; z = solver->work[3]; u = solver->work[3]; uq = solver->work[3]; q = solver->work[4]; ap = solver->work[4]; map = solver->work[5]; auq = solver->work[5]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); lis_vector_set_allex_nm(0.0,q); lis_vector_set_allex_nm(0.0,p); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,z,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* u = z + beta*q */ /* p = u + beta*(q + beta*p) */ /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_vector_axpyzex_mmmm(beta,q,z,u); lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,map,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ /* q = u - alpha*map */ /* uq = u + q */ /* auq = A * uq */ /* x = x + alpha*uq */ /* r = r - alpha*auq */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,map,u,q); lis_vector_axpyzex_mmmm(one,u,q,uq); LIS_MATVEC(A,uq,auq); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uq,x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,auq,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_cgs_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; phat = solver->work[3]; q = solver->work[4]; qhat = solver->work[5]; u = solver->work[5]; uhat = solver->work[6]; vhat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); for( iter=1; iter<=maxiter; iter++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* u = r + beta*q */ lis_vector_axpyzex_mmmm(beta,q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); /* phat = M^-1 * p */ time = lis_wtime(); lis_psolve(solver, p, phat); ptime += lis_wtime()-time; /* v = A * phat */ lis_matvec(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dotex_mmm(rtld,vhat,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* q = u - alpha*vhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyzex_mmmm(one,u,q,phat); time = lis_wtime(); lis_psolve(solver, phat, uhat); ptime += lis_wtime()-time; /* x = x + alpha*uhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uhat,x); /* qhat = A * uhat */ lis_matvec(A,uhat,qhat); /* r = r - alpha*qhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_cgs_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; phat = solver->work[3]; q = solver->work[4]; qhat = solver->work[5]; u = solver->work[5]; uhat = solver->work[6]; vhat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); uhat->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; phat->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]); /* u = r + beta*q */ lis_vector_axpyz(beta.hi[0],q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpay(q,beta.hi[0],p); lis_vector_xpay(u,beta.hi[0],p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dot(rtld,vhat,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* q = u - alpha*vhat */ lis_vector_axpyz(-alpha.hi[0],vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyz(1.0,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_vector_axpy(alpha.hi[0],uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_vector_axpy(-alpha.hi[0],qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } rho_old.hi[0] = rho.hi[0]; } uhat->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; phat->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* u = r + beta*q */ lis_vector_axpyzex_mmmm(beta,q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dotex_mmm(rtld,vhat,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* q = u - alpha*vhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyzex_mmmm(one,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_esi_quad(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x, Ax; LIS_SCALAR xAx, xx, mu, lshift; LIS_INT ss; LIS_INT emaxiter; LIS_REAL tol; LIS_INT i,j,k; LIS_SCALAR evalue,dotvr; LIS_INT iter,giter,output,niesolver; LIS_INT nprocs,my_rank; LIS_REAL nrm2,dot,resid,resid0; LIS_QUAD_PTR qdot_vv, qdot_vr; LIS_VECTOR *v,r,q; LIS_SOLVER solver; LIS_PRECON precon; double times,itimes,ptimes,p_c_times,p_i_times; LIS_INT err; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; LIS_DEBUG_FUNC_IN; A = esolver->A; x = esolver->x; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; lshift = esolver->lshift; output = esolver->options[LIS_EOPTIONS_OUTPUT]; niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER]; r = esolver->work[0]; q = esolver->work[1]; v = &esolver->work[2]; Ax = esolver->work[3]; LIS_QUAD_SCALAR_MALLOC(qdot_vv,0,1); LIS_QUAD_SCALAR_MALLOC(qdot_vr,1,1); lis_vector_set_all(1.0,r); lis_vector_nrm2(r, &nrm2); lis_vector_scale(1/nrm2,r); switch ( niesolver ) { case LIS_ESOLVER_II: lis_solver_create(&solver); lis_solver_set_option("-i bicg -p ilu -precision quad",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); break; case LIS_ESOLVER_AII: lis_solver_create(&solver); lis_solver_set_option("-i bicg -p ilu -precision quad",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); lis_vector_set_all(1.0,q); lis_solve(A, q, x, solver); lis_precon_create(solver, &precon); solver->precon = precon; break; case LIS_ESOLVER_RQI: lis_solver_create(&solver); lis_solver_set_option("-p ilu -precision quad -maxiter 10",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); break; } giter=0; j=0; while (j<ss) { lis_vector_duplicate(A,&esolver->evector[j]); j = j+1; lis_vector_copy(r, v[j]); if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI) { /* create preconditioner */ solver->A = A; err = lis_precon_create(solver, &precon); if( err ) { lis_solver_work_destroy(solver); solver->retcode = err; return err; } } if (niesolver==LIS_ESOLVER_RQI) { lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); lis_matvec(A, x, Ax); lis_vector_dot(x, Ax, &xAx); lis_vector_dot(x, x, &xx); mu = xAx / xx; } iter = 0; while (iter<emaxiter) { /* diagonalization */ iter = iter+1; giter = giter+1; for (k=1;k<j;k++) { lis_vector_dotex_mmm(v[j], v[k], &qdot_vv); lis_quad_minus((LIS_QUAD *)qdot_vv.hi); lis_vector_axpyex_mmm(qdot_vv,v[k],v[j]); } switch( niesolver ) { case LIS_ESOLVER_PI: lis_matvec(A,v[j],r); break; case LIS_ESOLVER_II: lis_solve_kernel(A, v[j], r, solver, precon); break; case LIS_ESOLVER_AII: lis_psolve(solver, v[j], r); break; case LIS_ESOLVER_RQI: lis_vector_nrm2(v[j], &nrm2); lis_vector_scale(1/nrm2, v[j]); lis_matrix_shift_diagonal(A, -mu); lis_solve_kernel(A, v[j], r, solver, precon); lis_matrix_shift_diagonal(A, mu); break; } if ( j==1 && ( niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_AII || niesolver==LIS_ESOLVER_RQI )) { lis_solver_get_timeex(solver,×,&itimes,&ptimes,&p_c_times,&p_i_times); esolver->ptimes += solver->ptimes; esolver->itimes += solver->itimes; esolver->p_c_times += solver->p_c_times; esolver->p_i_times += solver->p_i_times; } lis_vector_nrm2(r, &nrm2); lis_vector_dotex_mmm(v[j], r, &qdot_vr); lis_quad_minus((LIS_QUAD *)qdot_vr.hi); lis_vector_axpyzex_mmmm(qdot_vr,v[j],r,q); lis_quad_minus((LIS_QUAD *)qdot_vr.hi); dotvr = qdot_vr.hi[0]; mu = mu + 1/dotvr; lis_vector_nrm2(q, &resid); resid = fabs(resid / dotvr); lis_vector_scale(1/nrm2,r); lis_vector_copy(r, v[j]); if ( j==1 ) { if( output & LIS_PRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_PRINT_OUT ) printf("iter: %5d residual = %e\n", iter, resid); esolver->iter = iter; esolver->resid = resid; } if (tol>resid) break; } if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI) { lis_precon_destroy(precon); } switch ( niesolver ) { case LIS_ESOLVER_PI: esolver->evalue[j-1] = dotvr; break; case LIS_ESOLVER_II: esolver->evalue[j-1] = 1/dotvr; break; case LIS_ESOLVER_AII: esolver->evalue[j-1] = 1/dotvr; break; case LIS_ESOLVER_RQI: esolver->evalue[j-1] = mu; break; } lis_vector_copy(v[j], esolver->evector[j-1]); if (A->my_rank==0 && ss>1) { #ifdef _LONGLONG printf("Subspace: mode number = %lld\n", j-1); #else printf("Subspace: mode number = %d\n", j-1); #endif printf("Subspace: eigenvalue = %e\n", esolver->evalue[j-1]); #ifdef _LONGLONG printf("Subspace: number of iterations = %lld\n",iter); #else printf("Subspace: number of iterations = %d\n",iter); #endif printf("Subspace: relative residual 2-norm = %e\n",resid); } } lis_vector_copy(esolver->evector[esolver->options[LIS_EOPTIONS_MODE]], esolver->x); switch ( niesolver ) { case LIS_ESOLVER_II: if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_solver_destroy(solver); break; case LIS_ESOLVER_AII: if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_precon_destroy(precon); lis_solver_destroy(solver); break; case LIS_ESOLVER_RQI: if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_solver_destroy(solver); break; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_bicrstab_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, s, ap, ms, map, ams, z; LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[2]; ms = solver->work[3]; ams = solver->work[4]; p = solver->work[5]; ap = solver->work[6]; map = solver->work[7]; z = solver->work[8]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(omega,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_copyex_mm(z,p); lis_vector_dotex_mmm(rtld,z,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ /* alpha = rho_old / tmpdot1 */ /* s = r - alpha*ap */ LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,map,&tmpdot1); lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,ap,r,s); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,p,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* ms = z - alpha*map */ /* ams = A * ms */ /* tmpdot1 = <ams,s> */ /* tmpdot2 = <ams,ams> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_axpyzex_mmmm(alpha,map,z,ms); LIS_MATVEC(A,ms,ams); lis_vector_dotex_mmm(ams,s,&tmpdot1); lis_vector_dotex_mmm(ams,ams,&tmpdot2); lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi); /* x = x + alpha*p + omega*ms */ /* r = s - omega*ams */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_vector_axpyex_mmm(omega,ms,x); lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyzex_mmmm(omega,ams,s,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,z,&rho); if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / omega) */ /* p = z + beta*(p - omega*map) */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi); lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,map,p); lis_vector_xpayex_mmm(z,beta,p); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, rhat, p, ptld, phat; LIS_VECTOR t, ttld, that, t0, t0hat; LIS_VECTOR y, w, u, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,output,conv; LIS_INT iter2,maxiter2; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; rhat = solver->work[2]; p = solver->work[3]; ptld = solver->work[4]; phat = solver->work[5]; t = solver->work[6]; ttld = solver->work[7]; that = solver->work[8]; t0 = solver->work[9]; t0hat = solver->work[10]; y = solver->work[11]; w = solver->work[12]; u = solver->work[13]; z = solver->work[14]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; qsi.hi[0] = 1.0; qsi.lo[0] = 0.0; one.hi[0] = -1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]); /* w = ttld + beta*ptld */ lis_vector_axpyz(beta.hi[0],ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpy(-1,u,p); lis_vector_xpay(rhat,beta.hi[0],p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0]; /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyz(-1,w,ptld,y); lis_vector_xpay(t,alpha.hi[0],y); lis_vector_axpy(-1,r,y); /* t = r - alpha*ptld */ lis_vector_axpyz(-alpha.hi[0],ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dot(y,y,&tmpdot[0].hi[0]); lis_vector_dot(ttld,t,&tmpdot[1].hi[0]); lis_vector_dot(y,t,&tmpdot[2].hi[0]); lis_vector_dot(ttld,y,&tmpdot[3].hi[0]); lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]); if(iter==1) { qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0]; eta.hi[0] = 0.0; } else { tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0] - tmpdot[3].hi[0]*tmpdot[3].hi[0]; qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0]; eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0]; } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpay(t0hat,beta.hi[0],u); lis_vector_axpy(-1,rhat,u); lis_vector_scale(eta.hi[0],u); lis_vector_axpy(qsi.hi[0],phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scale(eta.hi[0],z); lis_vector_axpy(qsi.hi[0],rhat,z); lis_vector_axpy(-alpha.hi[0],u,z); /* x = x + alpha*p + z */ lis_vector_axpy(alpha.hi[0],p,x); lis_vector_axpy(1,z,x); /* r = t - eta*y - qsi*ttld */ lis_vector_axpyz(-eta.hi[0],y,t,r); lis_vector_axpy(-qsi.hi[0],ttld,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } lis_vector_copy(t,t0); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; t->precision = LIS_PRECISION_QUAD; t0->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; that->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; qsi.hi[0] = 1.0; one.hi[0] = -1.0; /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* w = ttld + beta*ptld */ lis_vector_axpyzex_mmmm(beta,ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(rhat,beta,p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi); /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyzex_mmmm(one,w,ptld,y); lis_vector_xpayex_mmm(t,alpha,y); lis_vector_axpyex_mmm(one,r,y); /* t = r - alpha*ptld */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(ttld,t,&tmpdot[1]); lis_vector_dotex_mmm(y,t,&tmpdot[2]); lis_vector_dotex_mmm(ttld,y,&tmpdot[3]); lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpayex_mmm(t0hat,beta,u); lis_vector_axpyex_mmm(one,rhat,u); lis_vector_scaleex_mm(eta,u); lis_vector_axpyex_mmm(qsi,phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,rhat,z); lis_vector_axpyex_mmm(alpha,u,z); /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_vector_axpyex_mmm(one,z,x); lis_quad_minus((LIS_QUAD *)one.hi); /* r = t - eta*y - qsi*ttld */ lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); lis_vector_axpyzex_mmmm(eta,y,t,r); lis_vector_axpyex_mmm(qsi,ttld,r); lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_copyex_mm(t,t0); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->iter2 = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_epi_quad(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,output; LIS_INT nprocs,my_rank; LIS_REAL nrm2,resid; LIS_QUAD_PTR qdot_xz; LIS_VECTOR z,q; double times, ptimes; LIS_DEBUG_FUNC_IN; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; A = esolver->A; x = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,x); } z = esolver->work[0]; q = esolver->work[1]; LIS_QUAD_SCALAR_MALLOC(qdot_xz,0,1); iter=0; while (iter<emaxiter) { iter = iter+1; /* x = x / ||x||_2 */ lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); /* z = A * x */ lis_matvec(A,x,z); /* evalue = <x,z> */ lis_vector_dotex_mmm(x, z, &qdot_xz); lis_quad_minus((LIS_QUAD *)qdot_xz.hi); /* resid = ||z - evalue * x||_2 / |evalue| */ lis_vector_axpyzex_mmmm(qdot_xz,x,z,q); lis_quad_minus((LIS_QUAD *)qdot_xz.hi); lis_vector_nrm2(q, &resid); evalue = qdot_xz.hi[0]; resid = fabs(resid / evalue); /* x = z */ lis_vector_copy(z, x); /* convergence check */ if( output ) { if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } esolver->retcode = LIS_MAXITER; esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_eii_quad(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue, ievalue; LIS_SCALAR lshift; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,iter2,output; LIS_REAL nrm2,resid; LIS_QUAD_PTR qdot_xz; LIS_VECTOR z,q; LIS_SOLVER solver; double time,itime,ptime,p_c_time,p_i_time; LIS_INT err; LIS_PRECON precon; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; LIS_DEBUG_FUNC_IN; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; lshift = esolver->lshift; output = esolver->options[LIS_EOPTIONS_OUTPUT]; A = esolver->A; x = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,x); } evalue = 1.0; z = esolver->work[0]; q = esolver->work[1]; LIS_QUAD_SCALAR_MALLOC(qdot_xz,0,1); iter=0; ievalue = 1/(evalue); #ifdef _LONG__DOUBLE if( output & (A->my_rank==0) ) printf("local shift : %Le\n", lshift); #else if( output & (A->my_rank==0) ) printf("local shift : %e\n", lshift); #endif if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p none -precision quad",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_solver_get_solvername(nsol, solvername); lis_solver_get_preconname(precon_type, preconname); if( output & (A->my_rank==0) ) printf("linear solver : %s\n", solvername); if( output & (A->my_rank==0) ) printf("preconditioner : %s\n", preconname); /* create preconditioner */ solver->A = A; err = lis_precon_create(solver, &precon); if( err ) { lis_solver_work_destroy(solver); solver->retcode = err; return err; } while (iter<emaxiter) { iter = iter+1; /* x = x / ||x||_2 */ lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); /* z = (A - lshift I)^-1 * x */ lis_solve_kernel(A, x, z, solver, precon); lis_solver_get_iter(solver,&iter2); /* 1/evalue = <x,z> */ lis_vector_dotex_mmm(x, z, &qdot_xz); lis_quad_minus((LIS_QUAD *)qdot_xz.hi); lis_vector_axpyzex_mmmm(qdot_xz,x,z,q); lis_quad_minus((LIS_QUAD *)qdot_xz.hi); ievalue = qdot_xz.hi[0]; /* resid = ||z - 1/evalue * x||_2 / |1/evalue| */ lis_vector_nrm2(q, &resid); resid = fabs(resid/ievalue); /* x = z */ lis_vector_copy(z,x); /* convergence check */ lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time); esolver->ptime += solver->ptime; esolver->itime += solver->itime; esolver->p_c_time += solver->p_c_time; esolver->p_i_time += solver->p_i_time; if( output ) { if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = 1/ievalue; lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_precon_destroy(precon); lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } lis_precon_destroy(precon); esolver->retcode = LIS_MAXITER; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = 1/ievalue; lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); if (lshift != 0) { lis_matrix_shift_diagonal(A, -lshift); } lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }