double lis_wtime_f(void) { LIS_DEBUG_FUNC_IN; return lis_wtime(); LIS_DEBUG_FUNC_OUT; }
LIS_INT lis_gs(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,t,s; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output; double time,ptime; LIS_INT err; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; ptime = 0.0; r = solver->work[0]; t = solver->work[1]; s = solver->work[2]; lis_vector_nrm2(b,&bnrm2); bnrm2 = 1.0 / bnrm2; err = lis_matrix_split(A); if( err ) return err; if( A->use_wd!=LIS_SOLVER_GS ) { if( !A->WD ) { err = lis_matrix_diag_duplicate(A->D,&A->WD); if( err ) return err; } lis_matrix_diag_copy(A->D,A->WD); lis_matrix_diag_inverse(A->WD); A->use_wd = LIS_SOLVER_GS; } for( iter=1; iter<=maxiter; iter++ ) { /* x += (D-L)^{-1}(b - Ax) */ time = lis_wtime(); lis_psolve(solver,x,s); ptime += lis_wtime() - time; lis_matvec(A,s,t); /* lis_matvec(A,x,t);*/ lis_vector_axpyz(-1,t,b,r); lis_vector_nrm2(r,&nrm2); lis_matrix_solve(A,r,t,LIS_MATRIX_LOWER); lis_vector_axpy(1,t,x); /* convergence check */ nrm2 = nrm2 * bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { time = lis_wtime(); lis_psolve(solver,x,s); ptime += lis_wtime() - time; lis_vector_copy(s,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } lis_psolve(solver,x,s); lis_vector_copy(s,x); solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_orthomin_quad(LIS_SOLVER solver) { LIS_Comm comm; LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR x; LIS_VECTOR r, rtld, *p, *ap, *aptld; LIS_QUAD *dotsave; LIS_QUAD_PTR alpha, beta, tmp, one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_INT m,l,lmax,ip,ip0; LIS_DEBUG_FUNC_IN; comm = LIS_COMM_WORLD; A = solver->A; M = solver->precon; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(tmp,3,1); LIS_QUAD_SCALAR_MALLOC(one,4,1); r = solver->work[0]; rtld = solver->work[1]; p = &solver->work[2]; ap = &solver->work[ (m+1)+2]; aptld = &solver->work[2*(m+1)+2]; one.hi[0] = 1.0; one.lo[0] = 0.0; dotsave = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD) * (m+1),"lis_orthomin_quad::dotsave" ); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,M,r,rtld,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; iter=1; while( iter<=maxiter ) { ip = (iter-1) % (m+1); /* p[ip] = rtld */ lis_vector_copyex_mm(rtld,p[ip]); /* ap[ip] = A*p[ip] */ /* aptld[ip] = M^-1 ap[ip] */ lis_matvec(A,p[ip],ap[ip]); time = lis_wtime(); lis_psolve(solver, ap[ip], aptld[ip]); ptime += lis_wtime()-time; lmax = _min(m,iter-1); for(l=1;l<=lmax;l++) { ip0 = (ip+m+1-l) % (m+1); /* beta = -<Ar[ip],Ap[ip0]> / <Ap[ip0],Ap[ip0]> */ lis_vector_dotex_mmm(aptld[ip],aptld[ip0],&beta); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,&dotsave[l-1]); lis_quad_minus((LIS_QUAD *)beta.hi); lis_vector_axpyex_mmm(beta,p[ip0] ,p[ip]); lis_vector_axpyex_mmm(beta,ap[ip0] ,ap[ip]); lis_vector_axpyex_mmm(beta,aptld[ip0],aptld[ip]); } for(l=m-1;l>0;l--) { dotsave[l] = dotsave[l-1]; } lis_vector_dotex_mmm(aptld[ip],aptld[ip],&tmp); dotsave[0].hi = tmp.hi[0]; dotsave[0].lo = tmp.lo[0]; /* test breakdown */ if( tmp.hi[0]==0.0 && tmp.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } lis_quad_div(&dotsave[0],(LIS_QUAD *)one.hi,&dotsave[0]); /* alpha = <rtld,Aptld[ip]> */ lis_vector_dotex_mmm(rtld,aptld[ip],&alpha); lis_quad_mul((LIS_QUAD *)alpha.hi,(LIS_QUAD *)alpha.hi,&dotsave[0]); lis_vector_axpyex_mmm( alpha,p[ip],x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,ap[ip],r); lis_vector_axpyex_mmm(alpha,aptld[ip],rtld); lis_quad_minus((LIS_QUAD *)alpha.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } iter++; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_cgs_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; phat = solver->work[3]; q = solver->work[4]; qhat = solver->work[5]; u = solver->work[5]; uhat = solver->work[6]; vhat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); uhat->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; phat->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]); /* u = r + beta*q */ lis_vector_axpyz(beta.hi[0],q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpay(q,beta.hi[0],p); lis_vector_xpay(u,beta.hi[0],p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dot(rtld,vhat,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* q = u - alpha*vhat */ lis_vector_axpyz(-alpha.hi[0],vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyz(1.0,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_vector_axpy(alpha.hi[0],uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_vector_axpy(-alpha.hi[0],qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } rho_old.hi[0] = rho.hi[0]; } uhat->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; phat->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* u = r + beta*q */ lis_vector_axpyzex_mmmm(beta,q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dotex_mmm(rtld,vhat,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* q = u - alpha*vhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyzex_mmmm(one,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_crs(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; z = solver->work[3]; u = solver->work[3]; uq = solver->work[3]; q = solver->work[4]; ap = solver->work[4]; map = solver->work[5]; auq = solver->work[5]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); lis_matvect(A,p,rtld); rho_old = 1.0; lis_vector_set_all(0,q); lis_vector_set_all(0,p); for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* rho = <rtld,z> */ time = lis_wtime(); lis_psolve(solver, r, z); ptime += lis_wtime()-time; lis_vector_dot(rtld,z,&rho); /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* u = z + beta*q */ /* p = u + beta*(q + beta*p) */ /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ beta = rho / rho_old; lis_vector_axpyz(beta,q,z,u); lis_vector_xpay(q,beta,p); lis_vector_xpay(u,beta,p); lis_matvec(A,p,ap); time = lis_wtime(); lis_psolve(solver, ap, map); ptime += lis_wtime()-time; lis_vector_dot(rtld,map,&tmpdot1); /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ /* q = u - alpha*map */ /* uq = u + q */ /* auq = A * uq */ /* x = x + alpha*uq */ /* r = r - alpha*auq */ alpha = rho / tmpdot1; lis_vector_axpyz(-alpha,map,u,q); lis_vector_axpyz(1,u,q,uq); lis_matvec(A,uq,auq); lis_vector_axpy(alpha,uq,x); lis_vector_axpy(-alpha,auq,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_SCALAR *a,*q,*r; LIS_INT m,n,nn; LIS_INT i,j,ii,jj,nnz,qriter; double time,time0; LIS_REAL qrerr; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); if( argc < 3 ) { printf("Usage: %s m n\n", argv[0]); CHKERR(1); } m = atoi(argv[1]); n = atoi(argv[2]); if( m<=0 || n<=0 ) { printf("m=%d <=0 or n=%d <=0\n", m,n); CHKERR(1); } printf("\n"); /* create arrays */ nn = m*n; a = (LIS_SCALAR *)malloc(nn*nn*sizeof(LIS_SCALAR)); q = (LIS_SCALAR *)malloc(nn*nn*sizeof(LIS_SCALAR)); r = (LIS_SCALAR *)malloc(nn*nn*sizeof(LIS_SCALAR)); /* define two-dimensional Laplacian */ lis_array_set_all(nn*nn,(LIS_SCALAR)0.0,a); nnz = 0; for(ii=0;ii<nn;ii++) { i = ii/m; j = ii - i*m; if( i>0 ) { jj = ii - m; a[ii + jj * nn] = -1.0; nnz++;} if( i<n-1 ) { jj = ii + m; a[ii + jj * nn] = -1.0; nnz++;} if( j>0 ) { jj = ii - 1; a[ii + jj * nn] = -1.0; nnz++;} if( j<m-1 ) { jj = ii + 1; a[ii + jj * nn] = -1.0; nnz++;} jj = ii; a[ii + jj * nn] = 4.0; nnz++; } printf("matrix size = %d x %d (%d nonzero entries)\n\n", nn,nn,nnz); /* solve eigenproblem */ time0 = lis_wtime(); lis_array_qr(nn,a,q,r,&qriter,&qrerr); time = lis_wtime() - time0; printf("QR : number of iterations = %d\n", qriter); printf("QR : elapsed time = %e sec.\n", time); printf("QR : eigensolver = %e sec.\n", time); #ifdef _LONG__DOUBLE printf("QR : 2-norm of A(2,1) = %Le\n\n", qrerr); #else printf("QR : 2-norm of A(2,1) = %e\n\n", qrerr); #endif lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT lis_gmres(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,s,z,*v; LIS_SCALAR *h; LIS_SCALAR aa,bb,rr,a2,b2,t; LIS_REAL tnrm2; LIS_REAL bnrm2,nrm2,tol; LIS_INT iter,maxiter,n,output; double time,ptime; LIS_REAL rnorm; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; h_dim = m+1; ptime = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; h = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR)*(h_dim+1)*(h_dim+2),"lis_gmres::h" ); cs = (m+1)*h_dim; sn = (m+2)*h_dim; /* r = M^-1 * (b - A * x) */ lis_matvec(A,x,z); lis_vector_xpay(b,-1.0,z); lis_psolve(solver,z,v[0]); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; iter=0; while( iter<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2(v[0],&rnorm); lis_vector_scale(1.0/rnorm,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z,v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dot(v[i1v],v[k],&t); h[k+iih] = t; lis_vector_axpy(-t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&tnrm2); h[i1+iih] = tnrm2; lis_vector_scale(1.0/tnrm2,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t = h[jj+iih]; aa = h[jj+cs]*t; aa += h[jj+sn]*h[k+iih]; bb = -h[jj+sn]*t; bb += h[jj+cs]*h[k+iih]; h[jj+iih] = aa; h[k+iih] = bb; } aa = h[ii+iih]; bb = h[i1+iih]; a2 = aa*aa; b2 = bb*bb; rr = sqrt(a2+b2); if( rr==0.0 ) rr=1.0e-17; h[ii+cs] = aa/rr; h[ii+sn] = bb/rr; s->value[i1] = -h[ii+sn]*s->value[ii]; s->value[ii] = h[ii+cs]*s->value[ii]; aa = h[ii+cs]*h[ii+iih]; aa += h[ii+sn]*h[i1+iih]; h[ii+iih] = aa; /* convergence check */ nrm2 = sabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter <maxiter ); /* Solve H * Y = S for upper Hessenberg matrix H */ s->value[ii] = s->value[ii]/h[ii+iih]; for(k=1;k<=ii;k++) { jj = ii-k; t = s->value[jj]; for(j=jj+1;j<=ii;j++) { t -= h[jj+j*h_dim]*s->value[j]; } s->value[jj] = t/h[jj+jj*h_dim]; } /* z = z + y * v */ #ifdef _OPENMP #pragma omp parallel for private(k) #endif for(k=0;k<n;k++) { z->value[k] = s->value[0]*v[0]->value[k]; } for(j=1;j<=ii;j++) { lis_vector_axpy(s->value[j],v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpy(1,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; s->value[jj-1] = -h[jj-1+sn]*s->value[jj]; s->value[jj] = h[jj-1+cs]*s->value[jj]; } for(j=0;j<=i1;j++) { t = s->value[j]; if( j==0 ) t = t-1.0; lis_vector_axpy(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter+1; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicr_quad(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, ap, map, az, aptld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; ap = solver->work[6]; az = solver->work[7]; map = solver->work[8]; aptld = solver->work[9]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); lis_vector_copyex_mm(z,p); lis_vector_copyex_mm(ztld,ptld); LIS_MATVEC(A,z,ap); lis_vector_dotex_mmm(ap,ztld,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* aptld = A^T * ptld */ /* map = M^-1 * ap */ LIS_MATVECT(A,ptld,aptld); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; /* tmpdot1 = <map,aptld> */ lis_vector_dotex_mmm(map,aptld,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho_old / tmpdot1 */ /* x = x + alpha*p */ /* r = r - alpha*ap */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,ap,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*aptld */ /* z = z - alpha*map */ /* ztld = M^-T * rtld */ /* az = A * z */ /* rho = <az,ztld> */ lis_vector_axpyex_mmm(alpha,aptld,rtld); lis_vector_axpyex_mmm(alpha,map,z); times = lis_wtime(); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; LIS_MATVEC(A,z,az); lis_vector_dotex_mmm(az,ztld,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* ap = az + beta*ap */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_vector_xpayex_mmm(z,beta,p); lis_vector_xpayex_mmm(ztld,beta,ptld); lis_vector_xpayex_mmm(az,beta,ap); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrstab(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, s, ap, ms, map, ams, z; LIS_SCALAR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[2]; ms = solver->work[3]; ams = solver->work[4]; p = solver->work[5]; ap = solver->work[6]; map = solver->work[7]; z = solver->work[8]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_copy(z,p); lis_vector_dot(rtld,z,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ /* alpha = rho_old / tmpdot1 */ /* s = r - alpha*ap */ LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dot(rtld,map,&tmpdot1); alpha = rho_old / tmpdot1; lis_vector_axpyz(-alpha,ap,r,s); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha,p,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* ms = z - alpha*map */ /* ams = A * ms */ /* tmpdot1 = <ams,s> */ /* tmpdot2 = <ams,ams> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_axpyz(-alpha,map,z,ms); LIS_MATVEC(A,ms,ams); lis_vector_dot(ams,s,&tmpdot1); lis_vector_dot(ams,ams,&tmpdot2); omega = tmpdot1 / tmpdot2; /* x = x + alpha*p + omega*ms */ /* r = s - omega*ams */ lis_vector_axpy(alpha,p,x); lis_vector_axpy(omega,ms,x); lis_vector_axpyz(-omega,ams,s,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dot(rtld,z,&rho); if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / omega) */ /* p = z + beta*(p - omega*map) */ beta = (rho / rho_old) * (alpha / omega); lis_vector_axpy(-omega,map,p); lis_vector_xpay(z,beta,p); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgstab_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, t,p,v, s, phat, shat; LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[1]; t = solver->work[2]; p = solver->work[3]; v = solver->work[4]; phat = solver->work[5]; shat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(omega,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; omega.hi[0] = 1.0; omega.lo[0] = 0.0; lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, phat); lis_vector_set_allex_nm(0.0, s); lis_vector_set_allex_nm(0.0, shat); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); s->precision = LIS_PRECISION_DEFAULT; shat->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; phat->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter==1 ) { lis_vector_copy(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / omega.hi[0]); /* p = r + beta*(p - omega*v) */ lis_vector_axpy(-omega.hi[0],v,p); lis_vector_xpay(r,beta.hi[0],p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dot(rtld,v,&tmpdot1.hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* s = r - alpha*v */ lis_vector_axpy(-alpha.hi[0],v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol2 ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha.hi[0],phat,x); solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dot(t,s,&tmpdot1.hi[0]); lis_vector_dot(t,t,&tmpdot2.hi[0]); omega.hi[0] = tmpdot1.hi[0] / tmpdot2.hi[0]; /* x = x + alpha*phat + omega*shat */ lis_vector_axpy(alpha.hi[0],phat,x); lis_vector_axpy(omega.hi[0],shat,x); /* r = s - omega*t */ lis_vector_axpy(-omega.hi[0],t,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } if( omega.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old.hi[0] = rho.hi[0]; } s->precision = LIS_PRECISION_QUAD; shat->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; phat->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; omega.hi[0] = 1.0; lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, phat); lis_vector_set_allex_nm(0.0, s); lis_vector_set_allex_nm(0.0, shat); /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter2==1 ) { lis_vector_copyex_mm(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi); /* p = r + beta*(p - omega*v) */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,v,p); lis_vector_xpayex_mmm(r,beta,p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dotex_mmm(rtld,v,&tmpdot1); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* s = r - alpha*v */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( tol > nrm2 ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,phat,x); solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dotex_mmm(t,s,&tmpdot1); lis_vector_dotex_mmm(t,t,&tmpdot2); lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi); /* x = x + alpha*phat + omega*shat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,phat,x); lis_vector_axpyex_mmm(omega,shat,x); /* r = s - omega*t */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,t,r); lis_quad_minus((LIS_QUAD *)omega.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } if( omega.hi[0]==0.0 && omega.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgstab(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, t,p,v, s, phat, shat; LIS_SCALAR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[1]; t = solver->work[2]; p = solver->work[3]; v = solver->work[4]; phat = solver->work[5]; shat = solver->work[6]; alpha = (LIS_SCALAR)1.0; omega = (LIS_SCALAR)1.0; rho_old = (LIS_SCALAR)1.0; lis_vector_set_all(0.0,p); lis_vector_set_all(0.0,phat); lis_vector_set_all(0.0,s); lis_vector_set_all(0.0,shat); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); for( iter=1; iter<=maxiter; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho); /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter==1 ) { lis_vector_copy(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ beta = (rho / rho_old) * (alpha / omega); /* p = r + beta*(p - omega*v) */ lis_vector_axpy(-omega,v,p); lis_vector_xpay(r,beta,p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dot(rtld,v,&tmpdot1); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ alpha = rho / tmpdot1; /* s = r - alpha*v */ lis_vector_axpy(-alpha,v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); /* lis_vector_nrm2(s,&nrm2); nrm2 = nrm2 * bnrm2;*/ if( nrm2 <= tol ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha,phat,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dot(t,s,&tmpdot1); lis_vector_dot(t,t,&tmpdot2); omega = tmpdot1 / tmpdot2; /* x = x + alpha*phat + omega*shat */ lis_vector_axpy(alpha,phat,x); lis_vector_axpy(omega,shat,x); /* r = s - omega*t */ lis_vector_axpy(-omega,t,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); /* lis_vector_nrm2(r,&nrm2); nrm2 = nrm2 * bnrm2;*/ if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } if( omega==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, rhat, p, ptld, phat; LIS_VECTOR t, ttld, that, t0, t0hat; LIS_VECTOR y, w, u, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,output,conv; LIS_INT iter2,maxiter2; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; rhat = solver->work[2]; p = solver->work[3]; ptld = solver->work[4]; phat = solver->work[5]; t = solver->work[6]; ttld = solver->work[7]; that = solver->work[8]; t0 = solver->work[9]; t0hat = solver->work[10]; y = solver->work[11]; w = solver->work[12]; u = solver->work[13]; z = solver->work[14]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; qsi.hi[0] = 1.0; qsi.lo[0] = 0.0; one.hi[0] = -1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]); /* w = ttld + beta*ptld */ lis_vector_axpyz(beta.hi[0],ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpy(-1,u,p); lis_vector_xpay(rhat,beta.hi[0],p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0]; /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyz(-1,w,ptld,y); lis_vector_xpay(t,alpha.hi[0],y); lis_vector_axpy(-1,r,y); /* t = r - alpha*ptld */ lis_vector_axpyz(-alpha.hi[0],ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dot(y,y,&tmpdot[0].hi[0]); lis_vector_dot(ttld,t,&tmpdot[1].hi[0]); lis_vector_dot(y,t,&tmpdot[2].hi[0]); lis_vector_dot(ttld,y,&tmpdot[3].hi[0]); lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]); if(iter==1) { qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0]; eta.hi[0] = 0.0; } else { tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0] - tmpdot[3].hi[0]*tmpdot[3].hi[0]; qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0]; eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0]; } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpay(t0hat,beta.hi[0],u); lis_vector_axpy(-1,rhat,u); lis_vector_scale(eta.hi[0],u); lis_vector_axpy(qsi.hi[0],phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scale(eta.hi[0],z); lis_vector_axpy(qsi.hi[0],rhat,z); lis_vector_axpy(-alpha.hi[0],u,z); /* x = x + alpha*p + z */ lis_vector_axpy(alpha.hi[0],p,x); lis_vector_axpy(1,z,x); /* r = t - eta*y - qsi*ttld */ lis_vector_axpyz(-eta.hi[0],y,t,r); lis_vector_axpy(-qsi.hi[0],ttld,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } lis_vector_copy(t,t0); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; t->precision = LIS_PRECISION_QUAD; t0->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; that->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; qsi.hi[0] = 1.0; one.hi[0] = -1.0; /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* w = ttld + beta*ptld */ lis_vector_axpyzex_mmmm(beta,ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(rhat,beta,p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi); /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyzex_mmmm(one,w,ptld,y); lis_vector_xpayex_mmm(t,alpha,y); lis_vector_axpyex_mmm(one,r,y); /* t = r - alpha*ptld */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(ttld,t,&tmpdot[1]); lis_vector_dotex_mmm(y,t,&tmpdot[2]); lis_vector_dotex_mmm(ttld,y,&tmpdot[3]); lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpayex_mmm(t0hat,beta,u); lis_vector_axpyex_mmm(one,rhat,u); lis_vector_scaleex_mm(eta,u); lis_vector_axpyex_mmm(qsi,phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,rhat,z); lis_vector_axpyex_mmm(alpha,u,z); /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_vector_axpyex_mmm(one,z,x); lis_quad_minus((LIS_QUAD *)one.hi); /* r = t - eta*y - qsi*ttld */ lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); lis_vector_axpyzex_mmmm(eta,y,t,r); lis_vector_axpyex_mmm(qsi,ttld,r); lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_copyex_mm(t,t0); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->iter2 = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, rhat, p, ptld; LIS_VECTOR t, ttld; LIS_VECTOR y, v, u, utld, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta; LIS_QUAD_PTR tmp, tmpdot[5],one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; rhat = solver->work[2]; p = solver->work[3]; ptld = solver->work[4]; t = solver->work[5]; ttld = solver->work[6]; y = solver->work[7]; v = solver->work[8]; u = solver->work[9]; z = solver->work[10]; utld = solver->work[11]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; qsi.hi[0] = 1.0; qsi.lo[0] = 0.0; one.hi[0] = -1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0,p); lis_vector_set_allex_nm(0.0,u); lis_vector_set_allex_nm(0.0,ptld); lis_vector_set_allex_nm(0.0,utld); for( iter=1; iter<=maxiter; iter++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* rhat = M^-1 * r */ /* v = A * rhat */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; lis_matvec(A,rhat,v); /* p = rhat + beta*(p - u) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(rhat,beta,p); /* ptld = v + beta*(ptld - utld) */ lis_vector_axpyex_mmm(one,utld,ptld); lis_vector_xpayex_mmm(v,beta,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <v,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <v,y> */ /* tmpdot[4] = <v,v> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(v,r,&tmpdot[1]); lis_vector_dotex_mmm(y,r,&tmpdot[2]); lis_vector_dotex_mmm(v,y,&tmpdot[3]); lis_vector_dotex_mmm(v,v,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* t = qsi*ptld + eta*y */ lis_vector_copyex_mm(y,t); lis_vector_scaleex_mm(eta,t); lis_vector_axpyex_mmm(qsi,ptld,t); /* ttld = M^-1 * t */ time = lis_wtime(); lis_psolve(solver, t, ttld); ptime += lis_wtime()-time; /* u = ttld + eta*beta*u */ /* utld = A * u */ lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi); lis_vector_xpayex_mmm(ttld,tmp,u); lis_matvec(A,u,utld); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,rhat,z); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,u,z); /* y = qsi*v + eta*y - alpha*utld */ lis_vector_scaleex_mm(eta,y); lis_vector_axpyex_mmm(qsi,v,y); lis_vector_axpyex_mmm(alpha,utld,y); lis_quad_minus((LIS_QUAD *)alpha.hi); /* x = x + alpha*p + z */ lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(one,z,x); lis_quad_minus((LIS_QUAD *)one.hi); /* r = r - alpha*ptld - y */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,ptld,r); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(one,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, mr, amr, t, mt, p, ap; LIS_VECTOR y, u, au, z; LIS_SCALAR alpha, beta; LIS_REAL rho, rho_old; LIS_SCALAR qsi, eta; LIS_SCALAR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; t = solver->work[6]; mt = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); time = lis_wtime(); lis_psolve(solver, r, mr); ptime += lis_wtime()-time; lis_matvec(A,mr,amr); lis_vector_dot(rtld,r,&rho_old); lis_vector_copy(amr,ap); lis_vector_copy(mr,p); beta = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* tmpdot[0] = <rtld,ap> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dot(rtld,ap,&tmpdot[0]); alpha = rho_old / tmpdot[0]; /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dot(y,y,&tmpdot[0]); lis_vector_dot(amr,r,&tmpdot[1]); lis_vector_dot(y,r,&tmpdot[2]); lis_vector_dot(amr,y,&tmpdot[3]); lis_vector_dot(amr,amr,&tmpdot[4]); if(iter==1) { qsi = tmpdot[1] / tmpdot[4]; eta = 0.0; } else { tmp = tmpdot[4]*tmpdot[0] - tmpdot[3]*tmpdot[3]; qsi = (tmpdot[0]*tmpdot[1] - tmpdot[2]*tmpdot[3]) / tmp; eta = (tmpdot[4]*tmpdot[2] - tmpdot[3]*tmpdot[1]) / tmp; } /* t = qsi*ap + eta*y */ lis_vector_copy(y,t); lis_vector_scale(eta,t); lis_vector_axpy(qsi,ap,t); /* mt = M^-1 * t */ time = lis_wtime(); lis_psolve(solver, t, mt); ptime += lis_wtime()-time; /* u = mt + eta*beta*u */ /* au = A * u */ lis_vector_xpay(mt,eta*beta,u); lis_matvec(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scale(eta,z); lis_vector_axpy(qsi,mr,z); lis_vector_axpy(-alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ lis_vector_scale(eta,y); lis_vector_axpy(qsi,amr,y); lis_vector_axpy(-alpha,au,y); /* x = x + alpha*p + z */ lis_vector_axpy(alpha,p,x); lis_vector_axpy(1.0,z,x); /* r = r - alpha*ap - y */ lis_vector_axpy(-alpha,ap,r); lis_vector_axpy(-1.0,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho); if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta = (rho / rho_old) * (alpha / qsi); /* mr = M^-1 * r */ /* amr = A * mr */ time = lis_wtime(); lis_psolve(solver, r, mr); ptime += lis_wtime()-time; lis_matvec(A,mr,amr); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpy(-1.0,u,p); lis_vector_xpay(mr,beta,p); lis_vector_axpy(-1.0,au,ap); lis_vector_xpay(amr,beta,ap); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrsafe_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map; LIS_VECTOR y, my, u, au, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; map = solver->work[6]; my = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; artld = solver->work[12]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_matvect(A,rtld,artld); time = lis_wtime(); lis_psolve(solver, r, mr); ptime += lis_wtime()-time; lis_matvec(A,mr,amr); lis_vector_dotex_mmm(rtld,amr,&rho_old); lis_vector_copyex_mm(amr,ap); lis_vector_copyex_mm(mr,p); one.hi[0] = -1.0; one.lo[0] = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* map = M^-1 * ap */ time = lis_wtime(); lis_psolve(solver, ap, map); ptime += lis_wtime()-time; /* tmpdot[0] = <artld,map> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dotex_mmm(artld,map,&tmpdot[0]); lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot[0].hi); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(amr,r,&tmpdot[1]); lis_vector_dotex_mmm(y,r,&tmpdot[2]); lis_vector_dotex_mmm(amr,y,&tmpdot[3]); lis_vector_dotex_mmm(amr,amr,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*map + eta*my + eta*beta*u */ /* au = A * u */ lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi); lis_vector_scaleex_mm(tmp,u); lis_vector_axpyex_mmm(qsi,map,u); lis_vector_axpyex_mmm(eta,my,u); lis_matvec(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,mr,z); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ /* my = M^-1 * y */ lis_vector_scaleex_mm(eta,y); lis_vector_axpyex_mmm(qsi,amr,y); lis_vector_axpyex_mmm(alpha,au,y); time = lis_wtime(); lis_psolve(solver, y, my); ptime += lis_wtime()-time; /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(one,z,x); /* r = r - alpha*ap - y */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,ap,r); lis_vector_axpyex_mmm(one,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* mr = mr - alpha*map - my */ /* amr = A * mr */ /* rho = <rtld,amr> */ lis_vector_axpyex_mmm(alpha,map,mr); lis_vector_axpyex_mmm(one,my,mr); lis_matvec(A,mr,amr); lis_vector_dotex_mmm(rtld,amr,&rho); if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(mr,beta,p); lis_vector_axpyex_mmm(one,au,ap); lis_vector_xpayex_mmm(amr,beta,ap); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; rho_old = (LIS_SCALAR)1.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_all(0, p); lis_vector_set_all(0, ptld); for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho); /* printf("rho = %e\n",rho);*/ /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta = rho / rho_old; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta,p); LIS_MATVEC(A,p,q); lis_vector_xpay(ztld,beta,ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1); /* printf("tmpdot1 = %e\n",tmpdot1);*/ /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha = rho / tmpdot1; /* x = x + alpha*p */ lis_vector_axpy(alpha,p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha,qtld,rtld); rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicg_switch(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; q = solver->work[2]; qtld = solver->work[3]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); r->precision = LIS_PRECISION_DEFAULT; rtld->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; ptld->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dot(z,rtld,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta.hi[0] = rho.hi[0] / rho_old.hi[0]; /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpay(z,beta.hi[0],p); LIS_MATVEC(A,p,q); lis_vector_xpay(ztld,beta.hi[0],ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dot(ptld,q,&tmpdot1.hi[0]); /* test breakdown */ if( tmpdot1.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* x = x + alpha*p */ lis_vector_axpy(alpha.hi[0],p,x); /* r = r - alpha*q */ lis_vector_axpy(-alpha.hi[0],q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } /* rtld = rtld - alpha*qtld */ lis_vector_axpy(-alpha.hi[0],qtld,rtld); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; rtld->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; /* solver->precon->precon_type = 0;*/ solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, ptld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* z = M^-1 * r */ /* ztld = M^-T * rtld */ times = lis_wtime(); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); /* memset(z->value_lo,0,n*sizeof(LIS_SCALAR)); memset(ztld->value_lo,0,n*sizeof(LIS_SCALAR));*/ ptimes += lis_wtime()-times; /* rho = <z,rtld> */ lis_vector_dotex_mmm(z,rtld,&rho); /* printf("rho = %e %e\n",rho.hi[0],rho.lo[0]);*/ /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* q = A * p */ /* qtld = A^T * ptld */ lis_vector_xpayex_mmm(z,beta,p); LIS_MATVEC(A,p,q); lis_vector_xpayex_mmm(ztld,beta,ptld); LIS_MATVECT(At,ptld,qtld); /* tmpdot1 = <ptld,q> */ lis_vector_dotex_mmm(ptld,q,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* x = x + alpha*p */ lis_vector_axpyex_mmm(alpha,p,x); /* r = r - alpha*q */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,q,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*qtld */ lis_vector_axpyex_mmm(alpha,qtld,rtld); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_esolve(LIS_MATRIX A, LIS_VECTOR x, LIS_SCALAR *evalue0, LIS_ESOLVER esolver) { LIS_INT nesolver,niesolver,emaxiter; LIS_SCALAR *evalue; LIS_VECTOR *evector; LIS_SCALAR *resid; LIS_SCALAR *rhistory; LIS_INT *iter,*iter2; LIS_INT err; LIS_INT output; LIS_INT ss, mode; double time; double gshift; LIS_INT estorage,eblock; LIS_MATRIX B; LIS_INT eprecision; LIS_VECTOR xx; LIS_DEBUG_FUNC_IN; /* begin parameter check */ err = lis_matrix_check(A,LIS_MATRIX_CHECK_ALL); if( err ) return err; if( x==NULL ) { LIS_SETERR(LIS_ERR_ILL_ARG,"vector x is undefined\n"); return LIS_ERR_ILL_ARG; } if( A->n!=x->n ) { return LIS_ERR_ILL_ARG; } if( A->gn<=0 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Size n(=%d) of matrix A is less than 0\n",A->gn); return LIS_ERR_ILL_ARG; } nesolver = esolver->options[LIS_EOPTIONS_ESOLVER]; niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER]; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; mode = esolver->options[LIS_EOPTIONS_MODE]; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; gshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; estorage = esolver->options[LIS_EOPTIONS_STORAGE]; eblock = esolver->options[LIS_EOPTIONS_STORAGE_BLOCK]; eprecision = esolver->options[LIS_EOPTIONS_PRECISION]; esolver->eprecision = eprecision; if( nesolver < 1 || nesolver > LIS_ESOLVER_LEN ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_ESOLVER is %d (Set between 1 to %d)\n",nesolver, LIS_ESOLVER_LEN); return LIS_ERR_ILL_ARG; } if( niesolver < 1 || niesolver > 7 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 7)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && niesolver > 4 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 4 for Subspace)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI && niesolver == LIS_ESOLVER_PI ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 7 for Lanczos)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI && (( niesolver == LIS_ESOLVER_PI ) || ( niesolver == LIS_ESOLVER_CG) || ( niesolver == LIS_ESOLVER_JD)) ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 4 or 6 for Arnoldi)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && ss > A->gn ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Subspace)\n", ss, A->gn); return LIS_ERR_ILL_ARG; } if (( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI || esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI ) && ss > A->gn ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Lanczos and Arnoldi)\n", ss, A->gn); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && mode >= ss ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Subspace)\n", mode, ss); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == ( LIS_ESOLVER_LI || LIS_ESOLVER_AI ) && mode >= ss ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Lanczos or Arnoldi)\n", mode, ss); return LIS_ERR_ILL_ARG; } #ifdef USE_QUAD_PRECISION if( eprecision==LIS_PRECISION_QUAD && lis_esolver_execute_quad[nesolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision eigensolver %s is not implemented\n",lis_esolvername[nesolver]); return LIS_ERR_NOT_IMPLEMENTED; } else if( eprecision==LIS_PRECISION_SWITCH && lis_esolver_execute_switch[nesolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch esolver %s is not implemented\n",lis_esolvername[nesolver]); return LIS_ERR_NOT_IMPLEMENTED; } if( esolver->options[LIS_EOPTIONS_SWITCH_MAXITER]==-1 ) { esolver->options[LIS_EOPTIONS_SWITCH_MAXITER] = emaxiter; } #endif /* create eigenvalue array */ if( esolver->evalue ) lis_free(esolver->evalue); evalue = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::evalue"); if( evalue==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } evalue[0] = 1.0; evalue[ss-1] = 1.0; /* create residual norm array */ if( esolver->resid ) lis_free(esolver->resid); resid = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::resid"); if( resid==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create number of iterations array */ if( esolver->iter ) lis_free(esolver->iter); iter = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter"); if( iter==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create quad precision number of iterations array */ if( esolver->iter2 ) lis_free(esolver->iter2); iter2 = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter2"); if( iter2==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create initial vector */ #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(A,&xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { err = lis_vector_duplicate(A,&xx); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx); } #endif if( err ) { esolver->retcode = err; return err; } if( esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { if( output ) lis_printf(A->comm,"initial vector x : 1\n"); #ifndef USE_QUAD_PRECISION lis_vector_set_all(1.0,xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { lis_vector_set_all(1.0,xx); } else { lis_vector_set_allex_nm(1.0,xx); } #endif } else { if( output ) lis_printf(A->comm,"initial vector x : user defined\n"); #ifndef USE_QUAD_PRECISION lis_vector_copy(x,xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(x,xx); } else { lis_vector_copyex_nm(x,xx); } #endif } /* global shift */ if ( output ) if( A->my_rank==0 ) printf("shift : %e\n", gshift); /* create eigenvector array */ if( esolver->evector ) lis_free(esolver->evector); evector = (LIS_VECTOR *)lis_malloc((ss+2)*sizeof(LIS_VECTOR),"lis_esolve::evector"); if( evector==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_VECTOR)); esolver->retcode = err; return err; } /* create residual history array */ if( esolver->rhistory ) lis_free(esolver->rhistory); rhistory = (LIS_SCALAR *)lis_malloc((emaxiter+2)*sizeof(LIS_SCALAR),"lis_esolve::rhistory"); if( rhistory==NULL ) { LIS_SETERR_MEM((emaxiter+2)*sizeof(LIS_SCALAR)); lis_vector_destroy(xx); esolver->retcode = err; return err; } /* convert matrix */ if( estorage>0 && A->matrix_type!=estorage ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_blocksize(B,eblock,eblock,NULL,NULL); lis_matrix_set_type(B,estorage); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } esolver->A = A; esolver->evalue = evalue; esolver->x = x; esolver->evector = evector; rhistory[0] = 1.0; esolver->rhistory = rhistory; esolver->resid = resid; esolver->iter = iter; esolver->iter2 = iter2; if( A->my_rank==0 ) { #ifdef _LONG__DOUBLE if ( output ) printf("precision : long double\n"); #else if ( output ) printf("precision : %s\n", lis_eprecisionname[eprecision]); #endif #ifdef _LONG__LONG if ( output ) printf("eigensolver : %s\n", lis_esolvername[nesolver]); #else if ( output ) printf("eigensolver : %s\n", lis_esolvername[nesolver]); #endif } if( A->my_rank==0 ) { #ifdef _LONG__DOUBLE if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1Le * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); #else if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1e * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); #endif } if( A->my_rank==0 ) { if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { #ifdef _LONG__LONG if ( output ) printf("matrix storage format : %s(%lld x %lld)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); #else if ( output ) printf("matrix storage format : %s(%d x %d)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); #endif } else { if ( output ) printf("matrix storage format : %s\n", lis_estoragename[A->matrix_type-1]); } } time = lis_wtime(); esolver->ptime = 0; esolver->itime = 0; esolver->p_c_time = 0; esolver->p_i_time = 0; if (gshift != 0.0) lis_matrix_shift_diagonal(A, gshift); /* create work vector */ err = lis_esolver_malloc_work[nesolver](esolver); if( err ) { lis_vector_destroy(xx); esolver->retcode = err; return err; } esolver->x = xx; esolver->xx = x; /* execute esolver */ #ifndef USE_QUAD_PRECISION err = lis_esolver_execute[nesolver](esolver); #else if( eprecision==LIS_PRECISION_DOUBLE ) { err = lis_esolver_execute[nesolver](esolver); } else if( eprecision==LIS_PRECISION_QUAD ) { err = lis_esolver_execute_quad[nesolver](esolver); } else if( eprecision==LIS_PRECISION_SWITCH ) { err = lis_esolver_execute_switch[nesolver](esolver); } #endif esolver->retcode = err; *evalue0 = esolver->evalue[0]; lis_vector_copy(esolver->x, x); esolver->time = lis_wtime() - time; lis_matrix_shift_diagonal(A, -gshift); if( A->my_rank==0 ) { if( err ) { #ifdef _LONG__LONG if ( output ) printf("eigensolver status : %s(code=%lld)\n\n",lis_ereturncode[err],err); #else if ( output ) printf("eigensolver status : %s(code=%d)\n\n",lis_ereturncode[err],err); #endif } else { if ( output ) printf("eigensolver status : normal end\n\n"); } } if( eprecision==LIS_PRECISION_DOUBLE ) { esolver->iter2[mode] = esolver->iter[mode]; } else if( eprecision==LIS_PRECISION_QUAD ) { esolver->iter2[mode] = 0; } lis_vector_destroy(xx); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_fgmres_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,s,*z,*v; LIS_QUAD *h; LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp; LIS_REAL bnrm2,nrm2,tol; LIS_INT iter,maxiter,n,output; double time,ptime; LIS_REAL rnorm; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; h_dim = m+1; ptime = 0.0; s = solver->work[0]; r = solver->work[1]; z = &solver->work[2]; v = &solver->work[m+2]; h = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_fgmres_quad::h" ); cs = (m+1)*h_dim; sn = (m+2)*h_dim; LIS_QUAD_SCALAR_MALLOC(aa,0,1); LIS_QUAD_SCALAR_MALLOC(bb,1,1); LIS_QUAD_SCALAR_MALLOC(rr,2,1); LIS_QUAD_SCALAR_MALLOC(a2,3,1); LIS_QUAD_SCALAR_MALLOC(b2,4,1); LIS_QUAD_SCALAR_MALLOC(t,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(one,7,1); one.hi[0] = 1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; rnorm = 1.0/bnrm2; iter=0; while( iter<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_scaleex_nm(bnrm2,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_allex_nm(0.0,s); s->value[0] = rnorm; s->value_lo[0] = 0.0; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z[iiv]); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z[iiv], v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dotex_mmm(v[i1v],v[k],&t); h[k+iih].hi = t.hi[0]; h[k+iih].lo = t.lo[0]; lis_quad_minus((LIS_QUAD *)t.hi); lis_vector_axpyex_mmm(t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2ex_mm(v[i1v],&t); h[i1+iih].hi = t.hi[0]; h[i1+iih].lo = t.lo[0]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi); lis_vector_scaleex_mm(tmp,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = h[jj+iih].hi; t.lo[0] = h[jj+iih].lo; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi); lis_quad_minus((LIS_QUAD *)bb.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi); h[jj+iih].hi = aa.hi[0]; h[jj+iih].lo = aa.lo[0]; h[k+iih].hi = bb.hi[0]; h[k+iih].lo = bb.lo[0]; } aa.hi[0] = h[ii+iih].hi; aa.lo[0] = h[ii+iih].lo; bb.hi[0] = h[i1+iih].hi; bb.lo[0] = h[i1+iih].lo; lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi); lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi); lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi); lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi); if( rr.hi[0]==0.0 ) { rr.hi[0]=1.0e-17; rr.lo[0]=0.0; } lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi); tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[i1] = aa.hi[0]; s->value_lo[i1] = aa.lo[0]; s->value[ii] = bb.hi[0]; s->value_lo[ii] = bb.lo[0]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); h[ii+iih].hi = aa.hi[0]; h[ii+iih].lo = aa.lo[0]; /* convergence check */ nrm2 = fabs(s->value[i1]); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter <maxiter ); /* Solve H * Y = S for upper Hessenberg matrix H */ tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]); s->value[ii] = tmp.hi[0]; s->value_lo[ii] = tmp.lo[0]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; t.lo[0] = s->value_lo[jj]; for(j=jj+1;j<=ii;j++) { tmp.hi[0] = s->value[j]; tmp.lo[0] = s->value_lo[j]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]); lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi); } lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]); s->value[jj] = tmp.hi[0]; s->value_lo[jj] = tmp.lo[0]; } /* x = x + y * z */ for(j=0;j<=ii;j++) { aa.hi[0] = s->value[j]; aa.lo[0] = s->value_lo[j]; lis_vector_axpyex_mmm(aa,z[j],x); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_matvec(A,x,v[0]); lis_vector_xpay(b,-1.0,v[0]); memset(v[0]->value_lo,0,n*sizeof(LIS_SCALAR)); lis_vector_nrm2(v[0],&rnorm); bnrm2 = 1.0/rnorm; } solver->retcode = LIS_MAXITER; solver->iter = iter+1; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads, maxthreads; LIS_INT nnz; LIS_INT i,n,np; LIS_INT block; LIS_INT is,ie; LIS_INT err,iter,matrix_type; double time,time2,nnzs,nnzap,nnzt; LIS_SCALAR val; double commtime,comptime,flops; char path[1024]; FILE *file; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 3 ) { if( my_rank==0 ) { printf("Usage: %s matrix_filename_list iter [block] \n", argv[0]); } lis_finalize(); exit(0); } file = fopen(argv[1], "r"); if( file==NULL ) CHKERR(1); iter = atoi(argv[2]); if (argv[3] == NULL) { block = 2; } else { block = atoi(argv[3]); } if( iter<=0 ) { #ifdef _LONG__LONG printf("iter=%lld <= 0\n",iter); #else printf("iter=%d <= 0\n",iter); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONG__LONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP if( my_rank==0 ) { nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); #ifdef _LONG__LONG printf("max number of threads = %lld\n", nthreads); printf("number of threads = %lld\n", maxthreads); #else printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); #endif } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ while( fscanf(file, "%s\n", path)==1 ) { if( my_rank==0 ) { printf("matrix_filename = %s\n", path); } lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_input(A0,NULL,NULL,path); if( err ) CHKERR(err); n = A0->n; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { #ifdef _LONG__LONG printf("block size of BSR and BSC = %lld x %lld\n",block,block); printf("number of iterations = %lld\n\n",iter); #else printf("block size of BSR and BSC = %d x %d\n",block,block); printf("number of iterations = %d\n\n",iter); #endif } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } /* MPI version of VBR is not implemented. DNS is also excluded to reduce memory usage. */ for (matrix_type=1;matrix_type<11;matrix_type++) { if ( nprocs>1 && matrix_type==9 ) continue; lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); if( my_rank==0 ) { if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { A->bnr = block; A->bnc = block; } } comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; #ifdef USE_MPI #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #endif #else #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #endif #endif } lis_matrix_destroy(A); } lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); } fclose(file); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT lis_gmres_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,s,z,*v; LIS_QUAD *h; LIS_SCALAR *hd; LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp; LIS_QUAD_PTR rnorm; LIS_REAL bnrm2,nrm2,tol,tol2; LIS_INT iter,maxiter,n,output; LIS_INT iter2,maxiter2; double time,ptime; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; m = solver->options[LIS_OPTIONS_RESTART]; h_dim = m+1; ptime = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; LIS_QUAD_SCALAR_MALLOC(aa,0,1); LIS_QUAD_SCALAR_MALLOC(bb,1,1); LIS_QUAD_SCALAR_MALLOC(rr,2,1); LIS_QUAD_SCALAR_MALLOC(a2,3,1); LIS_QUAD_SCALAR_MALLOC(b2,4,1); LIS_QUAD_SCALAR_MALLOC(t,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(one,7,1); LIS_QUAD_SCALAR_MALLOC(rnorm,8,1); h = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_gmres_switch::h" ); hd = (LIS_SCALAR *)h; cs = (m+1)*h_dim; sn = (m+2)*h_dim; one.hi[0] = 1.0; one.lo[0] = 0.0; z->precision = LIS_PRECISION_DEFAULT; /* r = M^-1 * (b - A * x) */ lis_matvec(A,x,z); lis_vector_xpay(b,-1.0,z); lis_psolve(solver,z,v[0]); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; iter=0; while( iter<maxiter2 ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2(v[0],&rnorm.hi[0]); lis_vector_scale(1.0/rnorm.hi[0],v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm.hi[0]; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dot(v[i1v],v[k],&t.hi[0]); hd[k+iih] = t.hi[0]; lis_vector_axpy(-t.hi[0],v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&t.hi[0]); hd[i1+iih] = t.hi[0]; lis_vector_scale(1.0/t.hi[0],v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = hd[jj+iih]; aa.hi[0] = hd[jj+cs]*t.hi[0]; aa.hi[0] += hd[jj+sn]*hd[k+iih]; bb.hi[0] = -hd[jj+sn]*t.hi[0]; bb.hi[0] += hd[jj+cs]*hd[k+iih]; hd[jj+iih] = aa.hi[0]; hd[k+iih] = bb.hi[0]; } aa.hi[0] = hd[ii+iih]; bb.hi[0] = hd[i1+iih]; a2.hi[0] = aa.hi[0]*aa.hi[0]; b2.hi[0] = bb.hi[0]*bb.hi[0]; rr.hi[0] = sqrt(a2.hi[0]+b2.hi[0]); if( rr.hi[0]==0.0 ) rr.hi[0]=1.0e-17; hd[ii+cs] = aa.hi[0]/rr.hi[0]; hd[ii+sn] = bb.hi[0]/rr.hi[0]; s->value[i1] = -hd[ii+sn]*s->value[ii]; s->value[ii] = hd[ii+cs]*s->value[ii]; aa.hi[0] = hd[ii+cs]*hd[ii+iih]; aa.hi[0] += hd[ii+sn]*hd[i1+iih]; hd[ii+iih] = aa.hi[0]; /* convergence check */ nrm2 = fabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) break; } while( i<m && iter <maxiter2 ); /* Solve H * Y = S for upper Hessenberg matrix H */ s->value[ii] = s->value[ii]/hd[ii+iih]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; for(j=jj+1;j<=ii;j++) { t.hi[0] -= hd[jj+j*h_dim]*s->value[j]; } s->value[jj] = t.hi[0]/hd[jj+jj*h_dim]; } /* z = z + y * v */ for(k=0;k<n;k++) { z->value[k] = s->value[0]*v[0]->value[k]; } for(j=1;j<=ii;j++) { lis_vector_axpy(s->value[j],v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpy(1,r,x); if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } for(j=1;j<=i;j++) { jj = i1-j+1; s->value[jj-1] = -hd[jj-1+sn]*s->value[jj]; s->value[jj] = hd[jj-1+cs]*s->value[jj]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; if( j==0 ) t.hi[0] = t.hi[0]-1.0; lis_vector_axpy(t.hi[0],v[j],v[0]); } } /* Initial Residual */ z->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2); tol = solver->tol; iter2=iter; while( iter2<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2ex_mm(v[0],&rnorm); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi); lis_vector_scaleex_mm(tmp,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_allex_nm(0.0,s); s->value[0] = rnorm.hi[0]; s->value_lo[0] = rnorm.lo[0]; i = 0; do { iter2++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dotex_mmm(v[i1v],v[k],&t); h[k+iih].hi = t.hi[0]; h[k+iih].lo = t.lo[0]; lis_quad_minus((LIS_QUAD *)t.hi); lis_vector_axpyex_mmm(t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2ex_mm(v[i1v],&t); h[i1+iih].hi = t.hi[0]; h[i1+iih].lo = t.lo[0]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi); lis_vector_scaleex_mm(tmp,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = h[jj+iih].hi; t.lo[0] = h[jj+iih].lo; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi); lis_quad_minus((LIS_QUAD *)bb.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi); h[jj+iih].hi = aa.hi[0]; h[jj+iih].lo = aa.lo[0]; h[k+iih].hi = bb.hi[0]; h[k+iih].lo = bb.lo[0]; } aa.hi[0] = h[ii+iih].hi; aa.lo[0] = h[ii+iih].lo; bb.hi[0] = h[i1+iih].hi; bb.lo[0] = h[i1+iih].lo; lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi); lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi); lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi); lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi); tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[i1] = aa.hi[0]; s->value_lo[i1] = aa.lo[0]; s->value[ii] = bb.hi[0]; s->value_lo[ii] = bb.lo[0]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); h[ii+iih].hi = aa.hi[0]; h[ii+iih].lo = aa.lo[0]; /* convergence check */ nrm2 = fabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter2 <maxiter ); /* Solve H * Y = S for upper Hessenberg matrix H */ tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]); s->value[ii] = tmp.hi[0]; s->value_lo[ii] = tmp.lo[0]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; t.lo[0] = s->value_lo[jj]; for(j=jj+1;j<=ii;j++) { tmp.hi[0] = s->value[j]; tmp.lo[0] = s->value_lo[j]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]); lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi); } lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]); s->value[jj] = tmp.hi[0]; s->value_lo[jj] = tmp.lo[0]; } /* z = z + y * v */ for(k=0;k<n;k++) { aa.hi[0] = s->value[0]; aa.lo[0] = s->value_lo[0]; bb.hi[0] = v[0]->value[k]; bb.lo[0] = v[0]->value_lo[k]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi); z->value[k] = tmp.hi[0]; z->value_lo[k] = tmp.lo[0]; } for(j=1;j<=ii;j++) { aa.hi[0] = s->value[j]; aa.lo[0] = s->value_lo[j]; lis_vector_axpyex_mmm(aa,v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpyex_mmm(one,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; tmp.hi[0] = s->value[jj]; tmp.lo[0] = s->value_lo[jj]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+sn]); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+cs]); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[jj-1] = aa.hi[0]; s->value_lo[jj-1] = aa.lo[0]; s->value[jj] = bb.hi[0]; s->value_lo[jj] = bb.lo[0]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; t.lo[0] = s->value_lo[j]; if( j==0 ) { lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi); } lis_vector_axpyex_mmm(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter2+1; solver->iter2 = iter; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_fgmres(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,s, *z, *v; LIS_SCALAR *h; LIS_SCALAR aa,bb,rr,a2,b2,t; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_REAL rnorm; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,i1h,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; conv = solver->options[LIS_OPTIONS_CONV_COND]; h_dim = m+1; ptimes = 0.0; s = solver->work[0]; r = solver->work[1]; z = &solver->work[2]; v = &solver->work[m+2]; h = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR) * (h_dim+1) * (h_dim+2),"lis_gmres::h" ); cs = (m+1)*h_dim; sn = (m+2)*h_dim; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; rnorm = 1.0 / bnrm2; iter=0; while( iter<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_scale(bnrm2,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; i1h = i*h_dim; /* z = M^-1 v */ times = lis_wtime(); lis_psolve(solver, v[iiv], z[iiv]); ptimes += lis_wtime()-times; /* v = Az */ LIS_MATVEC(A,z[iiv], v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i]v[k] */ lis_vector_dot(v[i1v],v[k],&t); h[k + iih] = t; lis_vector_axpy(-t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&t); h[i1 + iih] = t; lis_vector_scale(1.0/t,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t = h[jj + iih]; aa = h[jj + cs]*t; aa += h[jj + sn]*h[k + iih]; bb = -h[jj + sn]*t; bb += h[jj + cs]*h[k + iih]; h[jj + iih] = aa; h[k + iih] = bb; } aa = h[ii + iih]; bb = h[i1 + iih]; a2 = aa*aa; b2 = bb*bb; rr = sqrt(a2 + b2); if( rr==0.0 ) rr=1.0e-17; h[ii + cs] = aa / rr; h[ii + sn] = bb / rr; s->value[i1] = -h[ii + sn]*s->value[ii]; s->value[ii] = h[ii + cs]*s->value[ii]; aa = h[ii + cs]*h[ii + iih]; aa += h[ii + sn]*h[i1 + iih]; h[ii + iih] = aa; /* convergence check */ nrm2 = fabs(s->value[i1]); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter <maxiter ); /* Solve H*Y =S for upper triangular H */ s->value[ii] = s->value[ii] / h[ii + iih]; for(k=1;k<=ii;k++) { jj = ii-k; t = s->value[jj]; for(j=jj+1;j<=ii;j++) { t -= h[jj + j*h_dim]*s->value[j]; } s->value[jj] = t / h[jj + jj*h_dim]; } /* x = x + zy */ for(j=0;j<=ii;j++) { lis_vector_axpy(s->value[j],z[j],x); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } LIS_MATVEC(A,x,v[0]); lis_vector_xpay(b,-1.0,v[0]); lis_vector_nrm2(v[0],&rnorm); bnrm2 = 1.0 / rnorm; } solver->retcode = LIS_MAXITER; solver->iter = iter+1; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_cgs_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; phat = solver->work[3]; q = solver->work[4]; qhat = solver->work[5]; u = solver->work[5]; uhat = solver->work[6]; vhat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, q); lis_vector_set_allex_nm(0.0, p); for( iter=1; iter<=maxiter; iter++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); /* u = r + beta*q */ lis_vector_axpyzex_mmmm(beta,q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); /* phat = M^-1 * p */ time = lis_wtime(); lis_psolve(solver, p, phat); ptime += lis_wtime()-time; /* v = A * phat */ lis_matvec(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dotex_mmm(rtld,vhat,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* q = u - alpha*vhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyzex_mmmm(one,u,q,phat); time = lis_wtime(); lis_psolve(solver, phat, uhat); ptime += lis_wtime()-time; /* x = x + alpha*uhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uhat,x); /* qhat = A * uhat */ lis_matvec(A,uhat,qhat); /* r = r - alpha*qhat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_gmres_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,s, z, *v; LIS_QUAD *h; LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp; LIS_QUAD_PTR rnorm; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,i1h,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; conv = solver->options[LIS_OPTIONS_CONV_COND]; h_dim = m+1; ptimes = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; LIS_QUAD_SCALAR_MALLOC(aa,0,1); LIS_QUAD_SCALAR_MALLOC(bb,1,1); LIS_QUAD_SCALAR_MALLOC(rr,2,1); LIS_QUAD_SCALAR_MALLOC(a2,3,1); LIS_QUAD_SCALAR_MALLOC(b2,4,1); LIS_QUAD_SCALAR_MALLOC(t,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(one,7,1); LIS_QUAD_SCALAR_MALLOC(rnorm,8,1); h = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD) * (h_dim+1) * (h_dim+2),"lis_gmres_quad::h" ); cs = (m+1)*h_dim; sn = (m+2)*h_dim; one.hi[0] = 1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; iter=0; while( iter<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2ex_mm(v[0],&rnorm); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi); lis_vector_scaleex_mm(tmp,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_allex_nm(0.0,s); s->value[0] = rnorm.hi[0]; s->value_lo[0] = rnorm.lo[0]; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; i1h = i*h_dim; /* z = M^-1 v */ times = lis_wtime(); lis_psolve(solver, v[iiv], z); ptimes += lis_wtime()-times; /* v = Az */ LIS_MATVEC(A,z, v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i]v[k] */ lis_vector_dotex_mmm(v[i1v],v[k],&t); h[k + iih].hi = t.hi[0]; h[k + iih].lo = t.lo[0]; lis_quad_minus((LIS_QUAD *)t.hi); lis_vector_axpyex_mmm(t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2ex_mm(v[i1v],&t); h[i1 + iih].hi = t.hi[0]; h[i1 + iih].lo = t.lo[0]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi); lis_vector_scaleex_mm(tmp,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t.hi[0] = h[jj + iih].hi; t.lo[0] = h[jj + iih].lo; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi); lis_quad_minus((LIS_QUAD *)bb.hi); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]); lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi); h[jj + iih].hi = aa.hi[0]; h[jj + iih].lo = aa.lo[0]; h[k + iih].hi = bb.hi[0]; h[k + iih].lo = bb.lo[0]; } aa.hi[0] = h[ii + iih].hi; aa.lo[0] = h[ii + iih].lo; bb.hi[0] = h[i1 + iih].hi; bb.lo[0] = h[i1 + iih].lo; lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi); lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi); lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi); lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi); if( rr.hi[0]==0.0 ) { rr.hi[0]=1.0e-17; rr.lo[0]=0.0; } lis_quad_div((LIS_QUAD *)&h[ii + cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi); lis_quad_div((LIS_QUAD *)&h[ii + sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi); tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii + sn],(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii + cs],(LIS_QUAD *)tmp.hi); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[i1] = aa.hi[0]; s->value_lo[i1] = aa.lo[0]; s->value[ii] = bb.hi[0]; s->value_lo[ii] = bb.lo[0]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]); lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]); lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi); h[ii + iih].hi = aa.hi[0]; h[ii + iih].lo = aa.lo[0]; /* convergence check */ nrm2 = fabs(s->value[i1]) * bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter <maxiter ); /* Solve H*Y =S for upper triangular H */ tmp.hi[0] = s->value[ii]; tmp.lo[0] = s->value_lo[ii]; lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii + iih]); s->value[ii] = tmp.hi[0]; s->value_lo[ii] = tmp.lo[0]; for(k=1;k<=ii;k++) { jj = ii-k; t.hi[0] = s->value[jj]; t.lo[0] = s->value_lo[jj]; for(j=jj+1;j<=ii;j++) { tmp.hi[0] = s->value[j]; tmp.lo[0] = s->value_lo[j]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj + j*h_dim]); lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi); } lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj + jj*h_dim]); s->value[jj] = tmp.hi[0]; s->value_lo[jj] = tmp.lo[0]; } /* x = x + yv */ for(k=0;k<n;k++) { aa.hi[0] = s->value[0]; aa.lo[0] = s->value_lo[0]; bb.hi[0] = v[0]->value[k]; bb.lo[0] = v[0]->value_lo[k]; lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi); z->value[k] = tmp.hi[0]; z->value_lo[k] = tmp.lo[0]; } for(j=1;j<=ii;j++) { aa.hi[0] = s->value[j]; aa.lo[0] = s->value_lo[j]; lis_vector_axpyex_mmm(aa,v[j],z); } /* r = M^-1 z */ times = lis_wtime(); lis_psolve(solver, z, r); ptimes += lis_wtime()-times; /* x = x + r */ lis_vector_axpyex_mmm(one,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->iter2 = 0; solver->resid = nrm2; solver->ptimes = ptimes; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; tmp.hi[0] = s->value[jj]; tmp.lo[0] = s->value_lo[jj]; lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1 + sn]); lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1 + cs]); lis_quad_minus((LIS_QUAD *)aa.hi); s->value[jj-1] = aa.hi[0]; s->value_lo[jj-1] = aa.lo[0]; s->value[jj] = bb.hi[0]; s->value_lo[jj] = bb.lo[0]; } for(j=0;j<=i1;j++) { t.hi[0] = s->value[j]; t.lo[0] = s->value_lo[j]; if( j==0 ) { lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi); } lis_vector_axpyex_mmm(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter+1; solver->iter2 = 0; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_cgs(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat; LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; phat = solver->work[3]; q = solver->work[4]; qhat = solver->work[5]; u = solver->work[5]; uhat = solver->work[6]; vhat = solver->work[6]; alpha = (LIS_SCALAR)1.0; rho_old = (LIS_SCALAR)1.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_all(0,q); lis_vector_set_all(0,p); for( iter=1; iter<=maxiter; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho); /* test breakdown */ if( rho==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) */ beta = (rho / rho_old); /* u = r + beta*q */ lis_vector_axpyz(beta,q,r,u); /* p = u + beta*(q + beta*p) */ lis_vector_xpay(q,beta,p); lis_vector_xpay(u,beta,p); /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,vhat); /* tmpdot1 = <rtld,vhat> */ lis_vector_dot(rtld,vhat,&tmpdot1); /* test breakdown */ if( tmpdot1==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ alpha = rho / tmpdot1; /* q = u - alpha*vhat */ lis_vector_axpyz(-alpha,vhat,u,q); /* phat = u + q */ /* uhat = M^-1 * (u + q) */ lis_vector_axpyz(1,u,q,phat); times = lis_wtime(); lis_psolve(solver, phat, uhat); ptimes += lis_wtime()-times; /* x = x + alpha*uhat */ lis_vector_axpy(alpha,uhat,x); /* qhat = A * uhat */ LIS_MATVEC(A,uhat,qhat); /* r = r - alpha*qhat */ lis_vector_axpy(-alpha,qhat,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old = rho; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_jacobi(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR d,r,t,s; LIS_REAL bnrm2,nrm2,tol; LIS_INT iter,maxiter,output; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; ptime = 0.0; r = solver->work[0]; t = solver->work[1]; s = solver->work[2]; d = solver->work[3]; lis_vector_nrm2(b,&bnrm2); bnrm2 = 1.0 / bnrm2; lis_matrix_get_diagonal(A,d); lis_vector_reciprocal(d); for( iter=1; iter<=maxiter; iter++ ) { /* x += D^{-1}(b - Ax) */ time = lis_wtime(); lis_psolve(solver,x,s); ptime += lis_wtime() - time; lis_matvec(A,s,t); /* lis_matvec(A,x,t);*/ lis_vector_axpyz(-1,t,b,r); lis_vector_nrm2(r,&nrm2); lis_vector_pmul(r,d,r); lis_vector_axpy(1,r,x); /* convergence check */ nrm2 = nrm2 * bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { time = lis_wtime(); lis_psolve(solver,x,s); ptime += lis_wtime() - time; lis_vector_copy(s,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } lis_psolve(solver,x,s); lis_vector_copy(s,x); solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_crs_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; p = solver->work[2]; z = solver->work[3]; u = solver->work[3]; uq = solver->work[3]; q = solver->work[4]; ap = solver->work[4]; map = solver->work[5]; auq = solver->work[5]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(one,6,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); lis_vector_set_allex_nm(0.0,q); lis_vector_set_allex_nm(0.0,p); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; one.hi[0] = 1.0; one.lo[0] = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,z,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* u = z + beta*q */ /* p = u + beta*(q + beta*p) */ /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_vector_axpyzex_mmmm(beta,q,z,u); lis_vector_xpayex_mmm(q,beta,p); lis_vector_xpayex_mmm(u,beta,p); LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,map,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho / tmpdot1 */ /* q = u - alpha*map */ /* uq = u + q */ /* auq = A * uq */ /* x = x + alpha*uq */ /* r = r - alpha*auq */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,map,u,q); lis_vector_axpyzex_mmmm(one,u,q,uq); LIS_MATVEC(A,uq,auq); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,uq,x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,auq,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x; LIS_SCALAR *value; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads,maxthreads; LIS_INT gn,nnz,np; LIS_INT i,j,k,si,sj,sk,ii,jj,ctr; LIS_INT l,m,n,nn; LIS_INT is,ie; LIS_INT err,iter,matrix_type,storage,ss,se; LIS_INT *ptr,*index; double time,time2,nnzs,nnzap,nnzt; LIS_SCALAR val; double commtime,comptime,flops; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 5 ) { if( my_rank==0 ) { printf("Usage: %s l m n iter [matrix_type]\n", argv[0]); } CHKERR(1); } l = atoi(argv[1]); m = atoi(argv[2]); n = atoi(argv[3]); iter = atoi(argv[4]); if (argv[5] == NULL) { storage = 0; } else { storage = atoi(argv[5]); } if( iter<=0 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("iter=%lld <= 0\n",iter); #else if( my_rank==0 ) printf("iter=%d <= 0\n",iter); #endif CHKERR(1); } if( l<=0 || m<=0 || n<=0 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("l=%lld <=0, m=%lld <=0 or n=%lld <=0\n",l,m,n); #else if( my_rank==0 ) printf("l=%d <=0, m=%d <=0 or n=%d <=0\n",l,m,n); #endif CHKERR(1); } if( storage<0 || storage>11 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("matrix_type=%lld < 0 or matrix_type=%lld > 11\n",storage,storage); #else if( my_rank==0 ) printf("matrix_type=%d < 0 or matrix_type=%d > 11\n",storage,storage); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONG__LONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); if( my_rank==0 ) { #ifdef _LONG__LONG printf("max number of threads = %lld\n", nthreads); printf("number of threads = %lld\n", maxthreads); #else printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); #endif } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ nn = l*m*n; err = lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_matrix_set_size(A0,0,nn); CHKERR(err); ptr = (LIS_INT *)malloc((A0->n+1)*sizeof(LIS_INT)); if( ptr==NULL ) CHKERR(1); index = (LIS_INT *)malloc(27*A0->n*sizeof(LIS_INT)); if( index==NULL ) CHKERR(1); value = (LIS_SCALAR *)malloc(27*A0->n*sizeof(LIS_SCALAR)); if( value==NULL ) CHKERR(1); lis_matrix_get_range(A0,&is,&ie); ctr = 0; for(ii=is;ii<ie;ii++) { i = ii/(m*n); j = (ii - i*m*n)/n; k = ii - i*m*n - j*n; for(si=-1;si<=1;si++) { if( i+si>-1 && i+si<l ) { for(sj=-1;sj<=1;sj++) { if( j+sj>-1 && j+sj<m ) { for(sk=-1;sk<=1;sk++) { if( k+sk>-1 && k+sk<n ) { jj = ii + si*m*n + sj*n + sk; index[ctr] = jj; if( jj==ii ) { value[ctr++] = 26.0;} else { value[ctr++] = -1.0;} } } } } } } ptr[ii-is+1] = ctr; } ptr[0] = 0; err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A0); CHKERR(err); err = lis_matrix_assemble(A0); CHKERR(err); n = A0->n; gn = A0->gn; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { #ifdef _LONG__LONG printf("matrix size = %lld x %lld (%lld nonzero entries)\n",gn,gn,nnz); printf("number of iterations = %lld\n\n",iter); #else printf("matrix size = %d x %d (%d nonzero entries)\n",gn,gn,nnz); printf("number of iterations = %d\n\n",iter); #endif } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } for(i=0;i<n;i++) { lis_sort_id(A0->ptr[i],A0->ptr[i+1]-1,A0->index,A0->value); } /* MPI version of VBR is not implemented. DNS is also excluded to reduce memory usage. */ if (storage==0) { ss = 1; se = 11; } else { ss = storage; se = storage+1; } for (matrix_type=ss;matrix_type<se;matrix_type++) { if ( nprocs>1 && matrix_type==9 ) continue; lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; #ifdef USE_MPI #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #endif #else #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #endif #endif } lis_matrix_destroy(A); } lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT lis_minres(LIS_SOLVER solver) { LIS_Comm comm; LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR v1,v2,v3,v4,w0,w1,w2; LIS_REAL nrm2,tol; LIS_SCALAR alpha; LIS_REAL beta2,beta3; LIS_SCALAR gamma1,gamma2,gamma3; LIS_SCALAR delta,eta; LIS_SCALAR sigma1,sigma2,sigma3; LIS_SCALAR rho1,rho2,rho3; LIS_REAL r0_euc,r_euc; LIS_INT iter,maxiter,output; double time,ptime; LIS_DEBUG_FUNC_IN; comm = LIS_COMM_WORLD; A = solver->A; b = solver->b; x = solver->x; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; ptime = 0.0; v1 = solver->work[0]; v2 = solver->work[1]; v3 = solver->work[2]; v4 = solver->work[3]; w0 = solver->work[4]; w1 = solver->work[5]; w2 = solver->work[6]; /* Lanczos algorithm */ lis_matvec(A,x,v2); lis_vector_xpay(b,-1.0,v2); time = lis_wtime(); lis_psolve(solver,v2,v3); ptime += lis_wtime()-time; lis_vector_copy(v3,v2); /* Compute elements of Hermitian tridiagonal matrix */ lis_vector_nrm2(v2,&r_euc); eta = beta2 = r0_euc = r_euc; gamma2 = gamma1 = 1.0; sigma2 = sigma1 = 0.0; lis_vector_set_all(0.0,v1); lis_vector_set_all(0.0,w0); lis_vector_set_all(0.0,w1); nrm2 = r_euc / r0_euc; for(iter=1;iter<=maxiter;iter++) { /* Lanczos algorithm */ lis_vector_scale(1.0 / beta2,v2); lis_matvec(A,v2,v3); time = lis_wtime(); lis_psolve(solver,v3,v4); ptime += lis_wtime()-time; lis_vector_dot(v2,v4,&alpha); lis_vector_axpy(-alpha,v2,v4); lis_vector_axpy(-beta2,v1,v4); lis_vector_nrm2(v4,&beta3); /* Compute elements of Hermitian tridiagonal matrix */ delta = gamma2 * alpha - gamma1 * sigma2 * beta2; rho1 = sqrt(delta * delta + beta3 * beta3); rho2 = sigma2 * alpha + gamma1 * gamma2 * beta2; rho3 = sigma1 * beta2; gamma3 = delta / rho1; sigma3 = beta3 / rho1; lis_vector_axpyz(-rho3,w0,v2,w2); lis_vector_axpy(-rho2,w1,w2); lis_vector_scale(1.0 / rho1,w2); lis_vector_axpy(gamma3 * eta,w2,x); /* convergence check */ r_euc *= fabs(sigma3); nrm2 = r_euc / r0_euc; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2); } if( nrm2 <= tol ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } eta *= -sigma3; lis_vector_copy(v2,v1); lis_vector_copy(v4,v2); lis_vector_copy(w1,w0); lis_vector_copy(w2,w1); beta2 = beta3; gamma1 = gamma2; gamma2 = gamma3; sigma1 = sigma2; sigma2 = sigma3; } lis_vector_destroy(v1); lis_vector_destroy(v2); lis_vector_destroy(v3); lis_vector_destroy(v4); lis_vector_destroy(w0); lis_vector_destroy(w1); lis_vector_destroy(w2); solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_solve_kernel(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT nsolver, precon_type, maxiter; LIS_INT err; LIS_SCALAR *residual; LIS_VECTOR xx; LIS_INT output; LIS_INT scale; LIS_INT conv_cond; LIS_INT precision,is_use_at,storage,block; LIS_INT i,n,np; double p_c_times, p_i_times,itimes; LIS_SCALAR nrm2,tol,tol_w; LIS_VECTOR t; LIS_VECTOR bb; LIS_MATRIX AA,B; LIS_MATRIX At; char buf[64]; LIS_DEBUG_FUNC_IN; nsolver = solver->options[LIS_OPTIONS_SOLVER]; precon_type = solver->options[LIS_OPTIONS_PRECON]; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; scale = solver->options[LIS_OPTIONS_SCALE]; precision = solver->options[LIS_OPTIONS_PRECISION]; is_use_at = solver->options[LIS_OPTIONS_USE_AT]; storage = solver->options[LIS_OPTIONS_STORAGE]; block = solver->options[LIS_OPTIONS_STORAGE_BLOCK]; conv_cond = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol_w = solver->params[LIS_PARAMS_RESID_WEIGHT-LIS_OPTIONS_LEN]; solver->precision = precision; if( nsolver < 1 || nsolver > LIS_SOLVERS_LEN ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_SOLVER is %d (Set between 1 to %d)\n",nsolver, LIS_SOLVERS_LEN); return LIS_ERR_ILL_ARG; } if( precon_type < 0 || precon_type > precon_register_type ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_PRECON is %d (Set between 0 to %d)\n",precon_type, precon_register_type-1); return LIS_ERR_ILL_ARG; } if( maxiter<0 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_MAXITER(=%d) is less than 0\n",maxiter); return LIS_ERR_ILL_ARG; } #ifdef USE_MPI if( precon_type == LIS_PRECON_TYPE_SAAMG && solver->A->nprocs < 2) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter A->nprocs (=%d) is less than 2 (Set more than 1 when using parallel version of SAAMG)\n",solver->A->nprocs); return LIS_ERR_ILL_ARG; } #endif #ifdef USE_QUAD_PRECISION if( precision==LIS_PRECISION_QUAD && lis_solver_execute_quad[nsolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision solver %s is not implemented\n",lis_solvername[nsolver]); return LIS_ERR_NOT_IMPLEMENTED; } else if( precision==LIS_PRECISION_SWITCH && lis_solver_execute_switch[nsolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch solver %s is not implemented\n",lis_solvername[nsolver]); return LIS_ERR_NOT_IMPLEMENTED; } if( solver->options[LIS_OPTIONS_SWITCH_MAXITER]==-1 ) { solver->options[LIS_OPTIONS_SWITCH_MAXITER] = maxiter; } #endif err = lis_solver_check_params[nsolver](solver); if( err ) { solver->retcode = err; return err; } /* end parameter check */ solver->A = A; solver->b = b; /* create initial vector */ #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(A,&xx); #else if( precision==LIS_PRECISION_DOUBLE ) { err = lis_vector_duplicate(A,&xx); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx); } #endif if( err ) { solver->retcode = err; return err; } if( solver->options[LIS_OPTIONS_INITGUESS_ZEROS] ) { if( output ) lis_printf(A->comm,"initial vector x = 0\n"); #ifndef USE_QUAD_PRECISION lis_vector_set_all(0.0,xx); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_set_all(0.0,xx); } else { lis_vector_set_allex_nm(0.0,xx); } #endif } else { if( output ) lis_printf(A->comm,"initial vector x = user defined\n"); #ifndef USE_QUAD_PRECISION lis_vector_copy(x,xx); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(x,xx); } else { lis_vector_copyex_nm(x,xx); } #endif } /* create residual history vector */ if( solver->residual ) lis_free(solver->residual); residual = (LIS_SCALAR *)lis_malloc((maxiter+2)*sizeof(LIS_SCALAR),"lis_solve::residual"); if( residual==NULL ) { LIS_SETERR_MEM((maxiter+2)*sizeof(LIS_SCALAR)); lis_vector_destroy(xx); solver->retcode = err; return err; } residual[0] = 1.0; n = A->n; np = A->np; t = NULL; At = NULL; p_c_times = lis_wtime(); if( precon_type==LIS_PRECON_TYPE_IS ) { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI); } else if( !b->is_scaled ) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { b->value[i] = b->value[i]*solver->d->value[i]; } } if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR ) { solver->options[LIS_OPTIONS_ISLEVEL] = 0; } } else if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR && precon_type!=LIS_PRECON_TYPE_NONE ) { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI); } } else if( scale ) { if( storage==LIS_MATRIX_BSR && scale==LIS_SCALE_JACOBI ) { if( A->matrix_type!=LIS_MATRIX_BSR ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_blocksize(B,block,block,NULL,NULL); lis_matrix_set_type(B,storage); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } err = lis_matrix_split(A); if( err ) return err; err = lis_matrix_diag_duplicate(A->D,&solver->WD); if( err ) return err; lis_matrix_diag_copy(A->D,solver->WD); lis_matrix_diag_inverse(solver->WD); lis_matrix_bscaling_bsr(A,solver->WD); lis_vector_duplicate(A,&t); lis_matrix_diag_matvec(solver->WD,b,t); lis_vector_copy(t,b); lis_vector_destroy(t); t = NULL; } else { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( scale==LIS_SCALE_JACOBI && nsolver==LIS_SOLVER_CG ) { scale = LIS_SCALE_SYMM_DIAG; } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,scale); } else if( !b->is_scaled ) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { b->value[i] = b->value[i]*solver->d->value[i]; } } } } /* precon_type = precon->precon_type;*/ if( precon_type==LIS_PRECON_TYPE_IS ) { if( nsolver < LIS_SOLVER_JACOBI || nsolver > LIS_SOLVER_SOR ) { AA = solver->A; bb = solver->b; } else { AA = precon->A; bb = precon->Pb; } } else { AA = A; bb = b; } p_c_times = lis_wtime() - p_c_times; itimes = lis_wtime(); /* Matrix Convert */ solver->A = AA; solver->b = bb; err = lis_matrix_convert_self(solver); if( err ) { lis_vector_destroy(xx); lis_solver_work_destroy(solver); lis_free(residual); solver->retcode = err; return err; } block = solver->A->bnr; if( A->my_rank==0 ) { if( output ) printf("precision : %s\n", lis_precisionname[precision]); if( output ) printf("solver : %s %d\n", lis_solvername[nsolver],nsolver); switch( precon_type ) { case LIS_PRECON_TYPE_ILU: i = solver->options[LIS_OPTIONS_FILL]; if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_VBR ) { if( output ) sprintf(buf,"Block %s(%d)",lis_preconname[precon_type],i); } else { if( output ) sprintf(buf,"%s(%d)",lis_preconname[precon_type],i); } break; default: if( output ) sprintf(buf,"%s",lis_preconname[precon_type]); break; } if( solver->options[LIS_OPTIONS_ADDS] && precon_type ) { if( output ) printf("precon : %s + additive schwarz\n", buf); } else { if( output ) printf("precon : %s\n", buf); } } switch(conv_cond) { case LIS_CONV_COND_NRM2_R: case LIS_CONV_COND_NRM2_B: if( A->my_rank==0 ) { if( output ) ("CONV_COND : ||r||_2 <= %6.1e*||r_0||_2\n", tol); } break; case LIS_CONV_COND_NRM1_B: lis_vector_nrm1(b,&nrm2); nrm2 = nrm2*tol_w + tol; if( A->my_rank==0 ) { if( output ) printf("conv_cond : ||r||_1 <= %6.1e*||b||_1 + %6.1e = %6.1e\n", tol_w,tol,nrm2); } break; } if( A->my_rank==0 ) { if( AA->matrix_type==LIS_MATRIX_BSR || AA->matrix_type==LIS_MATRIX_BSC ) { if( output ) printf("storage : %s(%d x %d)\n", lis_storagename[AA->matrix_type-1],block,block); } else { if( output ) printf("storage : %s\n", lis_storagename[AA->matrix_type-1]); } } /* create work vector */ err = lis_solver_malloc_work[nsolver](solver); if( err ) { lis_vector_destroy(xx); lis_precon_destroy(precon); solver->retcode = err; return err; } if( nsolver==LIS_SOLVER_BICG && is_use_at ) { if( output ) lis_printf(A->comm,"Use At\n"); lis_matrix_duplicate(AA,&At); lis_matrix_set_type(At,LIS_USE_AT_TYPE[AA->matrix_type]); lis_matrix_convert(AA,At); solver->At = At; } solver->x = xx; solver->xx = x; solver->precon = precon; solver->residual = residual; /* execute solver */ #ifndef USE_QUAD_PRECISION err = lis_solver_execute[nsolver](solver); #else if( precision==LIS_PRECISION_DOUBLE ) { err = lis_solver_execute[nsolver](solver); } else if( precision==LIS_PRECISION_QUAD ) { err = lis_solver_execute_quad[nsolver](solver); } else if( precision==LIS_PRECISION_SWITCH ) { err = lis_solver_execute_switch[nsolver](solver); } #endif solver->retcode = err; if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { x->value[i] = xx->value[i]*solver->d->value[i]; } } else { #ifndef USE_QUAD_PRECISION lis_vector_copy(xx,x); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(xx,x); } else { lis_vector_copyex_mn(xx,x); } #endif } itimes = lis_wtime() - itimes - solver->ptimes; p_i_times = solver->ptimes; solver->ptimes = p_c_times + p_i_times; solver->p_c_times = p_c_times; solver->p_i_times = p_i_times; solver->times = solver->ptimes + itimes; solver->itimes = itimes; lis_solver_work_destroy(solver); lis_vector_duplicate(A,&t); xx->precision = LIS_PRECISION_DEFAULT; lis_matvec(A,xx,t); lis_vector_xpay(b,-1.0,t); if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { t->value[i] = t->value[i]/solver->d->value[i]; } } lis_vector_nrm2(t,&nrm2); /* solver->resid = nrm2; */ if( A->my_rank==0 ) { if( err ) { if( output ) printf("lis_solve : %s(code=%d)\n\n",lis_returncode[err],err); } else { if( output ) printf("lis_solve : normal end\n\n"); } } if( precision==LIS_PRECISION_DOUBLE ) { solver->iter2 = solver->iter; } else if( precision==LIS_PRECISION_QUAD ) { solver->iter2 = 0; } lis_vector_destroy(t); /* lis_vector_destroy(d);*/ lis_vector_destroy(xx); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }