LIS_INT lis_solver_set_shadowresidual(LIS_SOLVER solver, LIS_VECTOR r0, LIS_VECTOR rs0) { unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; LIS_INT i,n,resid; LIS_DEBUG_FUNC_IN; resid = solver->options[LIS_OPTIONS_INIT_SHADOW_RESID]; if( resid==LIS_RANDOM ) { n = solver->A->n; init_by_array(init, length); #ifdef USE_QUAD_PRECISION if( solver->precision==LIS_PRECISION_DEFAULT ) #endif { #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { rs0->value[i] = genrand_real1(); } } #ifdef USE_QUAD_PRECISION else { #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { rs0->value[i] = genrand_real1(); rs0->value_lo[i] = 0.0; } } #endif } else { #ifdef USE_QUAD_PRECISION if( solver->precision==LIS_PRECISION_DEFAULT ) #endif { lis_vector_copy(r0,rs0); } #ifdef USE_QUAD_PRECISION else { lis_vector_copyex_mm(r0,rs0); } #endif } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_psolvet_none(LIS_SOLVER solver, LIS_VECTOR b, LIS_VECTOR x) { LIS_DEBUG_FUNC_IN; #ifndef USE_QUAD_PRECISION lis_vector_copy(b,x); #else if( solver->precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(b,x); } else { lis_vector_copyex_mm(b,x); } #endif LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_orthomin_quad(LIS_SOLVER solver) { LIS_Comm comm; LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR x; LIS_VECTOR r, rtld, *p, *ap, *aptld; LIS_QUAD *dotsave; LIS_QUAD_PTR alpha, beta, tmp, one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_INT m,l,lmax,ip,ip0; LIS_DEBUG_FUNC_IN; comm = LIS_COMM_WORLD; A = solver->A; M = solver->precon; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(tmp,3,1); LIS_QUAD_SCALAR_MALLOC(one,4,1); r = solver->work[0]; rtld = solver->work[1]; p = &solver->work[2]; ap = &solver->work[ (m+1)+2]; aptld = &solver->work[2*(m+1)+2]; one.hi[0] = 1.0; one.lo[0] = 0.0; dotsave = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD) * (m+1),"lis_orthomin_quad::dotsave" ); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,M,r,rtld,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; iter=1; while( iter<=maxiter ) { ip = (iter-1) % (m+1); /* p[ip] = rtld */ lis_vector_copyex_mm(rtld,p[ip]); /* ap[ip] = A*p[ip] */ /* aptld[ip] = M^-1 ap[ip] */ lis_matvec(A,p[ip],ap[ip]); time = lis_wtime(); lis_psolve(solver, ap[ip], aptld[ip]); ptime += lis_wtime()-time; lmax = _min(m,iter-1); for(l=1;l<=lmax;l++) { ip0 = (ip+m+1-l) % (m+1); /* beta = -<Ar[ip],Ap[ip0]> / <Ap[ip0],Ap[ip0]> */ lis_vector_dotex_mmm(aptld[ip],aptld[ip0],&beta); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,&dotsave[l-1]); lis_quad_minus((LIS_QUAD *)beta.hi); lis_vector_axpyex_mmm(beta,p[ip0] ,p[ip]); lis_vector_axpyex_mmm(beta,ap[ip0] ,ap[ip]); lis_vector_axpyex_mmm(beta,aptld[ip0],aptld[ip]); } for(l=m-1;l>0;l--) { dotsave[l] = dotsave[l-1]; } lis_vector_dotex_mmm(aptld[ip],aptld[ip],&tmp); dotsave[0].hi = tmp.hi[0]; dotsave[0].lo = tmp.lo[0]; /* test breakdown */ if( tmp.hi[0]==0.0 && tmp.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } lis_quad_div(&dotsave[0],(LIS_QUAD *)one.hi,&dotsave[0]); /* alpha = <rtld,Aptld[ip]> */ lis_vector_dotex_mmm(rtld,aptld[ip],&alpha); lis_quad_mul((LIS_QUAD *)alpha.hi,(LIS_QUAD *)alpha.hi,&dotsave[0]); lis_vector_axpyex_mmm( alpha,p[ip],x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,ap[ip],r); lis_vector_axpyex_mmm(alpha,aptld[ip],rtld); lis_quad_minus((LIS_QUAD *)alpha.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } iter++; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; lis_free(dotsave); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicr_quad(LIS_SOLVER solver) { LIS_MATRIX A,At; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, z,ztld,p, ptld, ap, map, az, aptld; LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; At = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; r = solver->work[0]; rtld = solver->work[1]; z = solver->work[2]; ztld = solver->work[3]; p = solver->work[4]; ptld = solver->work[5]; ap = solver->work[6]; az = solver->work[7]; map = solver->work[8]; aptld = solver->work[9]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_psolve(solver, r, z); lis_psolvet(solver, rtld, ztld); lis_vector_copyex_mm(z,p); lis_vector_copyex_mm(ztld,ptld); LIS_MATVEC(A,z,ap); lis_vector_dotex_mmm(ap,ztld,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* aptld = A^T * ptld */ /* map = M^-1 * ap */ LIS_MATVECT(A,ptld,aptld); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; /* tmpdot1 = <map,aptld> */ lis_vector_dotex_mmm(map,aptld,&tmpdot1); /* test breakdown */ if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* alpha = rho_old / tmpdot1 */ /* x = x + alpha*p */ /* r = r - alpha*ap */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,ap,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* rtld = rtld - alpha*aptld */ /* z = z - alpha*map */ /* ztld = M^-T * rtld */ /* az = A * z */ /* rho = <az,ztld> */ lis_vector_axpyex_mmm(alpha,aptld,rtld); lis_vector_axpyex_mmm(alpha,map,z); times = lis_wtime(); lis_psolvet(solver, rtld, ztld); ptimes += lis_wtime()-times; LIS_MATVEC(A,z,az); lis_vector_dotex_mmm(az,ztld,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = rho / rho_old */ /* p = z + beta*p */ /* ptld = ztld + beta*ptld */ /* ap = az + beta*ap */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_vector_xpayex_mmm(z,beta,p); lis_vector_xpayex_mmm(ztld,beta,ptld); lis_vector_xpayex_mmm(az,beta,ap); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_matrix_solvet_csr(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag) { LIS_INT i,j,jj,n; LIS_SCALAR t; LIS_SCALAR *x; #ifdef _OPENMP LIS_INT is,ie,my_rank,nprocs; #endif #ifdef USE_QUAD_PRECISION LIS_QUAD w1,w2; LIS_SCALAR *xl; #endif LIS_QUAD_DECLAR; LIS_DEBUG_FUNC_IN; n = A->n; x = X->value; #ifdef USE_QUAD_PRECISION xl = X->value_lo; #endif #ifdef USE_QUAD_PRECISION if( B->precision==LIS_PRECISION_DEFAULT ) { #endif lis_vector_copy(B,X); #ifdef USE_QUAD_PRECISION } else { lis_vector_copyex_mm(B,X); } #endif switch(flag) { case LIS_MATRIX_LOWER: for(i=0;i<n;i++) { x[i] = x[i] * A->WD->value[i]; for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++) { x[A->U->index[j]] -= A->U->value[j] * x[i]; } } break; case LIS_MATRIX_UPPER: for(i=n-1;i>=0;i--) { x[i] = x[i] * A->WD->value[i]; for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++) { x[A->L->index[j]] -= A->L->value[j] * x[i]; } } break; case LIS_MATRIX_SSOR: #ifdef USE_QUAD_PRECISION if( B->precision==LIS_PRECISION_DEFAULT ) { #endif #ifdef _OPENMP nprocs = omp_get_max_threads(); #pragma omp parallel private(i,j,jj,t,is,ie,my_rank) { my_rank = omp_get_thread_num(); LIS_GET_ISIE(my_rank,nprocs,n,is,ie); for(i=is;i<ie;i++) { t = x[i] * A->WD->value[i]; for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++) { jj = A->U->index[j]; if( jj<is || jj>=ie ) continue; x[jj] -= A->U->value[j] * t; } } for(i=ie-1;i>=is;i--) { t = x[i] * A->WD->value[i]; x[i] = t; for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++) { jj = A->L->index[j]; if( jj<is ) continue; x[jj] -= A->L->value[j] * t; } } } #else for(i=0;i<n;i++) { t = x[i] * A->WD->value[i]; for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++) { x[A->U->index[j]] -= A->U->value[j] * t; } } for(i=n-1;i>=0;i--) { t = x[i] * A->WD->value[i]; x[i] = t; for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++) { x[A->L->index[j]] -= A->L->value[j] * t; } } #endif #ifdef USE_QUAD_PRECISION } else { #ifdef _OPENMP nprocs = omp_get_max_threads(); #ifndef USE_SSE2 #pragma omp parallel private(i,j,jj,is,ie,w1,my_rank,p1,p2,tq,bhi,blo,chi,clo,sh,sl,th,tl,eh,el) #else #pragma omp parallel private(i,j,jj,is,ie,w1,my_rank,bh,ch,sh,wh,th,bl,cl,sl,wl,tl,p1,p2,t0,t1,t2,eh) #endif { my_rank = omp_get_thread_num(); LIS_GET_ISIE(my_rank,nprocs,n,is,ie); for(i=is;i<ie;i++) { #ifndef USE_SSE2 LIS_QUAD_MULD(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]); #else LIS_QUAD_MULD_SSE2(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]); #endif /* t = x[i] * A->WD->value[i]; */ for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++) { jj = A->U->index[j]; if( jj<is || jj>=ie ) continue; #ifndef USE_SSE2 LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->U->value[j]); #else LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->U->value[j]); #endif /* x[A->U->index[j]] -= A->U->value[j] * t; */ } } for(i=ie-1;i>=is;i--) { #ifndef USE_SSE2 LIS_QUAD_MULD(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]); #else LIS_QUAD_MULD_SSE2(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]); #endif x[i] = w1.hi; xl[i] = w1.lo; /* t = x[i] * A->WD->value[i]; */ /* x[i] = t; */ for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++) { jj = A->L->index[j]; if( jj<is || jj>=ie ) continue; #ifndef USE_SSE2 LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->L->value[j]); #else LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->L->value[j]); #endif /* x[A->L->index[j]] -= A->L->value[j] * t; */ } } } #else for(i=0;i<n;i++) { #ifndef USE_SSE2 LIS_QUAD_MULD(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]); #else LIS_QUAD_MULD_SSE2(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]); #endif /* t = x[i] * A->WD->value[i]; */ for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++) { jj = A->U->index[j]; #ifndef USE_SSE2 LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->U->value[j]); #else LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->U->value[j]); #endif /* x[A->U->index[j]] -= A->U->value[j] * t; */ } } for(i=n-1;i>=0;i--) { #ifndef USE_SSE2 LIS_QUAD_MULD(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]); #else LIS_QUAD_MULD_SSE2(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]); #endif x[i] = w1.hi; xl[i] = w1.lo; /* t = x[i] * A->WD->value[i]; */ /* x[i] = t; */ for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++) { jj = A->L->index[j]; #ifndef USE_SSE2 LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->L->value[j]); #else LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->L->value[j]); #endif /* x[A->L->index[j]] -= A->L->value[j] * t; */ } } #endif } #endif break; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_psolve_adds(LIS_SOLVER solver, LIS_VECTOR B, LIS_VECTOR X) { LIS_INT i,k,n,np,iter,ptype; LIS_SCALAR *b,*x,*w,*r,*rl; LIS_VECTOR W,R; LIS_PRECON precon; LIS_QUAD_DECLAR; LIS_DEBUG_FUNC_IN; precon = solver->precon; n = precon->A->n; np = precon->A->np; W = precon->work[0]; R = precon->work[1]; b = B->value; x = X->value; w = W->value; r = R->value; rl = R->value_lo; iter = solver->options[LIS_OPTIONS_ADDS_ITER]; ptype = solver->options[LIS_OPTIONS_PRECON]; #ifdef USE_QUAD_PRECISION if( solver->precision==LIS_PRECISION_DEFAULT ) { #endif lis_vector_set_all(0.0,X); lis_vector_copy(B,R); for(k=0;k<iter+1;k++) { for(i=n;i<np;i++) { r[i] = 0.0; } lis_psolve_xxx[ptype](solver,R,W); #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { x[i] += w[i]; } if(k!=iter) { lis_matvec(precon->A,X,R); #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { r[i] = b[i] - r[i]; } } } #ifdef USE_QUAD_PRECISION } else { lis_vector_set_allex_nm(0.0,X); lis_vector_copyex_mm(B,R); for(k=0;k<iter+1;k++) { for(i=n;i<np;i++) { r[i] = 0.0; rl[i] = 0.0; } lis_psolve_xxx[ptype](solver,R,W); for(i=0;i<n;i++) { #ifndef USE_SSE2 LIS_QUAD_ADD(X->value[i],X->value_lo[i],X->value[i],X->value_lo[i],W->value[i],W->value_lo[i]); #else LIS_QUAD_ADD_SSE2(X->value[i],X->value_lo[i],X->value[i],X->value_lo[i],W->value[i],W->value_lo[i]); #endif /* x[i] += w[i];*/ } if(k==iter) break; lis_matvec(precon->A,X,R); for(i=0;i<n;i++) { #ifndef USE_SSE2 LIS_QUAD_ADD(R->value[i],R->value_lo[i],B->value[i],B->value_lo[i],-R->value[i],-R->value_lo[i]); #else LIS_QUAD_ADD_SSE2(R->value[i],R->value_lo[i],B->value[i],B->value_lo[i],-R->value[i],-R->value_lo[i]); #endif /* r[i] = b[i] - r[i];*/ } } } #endif LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_psolvet_ilut_csr(LIS_SOLVER solver, LIS_VECTOR B, LIS_VECTOR X) { #ifdef _OPENMP LIS_INT i,j,jj,n; LIS_INT is,ie,my_rank,nprocs; LIS_SCALAR *b,*x; LIS_MATRIX_ILU L,U; LIS_VECTOR D; LIS_PRECON precon; LIS_QUAD_DECLAR; #ifdef USE_QUAD_PRECISION LIS_SCALAR *xl; #endif LIS_DEBUG_FUNC_IN; precon = solver->precon; L = precon->L; U = precon->U; D = precon->D; b = B->value; x = X->value; #ifdef USE_QUAD_PRECISION xl = X->value_lo; #endif n = solver->A->n; nprocs = omp_get_max_threads(); #ifdef USE_QUAD_PRECISION if( B->precision==LIS_PRECISION_DEFAULT ) { #endif lis_vector_copy(B,X); #pragma omp parallel private(i,j,jj,is,ie,my_rank) { my_rank = omp_get_thread_num(); LIS_GET_ISIE(my_rank,nprocs,n,is,ie); for(i=is;i<ie;i++) { x[i] = D->value[i]*x[i]; for(j=0;j<U->nnz[i];j++) { jj = U->index[i][j]; x[jj] -= U->value[i][j] * x[i]; } } for(i=ie-1;i>=is;i--) { for(j=0;j<L->nnz[i];j++) { jj = L->index[i][j]; x[jj] -= L->value[i][j] * x[i]; } } } #ifdef USE_QUAD_PRECISION } else { lis_vector_copyex_mm(B,X); nprocs = omp_get_max_threads(); #ifndef USE_SSE2 #pragma omp parallel private(i,j,jj,is,ie,my_rank,p1,p2,tq,bhi,blo,chi,clo,sh,sl,th,tl,eh,el) #else #pragma omp parallel private(i,j,jj,is,ie,my_rank,bh,ch,sh,wh,th,bl,cl,sl,wl,tl,p1,p2,t0,t1,t2,eh) #endif { my_rank = omp_get_thread_num(); LIS_GET_ISIE(my_rank,nprocs,n,is,ie); for(i=is;i<ie;i++) { #ifndef USE_SSE2 LIS_QUAD_MULD(x[i],xl[i],x[i],xl[i],D->value[i]); #else LIS_QUAD_MULD_SSE2(x[i],xl[i],x[i],xl[i],D->value[i]); #endif /* x[i] = D->value[i]*x[i];*/ for(j=0;j<U->nnz[i];j++) { jj = U->index[i][j]; #ifndef USE_SSE2 LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-U->value[i][j]); #else LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-U->value[i][j]); #endif /* x[jj] -= U->value[i][j] * x[i];*/ } } for(i=ie-1;i>=is;i--) { for(j=0;j<L->nnz[i];j++) { jj = L->index[i][j]; #ifndef USE_SSE2 LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-L->value[i][j]); #else LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-L->value[i][j]); #endif /* x[jj] -= L->value[i][j] * x[i];*/ } } } } #endif LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #else LIS_INT i,j,jj,n; LIS_SCALAR *b,*x; LIS_MATRIX_ILU L,U; LIS_VECTOR D; LIS_PRECON precon; LIS_QUAD_DECLAR; #ifdef USE_QUAD_PRECISION LIS_SCALAR *xl; #endif LIS_DEBUG_FUNC_IN; precon = solver->precon; L = precon->L; U = precon->U; D = precon->D; b = B->value; x = X->value; #ifdef USE_QUAD_PRECISION xl = X->value_lo; #endif n = solver->A->n; #ifdef USE_QUAD_PRECISION if( B->precision==LIS_PRECISION_DEFAULT ) { #endif lis_vector_copy(B,X); for(i=0; i<n; i++) { x[i] = D->value[i]*x[i]; for(j=0;j<U->nnz[i];j++) { jj = U->index[i][j]; x[jj] -= U->value[i][j] * x[i]; } } for(i=n-1; i>=0; i--) { for(j=0;j<L->nnz[i];j++) { jj = L->index[i][j]; x[jj] -= L->value[i][j] * x[i]; } } #ifdef USE_QUAD_PRECISION } else { lis_vector_copy(B,X); for(i=0; i<n; i++) { #ifndef USE_SSE2 LIS_QUAD_MULD(x[i],xl[i],x[i],xl[i],D->value[i]); #else LIS_QUAD_MULD_SSE2(x[i],xl[i],x[i],xl[i],D->value[i]); #endif /* x[i] = D->value[i]*x[i];*/ for(j=0;j<U->nnz[i];j++) { jj = U->index[i][j]; #ifndef USE_SSE2 LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-U->value[i][j]); #else LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-U->value[i][j]); #endif /* x[jj] -= U->value[i][j] * x[i];*/ } } for(i=n-1; i>=0; i--) { for(j=0;j<L->nnz[i];j++) { jj = L->index[i][j]; #ifndef USE_SSE2 LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-L->value[i][j]); #else LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-L->value[i][j]); #endif /* x[jj] -= L->value[i][j] * x[i];*/ } } } #endif LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #endif }
LIS_INT lis_psolvet_hybrid(LIS_SOLVER solver, LIS_VECTOR B, LIS_VECTOR X) { LIS_VECTOR xx; LIS_SOLVER solver2; LIS_INT nsolver; LIS_PRECON precon; /* * Mx = b * M = A */ LIS_DEBUG_FUNC_IN; precon = solver->precon; solver2 = precon->solver; xx = precon->solver->x; nsolver = solver2->options[LIS_OPTIONS_SOLVER]; solver2->b = B; LIS_MATVEC = lis_matvect; LIS_MATVECT = lis_matvec; if( solver2->options[LIS_OPTIONS_INITGUESS_ZEROS] ) { #ifdef USE_QUAD_PRECISION if( solver->precision==LIS_PRECISION_DEFAULT ) { #endif lis_vector_set_all(0,xx); #ifdef USE_QUAD_PRECISION } else { lis_vector_set_allex_nm(0,xx); } #endif } else { #ifdef USE_QUAD_PRECISION if( solver->precision==LIS_PRECISION_DEFAULT ) { #endif lis_vector_copy(B,xx); #ifdef USE_QUAD_PRECISION } else { lis_vector_copyex_mm(B,xx); } #endif } /* execute solver */ lis_solver_execute[nsolver](solver2); #ifdef USE_QUAD_PRECISION if( solver->precision==LIS_PRECISION_DEFAULT ) { #endif lis_vector_copy(solver2->x,X); #ifdef USE_QUAD_PRECISION } else { lis_vector_copyex_mm(solver2->x,X); } #endif LIS_MATVEC = lis_matvec; LIS_MATVECT = lis_matvect; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_bicgstab_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, t,p,v, s, phat, shat; LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,n,output,conv; LIS_INT iter2,maxiter2; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[1]; t = solver->work[2]; p = solver->work[3]; v = solver->work[4]; phat = solver->work[5]; shat = solver->work[6]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(omega,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; omega.hi[0] = 1.0; omega.lo[0] = 0.0; lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, phat); lis_vector_set_allex_nm(0.0, s); lis_vector_set_allex_nm(0.0, shat); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); s->precision = LIS_PRECISION_DEFAULT; shat->precision = LIS_PRECISION_DEFAULT; p->precision = LIS_PRECISION_DEFAULT; phat->precision = LIS_PRECISION_DEFAULT; for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter==1 ) { lis_vector_copy(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / omega.hi[0]); /* p = r + beta*(p - omega*v) */ lis_vector_axpy(-omega.hi[0],v,p); lis_vector_xpay(r,beta.hi[0],p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dot(rtld,v,&tmpdot1.hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0]; /* s = r - alpha*v */ lis_vector_axpy(-alpha.hi[0],v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol2 ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_vector_axpy(alpha.hi[0],phat,x); solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dot(t,s,&tmpdot1.hi[0]); lis_vector_dot(t,t,&tmpdot2.hi[0]); omega.hi[0] = tmpdot1.hi[0] / tmpdot2.hi[0]; /* x = x + alpha*phat + omega*shat */ lis_vector_axpy(alpha.hi[0],phat,x); lis_vector_axpy(omega.hi[0],shat,x); /* r = s - omega*t */ lis_vector_axpy(-omega.hi[0],t,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( nrm2 <= tol2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptimes = ptimes; break; } if( omega.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old.hi[0] = rho.hi[0]; } s->precision = LIS_PRECISION_QUAD; shat->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; phat->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; omega.hi[0] = 1.0; lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, phat); lis_vector_set_allex_nm(0.0, s); lis_vector_set_allex_nm(0.0, shat); /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } if( iter2==1 ) { lis_vector_copyex_mm(r,p); } else { /* beta = (rho / rho_old) * (alpha / omega) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi); /* p = r + beta*(p - omega*v) */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,v,p); lis_vector_xpayex_mmm(r,beta,p); } /* phat = M^-1 * p */ times = lis_wtime(); lis_psolve(solver, p, phat); ptimes += lis_wtime()-times; /* v = A * phat */ LIS_MATVEC(A,phat,v); /* tmpdot1 = <rtld,v> */ lis_vector_dotex_mmm(rtld,v,&tmpdot1); /* test breakdown */ /* */ /* alpha = rho / tmpdot1 */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi); /* s = r - alpha*v */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,v,r); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( tol > nrm2 ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,phat,x); solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* shat = M^-1 * s */ times = lis_wtime(); lis_psolve(solver, s, shat); ptimes += lis_wtime()-times; /* t = A * shat */ LIS_MATVEC(A,shat,t); /* tmpdot1 = <t,s> */ /* tmpdot2 = <t,t> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_dotex_mmm(t,s,&tmpdot1); lis_vector_dotex_mmm(t,t,&tmpdot2); lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi); /* x = x + alpha*phat + omega*shat */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,phat,x); lis_vector_axpyex_mmm(omega,shat,x); /* r = s - omega*t */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,t,r); lis_quad_minus((LIS_QUAD *)omega.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter2, nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } if( omega.hi[0]==0.0 && omega.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrstab_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_PRECON M; LIS_VECTOR b,x; LIS_VECTOR r,rtld, p, s, ap, ms, map, ams, z; LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,n,output,conv; double times,ptimes; LIS_DEBUG_FUNC_IN; A = solver->A; M = solver->precon; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptimes = 0.0; rtld = solver->work[0]; r = solver->work[1]; s = solver->work[2]; ms = solver->work[3]; ams = solver->work[4]; p = solver->work[5]; ap = solver->work[6]; map = solver->work[7]; z = solver->work[8]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1); LIS_QUAD_SCALAR_MALLOC(omega,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,p); LIS_MATVECT(A,p,rtld); times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_copyex_mm(z,p); lis_vector_dotex_mmm(rtld,z,&rho_old); for( iter=1; iter<=maxiter; iter++ ) { /* ap = A * p */ /* map = M^-1 * ap */ /* tmpdot1 = <rtld,map> */ /* alpha = rho_old / tmpdot1 */ /* s = r - alpha*ap */ LIS_MATVEC(A,p,ap); times = lis_wtime(); lis_psolve(solver, ap, map); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,map,&tmpdot1); lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,ap,r,s); /* Early check for tolerance */ lis_solver_get_residual[conv](s,solver,&nrm2); if( nrm2 <= tol ) { if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,p,x); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* ms = z - alpha*map */ /* ams = A * ms */ /* tmpdot1 = <ams,s> */ /* tmpdot2 = <ams,ams> */ /* omega = tmpdot1 / tmpdot2 */ lis_vector_axpyzex_mmmm(alpha,map,z,ms); LIS_MATVEC(A,ms,ams); lis_vector_dotex_mmm(ams,s,&tmpdot1); lis_vector_dotex_mmm(ams,ams,&tmpdot2); lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi); /* x = x + alpha*p + omega*ms */ /* r = s - omega*ams */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_vector_axpyex_mmm(omega,ms,x); lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyzex_mmmm(omega,ams,s,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* z = M^-1 * r */ /* rho = <rtld,z> */ times = lis_wtime(); lis_psolve(solver, r, z); ptimes += lis_wtime()-times; lis_vector_dotex_mmm(rtld,z,&rho); if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / omega) */ /* p = z + beta*(p - omega*map) */ lis_quad_minus((LIS_QUAD *)omega.hi); lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi); lis_quad_minus((LIS_QUAD *)omega.hi); lis_vector_axpyex_mmm(omega,map,p); lis_vector_xpayex_mmm(z,beta,p); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, rhat, p, ptld, phat; LIS_VECTOR t, ttld, that, t0, t0hat; LIS_VECTOR y, w, u, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol, tol2; LIS_INT iter,maxiter,output,conv; LIS_INT iter2,maxiter2; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; rhat = solver->work[2]; p = solver->work[3]; ptld = solver->work[4]; phat = solver->work[5]; t = solver->work[6]; ttld = solver->work[7]; that = solver->work[8]; t0 = solver->work[9]; t0hat = solver->work[10]; y = solver->work[11]; w = solver->work[12]; u = solver->work[13]; z = solver->work[14]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; qsi.hi[0] = 1.0; qsi.lo[0] = 0.0; one.hi[0] = -1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol2 = solver->tol_switch; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter=1; iter<=maxiter2; iter++ ) { /* rho = <rtld,r> */ lis_vector_dot(rtld,r,&rho.hi[0]); /* test breakdown */ if( rho.hi[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]); /* w = ttld + beta*ptld */ lis_vector_axpyz(beta.hi[0],ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpy(-1,u,p); lis_vector_xpay(rhat,beta.hi[0],p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0]; /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyz(-1,w,ptld,y); lis_vector_xpay(t,alpha.hi[0],y); lis_vector_axpy(-1,r,y); /* t = r - alpha*ptld */ lis_vector_axpyz(-alpha.hi[0],ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dot(y,y,&tmpdot[0].hi[0]); lis_vector_dot(ttld,t,&tmpdot[1].hi[0]); lis_vector_dot(y,t,&tmpdot[2].hi[0]); lis_vector_dot(ttld,y,&tmpdot[3].hi[0]); lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]); if(iter==1) { qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0]; eta.hi[0] = 0.0; } else { tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0] - tmpdot[3].hi[0]*tmpdot[3].hi[0]; qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0]; eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0]; } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpay(t0hat,beta.hi[0],u); lis_vector_axpy(-1,rhat,u); lis_vector_scale(eta.hi[0],u); lis_vector_axpy(qsi.hi[0],phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scale(eta.hi[0],z); lis_vector_axpy(qsi.hi[0],rhat,z); lis_vector_axpy(-alpha.hi[0],u,z); /* x = x + alpha*p + z */ lis_vector_axpy(alpha.hi[0],p,x); lis_vector_axpy(1,z,x); /* r = t - eta*y - qsi*ttld */ lis_vector_axpyz(-eta.hi[0],y,t,r); lis_vector_axpy(-qsi.hi[0],ttld,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol2 >= nrm2 ) { solver->iter = iter; solver->iter2 = iter; solver->ptime = ptime; break; } lis_vector_copy(t,t0); rho_old.hi[0] = rho.hi[0]; } r->precision = LIS_PRECISION_QUAD; p->precision = LIS_PRECISION_QUAD; t->precision = LIS_PRECISION_QUAD; t0->precision = LIS_PRECISION_QUAD; ptld->precision = LIS_PRECISION_QUAD; that->precision = LIS_PRECISION_QUAD; solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE; lis_vector_copyex_mn(x,solver->xx); rho_old.hi[0] = 1.0; alpha.hi[0] = 1.0; qsi.hi[0] = 1.0; one.hi[0] = -1.0; /* Initial Residual */ lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2); tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0, ttld); lis_vector_set_allex_nm(0.0, ptld); lis_vector_set_allex_nm(0.0, p); lis_vector_set_allex_nm(0.0, u); lis_vector_set_allex_nm(0.0, t); lis_vector_set_allex_nm(0.0, t0); for( iter2=iter+1; iter2<=maxiter; iter2++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* w = ttld + beta*ptld */ lis_vector_axpyzex_mmmm(beta,ptld,ttld,w); /* rhat = M^-1 * r */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; /* p = rhat + beta*(p - u) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(rhat,beta,p); /* ptld = A * p */ lis_matvec(A,p,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi); /* y = t - r + alpha*(-w + ptld) */ lis_vector_axpyzex_mmmm(one,w,ptld,y); lis_vector_xpayex_mmm(t,alpha,y); lis_vector_axpyex_mmm(one,r,y); /* t = r - alpha*ptld */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyzex_mmmm(alpha,ptld,r,t); /* that = M^-1 * t */ /* phat = M^-1 * ptld */ /* t0hat = M^-1 * t0 */ time = lis_wtime(); lis_psolve(solver, t, that); lis_psolve(solver, ptld, phat); lis_psolve(solver, t0, t0hat); ptime += lis_wtime()-time; /* ttld = A * that */ lis_matvec(A,that,ttld); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <ttld,t> */ /* tmpdot[2] = <y,t> */ /* tmpdot[3] = <ttld,y> */ /* tmpdot[4] = <ttld,ttld> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(ttld,t,&tmpdot[1]); lis_vector_dotex_mmm(y,t,&tmpdot[2]); lis_vector_dotex_mmm(ttld,y,&tmpdot[3]); lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */ lis_vector_xpayex_mmm(t0hat,beta,u); lis_vector_axpyex_mmm(one,rhat,u); lis_vector_scaleex_mm(eta,u); lis_vector_axpyex_mmm(qsi,phat,u); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,rhat,z); lis_vector_axpyex_mmm(alpha,u,z); /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_vector_axpyex_mmm(one,z,x); lis_quad_minus((LIS_QUAD *)one.hi); /* r = t - eta*y - qsi*ttld */ lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); lis_vector_axpyzex_mmmm(eta,y,t,r); lis_vector_axpyex_mmm(qsi,ttld,r); lis_quad_minus((LIS_QUAD *)eta.hi); lis_quad_minus((LIS_QUAD *)qsi.hi); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter2; solver->iter2 = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } lis_vector_copyex_mm(t,t0); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->iter2 = iter2; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicgsafe_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, rhat, p, ptld; LIS_VECTOR t, ttld; LIS_VECTOR y, v, u, utld, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta; LIS_QUAD_PTR tmp, tmpdot[5],one; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; rhat = solver->work[2]; p = solver->work[3]; ptld = solver->work[4]; t = solver->work[5]; ttld = solver->work[6]; y = solver->work[7]; v = solver->work[8]; u = solver->work[9]; z = solver->work[10]; utld = solver->work[11]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); rho_old.hi[0] = 1.0; rho_old.lo[0] = 0.0; alpha.hi[0] = 1.0; alpha.lo[0] = 0.0; qsi.hi[0] = 1.0; qsi.lo[0] = 0.0; one.hi[0] = -1.0; one.lo[0] = 0.0; /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_vector_set_allex_nm(0.0,p); lis_vector_set_allex_nm(0.0,u); lis_vector_set_allex_nm(0.0,ptld); lis_vector_set_allex_nm(0.0,utld); for( iter=1; iter<=maxiter; iter++ ) { /* rho = <rtld,r> */ lis_vector_dotex_mmm(rtld,r,&rho); /* test breakdown */ if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* rhat = M^-1 * r */ /* v = A * rhat */ time = lis_wtime(); lis_psolve(solver, r, rhat); ptime += lis_wtime()-time; lis_matvec(A,rhat,v); /* p = rhat + beta*(p - u) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(rhat,beta,p); /* ptld = v + beta*(ptld - utld) */ lis_vector_axpyex_mmm(one,utld,ptld); lis_vector_xpayex_mmm(v,beta,ptld); /* tmpdot[0] = <rtld,ptld> */ lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]); /* test breakdown */ /* */ /* alpha = rho / tmpdot[0] */ lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <v,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <v,y> */ /* tmpdot[4] = <v,v> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(v,r,&tmpdot[1]); lis_vector_dotex_mmm(y,r,&tmpdot[2]); lis_vector_dotex_mmm(v,y,&tmpdot[3]); lis_vector_dotex_mmm(v,v,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* t = qsi*ptld + eta*y */ lis_vector_copyex_mm(y,t); lis_vector_scaleex_mm(eta,t); lis_vector_axpyex_mmm(qsi,ptld,t); /* ttld = M^-1 * t */ time = lis_wtime(); lis_psolve(solver, t, ttld); ptime += lis_wtime()-time; /* u = ttld + eta*beta*u */ /* utld = A * u */ lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi); lis_vector_xpayex_mmm(ttld,tmp,u); lis_matvec(A,u,utld); /* z = qsi*rhat + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,rhat,z); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,u,z); /* y = qsi*v + eta*y - alpha*utld */ lis_vector_scaleex_mm(eta,y); lis_vector_axpyex_mmm(qsi,v,y); lis_vector_axpyex_mmm(alpha,utld,y); lis_quad_minus((LIS_QUAD *)alpha.hi); /* x = x + alpha*p + z */ lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(one,z,x); lis_quad_minus((LIS_QUAD *)one.hi); /* r = r - alpha*ptld - y */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,ptld,r); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(one,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol > nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_bicrsafe_quad(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR x; LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map; LIS_VECTOR y, my, u, au, z; LIS_QUAD_PTR alpha, beta, rho, rho_old; LIS_QUAD_PTR qsi, eta, one; LIS_QUAD_PTR tmp, tmpdot[5]; LIS_REAL bnrm2, nrm2, tol; LIS_INT iter,maxiter,output,conv; double time,ptime; LIS_DEBUG_FUNC_IN; A = solver->A; x = solver->x; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; ptime = 0.0; rtld = solver->work[0]; r = solver->work[1]; mr = solver->work[2]; amr = solver->work[3]; p = solver->work[4]; ap = solver->work[5]; map = solver->work[6]; my = solver->work[7]; y = solver->work[8]; u = solver->work[9]; z = solver->work[10]; au = solver->work[11]; artld = solver->work[12]; LIS_QUAD_SCALAR_MALLOC(alpha,0,1); LIS_QUAD_SCALAR_MALLOC(beta,1,1); LIS_QUAD_SCALAR_MALLOC(rho,2,1); LIS_QUAD_SCALAR_MALLOC(rho_old,3,1); LIS_QUAD_SCALAR_MALLOC(qsi,4,1); LIS_QUAD_SCALAR_MALLOC(eta,5,1); LIS_QUAD_SCALAR_MALLOC(tmp,6,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1); LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1); LIS_QUAD_SCALAR_MALLOC(one,13,1); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; lis_solver_set_shadowresidual(solver,r,rtld); lis_matvect(A,rtld,artld); time = lis_wtime(); lis_psolve(solver, r, mr); ptime += lis_wtime()-time; lis_matvec(A,mr,amr); lis_vector_dotex_mmm(rtld,amr,&rho_old); lis_vector_copyex_mm(amr,ap); lis_vector_copyex_mm(mr,p); one.hi[0] = -1.0; one.lo[0] = 0.0; for( iter=1; iter<=maxiter; iter++ ) { /* map = M^-1 * ap */ time = lis_wtime(); lis_psolve(solver, ap, map); ptime += lis_wtime()-time; /* tmpdot[0] = <artld,map> */ /* alpha = rho_old / tmpdot[0] */ lis_vector_dotex_mmm(artld,map,&tmpdot[0]); lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot[0].hi); /* tmpdot[0] = <y,y> */ /* tmpdot[1] = <amr,r> */ /* tmpdot[2] = <y,r> */ /* tmpdot[3] = <amr,y> */ /* tmpdot[4] = <amr,amr> */ lis_vector_dotex_mmm(y,y,&tmpdot[0]); lis_vector_dotex_mmm(amr,r,&tmpdot[1]); lis_vector_dotex_mmm(y,r,&tmpdot[2]); lis_vector_dotex_mmm(amr,y,&tmpdot[3]); lis_vector_dotex_mmm(amr,amr,&tmpdot[4]); if(iter==1) { lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi); eta.hi[0] = 0.0; eta.lo[0] = 0.0; } else { lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi); lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi); lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi); lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi); lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi); lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi); lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi); } /* u = qsi*map + eta*my + eta*beta*u */ /* au = A * u */ lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi); lis_vector_scaleex_mm(tmp,u); lis_vector_axpyex_mmm(qsi,map,u); lis_vector_axpyex_mmm(eta,my,u); lis_matvec(A,u,au); /* z = qsi*mr + eta*z - alpha*u */ lis_vector_scaleex_mm(eta,z); lis_vector_axpyex_mmm(qsi,mr,z); lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,u,z); /* y = qsi*amr + eta*y - alpha*au */ /* my = M^-1 * y */ lis_vector_scaleex_mm(eta,y); lis_vector_axpyex_mmm(qsi,amr,y); lis_vector_axpyex_mmm(alpha,au,y); time = lis_wtime(); lis_psolve(solver, y, my); ptime += lis_wtime()-time; /* x = x + alpha*p + z */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_vector_axpyex_mmm(alpha,p,x); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(one,z,x); /* r = r - alpha*ap - y */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_minus((LIS_QUAD *)one.hi); lis_vector_axpyex_mmm(alpha,ap,r); lis_vector_axpyex_mmm(one,y,r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } /* mr = mr - alpha*map - my */ /* amr = A * mr */ /* rho = <rtld,amr> */ lis_vector_axpyex_mmm(alpha,map,mr); lis_vector_axpyex_mmm(one,my,mr); lis_matvec(A,mr,amr); lis_vector_dotex_mmm(rtld,amr,&rho); if( rho.hi[0]==0.0 && rho.lo[0]==0.0 ) { solver->retcode = LIS_BREAKDOWN; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_BREAKDOWN; } /* beta = (rho / rho_old) * (alpha / qsi) */ lis_quad_minus((LIS_QUAD *)alpha.hi); lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi); lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi); lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi); /* p = mr + beta*(p - u) */ /* ap = amr + beta*(ap - au) */ lis_vector_axpyex_mmm(one,u,p); lis_vector_xpayex_mmm(mr,beta,p); lis_vector_axpyex_mmm(one,au,ap); lis_vector_xpayex_mmm(amr,beta,ap); rho_old.hi[0] = rho.hi[0]; rho_old.lo[0] = rho.lo[0]; } solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }