LIS_INT lis_precon_create_ilut_bsr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,kk,bnr,bs; LIS_INT n,nr,annz,lfil,len; LIS_SCALAR gamma,t,tol,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_MATRIX_DIAG D; LIS_SCALAR tnorm, tolnorm; LIS_SCALAR buf_ns[16],buf_fact[16],*xnrm,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos,para; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; nr = A->nr; bnr = A->bnr; bs = bnr*bnr; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; gamma = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN]; annz = 10+A->bnnz / A->nr; lfil = (LIS_INT)(((double)A->bnnz/(2.0*nr))*m); L = NULL; U = NULL; err = lis_matrix_ilu_create(nr,bnr,&L); if( err ) return err; err = lis_matrix_ilu_create(nr,bnr,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_matrix_diag_duplicateM(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc(bs*(nr+1)*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } xnrm = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( xnrm==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(nr*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } for(i=0;i<nr;i++) iw[i] = -1; for(i=0;i<nr;i++) { tnorm = 0; for(j=A->bptr[i];j<A->bptr[i+1];j++) { lis_array_nrm2(bs,&A->value[bs*j],&t); tnorm = _max(t,tnorm); } tolnorm = tol * tnorm; lenu = 1; lenl = 0; jbuf[i] = i; memset(&w[bs*i],0,bs*sizeof(LIS_SCALAR)); iw[i] = i; for(j=A->bptr[i];j<A->bptr[i+1];j++) { col = A->bindex[j]; lis_array_nrm2(bs,&A->value[bs*j],&t); if( t<tolnorm && col!=i ) continue; if( col < i ) { jbuf[lenl] = col; iw[col] = lenl; memcpy(&w[bs*lenl],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); lenl++; } else if( col == i ) { memcpy(&w[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); } else { jpos = i + lenu; jbuf[jpos] = col; iw[col] = jpos; memcpy(&w[bs*jpos],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); lenu++; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[k]<jrow ) { jrow = jbuf[k]; jpos = k; } } if( jpos!=j ) { col = jbuf[j]; jbuf[j] = jbuf[jpos]; jbuf[jpos] = col; iw[jrow] = j; iw[col] = jpos; memcpy(buf_ns,&w[bs*j],bs*sizeof(LIS_SCALAR)); memcpy(&w[bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); memcpy(&w[bs*jpos],buf_ns,bs*sizeof(LIS_SCALAR)); } /* lis_array_matmat(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact,LIS_INS_VALUE);*/ lis_array_matinv(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact); iw[jrow] = -1; lis_array_nrm2(bs,buf_fact,&t); if( t * xnrm[jrow] <= tolnorm ) continue; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; lis_array_matmat(bnr,buf_fact,&U->value[jrow][bs*k],buf_ns,LIS_INS_VALUE); jpos = iw[col]; lis_array_nrm2(bs,buf_ns,&t); if( t < tolnorm && jpos == -1 ) { continue; } if( col >= i ) { if( jpos == -1 ) { upos = i + lenu; jbuf[upos] = col; iw[col] = upos; memcpy(&w[bs*upos],buf_ns,bs*sizeof(LIS_SCALAR)); lenu++; } else { for(kk=0;kk<bs;kk++) { w[bs*jpos+kk] += buf_ns[kk]; } } } else { if( jpos == -1 ) { jbuf[lenl] = col; iw[col] = lenl; memcpy(&w[bs*lenl],buf_ns,bs*sizeof(LIS_SCALAR)); lenl++; } else { for(kk=0;kk<bs;kk++) { w[bs*jpos+kk] += buf_ns[kk]; } } } } for(kk=0;kk<bs;kk++) { w[bs*len+kk] = -buf_fact[kk]; } jbuf[len] = jrow; len++; } lenl = len; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { lis_array_nrm2(bs,&w[bs*j],&wn[j]); iw[j] = j; } lis_sort_di(0,lenl-1,wn,iw); lis_sort_i(0,len-1,iw); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(bs*len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; L->index[i][j] = jbuf[jpos]; memcpy(&L->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); } for(j=0;j<lenl;j++) iw[j] = -1; len = _min(lfil,lenu); for(j=1;j<lenu;j++) { jpos = i+j; lis_array_nrm2(bs,&w[bs*jpos],&wn[j-1]); iw[j-1] = jpos; } para = lenu - 1; lis_sort_di(0,para-1,wn,iw); lis_sort_i(0,len-2,iw); U->nnz[i] = len-1; if( len>1 ) { U->index[i] = (LIS_INT *)malloc((len-1)*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(bs*(len-1)*sizeof(LIS_SCALAR)); } lis_array_nrm2(bs,&w[bs*i],&t); for(j=0;j<len-1;j++) { jpos = iw[j]; U->index[i][j] = jbuf[jpos]; memcpy(&U->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); t = _max(t,wn[j]); } for(j=0;j<lenu-1;j++) iw[j] = -1; xnrm[i] = t; memcpy(&D->value[bs*i],&w[bs*i],bs*sizeof(LIS_SCALAR)); if( i==nr-1 ) { switch(bnr) { case 2: if( n%2!=0 ) { D->value[4*(nr-1)+3] = 1.0; } break; case 3: if( n%3==1 ) { D->value[9*(nr-1)+4] = 1.0; D->value[9*(nr-1)+8] = 1.0; } else if( n%3==2 ) { D->value[9*(nr-1)+8] = 1.0; } break; } } /* lis_array_invGauss(bnr,&D->value[bs*i]);*/ lis_array_LUdecomp(bnr,&D->value[bs*i]); for(j=0;j<lenu;j++) { iw[ jbuf[i+j] ] = -1; } } precon->L = L; precon->U = U; precon->WD = D; lis_free2(5,w,iw,xnrm,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_ecg(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,iter3,nsolver,i,j,output; LIS_INT nprocs,my_rank; LIS_REAL nrm2,resid,resid3; LIS_SCALAR lshift; LIS_VECTOR b,D,r,w,p,Aw,Ax,Ap,ones,Ds; LIS_SCALAR *SA, *SB, *SW, *v3, *SAv3, *SBv3, *z3, *q3, *SBz3, evalue3, ievalue3; LIS_SOLVER solver; LIS_PRECON precon; LIS_MATRIX A0; LIS_VECTOR x0,z,q; double times,itimes,ptimes,p_c_times,p_i_times; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; A = esolver->A; x = esolver->x; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; lshift = esolver->lshift; if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); SA = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SA"); SB = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SB"); SW = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SW"); v3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::v3"); SAv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SAv3"); SBv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBv3"); SBz3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBz3"); z3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::z3"); q3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::q3"); b = esolver->work[0]; D = esolver->work[1]; Ds = esolver->work[2]; r = esolver->work[3]; w = esolver->work[4]; p = esolver->work[5]; Aw = esolver->work[6]; Ax = esolver->work[7]; Ap = esolver->work[8]; lis_vector_set_all(1.0,b); lis_vector_nrm2(b, &nrm2); lis_vector_scale(1/nrm2, b); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p ilu",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); lis_solve(A, b, x, solver); lis_vector_copy(b,Ax); lis_vector_nrm2(x, &nrm2); lis_vector_set_all(0.0,p); lis_vector_set_all(0.0,Ap); lis_precon_create(solver, &precon); solver->precon = precon; iter=0; while (iter<emaxiter) { iter = iter + 1; lis_vector_dot(x,Ax,&evalue); lis_vector_axpyz(-(evalue),x,Ax,r); lis_vector_nrm2(r, &nrm2); resid = fabs(nrm2/(evalue)); if( output ) { if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, resid); } if (resid<tol) break; lis_psolve(solver, x, w); lis_vector_copy(x,Aw); lis_vector_nrm2(w, &nrm2); lis_vector_dot(w,Aw,&SA[0]); lis_vector_dot(x,Aw,&SA[3]); lis_vector_dot(p,Aw,&SA[6]); SA[1] = SA[3]; lis_vector_dot(x,Ax,&SA[4]); lis_vector_dot(p,Ax,&SA[7]); SA[2] = SA[6]; SA[5] = SA[7]; lis_vector_dot(p,Ap,&SA[8]); lis_vector_dot(w,w,&SB[0]); lis_vector_dot(x,w,&SB[3]); lis_vector_dot(p,w,&SB[6]); SB[1] = SB[3]; lis_vector_dot(x,x,&SB[4]); lis_vector_dot(p,x,&SB[7]); SB[2] = SB[6]; SB[5] = SB[7]; lis_vector_dot(p,p,&SB[8]); lis_array_set_all(3, 1.0, v3); iter3=0; while (iter3<emaxiter) { iter3 = iter3 + 1; lis_array_nrm2(3, v3, &nrm2); lis_array_scale(3, 1/nrm2, v3); lis_array_matvec(3, SB, v3, SBv3, LIS_INS_VALUE); lis_array_invvec(3, SA, SBv3, z3); lis_array_dot2(3, SBv3, z3, &ievalue3); if (ievalue3==0) { printf("ievalue3 is zero\n"); lis_precon_destroy(precon); lis_solver_destroy(solver); esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_free(SA); lis_free(SB); lis_free(SW); lis_free(v3); lis_free(SAv3); lis_free(SBv3); lis_free(SBz3); lis_free(z3); lis_free(q3); return LIS_BREAKDOWN; } lis_array_axpyz(3, -ievalue3, SBv3, z3, q3); lis_array_nrm2(3, q3, &resid3); resid3 = fabs(resid3 / ievalue3); if (resid3<1e-12) break; lis_array_copy(3,z3,v3); } evalue3 = 1 / ievalue3; lis_vector_scale(v3[0],w); lis_vector_axpy(v3[2],p,w); lis_vector_xpay(w,v3[1],x); lis_vector_copy(w,p); lis_vector_scale(v3[0],Aw); lis_vector_axpy(v3[2],Ap,Aw); lis_vector_xpay(Aw,v3[1],Ax); lis_vector_copy(Aw,Ap); lis_vector_nrm2(x,&nrm2); lis_vector_scale(1/nrm2,x); lis_vector_scale(1/nrm2,Ax); lis_vector_nrm2(p,&nrm2); lis_vector_scale(1/nrm2,p); lis_vector_scale(1/nrm2,Ap); lis_solver_get_timeex(solver,×,&itimes,&ptimes,&p_c_times,&p_i_times); esolver->ptimes += solver->ptimes; esolver->itimes += solver->itimes; esolver->p_c_times += solver->p_c_times; esolver->p_i_times += solver->p_i_times; } lis_precon_destroy(precon); lis_solver_destroy(solver); esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_free(SA); lis_free(SB); lis_free(SW); lis_free(v3); lis_free(SAv3); lis_free(SBv3); lis_free(SBz3); lis_free(z3); lis_free(q3); if (resid<tol) { esolver->retcode = LIS_SUCCESS; return LIS_SUCCESS; } else { esolver->retcode = LIS_MAXITER; return LIS_MAXITER; } }