LIS_INT lis_psolve_ilut_bsr(LIS_SOLVER solver, LIS_VECTOR B, LIS_VECTOR X) { LIS_INT i,j,jj,nr,bnr,bs; LIS_SCALAR w[9]; LIS_SCALAR *b,*x; LIS_MATRIX_ILU L,U; LIS_MATRIX_DIAG D; LIS_PRECON precon; /* * LUx = b * LU = (D + L*A) * (I + D^-1 * U*A) */ LIS_DEBUG_FUNC_IN; precon = solver->precon; L = precon->L; U = precon->U; D = precon->WD; b = B->value; x = X->value; nr = solver->A->nr; bnr = solver->A->bnr; bs = bnr*bnr; lis_vector_copy(B,X); for(i=0; i<nr; i++) { for(j=0;j<L->nnz[i];j++) { jj = L->index[i][j]; lis_array_matvec(bnr,&L->value[i][bs*j],&x[bnr*jj],&x[bnr*i],LIS_SUB_VALUE); } } for(i=nr-1; i>=0; i--) { for(j=0;j<U->nnz[i];j++) { jj = U->index[i][j]; lis_array_matvec(bnr,&U->value[i][bs*j],&x[bnr*jj],&x[bnr*i],LIS_SUB_VALUE); } /* lis_array_matvec(bnr,&D->value[bs*i],&x[bnr*i],w,LIS_INS_VALUE);*/ lis_array_invvec(bnr,&D->value[bs*i],&x[bnr*i],w); memcpy(&x[bnr*i],w,bnr*sizeof(LIS_SCALAR)); } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_ecg(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,iter3,nsolver,i,j,output; LIS_INT nprocs,my_rank; LIS_REAL nrm2,resid,resid3; LIS_SCALAR lshift; LIS_VECTOR b,D,r,w,p,Aw,Ax,Ap,ones,Ds; LIS_SCALAR *SA, *SB, *SW, *v3, *SAv3, *SBv3, *z3, *q3, *SBz3, evalue3, ievalue3; LIS_SOLVER solver; LIS_PRECON precon; LIS_MATRIX A0; LIS_VECTOR x0,z,q; double times,itimes,ptimes,p_c_times,p_i_times; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; A = esolver->A; x = esolver->x; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; lshift = esolver->lshift; if( A->my_rank==0 ) printf("local shift = %e\n", lshift); if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); SA = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SA"); SB = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SB"); SW = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SW"); v3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::v3"); SAv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SAv3"); SBv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBv3"); SBz3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBz3"); z3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::z3"); q3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::q3"); b = esolver->work[0]; D = esolver->work[1]; Ds = esolver->work[2]; r = esolver->work[3]; w = esolver->work[4]; p = esolver->work[5]; Aw = esolver->work[6]; Ax = esolver->work[7]; Ap = esolver->work[8]; lis_vector_set_all(1.0,b); lis_vector_nrm2(b, &nrm2); lis_vector_scale(1/nrm2, b); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p ilu",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_get_solvername(nsol, solvername); lis_get_preconname(precon_type, preconname); printf("solver : %s %d\n", solvername, nsol); printf("precon : %s %d\n", preconname, precon_type); lis_solve(A, b, x, solver); lis_vector_copy(b,Ax); lis_vector_nrm2(x, &nrm2); lis_vector_set_all(0.0,p); lis_vector_set_all(0.0,Ap); lis_precon_create(solver, &precon); solver->precon = precon; iter=0; while (iter<emaxiter) { iter = iter + 1; lis_vector_dot(x,Ax,&evalue); lis_vector_axpyz(-(evalue),x,Ax,r); lis_vector_nrm2(r, &nrm2); resid = fabs(nrm2/(evalue)); if( output ) { if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, resid); } if (resid<tol) break; lis_psolve(solver, x, w); lis_vector_copy(x,Aw); lis_vector_nrm2(w, &nrm2); lis_vector_dot(w,Aw,&SA[0]); lis_vector_dot(x,Aw,&SA[3]); lis_vector_dot(p,Aw,&SA[6]); SA[1] = SA[3]; lis_vector_dot(x,Ax,&SA[4]); lis_vector_dot(p,Ax,&SA[7]); SA[2] = SA[6]; SA[5] = SA[7]; lis_vector_dot(p,Ap,&SA[8]); lis_vector_dot(w,w,&SB[0]); lis_vector_dot(x,w,&SB[3]); lis_vector_dot(p,w,&SB[6]); SB[1] = SB[3]; lis_vector_dot(x,x,&SB[4]); lis_vector_dot(p,x,&SB[7]); SB[2] = SB[6]; SB[5] = SB[7]; lis_vector_dot(p,p,&SB[8]); lis_array_set_all(3, 1.0, v3); iter3=0; while (iter3<emaxiter) { iter3 = iter3 + 1; lis_array_nrm2(3, v3, &nrm2); lis_array_scale(3, 1/nrm2, v3); lis_array_matvec(3, SB, v3, SBv3, LIS_INS_VALUE); lis_array_invvec(3, SA, SBv3, z3); lis_array_dot2(3, SBv3, z3, &ievalue3); if (ievalue3==0) { printf("ievalue3 is zero\n"); lis_precon_destroy(precon); lis_solver_destroy(solver); esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_free(SA); lis_free(SB); lis_free(SW); lis_free(v3); lis_free(SAv3); lis_free(SBv3); lis_free(SBz3); lis_free(z3); lis_free(q3); return LIS_BREAKDOWN; } lis_array_axpyz(3, -ievalue3, SBv3, z3, q3); lis_array_nrm2(3, q3, &resid3); resid3 = fabs(resid3 / ievalue3); if (resid3<1e-12) break; lis_array_copy(3,z3,v3); } evalue3 = 1 / ievalue3; lis_vector_scale(v3[0],w); lis_vector_axpy(v3[2],p,w); lis_vector_xpay(w,v3[1],x); lis_vector_copy(w,p); lis_vector_scale(v3[0],Aw); lis_vector_axpy(v3[2],Ap,Aw); lis_vector_xpay(Aw,v3[1],Ax); lis_vector_copy(Aw,Ap); lis_vector_nrm2(x,&nrm2); lis_vector_scale(1/nrm2,x); lis_vector_scale(1/nrm2,Ax); lis_vector_nrm2(p,&nrm2); lis_vector_scale(1/nrm2,p); lis_vector_scale(1/nrm2,Ap); lis_solver_get_timeex(solver,×,&itimes,&ptimes,&p_c_times,&p_i_times); esolver->ptimes += solver->ptimes; esolver->itimes += solver->itimes; esolver->p_c_times += solver->p_c_times; esolver->p_i_times += solver->p_i_times; } lis_precon_destroy(precon); lis_solver_destroy(solver); esolver->iter = iter; esolver->resid = resid; esolver->evalue[0] = evalue; if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_free(SA); lis_free(SB); lis_free(SW); lis_free(v3); lis_free(SAv3); lis_free(SBv3); lis_free(SBz3); lis_free(z3); lis_free(q3); if (resid<tol) { esolver->retcode = LIS_SUCCESS; return LIS_SUCCESS; } else { esolver->retcode = LIS_MAXITER; return LIS_MAXITER; } }
void lis_matvect_vbr(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[]) { LIS_INT i,j,k; LIS_INT bi,bj,bc,bs,bn; LIS_INT nr,nc,bnr,bnc; LIS_INT n,np; #ifdef _OPENMP LIS_INT nprocs,my_rank; LIS_SCALAR t; LIS_SCALAR *w; #endif n = A->n; np = A->np; nr = A->nr; nc = A->nc; bnr = A->bnr; bnc = A->bnc; bs = bnr*bnc; if( A->is_splited ) { #ifdef _OPENMP nprocs = omp_get_max_threads(); w = (LIS_SCALAR *)lis_malloc( nprocs*np*sizeof(LIS_SCALAR),"lis_matvect_vbr::w" ); #pragma omp parallel private(bi,bc,bj,i,j,k,bn,my_rank,t) { my_rank = omp_get_thread_num(); #pragma omp for for(j=0;j<nprocs;j++) { memset( &w[j*np], 0, np*sizeof(LIS_SCALAR) ); } #pragma omp for for(bi=0;bi<nr;bi++) { bn = A->D->bns[bi]; k = A->L->row[bi]; for(i=0;i<bn;i++) { t = 0.0; for(j=0;j<bn;j++) { t += A->D->v_value[bi][j*bn+i] * x[k+j]; } w[my_rank*np + k+i] += t; } for(bc=A->L->bptr[bi];bc<A->L->bptr[bi+1];bc++) { bj = A->L->bindex[bc]; k = A->L->ptr[bc]; for(j=A->L->col[bj];j<A->L->col[bj+1];j++) { for(i=A->L->row[bi];i<A->L->row[bi+1];i++) { w[my_rank*np + j] += A->L->value[k] * x[i]; k++; } } } for(bc=A->U->bptr[bi];bc<A->U->bptr[bi+1];bc++) { bj = A->U->bindex[bc]; k = A->U->ptr[bc]; for(j=A->U->col[bj];j<A->U->col[bj+1];j++) { for(i=A->U->row[bi];i<A->U->row[bi+1];i++) { w[my_rank*np + j] += A->U->value[k] * x[i]; k++; } } } } #pragma omp barrier #pragma omp for for(i=0;i<np;i++) { t = 0.0; for(j=0;j<nprocs;j++) { t += w[j*np+i]; } y[i] = t; } } lis_free(w); #else for(i=0; i<nr; i++) { bn = A->D->bns[i]; k = A->L->row[i]; lis_array_matvec(bn,A->D->v_value[i],&x[k],&y[k],LIS_INS_VALUE); } for(bi=0;bi<nr;bi++) { for(bc=A->L->bptr[bi];bc<A->L->bptr[bi+1];bc++) { bj = A->L->bindex[bc]; k = A->L->ptr[bc]; for(j=A->L->col[bj];j<A->L->col[bj+1];j++) { for(i=A->L->row[bi];i<A->L->row[bi+1];i++) { y[j] += A->L->value[k] * x[i]; k++; } } } for(bc=A->U->bptr[bi];bc<A->U->bptr[bi+1];bc++) { bj = A->U->bindex[bc]; k = A->U->ptr[bc]; for(j=A->U->col[bj];j<A->U->col[bj+1];j++) { for(i=A->U->row[bi];i<A->U->row[bi+1];i++) { y[j] += A->U->value[k] * x[i]; k++; } } } } #endif } else { #ifdef _OPENMP nprocs = omp_get_max_threads(); w = (LIS_SCALAR *)lis_malloc( nprocs*np*sizeof(LIS_SCALAR),"lis_matvect_vbr::w" ); #pragma omp parallel private(bi,bc,bj,i,j,k,my_rank) { my_rank = omp_get_thread_num(); #pragma omp for for(j=0;j<nprocs;j++) { memset( &w[j*np], 0, np*sizeof(LIS_SCALAR) ); } #pragma omp for for(bi=0;bi<nr;bi++) { for(bc=A->bptr[bi];bc<A->bptr[bi+1];bc++) { bj = A->bindex[bc]; k = A->ptr[bc]; for(j=A->col[bj];j<A->col[bj+1];j++) { for(i=A->row[bi];i<A->row[bi+1];i++) { w[my_rank*np + j] += A->value[k] * x[i]; k++; } } } } #pragma omp barrier #pragma omp for for(i=0;i<np;i++) { t = 0.0; for(j=0;j<nprocs;j++) { t += w[j*np+i]; } y[i] = t; } } lis_free(w); #else for(i=0; i<n; i++) { y[i] = 0.0; } for(bi=0;bi<nr;bi++) { for(bc=A->bptr[bi];bc<A->bptr[bi+1];bc++) { bj = A->bindex[bc]; k = A->ptr[bc]; for(j=A->col[bj];j<A->col[bj+1];j++) { for(i=A->row[bi];i<A->row[bi+1];i++) { y[j] += A->value[k] * x[i]; k++; } } } } #endif } }