LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,ii,jj,len,lfil; LIS_INT n,nnz,annz,cl,cu,cc,m; LIS_INT *wu,*wl,*il,*iu,*ic,*pc; LIS_SCALAR t,v; LIS_REAL tol,tol_dd,nrm; LIS_SCALAR *d,*r,*c,*l,*u,*tmp; LIS_MATRIX A,B; LIS_MATRIX_ILU W,Z; LIS_VECTOR D; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; nnz = A->nnz; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; annz = 10+A->nnz / A->n; lfil = (LIS_INT)((double)A->nnz/(2.0*n))*m; W = NULL; Z = NULL; wu = NULL; wl = NULL; d = NULL; l = NULL; u = NULL; il = NULL; iu = NULL; err = lis_matrix_ilu_create(n,1,&W); if( err ) return err; err = lis_matrix_ilu_create(n,1,&Z); if( err ) return err; err = lis_matrix_ilu_setCR(W); if( err ) return err; err = lis_matrix_ilu_setCR(Z); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) return err; d = D->value; tmp = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( tmp==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } r = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( r==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } c = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( c==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } l = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( l==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } u = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( u==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } il = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il"); if( il==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( iu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } ic = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( ic==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } wu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( wu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } wl = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( wl==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } pc = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( pc==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } lis_matrix_sort_csr(A); err = lis_matrix_duplicate(A,&B); if( err ) return err; err = lis_matrix_convert_csr2csc(A,B); if( err ) return err; for(i=0;i<n;i++) { wu[i] = 0; wl[i] = 0; pc[i] = A->ptr[i]; } for(i=0; i<n; i++) { /* nrm_inf(A[i,:]) */ nrm = 0.0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { nrm = _max(nrm,fabs(A->value[j])); } tol_dd = nrm * tol; /* l = e_i */ /* u = e_i */ l[i] = 1.0; u[i] = 1.0; il[0] = i; iu[0] = i; cl = 1; cu = 1; wu[i] = 1; wl[i] = 1; cc = 0; /* r = e_i^T*A */ for(j=A->ptr[i];j<A->ptr[i+1];j++) { jj = A->index[j]; r[jj] = A->value[j]; } /* c = A_i = A*e_i */ for(j=B->ptr[i];j<B->ptr[i+1];j++) { jj = B->index[j]; c[jj] = B->value[j]; } /* W_i = W_i - (r*Z_j/D_jj)*W_j */ for(j=0;j<i;j++) { t = 0.0; for(k=0;k<Z->nnz[j];k++) { t += r[Z->index[j][k]]*Z->value[j][k]; } t = t * d[j]; if( fabs(t) > tol_dd ) { for(k=0;k<W->nnz[j];k++) { v = t * W->value[j][k]; if( fabs(v) > tol_dd ) { jj = W->index[j][k]; if( wl[jj]==1 ) { l[jj] -= v; } else { l[jj] = -v; il[cl++] = jj; wl[jj] = 1; } } } } } /* Z_i = Z_i - (W_j^T*c/D_jj)*Z_j */ for(j=0;j<i;j++) { t = 0.0; for(k=0;k<W->nnz[j];k++) { t += c[W->index[j][k]]*W->value[j][k]; } t = t * d[j]; if( fabs(t) > tol_dd ) { for(k=0;k<Z->nnz[j];k++) { v = t * Z->value[j][k]; if( fabs(v) > tol_dd ) { jj = Z->index[j][k]; if( wu[jj]==1 ) { u[jj] -= v; } else { u[jj] = -v; iu[cu++] = jj; wu[jj] = 1; } } } } } /* len = _min(lfil,cl); for(j=0;j<cl;j++) tmp[j] = fabs(l[il[j]]); lis_sort_di(0,cl-1,tmp,il); lis_sort_i(0,len-1,il); cl = len; */ /* k = cl; for(j=0;j<cl;j++) { if( fabs(l[il[j]])<= tol_dd ) { wl[il[j]] = 0; il[j] = n; k--; } } lis_sort_i(0,cl-1,il); cl = k; k = cu; for(j=0;j<cu;j++) { if( fabs(u[iu[j]])<= tol_dd ) { wu[iu[j]] = 0; iu[j] = n; k--; } } lis_sort_i(0,cu-1,iu); cu = k; */ W->nnz[i] = cl; if( cl > 0 ) { W->index[i] = (LIS_INT *)malloc(cl*sizeof(LIS_INT)); W->value[i] = (LIS_SCALAR *)malloc(cl*sizeof(LIS_SCALAR)); memcpy(W->index[i],il,cl*sizeof(LIS_INT)); for(j=0;j<cl;j++) { W->value[i][j] = l[il[j]]; } } Z->nnz[i] = cu; if( cu > 0 ) { Z->index[i] = (LIS_INT *)malloc(cu*sizeof(LIS_INT)); Z->value[i] = (LIS_SCALAR *)malloc(cu*sizeof(LIS_SCALAR)); memcpy(Z->index[i],iu,cu*sizeof(LIS_INT)); for(j=0;j<cu;j++) { Z->value[i][j] = u[iu[j]]; } } for(j=A->ptr[i];j<A->ptr[i+1];j++) r[A->index[j]] = 0.0; for(j=B->ptr[i];j<B->ptr[i+1];j++) c[B->index[j]] = 0.0; for(j=0;j<cl;j++) { wl[il[j]] = 0; l[il[j]] = 0.0; } for(j=0;j<cu;j++) { wu[iu[j]] = 0; } /* D_ii = W_i^T * A * Z_i */ cl = 0; for(k=0;k<Z->nnz[i];k++) { ii = Z->index[i][k]; for(j=B->ptr[ii];j<B->ptr[ii+1];j++) { jj = B->index[j]; if( wl[jj]==0 ) { l[jj] = B->value[j]*Z->value[i][k]; wl[jj] = 1; il[cl++] = jj; } else { l[jj] += B->value[j]*Z->value[i][k]; } } } t = 0.0; for(j=0;j<W->nnz[i];j++) { k = W->index[i][j]; t += W->value[i][j] * l[k]; } d[i] = 1.0 / t; for(j=0;j<cl;j++) wl[il[j]] = 0; } lis_matrix_destroy(B); lis_free2(11,r,c,il,l,wl,iu,u,wu,ic,pc,tmp); precon->L = W; precon->U = Z; precon->D = D; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_ilut_csr(LIS_SOLVER solver, LIS_PRECON precon) { #ifdef _OPENMP LIS_INT err; LIS_INT i,j,k,ii,jj,kk; LIS_INT is,ie,my_rank,nprocs; LIS_INT n,nr,nnz,lfil,len; LIS_SCALAR gamma,t,tol,toldd,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_VECTOR D; LIS_SCALAR tnorm, tolnorm; LIS_SCALAR fact,lxu,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos,para; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; gamma = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN]; lfil = (LIS_INT)((double)A->nnz/(2.0*n))*m; nprocs = omp_get_max_threads(); L = NULL; U = NULL; err = lis_matrix_ilu_create(n,1,&L); if( err ) return err; err = lis_matrix_ilu_create(n,1,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc(nprocs*(n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(nprocs*(n+1)*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(nprocs*n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } #pragma omp parallel private(is,ie,my_rank,i,j,k,jj,tnorm,tolnorm,len,lenu,lenl,col,t,jpos,jrow,fact,lxu,upos) { my_rank = omp_get_thread_num(); LIS_GET_ISIE(my_rank,nprocs,n,is,ie); for(i=is;i<ie;i++) iw[my_rank*n+i] = -1; for(i=is;i<ie;i++) { tnorm = 0; k = 0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { jj = A->index[j]; if( jj<is || jj>=ie ) continue; tnorm += fabs(A->value[j]); k++; } tnorm = tnorm / (double)k; tolnorm = tol * tnorm; lenu = 0; lenl = 0; jbuf[my_rank*n+i] = i; w[my_rank*n+i] = 0; iw[my_rank*n+i] = i; for(j=A->ptr[i];j<A->ptr[i+1];j++) { col = A->index[j]; if( col<is || col>=ie ) continue; t = A->value[j]; if( col < i ) { jbuf[my_rank*n+lenl] = col; iw[my_rank*n+col] = lenl; w[my_rank*n+lenl] = t; lenl++; } else if( col == i ) { w[my_rank*n+i] = t; } else { lenu++; jpos = i + lenu; jbuf[my_rank*n+jpos] = col; iw[my_rank*n+col] = jpos; w[my_rank*n+jpos] = t; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[my_rank*n+j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[my_rank*n+k]<jrow ) { jrow = jbuf[my_rank*n+k]; jpos = k; } } if( jpos!=j ) { col = jbuf[my_rank*n+j]; jbuf[my_rank*n+j] = jbuf[my_rank*n+jpos]; jbuf[my_rank*n+jpos] = col; iw[my_rank*n+jrow] = j; iw[my_rank*n+col] = jpos; t = w[my_rank*n+j]; w[my_rank*n+j] = w[my_rank*n+jpos]; w[my_rank*n+jpos] = t; } fact = w[my_rank*n+j] * D->value[jrow]; w[my_rank*n+j] = fact; iw[my_rank*n+jrow] = -1; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; jpos = iw[my_rank*n+col]; lxu = -fact * U->value[jrow][k]; if( fabs(lxu) < tolnorm && jpos==-1 ) continue; if( col >= i ) { if( jpos == -1 ) { lenu++; upos = i + lenu; jbuf[my_rank*n+upos] = col; iw[my_rank*n+col] = upos; w[my_rank*n+upos] = lxu; } else { w[my_rank*n+jpos] += lxu; } } else { if( jpos == -1 ) { jbuf[my_rank*n+lenl] = col; iw[my_rank*n+col] = lenl; w[my_rank*n+lenl] = lxu; lenl++; } else { w[my_rank*n+jpos] += lxu; } } } } iw[my_rank*n+i] = -1; for(j=0;j<lenu;j++) { iw[ my_rank*n+jbuf[my_rank*n+i+j+1] ] = -1; } D->value[i] = 1.0 / w[my_rank*n+i]; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { wn[my_rank*n+j] = fabs(w[my_rank*n+j]); iw[my_rank*n+j] = j; } lis_sort_di(0,lenl-1,&wn[my_rank*n],&iw[my_rank*n]); lis_sort_i(0,len-1,&iw[my_rank*n]); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[my_rank*n+j]; L->index[i][j] = jbuf[my_rank*n+jpos]; L->value[i][j] = w[my_rank*n+jpos]; } for(j=0;j<lenl;j++) iw[my_rank*n+j] = -1; len = _min(lfil,lenu); for(j=0;j<lenu;j++) { wn[my_rank*n+j] = fabs(w[my_rank*n+i+j+1]); iw[my_rank*n+j] = i+j+1; } lis_sort_di(0,lenu-1,&wn[my_rank*n],&iw[my_rank*n]); lis_sort_i(0,len-1,&iw[my_rank*n]); U->nnz[i] = len; if( len>0 ) { U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[my_rank*n+j]; U->index[i][j] = jbuf[my_rank*n+jpos]; U->value[i][j] = w[my_rank*n+jpos]; } for(j=0;j<lenu;j++) iw[my_rank*n+j] = -1; } } precon->L = L; precon->U = U; precon->D = D; lis_free2(4,w,iw,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #else LIS_INT err; LIS_INT i,j,k; LIS_INT n,lfil,len; LIS_SCALAR gamma,t,tol,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_VECTOR D; LIS_SCALAR tnorm, tolnorm; LIS_SCALAR fact,lxu,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; gamma = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN]; lfil = (LIS_INT)(((double)A->nnz/(2.0*n))*m); L = NULL; U = NULL; err = lis_matrix_ilu_create(n,1,&L); if( err ) return err; err = lis_matrix_ilu_create(n,1,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc((n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } for(i=0;i<n;i++) iw[i] = -1; for(i=0;i<n;i++) { tnorm = 0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { tnorm += fabs(A->value[j]); } tnorm = tnorm / (double)(A->ptr[i+1]-A->ptr[i]); tolnorm = tol * tnorm; lenu = 0; lenl = 0; jbuf[i] = i; w[i] = 0; iw[i] = i; for(j=A->ptr[i];j<A->ptr[i+1];j++) { col = A->index[j]; #ifdef USE_MPI if( col>n-1 ) continue; #endif t = A->value[j]; if( col < i ) { jbuf[lenl] = col; iw[col] = lenl; w[lenl] = t; lenl++; } else if( col == i ) { w[i] = t; } else { lenu++; jpos = i + lenu; jbuf[jpos] = col; iw[col] = jpos; w[jpos] = t; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[k]<jrow ) { jrow = jbuf[k]; jpos = k; } } if( jpos!=j ) { col = jbuf[j]; jbuf[j] = jbuf[jpos]; jbuf[jpos] = col; iw[jrow] = j; iw[col] = jpos; t = w[j]; w[j] = w[jpos]; w[jpos] = t; } fact = w[j] * D->value[jrow]; w[j] = fact; iw[jrow] = -1; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; jpos = iw[col]; lxu = -fact * U->value[jrow][k]; if( fabs(lxu) < tolnorm && jpos==-1 ) continue; if( col >= i ) { if( jpos == -1 ) { lenu++; upos = i + lenu; jbuf[upos] = col; iw[col] = upos; w[upos] = lxu; } else { w[jpos] += lxu; } } else { if( jpos == -1 ) { jbuf[lenl] = col; iw[col] = lenl; w[lenl] = lxu; lenl++; } else { w[jpos] += lxu; } } } /* for(kk=0;kk<bs;kk++) { w[bs*len+kk] = -buf_fact[kk]; } jbuf[len] = jrow; len++;*/ } iw[i] = -1; for(j=0;j<lenu;j++) { iw[ jbuf[i+j+1] ] = -1; } D->value[i] = 1.0 / w[i]; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { wn[j] = fabs(w[j]); iw[j] = j; } lis_sort_di(0,lenl-1,wn,iw); lis_sort_i(0,len-1,iw); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; L->index[i][j] = jbuf[jpos]; L->value[i][j] = w[jpos]; } for(j=0;j<lenl;j++) iw[j] = -1; len = _min(lfil,lenu); for(j=0;j<lenu;j++) { wn[j] = fabs(w[i+j+1]); iw[j] = i+j+1; } lis_sort_di(0,lenu-1,wn,iw); lis_sort_i(0,len-1,iw); U->nnz[i] = len; if( len>0 ) { U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; U->index[i][j] = jbuf[jpos]; U->value[i][j] = w[jpos]; } for(j=0;j<lenu;j++) iw[j] = -1; } precon->L = L; precon->U = U; precon->D = D; lis_free2(4,w,iw,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #endif }
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,ii,jj,ik,jk; LIS_INT n,annz,cl,cu; LIS_INT *ww,*il,*iu; LIS_SCALAR t,dd; LIS_REAL tol,nrm; LIS_SCALAR *d,*l,*u; LIS_MATRIX A,B; LIS_MATRIX_ILU W,Z; LIS_VECTOR D; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; annz = A->n / 10; W = NULL; ww = NULL; d = NULL; l = NULL; u = NULL; il = NULL; iu = NULL; err = lis_matrix_ilu_create(n,1,&W); if( err ) return err; err = lis_matrix_ilu_create(n,1,&Z); if( err ) return err; err = lis_matrix_ilu_setCR(W); if( err ) return err; err = lis_matrix_ilu_setCR(Z); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) return err; d = D->value; err = lis_matrix_ilu_premalloc(annz,W); if( err ) return err; err = lis_matrix_ilu_premalloc(annz,Z); if( err ) return err; l = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( l==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } u = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( u==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } il = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il"); if( il==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( iu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } ww = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( ww==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } err = lis_matrix_duplicate(A,&B); if( err ) return err; err = lis_matrix_convert_csr2csc(A,B); if( err ) { return err; } for(i=0;i<n;i++) ww[i] = 0; for(i=0;i<n;i++) { W->value[i][0] = 1.0; W->index[i][0] = i; W->nnz[i] = 1; Z->value[i][0] = 1.0; Z->index[i][0] = i; Z->nnz[i] = 1; } for(i=0; i<n; i++) { /* nrm_inf(A[i,:]) */ nrm = 0.0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { nrm = _max(nrm,fabs(A->value[j])); } nrm = 1.0/nrm; /* l = AZ_i */ cl = 0; memset(l,0,n*sizeof(LIS_SCALAR)); for(k=0;k<Z->nnz[i];k++) { ii = Z->index[i][k]; for(j=B->ptr[ii];j<B->ptr[ii+1];j++) { jj = B->index[j]; if( jj>i ) { l[jj] += B->value[j]*Z->value[i][k]; if( ww[jj]==0 ) { ww[jj] = 1; il[cl++] = jj; } } } } for(k=0;k<cl;k++) ww[il[k]] = 0; /* u = W_i'A */ cu = 0; memset(u,0,n*sizeof(LIS_SCALAR)); for(k=0;k<W->nnz[i];k++) { ii = W->index[i][k]; for(j=A->ptr[ii];j<A->ptr[ii+1];j++) { jj = A->index[j]; #ifdef USE_MPI if( jj>n-1 ) continue; #endif u[jj] += A->value[j]*W->value[i][k]; if( jj>i && ww[jj]==0 ) { ww[jj] = 1; iu[cu++] = jj; } } } for(k=0;k<cu;k++) ww[iu[k]] = 0; /* d_ii = uZ_i or W_i'l */ t = 0.0; for(k=0;k<Z->nnz[i];k++) { t += u[Z->index[i][k]]*Z->value[i][k]; } d[i] = 1.0/t; /* for j>i, l_j!=0 */ /* w_j = w_j - (l_j/d_ii)*w_i */ for(jj=0;jj<cl;jj++) { j = il[jj]; dd = l[j]*d[i]; for(k=0;k<W->nnz[j];k++) { ww[W->index[j][k]] = k+1; } for(ik=0;ik<W->nnz[i];ik++) { jk = ww[W->index[i][ik]]; if( jk!=0 ) { t = dd*W->value[i][ik]; if( fabs(t)*nrm > tol ) { W->value[j][jk-1] -= t; } } else { t = dd*W->value[i][ik]; if( fabs(t)*nrm > tol ) { if( W->nnz[j] == W->nnz_ma[j] ) { W->nnz_ma[j] += annz; err = lis_matrix_ilu_realloc(j,W->nnz_ma[j],W); if( err ) return err; } jk = W->nnz[j]; W->index[j][jk] = W->index[i][ik]; W->value[j][jk] = -t; W->nnz[j]++; } } } for(k=0;k<W->nnz[j];k++) { ww[W->index[j][k]] = 0; } } /* for j>i, u_j!=0 */ /* z_j = z_j - (u_j/d_ii)*z_i */ for(jj=0;jj<cu;jj++) { j = iu[jj]; dd = u[j]*d[i]; for(k=0;k<Z->nnz[j];k++) { ww[Z->index[j][k]] = k+1; } for(ik=0;ik<Z->nnz[i];ik++) { jk = ww[Z->index[i][ik]]; if( jk!=0 ) { t = dd*Z->value[i][ik]; if( fabs(t)*nrm > tol ) { Z->value[j][jk-1] -= t; } } else { t = dd*Z->value[i][ik]; if( fabs(t)*nrm > tol ) { if( Z->nnz[j] == Z->nnz_ma[j] ) { Z->nnz_ma[j] += annz; err = lis_matrix_ilu_realloc(j,Z->nnz_ma[j],Z); if( err ) return err; } jk = Z->nnz[j]; Z->index[j][jk] = Z->index[i][ik]; Z->value[j][jk] = -t; Z->nnz[j]++; } } } for(k=0;k<Z->nnz[j];k++) { ww[Z->index[j][k]] = 0; } } } lis_matrix_destroy(B); lis_free2(5,l,u,ww,il,iu); precon->L = W; precon->U = Z; precon->D = D; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_ilut_bsr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,kk,bnr,bs; LIS_INT n,nr,annz,lfil,len; LIS_SCALAR gamma,t,tol,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_MATRIX_DIAG D; LIS_SCALAR tnorm, tolnorm; LIS_SCALAR buf_ns[16],buf_fact[16],*xnrm,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos,para; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; nr = A->nr; bnr = A->bnr; bs = bnr*bnr; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; gamma = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN]; annz = 10+A->bnnz / A->nr; lfil = (LIS_INT)(((double)A->bnnz/(2.0*nr))*m); L = NULL; U = NULL; err = lis_matrix_ilu_create(nr,bnr,&L); if( err ) return err; err = lis_matrix_ilu_create(nr,bnr,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_matrix_diag_duplicateM(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc(bs*(nr+1)*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } xnrm = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( xnrm==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(nr*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } for(i=0;i<nr;i++) iw[i] = -1; for(i=0;i<nr;i++) { tnorm = 0; for(j=A->bptr[i];j<A->bptr[i+1];j++) { lis_array_nrm2(bs,&A->value[bs*j],&t); tnorm = _max(t,tnorm); } tolnorm = tol * tnorm; lenu = 1; lenl = 0; jbuf[i] = i; memset(&w[bs*i],0,bs*sizeof(LIS_SCALAR)); iw[i] = i; for(j=A->bptr[i];j<A->bptr[i+1];j++) { col = A->bindex[j]; lis_array_nrm2(bs,&A->value[bs*j],&t); if( t<tolnorm && col!=i ) continue; if( col < i ) { jbuf[lenl] = col; iw[col] = lenl; memcpy(&w[bs*lenl],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); lenl++; } else if( col == i ) { memcpy(&w[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); } else { jpos = i + lenu; jbuf[jpos] = col; iw[col] = jpos; memcpy(&w[bs*jpos],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); lenu++; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[k]<jrow ) { jrow = jbuf[k]; jpos = k; } } if( jpos!=j ) { col = jbuf[j]; jbuf[j] = jbuf[jpos]; jbuf[jpos] = col; iw[jrow] = j; iw[col] = jpos; memcpy(buf_ns,&w[bs*j],bs*sizeof(LIS_SCALAR)); memcpy(&w[bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); memcpy(&w[bs*jpos],buf_ns,bs*sizeof(LIS_SCALAR)); } /* lis_array_matmat(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact,LIS_INS_VALUE);*/ lis_array_matinv(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact); iw[jrow] = -1; lis_array_nrm2(bs,buf_fact,&t); if( t * xnrm[jrow] <= tolnorm ) continue; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; lis_array_matmat(bnr,buf_fact,&U->value[jrow][bs*k],buf_ns,LIS_INS_VALUE); jpos = iw[col]; lis_array_nrm2(bs,buf_ns,&t); if( t < tolnorm && jpos == -1 ) { continue; } if( col >= i ) { if( jpos == -1 ) { upos = i + lenu; jbuf[upos] = col; iw[col] = upos; memcpy(&w[bs*upos],buf_ns,bs*sizeof(LIS_SCALAR)); lenu++; } else { for(kk=0;kk<bs;kk++) { w[bs*jpos+kk] += buf_ns[kk]; } } } else { if( jpos == -1 ) { jbuf[lenl] = col; iw[col] = lenl; memcpy(&w[bs*lenl],buf_ns,bs*sizeof(LIS_SCALAR)); lenl++; } else { for(kk=0;kk<bs;kk++) { w[bs*jpos+kk] += buf_ns[kk]; } } } } for(kk=0;kk<bs;kk++) { w[bs*len+kk] = -buf_fact[kk]; } jbuf[len] = jrow; len++; } lenl = len; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { lis_array_nrm2(bs,&w[bs*j],&wn[j]); iw[j] = j; } lis_sort_di(0,lenl-1,wn,iw); lis_sort_i(0,len-1,iw); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(bs*len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; L->index[i][j] = jbuf[jpos]; memcpy(&L->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); } for(j=0;j<lenl;j++) iw[j] = -1; len = _min(lfil,lenu); for(j=1;j<lenu;j++) { jpos = i+j; lis_array_nrm2(bs,&w[bs*jpos],&wn[j-1]); iw[j-1] = jpos; } para = lenu - 1; lis_sort_di(0,para-1,wn,iw); lis_sort_i(0,len-2,iw); U->nnz[i] = len-1; if( len>1 ) { U->index[i] = (LIS_INT *)malloc((len-1)*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(bs*(len-1)*sizeof(LIS_SCALAR)); } lis_array_nrm2(bs,&w[bs*i],&t); for(j=0;j<len-1;j++) { jpos = iw[j]; U->index[i][j] = jbuf[jpos]; memcpy(&U->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); t = _max(t,wn[j]); } for(j=0;j<lenu-1;j++) iw[j] = -1; xnrm[i] = t; memcpy(&D->value[bs*i],&w[bs*i],bs*sizeof(LIS_SCALAR)); if( i==nr-1 ) { switch(bnr) { case 2: if( n%2!=0 ) { D->value[4*(nr-1)+3] = 1.0; } break; case 3: if( n%3==1 ) { D->value[9*(nr-1)+4] = 1.0; D->value[9*(nr-1)+8] = 1.0; } else if( n%3==2 ) { D->value[9*(nr-1)+8] = 1.0; } break; } } /* lis_array_invGauss(bnr,&D->value[bs*i]);*/ lis_array_LUdecomp(bnr,&D->value[bs*i]); for(j=0;j<lenu;j++) { iw[ jbuf[i+j] ] = -1; } } precon->L = L; precon->U = U; precon->WD = D; lis_free2(5,w,iw,xnrm,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }