LIS_INT lis_matrix_split_csr(LIS_MATRIX A) { LIS_INT i,j,n; LIS_INT nnzl,nnzu; LIS_INT err; LIS_INT *lptr,*lindex,*uptr,*uindex; LIS_SCALAR *lvalue,*uvalue; LIS_MATRIX_DIAG D; #ifdef _OPENMP LIS_INT kl,ku; LIS_INT *liw,*uiw; #endif LIS_DEBUG_FUNC_IN; n = A->n; nnzl = 0; nnzu = 0; D = NULL; lptr = NULL; lindex = NULL; lvalue = NULL; uptr = NULL; uindex = NULL; uvalue = NULL; #ifdef _OPENMP liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_csr::liw"); if( liw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_csr::uiw"); if( uiw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free(liw); return LIS_OUT_OF_MEMORY; } #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { liw[i] = 0; uiw[i] = 0; } #pragma omp parallel for private(i,j) for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<i ) { liw[i+1]++; } else if( A->index[j]>i ) { uiw[i+1]++; } } } for(i=0;i<n;i++) { liw[i+1] += liw[i]; uiw[i+1] += uiw[i]; } nnzl = liw[n]; nnzu = uiw[n]; #else for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<i ) { nnzl++; } else if( A->index[j]>i ) { nnzu++; } } } #endif err = lis_matrix_LU_create(A); if( err ) { return err; } err = lis_matrix_malloc_csr(n,nnzl,&lptr,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_csr(n,nnzu,&uptr,&uindex,&uvalue); if( err ) { lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue); return err; } err = lis_matrix_diag_duplicateM(A,&D); if( err ) { lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { lptr[i] = liw[i]; uptr[i] = uiw[i]; } #pragma omp parallel for private(i,j,kl,ku) for(i=0;i<n;i++) { kl = lptr[i]; ku = uptr[i]; for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<i ) { lindex[kl] = A->index[j]; lvalue[kl] = A->value[j]; kl++; } else if( A->index[j]>i ) { uindex[ku] = A->index[j]; uvalue[ku] = A->value[j]; ku++; } else { D->value[i] = A->value[j]; } } } lis_free2(2,liw,uiw); #else nnzl = 0; nnzu = 0; lptr[0] = 0; uptr[0] = 0; for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<i ) { lindex[nnzl] = A->index[j]; lvalue[nnzl] = A->value[j]; nnzl++; } else if( A->index[j]>i ) { uindex[nnzu] = A->index[j]; uvalue[nnzu] = A->value[j]; nnzu++; } else { D->value[i] = A->value[j]; } } lptr[i+1] = nnzl; uptr[i+1] = nnzu; } #endif A->L->nnz = nnzl; A->L->ptr = lptr; A->L->index = lindex; A->L->value = lvalue; A->U->nnz = nnzu; A->U->ptr = uptr; A->U->index = uindex; A->U->value = uvalue; A->D = D; A->is_splited = LIS_TRUE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_ilut_bsr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,kk,bnr,bs; LIS_INT n,nr,annz,lfil,len; LIS_SCALAR gamma,t,tol,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_MATRIX_DIAG D; LIS_SCALAR tnorm, tolnorm; LIS_SCALAR buf_ns[16],buf_fact[16],*xnrm,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos,para; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; nr = A->nr; bnr = A->bnr; bs = bnr*bnr; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; gamma = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN]; annz = 10+A->bnnz / A->nr; lfil = (LIS_INT)(((double)A->bnnz/(2.0*nr))*m); L = NULL; U = NULL; err = lis_matrix_ilu_create(nr,bnr,&L); if( err ) return err; err = lis_matrix_ilu_create(nr,bnr,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_matrix_diag_duplicateM(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc(bs*(nr+1)*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } xnrm = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( xnrm==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(nr*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } for(i=0;i<nr;i++) iw[i] = -1; for(i=0;i<nr;i++) { tnorm = 0; for(j=A->bptr[i];j<A->bptr[i+1];j++) { lis_array_nrm2(bs,&A->value[bs*j],&t); tnorm = _max(t,tnorm); } tolnorm = tol * tnorm; lenu = 1; lenl = 0; jbuf[i] = i; memset(&w[bs*i],0,bs*sizeof(LIS_SCALAR)); iw[i] = i; for(j=A->bptr[i];j<A->bptr[i+1];j++) { col = A->bindex[j]; lis_array_nrm2(bs,&A->value[bs*j],&t); if( t<tolnorm && col!=i ) continue; if( col < i ) { jbuf[lenl] = col; iw[col] = lenl; memcpy(&w[bs*lenl],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); lenl++; } else if( col == i ) { memcpy(&w[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); } else { jpos = i + lenu; jbuf[jpos] = col; iw[col] = jpos; memcpy(&w[bs*jpos],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); lenu++; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[k]<jrow ) { jrow = jbuf[k]; jpos = k; } } if( jpos!=j ) { col = jbuf[j]; jbuf[j] = jbuf[jpos]; jbuf[jpos] = col; iw[jrow] = j; iw[col] = jpos; memcpy(buf_ns,&w[bs*j],bs*sizeof(LIS_SCALAR)); memcpy(&w[bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); memcpy(&w[bs*jpos],buf_ns,bs*sizeof(LIS_SCALAR)); } /* lis_array_matmat(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact,LIS_INS_VALUE);*/ lis_array_matinv(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact); iw[jrow] = -1; lis_array_nrm2(bs,buf_fact,&t); if( t * xnrm[jrow] <= tolnorm ) continue; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; lis_array_matmat(bnr,buf_fact,&U->value[jrow][bs*k],buf_ns,LIS_INS_VALUE); jpos = iw[col]; lis_array_nrm2(bs,buf_ns,&t); if( t < tolnorm && jpos == -1 ) { continue; } if( col >= i ) { if( jpos == -1 ) { upos = i + lenu; jbuf[upos] = col; iw[col] = upos; memcpy(&w[bs*upos],buf_ns,bs*sizeof(LIS_SCALAR)); lenu++; } else { for(kk=0;kk<bs;kk++) { w[bs*jpos+kk] += buf_ns[kk]; } } } else { if( jpos == -1 ) { jbuf[lenl] = col; iw[col] = lenl; memcpy(&w[bs*lenl],buf_ns,bs*sizeof(LIS_SCALAR)); lenl++; } else { for(kk=0;kk<bs;kk++) { w[bs*jpos+kk] += buf_ns[kk]; } } } } for(kk=0;kk<bs;kk++) { w[bs*len+kk] = -buf_fact[kk]; } jbuf[len] = jrow; len++; } lenl = len; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { lis_array_nrm2(bs,&w[bs*j],&wn[j]); iw[j] = j; } lis_sort_di(0,lenl-1,wn,iw); lis_sort_i(0,len-1,iw); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(bs*len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; L->index[i][j] = jbuf[jpos]; memcpy(&L->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); } for(j=0;j<lenl;j++) iw[j] = -1; len = _min(lfil,lenu); for(j=1;j<lenu;j++) { jpos = i+j; lis_array_nrm2(bs,&w[bs*jpos],&wn[j-1]); iw[j-1] = jpos; } para = lenu - 1; lis_sort_di(0,para-1,wn,iw); lis_sort_i(0,len-2,iw); U->nnz[i] = len-1; if( len>1 ) { U->index[i] = (LIS_INT *)malloc((len-1)*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(bs*(len-1)*sizeof(LIS_SCALAR)); } lis_array_nrm2(bs,&w[bs*i],&t); for(j=0;j<len-1;j++) { jpos = iw[j]; U->index[i][j] = jbuf[jpos]; memcpy(&U->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); t = _max(t,wn[j]); } for(j=0;j<lenu-1;j++) iw[j] = -1; xnrm[i] = t; memcpy(&D->value[bs*i],&w[bs*i],bs*sizeof(LIS_SCALAR)); if( i==nr-1 ) { switch(bnr) { case 2: if( n%2!=0 ) { D->value[4*(nr-1)+3] = 1.0; } break; case 3: if( n%3==1 ) { D->value[9*(nr-1)+4] = 1.0; D->value[9*(nr-1)+8] = 1.0; } else if( n%3==2 ) { D->value[9*(nr-1)+8] = 1.0; } break; } } /* lis_array_invGauss(bnr,&D->value[bs*i]);*/ lis_array_LUdecomp(bnr,&D->value[bs*i]); for(j=0;j<lenu;j++) { iw[ jbuf[i+j] ] = -1; } } precon->L = L; precon->U = U; precon->WD = D; lis_free2(5,w,iw,xnrm,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_copyDLU_csr(LIS_MATRIX Ain, LIS_MATRIX_DIAG *D, LIS_MATRIX *L, LIS_MATRIX *U) { LIS_INT err; LIS_INT i,n,np,lnnz,unnz; LIS_INT *lptr,*lindex; LIS_INT *uptr,*uindex; LIS_SCALAR *lvalue,*uvalue,*diag; LIS_DEBUG_FUNC_IN; *D = NULL; *L = NULL; *U = NULL; err = lis_matrix_check(Ain,LIS_MATRIX_CHECK_ALL); if( err ) return err; n = Ain->n; np = Ain->np; err = lis_matrix_duplicate(Ain,L); if( err ) { return err; } err = lis_matrix_duplicate(Ain,U); if( err ) { lis_matrix_destroy(*L); return err; } err = lis_matrix_diag_duplicateM(Ain,D); if( err ) { lis_matrix_destroy(*L); lis_matrix_destroy(*U); return err; } lis_free((*D)->value); if( Ain->is_splited ) { } lnnz = Ain->L->nnz; unnz = Ain->U->nnz; lptr = NULL; lindex = NULL; uptr = NULL; uindex = NULL; diag = NULL; err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } diag = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR),"lis_matrix_copyDLU_csr::diag"); if( diag==NULL ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { diag[i] = Ain->D->value[i]; } lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue); lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue); (*D)->value = diag; err = lis_matrix_set_csr(lnnz,lptr,lindex,lvalue,*L); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } err = lis_matrix_set_csr(unnz,uptr,uindex,uvalue,*U); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } err = lis_matrix_assemble(*L); if( err ) { return err; } err = lis_matrix_assemble(*U); if( err ) { return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_split_msr(LIS_MATRIX A) { LIS_INT i,j,n; LIS_INT lnnz,unnz; LIS_INT lndz,undz; LIS_INT err; LIS_INT *lindex,*uindex; LIS_SCALAR *lvalue,*uvalue; #ifdef _OPENMP LIS_INT kl,ku; LIS_INT *liw,*uiw; #endif LIS_MATRIX_DIAG D; LIS_DEBUG_FUNC_IN; n = A->n; lnnz = 0; unnz = 0; lndz = n; undz = n; D = NULL; lindex = NULL; lvalue = NULL; uindex = NULL; uvalue = NULL; #ifdef _OPENMP liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::liw"); if( liw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::uiw"); if( uiw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free(liw); return LIS_OUT_OF_MEMORY; } #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { liw[i] = 0; uiw[i] = 0; } #pragma omp parallel for private(i,j) for(i=0;i<n;i++) { for(j=A->index[i];j<A->index[i+1];j++) { if( A->index[j]<i ) { liw[i+1]++; } else if( A->index[j]>i ) { uiw[i+1]++; } } } liw[0] = n+1; uiw[0] = n+1; for(i=0;i<n;i++) { liw[i+1] += liw[i]; uiw[i+1] += uiw[i]; } lnnz = liw[n]; unnz = uiw[n]; #else for(i=0;i<n;i++) { for(j=A->index[i];j<A->index[i+1];j++) { if( A->index[j]<i ) { lnnz++; } else if( A->index[j]>i ) { unnz++; } } } #endif err = lis_matrix_LU_create(A); if( err ) { return err; } err = lis_matrix_malloc_msr(n,lnnz,lndz,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_msr(n,unnz,undz,&uindex,&uvalue); if( err ) { lis_free2(4,lindex,lvalue,uindex,uvalue); return err; } err = lis_matrix_diag_duplicateM(A,&D); if( err ) { lis_free2(4,lindex,lvalue,uindex,uvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { lindex[i] = liw[i]; uindex[i] = uiw[i]; } #pragma omp parallel for private(i,j,kl,ku) for(i=0;i<n;i++) { kl = lindex[i]; ku = uindex[i]; D->value[i] = A->value[i]; for(j=A->index[i];j<A->index[i+1];j++) { if( A->index[j]<i ) { lindex[kl] = A->index[j]; lvalue[kl] = A->value[j]; kl++; } else if( A->index[j]>i ) { uindex[ku] = A->index[j]; uvalue[ku] = A->value[j]; ku++; } } } lis_free2(2,liw,uiw); #else lnnz = n+1; unnz = n+1; lindex[0] = n+1; uindex[0] = n+1; for(i=0;i<n;i++) { D->value[i] = A->value[i]; for(j=A->index[i];j<A->index[i+1];j++) { if( A->index[j]<i ) { lindex[lnnz] = A->index[j]; lvalue[lnnz] = A->value[j]; lnnz++; } else if( A->index[j]>i ) { uindex[unnz] = A->index[j]; uvalue[unnz] = A->value[j]; unnz++; } } lindex[i+1] = lnnz; uindex[i+1] = unnz; } #endif A->L->nnz = lnnz - (n+1); A->L->ndz = lndz; A->L->index = lindex; A->L->value = lvalue; A->U->nnz = unnz - (n+1); A->U->ndz = undz; A->U->index = uindex; A->U->value = uvalue; A->D = D; A->is_splited = LIS_TRUE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }