LIS_INT lis_matrix_malloc_csr(LIS_INT n, LIS_INT nnz, LIS_INT **ptr, LIS_INT **index, LIS_SCALAR **value) { LIS_DEBUG_FUNC_IN; *ptr = NULL; *index = NULL; *value = NULL; *ptr = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_malloc_csr::ptr" ); if( *ptr==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free2(3,*ptr,*index,*value); return LIS_OUT_OF_MEMORY; } *index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_malloc_csr::index" ); if( *index==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_INT)); lis_free2(3,*ptr,*index,*value); return LIS_OUT_OF_MEMORY; } *value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_malloc_csr::value" ); if( *value==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR)); lis_free2(3,*ptr,*index,*value); return LIS_OUT_OF_MEMORY; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_ilu_destroy(LIS_MATRIX_ILU A) { LIS_INT i,j; LIS_DEBUG_FUNC_IN; if( lis_is_malloc(A) ) { if( A->bsz ) { for(i=0;i<A->n;i++) { free(A->index[i]); for(j=0;j<A->nnz[i];j++) { free(A->values[i][j]); } if( A->nnz[i]>0 ) free(A->values[i]); } lis_free2(5,A->bsz,A->nnz,A->index,A->values,A->nnz_ma); } else { for(i=0;i<A->n;i++) { if( A->nnz[i]>0 ) { free(A->index[i]); free(A->value[i]); } } lis_free2(4,A->nnz,A->index,A->value,A->nnz_ma); } lis_free(A); } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
void lis_sort_jds(int is, int ie, int maxnzr, int *i1, int *i2) { int i,j; int *iw,*iw2; iw = (int *)lis_malloc((maxnzr+2)*sizeof(int),"lis_sort_jds::iw"); iw2 = (int *)lis_malloc((maxnzr+2)*sizeof(int),"lis_sort_jds::iw2"); #ifdef USE_VEC_COMP #pragma cdir nodep #endif for(i=0;i<maxnzr+2;i++) { iw[i] = 0; } for(i=is;i<ie;i++) { iw[(maxnzr - i1[i])+1]++; } iw[0] = is; for(i=0;i<maxnzr+1;i++) { iw[i+1] += iw[i]; } #ifdef USE_VEC_COMP #pragma cdir nodep #endif for(i=0;i<maxnzr+2;i++) { iw2[i] = iw[i]; } for(i=is;i<ie;i++) { i2[iw[maxnzr - i1[i]]] = i; iw[maxnzr - i1[i]]++; } for(i=0;i<maxnzr+1;i++) { #ifdef USE_VEC_COMP #pragma cdir nodep #endif for(j=iw2[i];j<iw2[i+1];j++) { i1[j] = maxnzr - i; } } lis_free2(2,iw,iw2); }
void lis_sort_jad(LIS_INT is, LIS_INT ie, LIS_INT maxnzr, LIS_INT *i1, LIS_INT *i2) { LIS_INT i,j; LIS_INT *iw,*iw2; iw = (LIS_INT *)lis_malloc((maxnzr+2)*sizeof(LIS_INT),"lis_sort_jad::iw"); iw2 = (LIS_INT *)lis_malloc((maxnzr+2)*sizeof(LIS_INT),"lis_sort_jad::iw2"); #ifdef USE_VEC_COMP #pragma cdir nodep #endif for(i=0;i<maxnzr+2;i++) { iw[i] = 0; } for(i=is;i<ie;i++) { iw[(maxnzr - i1[i])+1]++; } iw[0] = is; for(i=0;i<maxnzr+1;i++) { iw[i+1] += iw[i]; } #ifdef USE_VEC_COMP #pragma cdir nodep #endif for(i=0;i<maxnzr+2;i++) { iw2[i] = iw[i]; } for(i=is;i<ie;i++) { i2[iw[maxnzr - i1[i]]] = i; iw[maxnzr - i1[i]]++; } for(i=0;i<maxnzr+1;i++) { #ifdef USE_VEC_COMP #pragma cdir nodep #endif for(j=iw2[i];j<iw2[i+1];j++) { i1[j] = maxnzr - i; } } lis_free2(2,iw,iw2); }
LIS_INT lis_matrix_malloc_dia(LIS_INT n, LIS_INT nnd, LIS_INT **index, LIS_SCALAR **value) { LIS_DEBUG_FUNC_IN; *index = NULL; *value = NULL; *index = (LIS_INT *)lis_malloc( n*nnd*sizeof(LIS_INT),"lis_matrix_malloc_dia::index" ); if( *index==NULL ) { LIS_SETERR_MEM(n*nnd*sizeof(LIS_INT)); lis_free2(2,*index,*value); return LIS_OUT_OF_MEMORY; } *value = (LIS_SCALAR *)lis_malloc( n*nnd*sizeof(LIS_SCALAR),"lis_matrix_malloc_dia::value" ); if( *value==NULL ) { LIS_SETERR_MEM(n*nnd*sizeof(LIS_SCALAR)); lis_free2(2,*index,*value); return LIS_OUT_OF_MEMORY; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_args_free(LIS_ARGS args) { LIS_ARGS arg,t; LIS_DEBUG_FUNC_IN; arg = args->next; while( arg!=args ) { t = arg; arg = arg->next; lis_free2(2,t->arg1,t->arg2); t->next->prev = t->prev; t->prev->next = t->next; lis_free(t); } if (args) lis_free(args); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_rco2csr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k,n,nnz,err; LIS_INT *ptr,*index; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; ptr = NULL; index = NULL; value = NULL; n = Ain->n; nnz = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+:nnz) private(i) #endif for(i=0;i<n;i++) { nnz += Ain->w_row[i]; } err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value); if( err ) { return err; } #ifdef _NUMA #pragma omp parallel for private(i) for(i=0;i<n+1;i++) ptr[i] = 0; #else ptr[0] = 0; #endif for(i=0;i<n;i++) { ptr[i+1] = ptr[i] + Ain->w_row[i]; } #ifdef _OPENMP #pragma omp parallel for private(i,j,k) #endif for(i=0;i<n;i++) { k = ptr[i]; for(j=0;j<Ain->w_row[i];j++) { index[k] = Ain->w_index[i][j]; value[k] = Ain->w_value[i][j]; k++; } } err = lis_matrix_set_csr(nnz,ptr,index,value,Aout); if( err ) { lis_free2(3,ptr,index,value); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,ii,jj,len,lfil; LIS_INT n,nnz,annz,cl,cu,cc,m; LIS_INT *wu,*wl,*il,*iu,*ic,*pc; LIS_SCALAR t,v; LIS_REAL tol,tol_dd,nrm; LIS_SCALAR *d,*r,*c,*l,*u,*tmp; LIS_MATRIX A,B; LIS_MATRIX_ILU W,Z; LIS_VECTOR D; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; nnz = A->nnz; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; annz = 10+A->nnz / A->n; lfil = (LIS_INT)((double)A->nnz/(2.0*n))*m; W = NULL; Z = NULL; wu = NULL; wl = NULL; d = NULL; l = NULL; u = NULL; il = NULL; iu = NULL; err = lis_matrix_ilu_create(n,1,&W); if( err ) return err; err = lis_matrix_ilu_create(n,1,&Z); if( err ) return err; err = lis_matrix_ilu_setCR(W); if( err ) return err; err = lis_matrix_ilu_setCR(Z); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) return err; d = D->value; tmp = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( tmp==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } r = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( r==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } c = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( c==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } l = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( l==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } u = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( u==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } il = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il"); if( il==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( iu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } ic = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( ic==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } wu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( wu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } wl = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( wl==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } pc = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( pc==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } lis_matrix_sort_csr(A); err = lis_matrix_duplicate(A,&B); if( err ) return err; err = lis_matrix_convert_csr2csc(A,B); if( err ) return err; for(i=0;i<n;i++) { wu[i] = 0; wl[i] = 0; pc[i] = A->ptr[i]; } for(i=0; i<n; i++) { /* nrm_inf(A[i,:]) */ nrm = 0.0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { nrm = _max(nrm,fabs(A->value[j])); } tol_dd = nrm * tol; /* l = e_i */ /* u = e_i */ l[i] = 1.0; u[i] = 1.0; il[0] = i; iu[0] = i; cl = 1; cu = 1; wu[i] = 1; wl[i] = 1; cc = 0; /* r = e_i^T*A */ for(j=A->ptr[i];j<A->ptr[i+1];j++) { jj = A->index[j]; r[jj] = A->value[j]; } /* c = A_i = A*e_i */ for(j=B->ptr[i];j<B->ptr[i+1];j++) { jj = B->index[j]; c[jj] = B->value[j]; } /* W_i = W_i - (r*Z_j/D_jj)*W_j */ for(j=0;j<i;j++) { t = 0.0; for(k=0;k<Z->nnz[j];k++) { t += r[Z->index[j][k]]*Z->value[j][k]; } t = t * d[j]; if( fabs(t) > tol_dd ) { for(k=0;k<W->nnz[j];k++) { v = t * W->value[j][k]; if( fabs(v) > tol_dd ) { jj = W->index[j][k]; if( wl[jj]==1 ) { l[jj] -= v; } else { l[jj] = -v; il[cl++] = jj; wl[jj] = 1; } } } } } /* Z_i = Z_i - (W_j^T*c/D_jj)*Z_j */ for(j=0;j<i;j++) { t = 0.0; for(k=0;k<W->nnz[j];k++) { t += c[W->index[j][k]]*W->value[j][k]; } t = t * d[j]; if( fabs(t) > tol_dd ) { for(k=0;k<Z->nnz[j];k++) { v = t * Z->value[j][k]; if( fabs(v) > tol_dd ) { jj = Z->index[j][k]; if( wu[jj]==1 ) { u[jj] -= v; } else { u[jj] = -v; iu[cu++] = jj; wu[jj] = 1; } } } } } /* len = _min(lfil,cl); for(j=0;j<cl;j++) tmp[j] = fabs(l[il[j]]); lis_sort_di(0,cl-1,tmp,il); lis_sort_i(0,len-1,il); cl = len; */ /* k = cl; for(j=0;j<cl;j++) { if( fabs(l[il[j]])<= tol_dd ) { wl[il[j]] = 0; il[j] = n; k--; } } lis_sort_i(0,cl-1,il); cl = k; k = cu; for(j=0;j<cu;j++) { if( fabs(u[iu[j]])<= tol_dd ) { wu[iu[j]] = 0; iu[j] = n; k--; } } lis_sort_i(0,cu-1,iu); cu = k; */ W->nnz[i] = cl; if( cl > 0 ) { W->index[i] = (LIS_INT *)malloc(cl*sizeof(LIS_INT)); W->value[i] = (LIS_SCALAR *)malloc(cl*sizeof(LIS_SCALAR)); memcpy(W->index[i],il,cl*sizeof(LIS_INT)); for(j=0;j<cl;j++) { W->value[i][j] = l[il[j]]; } } Z->nnz[i] = cu; if( cu > 0 ) { Z->index[i] = (LIS_INT *)malloc(cu*sizeof(LIS_INT)); Z->value[i] = (LIS_SCALAR *)malloc(cu*sizeof(LIS_SCALAR)); memcpy(Z->index[i],iu,cu*sizeof(LIS_INT)); for(j=0;j<cu;j++) { Z->value[i][j] = u[iu[j]]; } } for(j=A->ptr[i];j<A->ptr[i+1];j++) r[A->index[j]] = 0.0; for(j=B->ptr[i];j<B->ptr[i+1];j++) c[B->index[j]] = 0.0; for(j=0;j<cl;j++) { wl[il[j]] = 0; l[il[j]] = 0.0; } for(j=0;j<cu;j++) { wu[iu[j]] = 0; } /* D_ii = W_i^T * A * Z_i */ cl = 0; for(k=0;k<Z->nnz[i];k++) { ii = Z->index[i][k]; for(j=B->ptr[ii];j<B->ptr[ii+1];j++) { jj = B->index[j]; if( wl[jj]==0 ) { l[jj] = B->value[j]*Z->value[i][k]; wl[jj] = 1; il[cl++] = jj; } else { l[jj] += B->value[j]*Z->value[i][k]; } } } t = 0.0; for(j=0;j<W->nnz[i];j++) { k = W->index[i][j]; t += W->value[i][j] * l[k]; } d[i] = 1.0 / t; for(j=0;j<cl;j++) wl[il[j]] = 0; } lis_matrix_destroy(B); lis_free2(11,r,c,il,l,wl,iu,u,wu,ic,pc,tmp); precon->L = W; precon->U = Z; precon->D = D; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,ii,jj,ik,jk; LIS_INT n,annz,cl,cu; LIS_INT *ww,*il,*iu; LIS_SCALAR t,dd; LIS_REAL tol,nrm; LIS_SCALAR *d,*l,*u; LIS_MATRIX A,B; LIS_MATRIX_ILU W,Z; LIS_VECTOR D; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; annz = A->n / 10; W = NULL; ww = NULL; d = NULL; l = NULL; u = NULL; il = NULL; iu = NULL; err = lis_matrix_ilu_create(n,1,&W); if( err ) return err; err = lis_matrix_ilu_create(n,1,&Z); if( err ) return err; err = lis_matrix_ilu_setCR(W); if( err ) return err; err = lis_matrix_ilu_setCR(Z); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) return err; d = D->value; err = lis_matrix_ilu_premalloc(annz,W); if( err ) return err; err = lis_matrix_ilu_premalloc(annz,Z); if( err ) return err; l = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( l==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } u = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( u==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } il = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il"); if( il==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( iu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } ww = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( ww==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } err = lis_matrix_duplicate(A,&B); if( err ) return err; err = lis_matrix_convert_csr2csc(A,B); if( err ) { return err; } for(i=0;i<n;i++) ww[i] = 0; for(i=0;i<n;i++) { W->value[i][0] = 1.0; W->index[i][0] = i; W->nnz[i] = 1; Z->value[i][0] = 1.0; Z->index[i][0] = i; Z->nnz[i] = 1; } for(i=0; i<n; i++) { /* nrm_inf(A[i,:]) */ nrm = 0.0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { nrm = _max(nrm,fabs(A->value[j])); } nrm = 1.0/nrm; /* l = AZ_i */ cl = 0; memset(l,0,n*sizeof(LIS_SCALAR)); for(k=0;k<Z->nnz[i];k++) { ii = Z->index[i][k]; for(j=B->ptr[ii];j<B->ptr[ii+1];j++) { jj = B->index[j]; if( jj>i ) { l[jj] += B->value[j]*Z->value[i][k]; if( ww[jj]==0 ) { ww[jj] = 1; il[cl++] = jj; } } } } for(k=0;k<cl;k++) ww[il[k]] = 0; /* u = W_i'A */ cu = 0; memset(u,0,n*sizeof(LIS_SCALAR)); for(k=0;k<W->nnz[i];k++) { ii = W->index[i][k]; for(j=A->ptr[ii];j<A->ptr[ii+1];j++) { jj = A->index[j]; #ifdef USE_MPI if( jj>n-1 ) continue; #endif u[jj] += A->value[j]*W->value[i][k]; if( jj>i && ww[jj]==0 ) { ww[jj] = 1; iu[cu++] = jj; } } } for(k=0;k<cu;k++) ww[iu[k]] = 0; /* d_ii = uZ_i or W_i'l */ t = 0.0; for(k=0;k<Z->nnz[i];k++) { t += u[Z->index[i][k]]*Z->value[i][k]; } d[i] = 1.0/t; /* for j>i, l_j!=0 */ /* w_j = w_j - (l_j/d_ii)*w_i */ for(jj=0;jj<cl;jj++) { j = il[jj]; dd = l[j]*d[i]; for(k=0;k<W->nnz[j];k++) { ww[W->index[j][k]] = k+1; } for(ik=0;ik<W->nnz[i];ik++) { jk = ww[W->index[i][ik]]; if( jk!=0 ) { t = dd*W->value[i][ik]; if( fabs(t)*nrm > tol ) { W->value[j][jk-1] -= t; } } else { t = dd*W->value[i][ik]; if( fabs(t)*nrm > tol ) { if( W->nnz[j] == W->nnz_ma[j] ) { W->nnz_ma[j] += annz; err = lis_matrix_ilu_realloc(j,W->nnz_ma[j],W); if( err ) return err; } jk = W->nnz[j]; W->index[j][jk] = W->index[i][ik]; W->value[j][jk] = -t; W->nnz[j]++; } } } for(k=0;k<W->nnz[j];k++) { ww[W->index[j][k]] = 0; } } /* for j>i, u_j!=0 */ /* z_j = z_j - (u_j/d_ii)*z_i */ for(jj=0;jj<cu;jj++) { j = iu[jj]; dd = u[j]*d[i]; for(k=0;k<Z->nnz[j];k++) { ww[Z->index[j][k]] = k+1; } for(ik=0;ik<Z->nnz[i];ik++) { jk = ww[Z->index[i][ik]]; if( jk!=0 ) { t = dd*Z->value[i][ik]; if( fabs(t)*nrm > tol ) { Z->value[j][jk-1] -= t; } } else { t = dd*Z->value[i][ik]; if( fabs(t)*nrm > tol ) { if( Z->nnz[j] == Z->nnz_ma[j] ) { Z->nnz_ma[j] += annz; err = lis_matrix_ilu_realloc(j,Z->nnz_ma[j],Z); if( err ) return err; } jk = Z->nnz[j]; Z->index[j][jk] = Z->index[i][ik]; Z->value[j][jk] = -t; Z->nnz[j]++; } } } for(k=0;k<Z->nnz[j];k++) { ww[Z->index[j][k]] = 0; } } } lis_matrix_destroy(B); lis_free2(5,l,u,ww,il,iu); precon->L = W; precon->U = Z; precon->D = D; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_split2_csr(LIS_MATRIX A) { LIS_INT i,j,n; LIS_INT nnzl,nnzu; LIS_INT err; LIS_INT *lptr,*lindex,*uptr,*uindex; LIS_SCALAR *lvalue,*uvalue; #ifdef _OPENMP LIS_INT kl,ku; LIS_INT *liw,*uiw; #endif LIS_DEBUG_FUNC_IN; n = A->n; nnzl = 0; nnzu = 0; lptr = NULL; lindex = NULL; lvalue = NULL; uptr = NULL; uindex = NULL; uvalue = NULL; #ifdef _OPENMP liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split2_csr::liw"); if( liw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split2_csr::uiw"); if( uiw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free(liw); return LIS_OUT_OF_MEMORY; } #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { liw[i] = 0; uiw[i] = 0; } #pragma omp parallel for private(i,j) for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<n ) { liw[i+1]++; } else { uiw[i+1]++; } } } for(i=0;i<n;i++) { liw[i+1] += liw[i]; uiw[i+1] += uiw[i]; } nnzl = liw[n]; nnzu = uiw[n]; #else for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<n ) { nnzl++; } else { nnzu++; } } } #endif err = lis_matrix_LU_create(A); if( err ) { return err; } err = lis_matrix_malloc_csr(n,nnzl,&lptr,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_csr(n,nnzu,&uptr,&uindex,&uvalue); if( err ) { lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { lptr[i] = liw[i]; uptr[i] = uiw[i]; } #pragma omp parallel for private(i,j,kl,ku) for(i=0;i<n;i++) { kl = lptr[i]; ku = uptr[i]; for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<n ) { lindex[kl] = A->index[j]; lvalue[kl] = A->value[j]; kl++; } else { uindex[ku] = A->index[j]; uvalue[ku] = A->value[j]; ku++; } } } lis_free2(2,liw,uiw); #else nnzl = 0; nnzu = 0; lptr[0] = 0; uptr[0] = 0; for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<n ) { lindex[nnzl] = A->index[j]; lvalue[nnzl] = A->value[j]; nnzl++; } else { uindex[nnzu] = A->index[j]; uvalue[nnzu] = A->value[j]; nnzu++; } } lptr[i+1] = nnzl; uptr[i+1] = nnzu; } #endif A->L->nnz = nnzl; A->L->ptr = lptr; A->L->index = lindex; A->L->value = lvalue; A->U->nnz = nnzu; A->U->ptr = uptr; A->U->index = uindex; A->U->value = uvalue; A->is_splited = LIS_TRUE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_copyDLU_csr(LIS_MATRIX Ain, LIS_MATRIX_DIAG *D, LIS_MATRIX *L, LIS_MATRIX *U) { LIS_INT err; LIS_INT i,n,np,lnnz,unnz; LIS_INT *lptr,*lindex; LIS_INT *uptr,*uindex; LIS_SCALAR *lvalue,*uvalue,*diag; LIS_DEBUG_FUNC_IN; *D = NULL; *L = NULL; *U = NULL; err = lis_matrix_check(Ain,LIS_MATRIX_CHECK_ALL); if( err ) return err; n = Ain->n; np = Ain->np; err = lis_matrix_duplicate(Ain,L); if( err ) { return err; } err = lis_matrix_duplicate(Ain,U); if( err ) { lis_matrix_destroy(*L); return err; } err = lis_matrix_diag_duplicateM(Ain,D); if( err ) { lis_matrix_destroy(*L); lis_matrix_destroy(*U); return err; } lis_free((*D)->value); if( Ain->is_splited ) { } lnnz = Ain->L->nnz; unnz = Ain->U->nnz; lptr = NULL; lindex = NULL; uptr = NULL; uindex = NULL; diag = NULL; err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } diag = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR),"lis_matrix_copyDLU_csr::diag"); if( diag==NULL ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { diag[i] = Ain->D->value[i]; } lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue); lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue); (*D)->value = diag; err = lis_matrix_set_csr(lnnz,lptr,lindex,lvalue,*L); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } err = lis_matrix_set_csr(unnz,uptr,uindex,uvalue,*U); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } err = lis_matrix_assemble(*L); if( err ) { return err; } err = lis_matrix_assemble(*U); if( err ) { return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_copy_csr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT err; LIS_INT i,n,nnz,lnnz,unnz; LIS_INT *ptr,*index; LIS_INT *lptr,*lindex; LIS_INT *uptr,*uindex; LIS_SCALAR *value,*lvalue,*uvalue,*diag; LIS_DEBUG_FUNC_IN; n = Ain->n; if( Ain->is_splited ) { lnnz = Ain->L->nnz; unnz = Ain->U->nnz; lptr = NULL; lindex = NULL; uptr = NULL; uindex = NULL; diag = NULL; err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_csr::diag"); if( diag==NULL ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { diag[i] = Ain->D->value[i]; } lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue); lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue); err = lis_matrix_setDLU_csr(lnnz,unnz,diag,lptr,lindex,lvalue,uptr,uindex,uvalue,Aout); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } } if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) ) { ptr = NULL; index = NULL; value = NULL; nnz = Ain->nnz; err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value); if( err ) { return err; } lis_matrix_elements_copy_csr(n,Ain->ptr,Ain->index,Ain->value,ptr,index,value); err = lis_matrix_set_csr(nnz,ptr,index,value,Aout); if( err ) { lis_free2(3,ptr,index,value); return err; } } if( Ain->matrix_type==LIS_MATRIX_CSC ) { Aout->matrix_type = LIS_MATRIX_CSC; Aout->status = -LIS_MATRIX_CSC; err = lis_matrix_assemble(Aout); } else { err = lis_matrix_assemble(Aout); } if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_split_msr(LIS_MATRIX A) { LIS_INT i,j,n; LIS_INT lnnz,unnz; LIS_INT lndz,undz; LIS_INT err; LIS_INT *lindex,*uindex; LIS_SCALAR *lvalue,*uvalue; #ifdef _OPENMP LIS_INT kl,ku; LIS_INT *liw,*uiw; #endif LIS_MATRIX_DIAG D; LIS_DEBUG_FUNC_IN; n = A->n; lnnz = 0; unnz = 0; lndz = n; undz = n; D = NULL; lindex = NULL; lvalue = NULL; uindex = NULL; uvalue = NULL; #ifdef _OPENMP liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::liw"); if( liw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::uiw"); if( uiw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free(liw); return LIS_OUT_OF_MEMORY; } #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { liw[i] = 0; uiw[i] = 0; } #pragma omp parallel for private(i,j) for(i=0;i<n;i++) { for(j=A->index[i];j<A->index[i+1];j++) { if( A->index[j]<i ) { liw[i+1]++; } else if( A->index[j]>i ) { uiw[i+1]++; } } } liw[0] = n+1; uiw[0] = n+1; for(i=0;i<n;i++) { liw[i+1] += liw[i]; uiw[i+1] += uiw[i]; } lnnz = liw[n]; unnz = uiw[n]; #else for(i=0;i<n;i++) { for(j=A->index[i];j<A->index[i+1];j++) { if( A->index[j]<i ) { lnnz++; } else if( A->index[j]>i ) { unnz++; } } } #endif err = lis_matrix_LU_create(A); if( err ) { return err; } err = lis_matrix_malloc_msr(n,lnnz,lndz,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_msr(n,unnz,undz,&uindex,&uvalue); if( err ) { lis_free2(4,lindex,lvalue,uindex,uvalue); return err; } err = lis_matrix_diag_duplicateM(A,&D); if( err ) { lis_free2(4,lindex,lvalue,uindex,uvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { lindex[i] = liw[i]; uindex[i] = uiw[i]; } #pragma omp parallel for private(i,j,kl,ku) for(i=0;i<n;i++) { kl = lindex[i]; ku = uindex[i]; D->value[i] = A->value[i]; for(j=A->index[i];j<A->index[i+1];j++) { if( A->index[j]<i ) { lindex[kl] = A->index[j]; lvalue[kl] = A->value[j]; kl++; } else if( A->index[j]>i ) { uindex[ku] = A->index[j]; uvalue[ku] = A->value[j]; ku++; } } } lis_free2(2,liw,uiw); #else lnnz = n+1; unnz = n+1; lindex[0] = n+1; uindex[0] = n+1; for(i=0;i<n;i++) { D->value[i] = A->value[i]; for(j=A->index[i];j<A->index[i+1];j++) { if( A->index[j]<i ) { lindex[lnnz] = A->index[j]; lvalue[lnnz] = A->value[j]; lnnz++; } else if( A->index[j]>i ) { uindex[unnz] = A->index[j]; uvalue[unnz] = A->value[j]; unnz++; } } lindex[i+1] = lnnz; uindex[i+1] = unnz; } #endif A->L->nnz = lnnz - (n+1); A->L->ndz = lndz; A->L->index = lindex; A->L->value = lvalue; A->U->nnz = unnz - (n+1); A->U->ndz = undz; A->U->index = uindex; A->U->value = uvalue; A->D = D; A->is_splited = LIS_TRUE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_rco2csc(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k,l,n,nnz,err; LIS_INT *ptr,*index,*iw; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; ptr = NULL; index = NULL; value = NULL; iw = NULL; n = Ain->n; iw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,iw); return LIS_OUT_OF_MEMORY; } ptr = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::ptr"); if( ptr==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,iw); return LIS_OUT_OF_MEMORY; } for(i=0;i<n;i++) iw[i] = 0; for(i=0;i<n;i++) { for(j=0;j<Ain->w_row[i];j++) { iw[Ain->w_index[i][j]]++; } } ptr[0] = 0; for(i=0;i<n;i++) { ptr[i+1] = ptr[i] + iw[i]; iw[i] = ptr[i]; } nnz = ptr[n]; index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::index" ); if( index==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,iw); return LIS_OUT_OF_MEMORY; } value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_rco2csc::value" ); if( value==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR)); lis_free2(4,ptr,index,value,iw); return LIS_OUT_OF_MEMORY; } for(i=0;i<n;i++) { for(j=0;j<Ain->w_row[i];j++) { k = Ain->w_index[i][j]; l = iw[k]; value[l] = Ain->w_value[i][j]; index[l] = i; iw[k]++; } } err = lis_matrix_set_csc(nnz,ptr,index,value,Aout); if( err ) { lis_free2(4,ptr,index,value,iw); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } lis_free(iw); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_copy_dia(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT err; LIS_INT i,n,nnd,lnnd,unnd; LIS_INT *index; LIS_INT *lindex; LIS_INT *uindex; LIS_SCALAR *value,*lvalue,*uvalue,*diag; LIS_DEBUG_FUNC_IN; n = Ain->n; if( Ain->is_splited ) { lnnd = Ain->L->nnd; unnd = Ain->U->nnd; lindex = NULL; uindex = NULL; diag = NULL; err = lis_matrix_malloc_dia(n,lnnd,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_dia(n,unnd,&uindex,&uvalue); if( err ) { lis_free2(5,diag,uindex,lindex,uvalue,lvalue); return err; } diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_dia::diag"); if( diag==NULL ) { lis_free2(5,diag,uindex,lindex,uvalue,lvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { diag[i] = Ain->D->value[i]; } lis_matrix_elements_copy_dia(n,lnnd,Ain->L->index,Ain->L->value,lindex,lvalue); lis_matrix_elements_copy_dia(n,unnd,Ain->U->index,Ain->U->value,uindex,uvalue); err = lis_matrix_setDLU_dia(lnnd,unnd,diag,lindex,lvalue,uindex,uvalue,Aout); if( err ) { lis_free2(5,diag,uindex,lindex,uvalue,lvalue); return err; } } if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) ) { index = NULL; value = NULL; nnd = Ain->nnd; err = lis_matrix_malloc_dia(n,nnd,&index,&value); if( err ) { return err; } lis_matrix_elements_copy_dia(n,nnd,Ain->index,Ain->value,index,value); err = lis_matrix_set_dia(nnd,index,value,Aout); if( err ) { lis_free2(2,index,value); return err; } } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_malloc_rco(LIS_INT n, LIS_INT nnz[], LIS_INT **row, LIS_INT ***index, LIS_SCALAR ***value) { LIS_INT i,j; LIS_INT *w_row,**w_index; LIS_SCALAR **w_value; LIS_DEBUG_FUNC_IN; w_row = NULL; w_index = NULL; w_value = NULL; w_row = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_malloc_rco::w_row" ); if( w_row==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } w_index = (LIS_INT **)lis_malloc( n*sizeof(LIS_INT *),"lis_matrix_malloc_rco::w_index" ); if( w_index==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT *)); lis_free2(3,w_row,w_index,w_value); return LIS_OUT_OF_MEMORY; } w_value = (LIS_SCALAR **)lis_malloc( n*sizeof(LIS_SCALAR *),"lis_matrix_malloc_rco::w_value" ); if( w_value==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR *)); lis_free2(3,w_row,w_index,w_value); return LIS_OUT_OF_MEMORY; } if( nnz!=NULL ) { for(i=0;i<n;i++) { w_index[i] = NULL; w_value[i] = NULL; if( nnz[i]==0 ) continue; w_index[i] = (LIS_INT *)lis_malloc( nnz[i]*sizeof(LIS_INT),"lis_matrix_malloc_rco::w_index[i]" ); if( w_index[i]==NULL ) { LIS_SETERR_MEM(nnz[i]*sizeof(LIS_INT)); break; } w_value[i] = (LIS_SCALAR *)lis_malloc( nnz[i]*sizeof(LIS_SCALAR),"lis_matrix_malloc_rco::w_value[i]" ); if( w_value[i]==NULL ) { LIS_SETERR_MEM(nnz[i]*sizeof(LIS_SCALAR)); break; } } if(i<n) { for(j=0;j<i;j++) { if( w_index[i] ) lis_free(w_index[i]); if( w_value[i] ) lis_free(w_value[i]); } lis_free2(3,w_row,w_index,w_value); return LIS_OUT_OF_MEMORY; } } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) w_row[i] = 0; *row = w_row; *index = w_index; *value = w_value; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_ilut_csr(LIS_SOLVER solver, LIS_PRECON precon) { #ifdef _OPENMP LIS_INT err; LIS_INT i,j,k,ii,jj,kk; LIS_INT is,ie,my_rank,nprocs; LIS_INT n,nr,nnz,lfil,len; LIS_SCALAR gamma,t,tol,toldd,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_VECTOR D; LIS_SCALAR tnorm, tolnorm; LIS_SCALAR fact,lxu,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos,para; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; gamma = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN]; lfil = (LIS_INT)((double)A->nnz/(2.0*n))*m; nprocs = omp_get_max_threads(); L = NULL; U = NULL; err = lis_matrix_ilu_create(n,1,&L); if( err ) return err; err = lis_matrix_ilu_create(n,1,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc(nprocs*(n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(nprocs*(n+1)*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(nprocs*n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } #pragma omp parallel private(is,ie,my_rank,i,j,k,jj,tnorm,tolnorm,len,lenu,lenl,col,t,jpos,jrow,fact,lxu,upos) { my_rank = omp_get_thread_num(); LIS_GET_ISIE(my_rank,nprocs,n,is,ie); for(i=is;i<ie;i++) iw[my_rank*n+i] = -1; for(i=is;i<ie;i++) { tnorm = 0; k = 0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { jj = A->index[j]; if( jj<is || jj>=ie ) continue; tnorm += fabs(A->value[j]); k++; } tnorm = tnorm / (double)k; tolnorm = tol * tnorm; lenu = 0; lenl = 0; jbuf[my_rank*n+i] = i; w[my_rank*n+i] = 0; iw[my_rank*n+i] = i; for(j=A->ptr[i];j<A->ptr[i+1];j++) { col = A->index[j]; if( col<is || col>=ie ) continue; t = A->value[j]; if( col < i ) { jbuf[my_rank*n+lenl] = col; iw[my_rank*n+col] = lenl; w[my_rank*n+lenl] = t; lenl++; } else if( col == i ) { w[my_rank*n+i] = t; } else { lenu++; jpos = i + lenu; jbuf[my_rank*n+jpos] = col; iw[my_rank*n+col] = jpos; w[my_rank*n+jpos] = t; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[my_rank*n+j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[my_rank*n+k]<jrow ) { jrow = jbuf[my_rank*n+k]; jpos = k; } } if( jpos!=j ) { col = jbuf[my_rank*n+j]; jbuf[my_rank*n+j] = jbuf[my_rank*n+jpos]; jbuf[my_rank*n+jpos] = col; iw[my_rank*n+jrow] = j; iw[my_rank*n+col] = jpos; t = w[my_rank*n+j]; w[my_rank*n+j] = w[my_rank*n+jpos]; w[my_rank*n+jpos] = t; } fact = w[my_rank*n+j] * D->value[jrow]; w[my_rank*n+j] = fact; iw[my_rank*n+jrow] = -1; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; jpos = iw[my_rank*n+col]; lxu = -fact * U->value[jrow][k]; if( fabs(lxu) < tolnorm && jpos==-1 ) continue; if( col >= i ) { if( jpos == -1 ) { lenu++; upos = i + lenu; jbuf[my_rank*n+upos] = col; iw[my_rank*n+col] = upos; w[my_rank*n+upos] = lxu; } else { w[my_rank*n+jpos] += lxu; } } else { if( jpos == -1 ) { jbuf[my_rank*n+lenl] = col; iw[my_rank*n+col] = lenl; w[my_rank*n+lenl] = lxu; lenl++; } else { w[my_rank*n+jpos] += lxu; } } } } iw[my_rank*n+i] = -1; for(j=0;j<lenu;j++) { iw[ my_rank*n+jbuf[my_rank*n+i+j+1] ] = -1; } D->value[i] = 1.0 / w[my_rank*n+i]; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { wn[my_rank*n+j] = fabs(w[my_rank*n+j]); iw[my_rank*n+j] = j; } lis_sort_di(0,lenl-1,&wn[my_rank*n],&iw[my_rank*n]); lis_sort_i(0,len-1,&iw[my_rank*n]); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[my_rank*n+j]; L->index[i][j] = jbuf[my_rank*n+jpos]; L->value[i][j] = w[my_rank*n+jpos]; } for(j=0;j<lenl;j++) iw[my_rank*n+j] = -1; len = _min(lfil,lenu); for(j=0;j<lenu;j++) { wn[my_rank*n+j] = fabs(w[my_rank*n+i+j+1]); iw[my_rank*n+j] = i+j+1; } lis_sort_di(0,lenu-1,&wn[my_rank*n],&iw[my_rank*n]); lis_sort_i(0,len-1,&iw[my_rank*n]); U->nnz[i] = len; if( len>0 ) { U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[my_rank*n+j]; U->index[i][j] = jbuf[my_rank*n+jpos]; U->value[i][j] = w[my_rank*n+jpos]; } for(j=0;j<lenu;j++) iw[my_rank*n+j] = -1; } } precon->L = L; precon->U = U; precon->D = D; lis_free2(4,w,iw,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #else LIS_INT err; LIS_INT i,j,k; LIS_INT n,lfil,len; LIS_SCALAR gamma,t,tol,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_VECTOR D; LIS_SCALAR tnorm, tolnorm; LIS_SCALAR fact,lxu,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; gamma = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN]; lfil = (LIS_INT)(((double)A->nnz/(2.0*n))*m); L = NULL; U = NULL; err = lis_matrix_ilu_create(n,1,&L); if( err ) return err; err = lis_matrix_ilu_create(n,1,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc((n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } for(i=0;i<n;i++) iw[i] = -1; for(i=0;i<n;i++) { tnorm = 0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { tnorm += fabs(A->value[j]); } tnorm = tnorm / (double)(A->ptr[i+1]-A->ptr[i]); tolnorm = tol * tnorm; lenu = 0; lenl = 0; jbuf[i] = i; w[i] = 0; iw[i] = i; for(j=A->ptr[i];j<A->ptr[i+1];j++) { col = A->index[j]; #ifdef USE_MPI if( col>n-1 ) continue; #endif t = A->value[j]; if( col < i ) { jbuf[lenl] = col; iw[col] = lenl; w[lenl] = t; lenl++; } else if( col == i ) { w[i] = t; } else { lenu++; jpos = i + lenu; jbuf[jpos] = col; iw[col] = jpos; w[jpos] = t; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[k]<jrow ) { jrow = jbuf[k]; jpos = k; } } if( jpos!=j ) { col = jbuf[j]; jbuf[j] = jbuf[jpos]; jbuf[jpos] = col; iw[jrow] = j; iw[col] = jpos; t = w[j]; w[j] = w[jpos]; w[jpos] = t; } fact = w[j] * D->value[jrow]; w[j] = fact; iw[jrow] = -1; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; jpos = iw[col]; lxu = -fact * U->value[jrow][k]; if( fabs(lxu) < tolnorm && jpos==-1 ) continue; if( col >= i ) { if( jpos == -1 ) { lenu++; upos = i + lenu; jbuf[upos] = col; iw[col] = upos; w[upos] = lxu; } else { w[jpos] += lxu; } } else { if( jpos == -1 ) { jbuf[lenl] = col; iw[col] = lenl; w[lenl] = lxu; lenl++; } else { w[jpos] += lxu; } } } /* for(kk=0;kk<bs;kk++) { w[bs*len+kk] = -buf_fact[kk]; } jbuf[len] = jrow; len++;*/ } iw[i] = -1; for(j=0;j<lenu;j++) { iw[ jbuf[i+j+1] ] = -1; } D->value[i] = 1.0 / w[i]; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { wn[j] = fabs(w[j]); iw[j] = j; } lis_sort_di(0,lenl-1,wn,iw); lis_sort_i(0,len-1,iw); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; L->index[i][j] = jbuf[jpos]; L->value[i][j] = w[jpos]; } for(j=0;j<lenl;j++) iw[j] = -1; len = _min(lfil,lenu); for(j=0;j<lenu;j++) { wn[j] = fabs(w[i+j+1]); iw[j] = i+j+1; } lis_sort_di(0,lenu-1,wn,iw); lis_sort_i(0,len-1,iw); U->nnz[i] = len; if( len>0 ) { U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; U->index[i][j] = jbuf[jpos]; U->value[i][j] = w[jpos]; } for(j=0;j<lenu;j++) iw[j] = -1; } precon->L = L; precon->U = U; precon->D = D; lis_free2(4,w,iw,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #endif }
LIS_INT lis_precon_create_ilut_bsr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,kk,bnr,bs; LIS_INT n,nr,annz,lfil,len; LIS_SCALAR gamma,t,tol,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_MATRIX_DIAG D; LIS_SCALAR tnorm, tolnorm; LIS_SCALAR buf_ns[16],buf_fact[16],*xnrm,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos,para; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; nr = A->nr; bnr = A->bnr; bs = bnr*bnr; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; gamma = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN]; annz = 10+A->bnnz / A->nr; lfil = (LIS_INT)(((double)A->bnnz/(2.0*nr))*m); L = NULL; U = NULL; err = lis_matrix_ilu_create(nr,bnr,&L); if( err ) return err; err = lis_matrix_ilu_create(nr,bnr,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_matrix_diag_duplicateM(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc(bs*(nr+1)*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } xnrm = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( xnrm==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(nr*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } for(i=0;i<nr;i++) iw[i] = -1; for(i=0;i<nr;i++) { tnorm = 0; for(j=A->bptr[i];j<A->bptr[i+1];j++) { lis_array_nrm2(bs,&A->value[bs*j],&t); tnorm = _max(t,tnorm); } tolnorm = tol * tnorm; lenu = 1; lenl = 0; jbuf[i] = i; memset(&w[bs*i],0,bs*sizeof(LIS_SCALAR)); iw[i] = i; for(j=A->bptr[i];j<A->bptr[i+1];j++) { col = A->bindex[j]; lis_array_nrm2(bs,&A->value[bs*j],&t); if( t<tolnorm && col!=i ) continue; if( col < i ) { jbuf[lenl] = col; iw[col] = lenl; memcpy(&w[bs*lenl],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); lenl++; } else if( col == i ) { memcpy(&w[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); } else { jpos = i + lenu; jbuf[jpos] = col; iw[col] = jpos; memcpy(&w[bs*jpos],&A->value[bs*j],bs*sizeof(LIS_SCALAR)); lenu++; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[k]<jrow ) { jrow = jbuf[k]; jpos = k; } } if( jpos!=j ) { col = jbuf[j]; jbuf[j] = jbuf[jpos]; jbuf[jpos] = col; iw[jrow] = j; iw[col] = jpos; memcpy(buf_ns,&w[bs*j],bs*sizeof(LIS_SCALAR)); memcpy(&w[bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); memcpy(&w[bs*jpos],buf_ns,bs*sizeof(LIS_SCALAR)); } /* lis_array_matmat(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact,LIS_INS_VALUE);*/ lis_array_matinv(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact); iw[jrow] = -1; lis_array_nrm2(bs,buf_fact,&t); if( t * xnrm[jrow] <= tolnorm ) continue; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; lis_array_matmat(bnr,buf_fact,&U->value[jrow][bs*k],buf_ns,LIS_INS_VALUE); jpos = iw[col]; lis_array_nrm2(bs,buf_ns,&t); if( t < tolnorm && jpos == -1 ) { continue; } if( col >= i ) { if( jpos == -1 ) { upos = i + lenu; jbuf[upos] = col; iw[col] = upos; memcpy(&w[bs*upos],buf_ns,bs*sizeof(LIS_SCALAR)); lenu++; } else { for(kk=0;kk<bs;kk++) { w[bs*jpos+kk] += buf_ns[kk]; } } } else { if( jpos == -1 ) { jbuf[lenl] = col; iw[col] = lenl; memcpy(&w[bs*lenl],buf_ns,bs*sizeof(LIS_SCALAR)); lenl++; } else { for(kk=0;kk<bs;kk++) { w[bs*jpos+kk] += buf_ns[kk]; } } } } for(kk=0;kk<bs;kk++) { w[bs*len+kk] = -buf_fact[kk]; } jbuf[len] = jrow; len++; } lenl = len; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { lis_array_nrm2(bs,&w[bs*j],&wn[j]); iw[j] = j; } lis_sort_di(0,lenl-1,wn,iw); lis_sort_i(0,len-1,iw); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(bs*len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; L->index[i][j] = jbuf[jpos]; memcpy(&L->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); } for(j=0;j<lenl;j++) iw[j] = -1; len = _min(lfil,lenu); for(j=1;j<lenu;j++) { jpos = i+j; lis_array_nrm2(bs,&w[bs*jpos],&wn[j-1]); iw[j-1] = jpos; } para = lenu - 1; lis_sort_di(0,para-1,wn,iw); lis_sort_i(0,len-2,iw); U->nnz[i] = len-1; if( len>1 ) { U->index[i] = (LIS_INT *)malloc((len-1)*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(bs*(len-1)*sizeof(LIS_SCALAR)); } lis_array_nrm2(bs,&w[bs*i],&t); for(j=0;j<len-1;j++) { jpos = iw[j]; U->index[i][j] = jbuf[jpos]; memcpy(&U->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR)); t = _max(t,wn[j]); } for(j=0;j<lenu-1;j++) iw[j] = -1; xnrm[i] = t; memcpy(&D->value[bs*i],&w[bs*i],bs*sizeof(LIS_SCALAR)); if( i==nr-1 ) { switch(bnr) { case 2: if( n%2!=0 ) { D->value[4*(nr-1)+3] = 1.0; } break; case 3: if( n%3==1 ) { D->value[9*(nr-1)+4] = 1.0; D->value[9*(nr-1)+8] = 1.0; } else if( n%3==2 ) { D->value[9*(nr-1)+8] = 1.0; } break; } } /* lis_array_invGauss(bnr,&D->value[bs*i]);*/ lis_array_LUdecomp(bnr,&D->value[bs*i]); for(j=0;j<lenu;j++) { iw[ jbuf[i+j] ] = -1; } } precon->L = L; precon->U = U; precon->WD = D; lis_free2(5,w,iw,xnrm,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_input_mm_csr(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file) { char buf[BUFSIZE]; LIS_INT nr,nc,nnz; LIS_INT i,j,my_rank; LIS_INT err; LIS_INT mmtype,mode; LIS_INT n,is,ie; LIS_INT ridx,cidx; LIS_INT *ptr, *index; LIS_INT *work; LIS_INT isb,isx,isbin; LIS_SCALAR val; LIS_SCALAR *value; LIS_MM_MATFMT matfmt; LIS_DEBUG_FUNC_IN; #ifdef USE_MPI my_rank = A->my_rank; #else my_rank = 0; #endif /* check banner */ err = lis_input_mm_banner(file,&mmtype); if( err ) return err; /* check size */ err = lis_input_mm_size(file,&nr,&nc,&nnz,&isb,&isx,&isbin); if( err ) return err; err = lis_matrix_set_size(A,0,nr); if( err ) return err; #ifdef _LONGLONG if( my_rank==0 ) printf("matrix size = %lld x %lld (%lld nonzero entries)\n\n",nr,nc,nnz); #else if( my_rank==0 ) printf("matrix size = %d x %d (%d nonzero entries)\n\n",nr,nc,nnz); #endif n = A->n; ptr = NULL; index = NULL; value = NULL; work = NULL; lis_matrix_get_range(A,&is,&ie); ptr = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_input_mm_csr::ptr" ); if( ptr==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,work); return LIS_OUT_OF_MEMORY; } work = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_input_mm_csr::work" ); if( work==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,work); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n+1;i++) { ptr[i] = 0; work[i] = 0; } /* read data */ mode = 1; mode = *(char *)&mode; if( mode!=(isbin-1) ) { mode = LIS_TRUE; } else { mode = LIS_FALSE; } for( i=0; i<nnz; i++ ) { if( isbin ) { if( fread(&matfmt, sizeof(matfmt), 1, file)!=1 ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } ridx = matfmt.i; cidx = matfmt.j; if( mode ) { lis_bswap_int(1,&ridx); lis_bswap_int(1,&cidx); } } else { if( fgets(buf, BUFSIZE, file)==NULL ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } #ifdef _LONGLONG #ifdef _LONG__DOUBLE if( sscanf(buf, "%lld %lld %Lg", &ridx, &cidx, &val) != 3 ) #else if( sscanf(buf, "%lld %lld %lg", &ridx, &cidx, &val) != 3 ) #endif #else #ifdef _LONG__DOUBLE if( sscanf(buf, "%d %d %Lg", &ridx, &cidx, &val) != 3 ) #else if( sscanf(buf, "%d %d %lg", &ridx, &cidx, &val) != 3 ) #endif #endif { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } } /* if( val!=0.0 )*/ { if( mmtype==MM_SYMM && ridx!=cidx ) { if( cidx>is && cidx<=ie ) work[cidx-is-1]++; } if( ridx>is && ridx<=ie ) { ptr[ridx-is]++; } } } ptr[0] = 0; for( i=0; i<n; i++ ) { if( mmtype==MM_SYMM ) { ptr[i+1] += ptr[i] + work[i]; } else { ptr[i+1] += ptr[i]; } work[i] = 0; } index = (LIS_INT *)lis_malloc( ptr[n]*sizeof(LIS_INT),"lis_input_mm_csr::index" ); if( index==NULL ) { LIS_SETERR_MEM(ptr[n]*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,work); return LIS_OUT_OF_MEMORY; } value = (LIS_SCALAR *)lis_malloc( ptr[n]*sizeof(LIS_SCALAR),"lis_input_mm_csr::value" ); if( value==NULL ) { LIS_SETERR_MEM(ptr[n]*sizeof(LIS_SCALAR)); lis_free2(4,ptr,index,value,work); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel for private(i,j) #endif for(i=0;i<n;i++) { for(j=ptr[i];j<ptr[i+1];j++) { index[j] = 0; value[j] = 0.0; } } rewind(file); if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } do { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } }while( buf[0]=='%' ); for( i=0; i<nnz; i++ ) { if( isbin ) { if( fread(&matfmt, sizeof(matfmt), 1, file)!=1 ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } ridx = matfmt.i; cidx = matfmt.j; val = matfmt.value; if( mode ) { lis_bswap_int(1,&ridx); lis_bswap_int(1,&cidx); lis_bswap_scalar(1,&val); } } else { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } #ifdef _LONGLONG #ifdef _LONG__DOUBLE if( sscanf(buf, "%lld %lld %Lg", &ridx, &cidx, &val) != 3 ) #else if( sscanf(buf, "%lld %lld %lg", &ridx, &cidx, &val) != 3 ) #endif #else #ifdef _LONG__DOUBLE if( sscanf(buf, "%d %d %Lg", &ridx, &cidx, &val) != 3 ) #else if( sscanf(buf, "%d %d %lg", &ridx, &cidx, &val) != 3 ) #endif #endif { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } } ridx--; cidx--; if( ridx==cidx && val==0.0 ) { #ifdef _LONGLONG printf("diagonal element is zero (i=%lld)\n",ridx); #else printf("diagonal element is zero (i=%d)\n",ridx); #endif } /* if( val!=0.0 )*/ { if( mmtype==MM_SYMM && ridx!=cidx ) { if( cidx>=is && cidx<ie ) { value[ptr[cidx-is]+work[cidx-is]] = val; index[ptr[cidx-is]+work[cidx-is]] = ridx; work[cidx-is]++; } } if( ridx>=is && ridx<ie ) { value[ptr[ridx-is]+work[ridx-is]] = val; index[ptr[ridx-is]+work[ridx-is]] = cidx; work[ridx-is]++; } } } #ifdef USE_MPI MPI_Barrier(A->comm); #endif err = lis_matrix_set_csr(ptr[n],ptr,index,value,A); if( err ) { lis_free2(4,ptr,index,value,work); return err; } err = lis_matrix_assemble(A); if( err ) { lis_matrix_storage_destroy(A); lis_free(work); return err; } if( b!=NULL && x!=NULL ) { err = lis_input_mm_vec(A,b,x,file,isb,isx,isbin); if( err ) { lis_matrix_storage_destroy(A); lis_free(work); } } lis_free(work); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_csr2msr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k,jj; LIS_INT err; LIS_INT n,nnz,ndz; LIS_INT count; LIS_INT *iw; LIS_INT *index; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; n = Ain->n; nnz = Ain->nnz; iw = NULL; index = NULL; value = NULL; iw = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_convert_csr2msr::iw" ); if( iw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); return LIS_ERR_OUT_OF_MEMORY; } /* check ndz */ for(i=0;i<n+1;i++) iw[i] = 0; count = 0; #ifdef _OPENMP #pragma omp parallel private(i,j) #endif { #ifdef _OPENMP #pragma omp for #endif for(i=0;i<n;i++) { iw[i+1] = 0; for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++) { if( i==Ain->index[j] ) { iw[i+1] = 1; } } } #ifdef _OPENMP #pragma omp for reduction(+:count) #endif for(i=0;i<n;i++) { count += iw[i+1]; } #ifdef _OPENMP #pragma omp for #endif for(i=0;i<n;i++) { iw[i+1] = Ain->ptr[i+1]-Ain->ptr[i]-iw[i+1]; } } ndz = n - count; err = lis_matrix_malloc_msr(n,nnz,ndz,&index,&value); if( err ) { lis_free2(3,index,value,iw); return err; } /* convert msr */ iw[0] = n+1; for(i=0;i<n;i++) { iw[i+1] = iw[i+1] + iw[i]; } #ifdef _OPENMP #pragma omp parallel private(i,j,k) #endif { #ifdef _OPENMP #pragma omp for #endif for(i=0;i<n+1;i++) { index[i] = iw[i]; } #ifdef _OPENMP #pragma omp for #endif for(i=0;i<n;i++) { k = index[i]; for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++) { jj = Ain->index[j]; if( jj==i ) { value[i] = Ain->value[j]; } else { value[k] = Ain->value[j]; index[k] = Ain->index[j]; k++; } } } } err = lis_matrix_set_msr(nnz,ndz,index,value,Aout); if( err ) { lis_free2(3,index,value,iw); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_free(iw); lis_matrix_storage_destroy(Aout); return err; } lis_free(iw); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_rco2bsr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k,n,gn,nnz,bnnz,nr,nc,bnr,bnc,err; LIS_INT ii,jj,kk,bj,jpos,ij,kv,bi; LIS_INT *iw,*iw2; LIS_INT *bptr,*bindex; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; bnr = Ain->conv_bnr; bnc = Ain->conv_bnc; n = Ain->n; gn = Ain->gn; nr = 1 + (n-1)/bnr; nc = 1 + (gn-1)/bnc; bptr = NULL; bindex = NULL; value = NULL; iw = NULL; iw2 = NULL; bptr = (LIS_INT *)lis_malloc( (nr+1)*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::bptr" ); if( bptr==NULL ) { LIS_SETERR_MEM((nr+1)*sizeof(LIS_INT)); lis_free2(5,bptr,bindex,value,iw,iw2); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel private(i,k,ii,j,bj,kk,ij,jj,iw,iw2,kv,jpos) #endif { iw = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw" ); iw2 = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw2" ); memset(iw,0,nc*sizeof(LIS_INT)); #ifdef _OPENMP #pragma omp for #endif for(i=0;i<nr;i++) { k = 0; kk = bnr*i; jj = 0; for(ii=0;ii+kk<n&&ii<bnr;ii++) { for(j=0;j<Ain->w_row[kk+ii];j++) { bj = Ain->w_index[kk+ii][j]/bnc; jpos = iw[bj]; if( jpos==0 ) { iw[bj] = 1; iw2[jj] = bj; jj++; } } } for(bj=0;bj<jj;bj++) { k++; ii = iw2[bj]; iw[ii]=0; } bptr[i+1] = k; } lis_free(iw); lis_free(iw2); } bptr[0] = 0; for(i=0;i<nr;i++) { bptr[i+1] += bptr[i]; } bnnz = bptr[nr]; nnz = bnnz*bnr*bnc; bindex = (LIS_INT *)lis_malloc( bnnz*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::bindex" ); if( bindex==NULL ) { LIS_SETERR_MEM((nr+1)*sizeof(LIS_INT)); lis_free2(3,bptr,bindex,value); return LIS_OUT_OF_MEMORY; } value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_rco2bsr::value" ); if( value==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR)); lis_free2(3,bptr,bindex,value); return LIS_OUT_OF_MEMORY; } /* convert bsr */ #ifdef _OPENMP #pragma omp parallel private(bi,i,ii,k,j,bj,jpos,kv,kk,ij,jj,iw) #endif { iw = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw" ); memset(iw,0,nc*sizeof(LIS_INT)); #ifdef _OPENMP #pragma omp for #endif for(bi=0;bi<nr;bi++) { i = bi*bnr; ii = 0; kk = bptr[bi]; while( i+ii<n && ii<=bnr-1 ) { for( k=0;k<Ain->w_row[i+ii];k++) { j = Ain->w_index[i+ii][k]; bj = j/bnc; j = j%bnc; jpos = iw[bj]; if( jpos==0 ) { kv = kk * bnr * bnc; iw[bj] = kv+1; bindex[kk] = bj; for(jj=0;jj<bnr*bnc;jj++) value[kv+jj] = 0.0; ij = j*bnr + ii; value[kv+ij] = Ain->w_value[i+ii][k]; kk = kk+1; } else { ij = j*bnr + ii; value[jpos+ij-1] = Ain->w_value[i+ii][k]; } } ii = ii+1; } for(j=bptr[bi];j<bptr[bi+1];j++) { iw[bindex[j]] = 0; } } lis_free(iw); } err = lis_matrix_set_bsr(bnr,bnc,bnnz,bptr,bindex,value,Aout); if( err ) { lis_free2(3,bptr,bindex,value); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_idrs(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,t,v,av,*dX,*dR,*P; LIS_SCALAR om, h; LIS_SCALAR *M,*m,*c,*MM; LIS_REAL bnrm2, nrm2, tol; LIS_REAL angle; LIS_INT i,j,k,s,oldest; LIS_INT iter,maxiter,n,output,conv; double times,ptimes,tim; unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; conv = solver->options[LIS_OPTIONS_CONV_COND]; s = solver->options[LIS_OPTIONS_IDRS_RESTART]; ptimes = 0.0; r = solver->work[0]; t = solver->work[1]; v = solver->work[2]; av = solver->work[3]; dX = &solver->work[4]; P = &solver->work[4+s]; dR = &solver->work[4+2*s]; angle = 0.7; m = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::m"); c = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::c"); M = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR), "lis_idrs::M"); MM = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR), "lis_idrs::M"); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) ) { lis_free2(4,m,c,M,MM); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; init_by_array(init, length); for(k=0;k<s;k++) { for(i=0;i<n;i++) { P[k]->value[i] = genrand_real1(); } } lis_idrs_orth(s,P); for( k=0; k<s; k++ ) { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, r, dX[k]); ptimes += lis_wtime()-times; LIS_MATVEC(A,dX[k],dR[k]); #endif lis_vector_dot(dR[k],dR[k],&h); lis_vector_dot(dR[k],r,&om); om = om / h; lis_vector_scale(om,dX[k]); lis_vector_scale(-om,dR[k]); lis_vector_axpy(1.0,dX[k],x); lis_vector_axpy(1.0,dR[k],r); /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[k+1] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", k+1, nrm2); } if( tol >= nrm2 ) { lis_free2(4,m,c,M,MM); solver->retcode = LIS_SUCCESS; solver->iter = k+1; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(i=0;i<s;i++) { lis_vector_dot(P[i],dR[k],&M[k*s+i]); } } iter = s; oldest = 0; for(i=0;i<s;i++) { lis_vector_dot(P[i],r,&m[i]); } while( iter<=maxiter ) { tim = lis_wtime(); lis_array_solve(s,M,m,c,MM); /* solve Mc=m */ lis_vector_copy(r,v); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dR[j],v); } if( (iter%(s+1))==s ) { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; LIS_MATVEC(A,av,t); #endif lis_vector_dot(t,t,&h); lis_vector_dot(t,v,&om); om = om / h; #if 0 lis_vector_scale(-om,t); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dR[j],t); } lis_vector_copy(t,dR[oldest]); lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; for(j=0;j<s;j++) { h -= dX[j]->value[i] * c[j]; } dX[oldest]->value[i] = h; } for(i=0;i<n;i++) { h = -om*t->value[i]; for(j=0;j<s;j++) { h -= dR[j]->value[i] * c[j]; } dR[oldest]->value[i] = h; } #endif } else { #ifdef PRE_RIGHT times = lis_wtime(); lis_psolve(solver, v, av); ptimes += lis_wtime()-times; #endif #if 0 lis_vector_scale(om,av); for(j=0;j<s;j++) { lis_vector_axpy(-c[j],dX[j],av); } lis_vector_copy(av,dX[oldest]); #else for(i=0;i<n;i++) { h = om*av->value[i]; for(j=0;j<s;j++) { h -= dX[j]->value[i] * c[j]; } dX[oldest]->value[i] = h; } #endif LIS_MATVEC(A,dX[oldest],dR[oldest]); lis_vector_scale(-1.0,dR[oldest]); } lis_vector_axpy(1.0,dR[oldest],r); lis_vector_axpy(1.0,dX[oldest],x); iter++; /* convergence check */ lis_solver_get_residual[conv](r,solver,&nrm2); if( output ) { if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d residual = %e\n", iter, nrm2); } if( tol >= nrm2 ) { lis_free2(4,m,c,M,MM); solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptimes = ptimes; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(i=0;i<s;i++) { lis_vector_dot(P[i],dR[oldest],&h); m[i] += h; M[oldest*s+i] = h; } oldest++; if( oldest==s ) oldest = 0; tim = lis_wtime() - tim; /* printf("update m,M: %e\n",tim); */ } lis_free2(4,m,c,M,MM); solver->retcode = LIS_MAXITER; solver->iter = iter; solver->resid = nrm2; LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_matrix_convert_msr2csr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k; LIS_INT err; LIS_INT n,nnz,is; LIS_INT *ptr,*index; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; n = Ain->n; nnz = Ain->nnz; is = Ain->is; ptr = NULL; index = NULL; value = NULL; err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value); if( err ) { return err; } /* convert csr */ #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { ptr[i+1] = Ain->index[i+1] - Ain->index[i]; if( Ain->value[i]!=0.0 ) { ptr[i+1]++; } } ptr[0] = 0; for(i=0;i<n;i++) { ptr[i+1] += ptr[i]; } #ifdef _OPENMP #pragma omp parallel for private(i,j,k) #endif for(i=0;i<n;i++) { k = ptr[i]; if( Ain->value[i]!=(LIS_SCALAR)0.0 ) { value[k] = Ain->value[i]; index[k] = i; k++; } for(j=Ain->index[i];j<Ain->index[i+1];j++) { value[k] = Ain->value[j]; index[k] = Ain->index[j]; k++; } } err = lis_matrix_set_csr(nnz,ptr,index,value,Aout); if( err ) { lis_free2(3,ptr,index,value); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }