LIS_INT lis_matrix_setDLU_dia(LIS_INT lnnd, LIS_INT unnd, LIS_SCALAR *diag, LIS_INT *lindex, LIS_SCALAR *lvalue, LIS_INT *uindex, LIS_SCALAR *uvalue, LIS_MATRIX A) { LIS_INT err; LIS_MATRIX_DIAG D; LIS_DEBUG_FUNC_IN; #if 0 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET); if( err ) return err; #else if(lis_matrix_is_assembled(A)) return LIS_SUCCESS; else { err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET); if( err ) return err; } #endif A->L = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT),"lis_matrix_setDLU_dia::A->L"); if( A->L==NULL ) { LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT)); return LIS_OUT_OF_MEMORY; } A->U = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT),"lis_matrix_setDLU_dia::A->U"); if( A->U==NULL ) { LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT)); lis_matrix_DLU_destroy(A); return LIS_OUT_OF_MEMORY; } err = lis_matrix_diag_create(A->n,0,A->comm,&D); if( err ) { lis_matrix_DLU_destroy(A); return err; } lis_free(D->value); D->value = diag; A->D = D; A->L->nnd = lnnd; A->L->index = lindex; A->L->value = lvalue; A->U->nnd = unnd; A->U->index = uindex; A->U->value = uvalue; A->is_copy = LIS_FALSE; A->status = -LIS_MATRIX_DIA; A->is_splited = LIS_TRUE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_ilu_create(LIS_INT n, LIS_INT bs, LIS_MATRIX_ILU *A) { LIS_INT i; LIS_INT *nnz; LIS_INT **index; LIS_DEBUG_FUNC_IN; *A = NULL; nnz = NULL; index = NULL; *A = (LIS_MATRIX_ILU)lis_malloc( sizeof(struct LIS_MATRIX_ILU_STRUCT),"lis_matrix_ilu_create::A" ); if( NULL==*A ) { LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_ILU_STRUCT)); return LIS_OUT_OF_MEMORY; } memset(*A,0,sizeof(struct LIS_MATRIX_ILU_STRUCT)); nnz = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_ilu_create::nnz" ); if( nnz==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } index = (LIS_INT **)lis_malloc( n*sizeof(LIS_INT *),"lis_matrix_ilu_create::index" ); if( index==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT *)); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { nnz[i] = 0; index[i] = NULL; } (*A)->n = n; (*A)->bs = bs; (*A)->nnz = nnz; (*A)->index = index; (*A)->nnz_ma = NULL; (*A)->value = NULL; (*A)->values = NULL; (*A)->bsz = NULL; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_vector_reuse(LIS_VECTOR *vec) { LIS_INT err,np,precision; LIS_DEBUG_FUNC_IN; err = lis_vector_check(*vec,LIS_VECTOR_CHECK_NULL); if( err ) return err; np = (*vec)->np; if( (*vec)->status==LIS_VECTOR_NULL ) { precision = ((LIS_VECTOR)*vec)->precision; if( !precision ) { (*vec)->value = (LIS_SCALAR *)lis_malloc( np*sizeof(LIS_SCALAR),"lis_vector_reuse::vec->value" ); if( NULL==(*vec)->value ) { LIS_SETERR_MEM(np*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } (*vec)->is_copy = LIS_TRUE; } else { (*vec)->value = (LIS_SCALAR *)lis_malloc( (2*np+np%2)*sizeof(LIS_SCALAR),"lis_vector_reuse::vec->value" ); if( NULL==(*vec)->value ) { LIS_SETERR_MEM((2*np+np%2)*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } (*vec)->is_copy = LIS_TRUE; (*vec)->value_lo = (*vec)->value + np + np%2; (*vec)->work = (LIS_SCALAR *)lis_malloc( 32*sizeof(LIS_SCALAR),"lis_vector_reuse::vec->work" ); if( NULL==(*vec)->work ) { LIS_SETERR_MEM(32*sizeof(LIS_SCALAR)); lis_vector_destroy(*vec); *vec = NULL; return LIS_OUT_OF_MEMORY; } } } (*vec)->status = LIS_VECTOR_ASSEMBLED; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
void lis_set_argv_f(LIS_INT *no, char *argv, LIS_INT *ierr, LIS_INT len) { LIS_INT i; char *p; LIS_DEBUG_FUNC_IN; i = *no; f_argv_tmp[i] = (char *)lis_malloc((len+1)*sizeof(char),"lis_set_argv_f::f_argv_tmp"); if( f_argv_tmp[i]==NULL ) { LIS_SETERR_MEM((len+1)*sizeof(char)); *ierr = LIS_OUT_OF_MEMORY; return; } memset(f_argv_tmp[i],0x20,(len+1)*sizeof(char)); strncpy(f_argv_tmp[i],argv,len); p = &f_argv_tmp[i][len]; if( len>0 ) { while( *p==' ' ) p--; p++; } *p = '\0'; *ierr = LIS_SUCCESS; LIS_DEBUG_FUNC_OUT; return; }
LIS_INT lis_matrix_ilu_premalloc(LIS_INT nnzrow, LIS_MATRIX_ILU A) { LIS_INT i,n; LIS_INT *nnz_ma; LIS_DEBUG_FUNC_IN; n = A->n; nnz_ma = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_ilu_premalloc::nnz_ma" ); if( nnz_ma==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { nnz_ma[i] = nnzrow; A->index[i] = (LIS_INT *)malloc( nnzrow*sizeof(LIS_INT) ); A->value[i] = (LIS_SCALAR *)malloc( nnzrow*sizeof(LIS_SCALAR) ); } for(i=0;i<n;i++) { if( A->index[i]==NULL ) { LIS_SETERR_MEM(nnzrow*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } if( A->value[i]==NULL ) { LIS_SETERR_MEM(nnzrow*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } } A->nnz_ma = nnz_ma; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_adds(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT i,j; LIS_INT precon_type,worklen; LIS_INT err; LIS_VECTOR *work; LIS_DEBUG_FUNC_IN; precon_type = solver->options[LIS_OPTIONS_PRECON]; worklen = 2; work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_precon_create_adds::work" ); if( work==NULL ) { LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR)); return LIS_OUT_OF_MEMORY; } if( solver->precision==LIS_PRECISION_DEFAULT ) { for(i=0;i<worklen;i++) { err = lis_vector_duplicate(solver->A,&work[i]); if( err ) break; } } else { for(i=0;i<worklen;i++) { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]); if( err ) break; } } if( i<worklen ) { for(j=0;j<i;j++) lis_vector_destroy(work[j]); lis_free(work); return err; } precon->worklen = worklen; precon->work = work; err = lis_precon_create_xxx[precon_type](solver,precon); if( err ) { lis_precon_destroy(precon); return err; } precon->A = solver->A; precon->is_copy = LIS_FALSE; LIS_DEBUG_FUNC_OUT; return err; }
LIS_INT lis_gmres_malloc_work(LIS_SOLVER solver) { LIS_VECTOR *work; LIS_INT i,j,restart,worklen,err; LIS_DEBUG_FUNC_IN; restart = solver->options[LIS_OPTIONS_RESTART]; worklen = NWORK + (restart+1); work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_gmres_malloc_work::work" ); if( work==NULL ) { LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR)); return LIS_ERR_OUT_OF_MEMORY; } if( solver->precision==LIS_PRECISION_DEFAULT ) { for(i=1;i<worklen;i++) { err = lis_vector_duplicate(solver->A,&work[i]); if( err ) break; } } else { for(i=1;i<worklen;i++) { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]); if( err ) break; memset(work[i]->value_lo,0,solver->A->np*sizeof(LIS_SCALAR)); } } if( i<worklen ) { for(j=1;j<i;j++) lis_vector_destroy(work[j]); lis_free(work); return err; } if( solver->precision==LIS_PRECISION_DEFAULT ) { lis_vector_create(solver->A->comm,&work[0]); } else { lis_vector_createex(LIS_PRECISION_QUAD,solver->A->comm,&work[0]); } lis_vector_set_size(work[0],restart+1,0); solver->worklen = worklen; solver->work = work; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_idrs_malloc_work(LIS_SOLVER solver) { LIS_VECTOR *work; LIS_INT i,j,s,worklen,err; LIS_DEBUG_FUNC_IN; /* err = lis_matrix_convert(solver->A,&solver->At,LIS_MATRIX_CCS); if( err ) return err; */ s = solver->options[LIS_OPTIONS_IDRS_RESTART]; worklen = NWORK + 3*s; work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_idrs_malloc_work::work" ); if( work==NULL ) { LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR)); return LIS_ERR_OUT_OF_MEMORY; } if( solver->precision==LIS_PRECISION_DEFAULT ) { for(i=0;i<worklen;i++) { err = lis_vector_duplicate(solver->A,&work[i]); if( err ) break; } } else { for(i=0;i<worklen;i++) { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]); if( err ) break; memset(work[i]->value_lo,0,solver->A->np*sizeof(LIS_SCALAR)); } } if( i<worklen ) { for(j=0;j<i;j++) lis_vector_destroy(work[j]); lis_free(work); return err; } solver->worklen = worklen; solver->work = work; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_ilu_realloc(LIS_INT row, LIS_INT nnz, LIS_MATRIX_ILU A) { LIS_DEBUG_FUNC_IN; A->index[row] = (LIS_INT *)realloc(A->index[row],nnz*sizeof(LIS_INT)); if( A->index[row]==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } A->value[row] = (LIS_SCALAR *)realloc(A->value[row],nnz*sizeof(LIS_SCALAR)); if( A->value[row]==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_malloc_dia(LIS_INT n, LIS_INT nnd, LIS_INT **index, LIS_SCALAR **value) { LIS_DEBUG_FUNC_IN; *index = NULL; *value = NULL; *index = (LIS_INT *)lis_malloc( n*nnd*sizeof(LIS_INT),"lis_matrix_malloc_dia::index" ); if( *index==NULL ) { LIS_SETERR_MEM(n*nnd*sizeof(LIS_INT)); lis_free2(2,*index,*value); return LIS_OUT_OF_MEMORY; } *value = (LIS_SCALAR *)lis_malloc( n*nnd*sizeof(LIS_SCALAR),"lis_matrix_malloc_dia::value" ); if( *value==NULL ) { LIS_SETERR_MEM(n*nnd*sizeof(LIS_SCALAR)); lis_free2(2,*index,*value); return LIS_OUT_OF_MEMORY; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
int lis_vector_set_value(int flag, int i, LIS_SCALAR value, LIS_VECTOR v) { int n,np,gn,is,ie; LIS_DEBUG_FUNC_IN; np = v->np; n = v->n; gn = v->gn; is = v->is; ie = v->ie; if( v->origin ) i--; if( i<is || i>=ie ) { if( v->origin ) { is++; ie++; i++; } LIS_SETERR3(LIS_ERR_ILL_ARG, "i(=%d) is less than %d or larger than %d\n",i,is,ie); return LIS_ERR_ILL_ARG; } if(v->status==LIS_VECTOR_NULL) { v->value = (LIS_SCALAR *)lis_malloc( np*sizeof(LIS_SCALAR),"lis_vector_set_value::v->value" ); if( NULL==v->value ) { LIS_SETERR_MEM(np*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } v->is_copy = LIS_TRUE; v->status = LIS_VECTOR_ASSEMBLING; } if(flag==LIS_INS_VALUE) { v->value[i-is] = value; } else { v->value[i-is] += value; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_solver_create(LIS_SOLVER *solver) { LIS_DEBUG_FUNC_IN; *solver = NULL; *solver = (LIS_SOLVER)lis_malloc( sizeof(struct LIS_SOLVER_STRUCT),"lis_solver_create::solver" ); if( NULL==*solver ) { LIS_SETERR_MEM(sizeof(struct LIS_SOLVER_STRUCT)); return LIS_OUT_OF_MEMORY; } lis_solver_init(*solver); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
void lis_set_argv_begin_f(LIS_INT *argc, LIS_INT *ierr) { LIS_DEBUG_FUNC_IN; f_argc_tmp = *argc+1; f_argv_tmp = (char **)lis_malloc(f_argc_tmp*sizeof(char *),"lis_set_argv_begin_f::f_argv_tmp"); if( f_argv_tmp==NULL ) { LIS_SETERR_MEM(f_argc_tmp*sizeof(char *)); *ierr = LIS_OUT_OF_MEMORY; return; } *ierr = LIS_SUCCESS; LIS_DEBUG_FUNC_OUT; return; }
LIS_INT lis_input_hb(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file) { LIS_INT err; LIS_INT matrix_type; LIS_MATRIX B; LIS_DEBUG_FUNC_IN; matrix_type = A->matrix_type; err = lis_input_hb_csr(A,b,x,file); if( err ) return err; if( matrix_type!=LIS_MATRIX_CSR && matrix_type!=LIS_MATRIX_CSC ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,matrix_type); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); if( A->matrix_type==LIS_MATRIX_JAD ) { A->work = (LIS_SCALAR *)lis_malloc(A->n*sizeof(LIS_SCALAR),"lis_input_hb::A->work"); if( A->work==NULL ) { LIS_SETERR_MEM(A->n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } } } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_esi_malloc_work(LIS_ESOLVER esolver) { LIS_VECTOR *work; LIS_INT i,j,worklen,err,ss; LIS_DEBUG_FUNC_IN; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; worklen = NWORK + ss; work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_esi_malloc_work::work" ); if( work==NULL ) { LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR)); return LIS_ERR_OUT_OF_MEMORY; } if( esolver->eprecision==LIS_PRECISION_DEFAULT ) { for(i=0;i<worklen;i++) { err = lis_vector_duplicate(esolver->A,&work[i]); if( err ) break; } } else { for(i=0;i<worklen;i++) { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,esolver->A,&work[i]); if( err ) break; } } if( i<worklen ) { for(j=0;j<i;j++) lis_vector_destroy(work[j]); lis_free(work); return err; } esolver->worklen = worklen; esolver->work = work; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_cgs_malloc_work(LIS_SOLVER solver) { LIS_VECTOR *work; LIS_INT i,j,worklen,err; LIS_DEBUG_FUNC_IN; worklen = NWORK; work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_cgs_malloc_work::work" ); if( work==NULL ) { LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR)); return LIS_ERR_OUT_OF_MEMORY; } if( solver->precision==LIS_PRECISION_DEFAULT ) { for(i=0;i<worklen;i++) { err = lis_vector_duplicate(solver->A,&work[i]); if( err ) break; } } else { for(i=0;i<worklen;i++) { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]); if( err ) break; memset(work[i]->value_lo,0,solver->A->np*sizeof(LIS_SCALAR)); } } if( i<worklen ) { for(j=0;j<i;j++) lis_vector_destroy(work[j]); lis_free(work); return err; } solver->worklen = worklen; solver->work = work; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_ilu_setCR(LIS_MATRIX_ILU A) { LIS_INT n; LIS_SCALAR **value; LIS_DEBUG_FUNC_IN; n = A->n; value = (LIS_SCALAR **)lis_malloc( n*sizeof(LIS_SCALAR *),"lis_matrix_ilu_setCR::value" ); if( value==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } A->value = value; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create(LIS_SOLVER solver, LIS_PRECON *precon) { LIS_INT err; LIS_INT precon_type; LIS_DEBUG_FUNC_IN; *precon = NULL; precon_type = solver->options[LIS_OPTIONS_PRECON]; *precon = (LIS_PRECON)lis_malloc( sizeof(struct LIS_PRECON_STRUCT),"lis_precon_create::precon" ); if( NULL==*precon ) { LIS_SETERR_MEM(sizeof(struct LIS_PRECON_STRUCT)); return LIS_OUT_OF_MEMORY; } lis_precon_init(*precon); (*precon)->precon_type = precon_type; if( precon_type>=LIS_PRECON_TYPE_USERDEF ) { err = precon_register_top[precon_type-LIS_PRECON_TYPE_USERDEF].pcreate(solver,*precon); } else if( precon_type && solver->options[LIS_OPTIONS_ADDS] ) { err = lis_precon_create_adds(solver,*precon); (*precon)->precon_type = LIS_PRECON_TYPE_ADDS; } else { err = lis_precon_create_xxx[precon_type](solver,*precon); } if( err ) { lis_precon_destroy(*precon); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_hashtable_create(LIS_HASHTABLE *hashtable) { LIS_HASHTABLE table; LIS_DEBUG_FUNC_IN; *hashtable = NULL; table = (LIS_HASHTABLE)malloc(LIS_HASHTABLE_SIZE*sizeof(struct LIS_HASH_STRUCT *)); if( table==NULL ) { LIS_SETERR_MEM(LIS_HASHTABLE_SIZE*sizeof(struct LIS_HASH_STRUCT *)); return LIS_ERR_OUT_OF_MEMORY; } memset(table,0,LIS_HASHTABLE_SIZE*sizeof(struct LIS_HASH_STRUCT *)); *hashtable = table; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_vector_createex(LIS_INT precision, LIS_Comm comm, LIS_VECTOR *vec) { LIS_DEBUG_FUNC_IN; *vec = NULL; *vec = (LIS_VECTOR)lis_malloc( sizeof(struct LIS_VECTOR_STRUCT),"lis_vector_createex::vec" ); if( NULL==*vec ) { LIS_SETERR_MEM(sizeof(struct LIS_VECTOR_STRUCT)); return LIS_OUT_OF_MEMORY; } lis_vector_init(vec); (*vec)->status = LIS_VECTOR_NULL; (*vec)->precision = precision; (*vec)->comm = comm; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_hashtable_set_value(LIS_HASHTABLE hashtable, LIS_INT index, LIS_INT value) { LIS_INT hashval; LIS_HASH p; LIS_DEBUG_FUNC_IN; p = (LIS_HASH)malloc(sizeof(struct LIS_HASH_STRUCT)); if( p==NULL ) { LIS_SETERR_MEM(LIS_HASHTABLE_SIZE*sizeof(struct LIS_HASH_STRUCT *)); return LIS_ERR_OUT_OF_MEMORY; } hashval = index%LIS_HASHTABLE_SIZE; p->next = hashtable[hashval]; p->index = index; p->value = value; hashtable[hashval] = p; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_vector_duplicateex(LIS_INT precision, void *A, LIS_VECTOR *vout) { LIS_INT np,pad; LIS_INT nprocs; LIS_INT i; #ifdef USE_MPI LIS_INT *ranges; #endif LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; if( ((LIS_VECTOR)A)->label!=LIS_LABEL_VECTOR && ((LIS_VECTOR)A)->label!=LIS_LABEL_MATRIX) { LIS_SETERR(LIS_ERR_ILL_ARG, "Second argument is not LIS_VECTOR or LIS_MATRIX\n"); return LIS_ERR_ILL_ARG; } nprocs = ((LIS_VECTOR)A)->nprocs; np = ((LIS_VECTOR)A)->np; pad = ((LIS_VECTOR)A)->pad; *vout = NULL; *vout = (LIS_VECTOR)lis_malloc( sizeof(struct LIS_VECTOR_STRUCT),"lis_vector_duplicateex::vout" ); if( NULL==*vout ) { LIS_SETERR_MEM(sizeof(struct LIS_VECTOR_STRUCT)); return LIS_OUT_OF_MEMORY; } lis_vector_init(vout); if( !precision ) { value = (LIS_SCALAR *)lis_malloc( (np+pad)*sizeof(LIS_SCALAR),"lis_vector_duplicateex::value" ); if( NULL==value ) { LIS_SETERR_MEM((np+pad)*sizeof(LIS_SCALAR)); lis_vector_destroy(*vout); *vout = NULL; return LIS_OUT_OF_MEMORY; } (*vout)->value = value; #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<np+pad;i++) { (*vout)->value[i] = 0.0; } } else { value = (LIS_SCALAR *)lis_malloc( (2*(np+pad) + (np+pad)%2)*sizeof(LIS_SCALAR),"lis_vector_duplicateex::value" ); if( NULL==value ) { LIS_SETERR_MEM((2*(np+pad) + (np+pad)%2)*sizeof(LIS_SCALAR)); lis_vector_destroy(*vout); *vout = NULL; return LIS_OUT_OF_MEMORY; } (*vout)->value = value; (*vout)->value_lo = value + np+pad + (np+pad)%2; (*vout)->work = (LIS_SCALAR *)lis_malloc( 32*sizeof(LIS_SCALAR),"lis_vector_duplicateex::vout->work" ); if( NULL==(*vout)->work ) { LIS_SETERR_MEM(32*sizeof(LIS_SCALAR)); lis_vector_destroy(*vout); *vout = NULL; return LIS_OUT_OF_MEMORY; } #ifdef USE_VEC_COMP #pragma cdir nodep #endif #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<np+pad;i++) { (*vout)->value[i] = 0.0; (*vout)->value_lo[i] = 0.0; } } #ifdef USE_MPI ranges = (LIS_INT *)lis_malloc( (nprocs+1)*sizeof(LIS_INT),"lis_vector_duplicateex::ranges" ); if( ranges==NULL ) { LIS_SETERR_MEM((nprocs+1)*sizeof(LIS_INT)); lis_vector_destroy(*vout); *vout = NULL; return LIS_OUT_OF_MEMORY; } for(i=0;i<nprocs+1;i++) ranges[i] = ((LIS_VECTOR)A)->ranges[i]; (*vout)->ranges = ranges; #else (*vout)->ranges = NULL; #endif (*vout)->is_copy = LIS_TRUE; (*vout)->status = LIS_VECTOR_ASSEMBLED; (*vout)->precision = precision; (*vout)->n = ((LIS_VECTOR)A)->n; (*vout)->gn = ((LIS_VECTOR)A)->gn; (*vout)->np = ((LIS_VECTOR)A)->np; (*vout)->pad = ((LIS_VECTOR)A)->pad; (*vout)->comm = ((LIS_VECTOR)A)->comm; (*vout)->my_rank = ((LIS_VECTOR)A)->my_rank; (*vout)->nprocs = ((LIS_VECTOR)A)->nprocs; (*vout)->is = ((LIS_VECTOR)A)->is; (*vout)->ie = ((LIS_VECTOR)A)->ie; (*vout)->origin = ((LIS_VECTOR)A)->origin; (*vout)->is_destroy = ((LIS_VECTOR)A)->is_destroy; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_ranges_create(LIS_Comm comm, LIS_INT *local_n, LIS_INT *global_n, LIS_INT **ranges, LIS_INT *is, LIS_INT *ie, LIS_INT *nprocs, LIS_INT *my_rank) { #ifdef USE_MPI LIS_INT i; #endif LIS_INT *tranges; int int_nprocs,int_my_rank; LIS_DEBUG_FUNC_IN; #ifdef USE_MPI MPI_Comm_size(comm,&int_nprocs); MPI_Comm_rank(comm,&int_my_rank); *nprocs=int_nprocs; *my_rank=int_my_rank; tranges = (LIS_INT *)lis_malloc( (*nprocs+1)*sizeof(LIS_INT),"lis_ranges_create::tranges" ); if( tranges==NULL ) { LIS_SETERR_MEM((*nprocs+1)*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } #else *nprocs = 1; *my_rank = 0; tranges = NULL; #endif #ifdef USE_MPI MPI_Allreduce(local_n,&i,1,LIS_MPI_INT,MPI_SUM,comm); if( i==0 ) #else if( *local_n==0 ) #endif { #ifdef USE_MPI LIS_GET_ISIE(*my_rank,*nprocs,*global_n,*is,*ie); *local_n = *ie-*is; MPI_Allgather(ie,1,LIS_MPI_INT,&tranges[1],1,LIS_MPI_INT,comm); tranges[0] = 0; #else *local_n = *global_n; *is = 0; *ie = *global_n; #endif } else { #ifdef USE_MPI MPI_Allgather(local_n,1,LIS_MPI_INT,&tranges[1],1,LIS_MPI_INT,comm); tranges[0] = 0; for(i=0;i<*nprocs;i++) { tranges[i+1] += tranges[i]; } *global_n = tranges[*nprocs]; *is = tranges[*my_rank]; *ie = tranges[*my_rank+1]; #else *global_n = *local_n; *is = 0; *ie = *local_n; #endif } *ranges = tranges; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_initialize(LIS_INT* argc, char** argv[]) { LIS_ARGS p; LIS_INT i,nprocs; LIS_DEBUG_FUNC_IN; /* lis_memory_init();*/ #ifdef USE_MPI MPI_Initialized(&lis_mpi_initialized); if (!lis_mpi_initialized) MPI_Init(argc, argv); #ifdef USE_QUAD_PRECISION MPI_Type_contiguous( LIS_MPI_MSCALAR_LEN, MPI_DOUBLE, &LIS_MPI_MSCALAR ); MPI_Type_commit( &LIS_MPI_MSCALAR ); MPI_Op_create((MPI_User_function *)lis_mpi_msum, LIS_TRUE, &LIS_MPI_MSUM); #endif #endif #ifdef _OPENMP nprocs = omp_get_max_threads(); #endif lis_arg2args(*argc,*argv,&cmd_args); p = cmd_args->next; while( p!=cmd_args ) { for(i=0;i<LIS_INIT_OPTIONS_LEN;i++) { if( strcmp(p->arg1, LIS_INIT_OPTNAME[i])==0 ) { switch( LIS_INIT_OPTACT[i] ) { case LIS_INIT_OPTIONS_OMPNUMTHREADS: #ifdef _LONGLONG sscanf(p->arg2, "%lld", &nprocs); #else sscanf(p->arg2, "%d", &nprocs); #endif break; } } } p = p->next; } #ifdef _OPENMP omp_set_num_threads(nprocs); lis_vec_tmp = (LIS_SCALAR *)lis_malloc( nprocs*LIS_VEC_TMP_PADD*sizeof(LIS_QUAD),"lis_initialize::lis_vec_tmp" ); if( lis_vec_tmp==NULL ) { LIS_SETERR_MEM(nprocs*LIS_VEC_TMP_PADD*sizeof(LIS_QUAD)); return LIS_ERR_OUT_OF_MEMORY; } #endif #ifdef USE_QUAD_PRECISION lis_quad_scalar_tmp = (LIS_SCALAR *)lis_malloc( LIS_QUAD_SCALAR_SIZE*sizeof(LIS_SCALAR),"lis_initialize::lis_quad_scalar_tmp" ); if( lis_quad_scalar_tmp==NULL ) { LIS_SETERR_MEM(LIS_QUAD_SCALAR_SIZE*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } lis_quad_x87_fpu_init(&lis_x87_fpu_cw); #endif for(i=1;i<*argc;i++) { if( strncmp(argv[0][i], "-help", 5)==0 ) { /* lis_display();*/ CHKERR(1); } else if( strncmp(argv[0][i], "-ver", 4)==0 ) { lis_version(); CHKERR(1); } } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_ordering_mc21(LIS_MATRIX A, LIS_INT *iperm) { LIS_INT n,numnz,jord,i,j,k,ii,kk,in1,in2,j1; LIS_INT *pr,*cv,*arp,*out; n = A->n; /* iperm = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_ordering_mc21:iperm"); if( iperm==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } */ pr = (LIS_INT *)lis_malloc(4*n*sizeof(LIS_INT),"lis_matrix_ordering_mc21:pr"); if( pr==NULL ) { LIS_SETERR_MEM(4*n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } cv = pr + n; arp = pr + 2*n; out = pr + 3*n; for(i=0;i<n;i++) { arp[i] = A->ptr[i+1] - A->ptr[i] - 1; cv[i] = -1; iperm[i] = -1; } numnz = 0; for(jord=0;jord<n;) { j = jord; pr[j] = -1; for(k=0;k<=jord;k++) { in1 = arp[j]; if( in1>=0 ) { in2 = A->ptr[j+1] - 1; in1 = in2 - in1; for(ii=in1;ii<=in2;ii++) { i = A->index[ii]; if( iperm[i]==-1 ) goto mc21_80; } arp[j] = -1; } out[j] = A->ptr[j+1] - A->ptr[j] - 1; for(kk=0;kk<jord;kk++) { in1 = out[j]; if( in1>=0 ) { in2 = A->ptr[j+1] - 1; in1 = in2 - in1; for(ii=in1;ii<=in2;ii++) { i = A->index[ii]; if( cv[i]!=jord ) break; } if( cv[i]!=jord ) { j1 = j; j = iperm[i]; cv[i] = jord; pr[j] = j1; out[j1] = in2 - ii - 1; break; } } j = pr[j]; if( j==-1 ) goto mc21_100; } } mc21_80: iperm[i] = j; arp[j] = in2 - ii - 1; numnz = numnz + 1; for(k=0;k<jord;k++) { j = pr[j]; if( j==-1 ) break; ii = A->ptr[j+1] - out[j] - 2; i = A->index[ii]; iperm[i] = j; } mc21_100: jord++; } if( numnz!=n ) { for(i=0;i<n;i++) { arp[i] = 0; } k = 0; for(i=0;i<n;i++) { if( iperm[i]!=0 ) { arp[j] = 1; } else { k = k + 1; out[k] = 1; } } k = 0; for(i=0;i<n;i++) { if( arp[i]!=0 ) continue; k = k + 1; iperm[out[k]] = i; } } lis_free(pr); return LIS_SUCCESS; }
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,ii,jj,len,lfil; LIS_INT n,nnz,annz,cl,cu,cc,m; LIS_INT *wu,*wl,*il,*iu,*ic,*pc; LIS_SCALAR t,v; LIS_REAL tol,tol_dd,nrm; LIS_SCALAR *d,*r,*c,*l,*u,*tmp; LIS_MATRIX A,B; LIS_MATRIX_ILU W,Z; LIS_VECTOR D; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; nnz = A->nnz; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; annz = 10+A->nnz / A->n; lfil = (LIS_INT)((double)A->nnz/(2.0*n))*m; W = NULL; Z = NULL; wu = NULL; wl = NULL; d = NULL; l = NULL; u = NULL; il = NULL; iu = NULL; err = lis_matrix_ilu_create(n,1,&W); if( err ) return err; err = lis_matrix_ilu_create(n,1,&Z); if( err ) return err; err = lis_matrix_ilu_setCR(W); if( err ) return err; err = lis_matrix_ilu_setCR(Z); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) return err; d = D->value; tmp = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( tmp==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } r = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( r==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } c = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( c==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } l = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( l==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } u = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( u==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } il = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il"); if( il==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( iu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } ic = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( ic==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } wu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( wu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } wl = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( wl==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } pc = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( pc==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } lis_matrix_sort_csr(A); err = lis_matrix_duplicate(A,&B); if( err ) return err; err = lis_matrix_convert_csr2csc(A,B); if( err ) return err; for(i=0;i<n;i++) { wu[i] = 0; wl[i] = 0; pc[i] = A->ptr[i]; } for(i=0; i<n; i++) { /* nrm_inf(A[i,:]) */ nrm = 0.0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { nrm = _max(nrm,fabs(A->value[j])); } tol_dd = nrm * tol; /* l = e_i */ /* u = e_i */ l[i] = 1.0; u[i] = 1.0; il[0] = i; iu[0] = i; cl = 1; cu = 1; wu[i] = 1; wl[i] = 1; cc = 0; /* r = e_i^T*A */ for(j=A->ptr[i];j<A->ptr[i+1];j++) { jj = A->index[j]; r[jj] = A->value[j]; } /* c = A_i = A*e_i */ for(j=B->ptr[i];j<B->ptr[i+1];j++) { jj = B->index[j]; c[jj] = B->value[j]; } /* W_i = W_i - (r*Z_j/D_jj)*W_j */ for(j=0;j<i;j++) { t = 0.0; for(k=0;k<Z->nnz[j];k++) { t += r[Z->index[j][k]]*Z->value[j][k]; } t = t * d[j]; if( fabs(t) > tol_dd ) { for(k=0;k<W->nnz[j];k++) { v = t * W->value[j][k]; if( fabs(v) > tol_dd ) { jj = W->index[j][k]; if( wl[jj]==1 ) { l[jj] -= v; } else { l[jj] = -v; il[cl++] = jj; wl[jj] = 1; } } } } } /* Z_i = Z_i - (W_j^T*c/D_jj)*Z_j */ for(j=0;j<i;j++) { t = 0.0; for(k=0;k<W->nnz[j];k++) { t += c[W->index[j][k]]*W->value[j][k]; } t = t * d[j]; if( fabs(t) > tol_dd ) { for(k=0;k<Z->nnz[j];k++) { v = t * Z->value[j][k]; if( fabs(v) > tol_dd ) { jj = Z->index[j][k]; if( wu[jj]==1 ) { u[jj] -= v; } else { u[jj] = -v; iu[cu++] = jj; wu[jj] = 1; } } } } } /* len = _min(lfil,cl); for(j=0;j<cl;j++) tmp[j] = fabs(l[il[j]]); lis_sort_di(0,cl-1,tmp,il); lis_sort_i(0,len-1,il); cl = len; */ /* k = cl; for(j=0;j<cl;j++) { if( fabs(l[il[j]])<= tol_dd ) { wl[il[j]] = 0; il[j] = n; k--; } } lis_sort_i(0,cl-1,il); cl = k; k = cu; for(j=0;j<cu;j++) { if( fabs(u[iu[j]])<= tol_dd ) { wu[iu[j]] = 0; iu[j] = n; k--; } } lis_sort_i(0,cu-1,iu); cu = k; */ W->nnz[i] = cl; if( cl > 0 ) { W->index[i] = (LIS_INT *)malloc(cl*sizeof(LIS_INT)); W->value[i] = (LIS_SCALAR *)malloc(cl*sizeof(LIS_SCALAR)); memcpy(W->index[i],il,cl*sizeof(LIS_INT)); for(j=0;j<cl;j++) { W->value[i][j] = l[il[j]]; } } Z->nnz[i] = cu; if( cu > 0 ) { Z->index[i] = (LIS_INT *)malloc(cu*sizeof(LIS_INT)); Z->value[i] = (LIS_SCALAR *)malloc(cu*sizeof(LIS_SCALAR)); memcpy(Z->index[i],iu,cu*sizeof(LIS_INT)); for(j=0;j<cu;j++) { Z->value[i][j] = u[iu[j]]; } } for(j=A->ptr[i];j<A->ptr[i+1];j++) r[A->index[j]] = 0.0; for(j=B->ptr[i];j<B->ptr[i+1];j++) c[B->index[j]] = 0.0; for(j=0;j<cl;j++) { wl[il[j]] = 0; l[il[j]] = 0.0; } for(j=0;j<cu;j++) { wu[iu[j]] = 0; } /* D_ii = W_i^T * A * Z_i */ cl = 0; for(k=0;k<Z->nnz[i];k++) { ii = Z->index[i][k]; for(j=B->ptr[ii];j<B->ptr[ii+1];j++) { jj = B->index[j]; if( wl[jj]==0 ) { l[jj] = B->value[j]*Z->value[i][k]; wl[jj] = 1; il[cl++] = jj; } else { l[jj] += B->value[j]*Z->value[i][k]; } } } t = 0.0; for(j=0;j<W->nnz[i];j++) { k = W->index[i][j]; t += W->value[i][j] * l[k]; } d[i] = 1.0 / t; for(j=0;j<cl;j++) wl[il[j]] = 0; } lis_matrix_destroy(B); lis_free2(11,r,c,il,l,wl,iu,u,wu,ic,pc,tmp); precon->L = W; precon->U = Z; precon->D = D; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,ii,jj,ik,jk; LIS_INT n,annz,cl,cu; LIS_INT *ww,*il,*iu; LIS_SCALAR t,dd; LIS_REAL tol,nrm; LIS_SCALAR *d,*l,*u; LIS_MATRIX A,B; LIS_MATRIX_ILU W,Z; LIS_VECTOR D; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; annz = A->n / 10; W = NULL; ww = NULL; d = NULL; l = NULL; u = NULL; il = NULL; iu = NULL; err = lis_matrix_ilu_create(n,1,&W); if( err ) return err; err = lis_matrix_ilu_create(n,1,&Z); if( err ) return err; err = lis_matrix_ilu_setCR(W); if( err ) return err; err = lis_matrix_ilu_setCR(Z); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) return err; d = D->value; err = lis_matrix_ilu_premalloc(annz,W); if( err ) return err; err = lis_matrix_ilu_premalloc(annz,Z); if( err ) return err; l = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( l==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } u = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( u==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } il = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il"); if( il==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( iu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } ww = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( ww==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } err = lis_matrix_duplicate(A,&B); if( err ) return err; err = lis_matrix_convert_csr2csc(A,B); if( err ) { return err; } for(i=0;i<n;i++) ww[i] = 0; for(i=0;i<n;i++) { W->value[i][0] = 1.0; W->index[i][0] = i; W->nnz[i] = 1; Z->value[i][0] = 1.0; Z->index[i][0] = i; Z->nnz[i] = 1; } for(i=0; i<n; i++) { /* nrm_inf(A[i,:]) */ nrm = 0.0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { nrm = _max(nrm,fabs(A->value[j])); } nrm = 1.0/nrm; /* l = AZ_i */ cl = 0; memset(l,0,n*sizeof(LIS_SCALAR)); for(k=0;k<Z->nnz[i];k++) { ii = Z->index[i][k]; for(j=B->ptr[ii];j<B->ptr[ii+1];j++) { jj = B->index[j]; if( jj>i ) { l[jj] += B->value[j]*Z->value[i][k]; if( ww[jj]==0 ) { ww[jj] = 1; il[cl++] = jj; } } } } for(k=0;k<cl;k++) ww[il[k]] = 0; /* u = W_i'A */ cu = 0; memset(u,0,n*sizeof(LIS_SCALAR)); for(k=0;k<W->nnz[i];k++) { ii = W->index[i][k]; for(j=A->ptr[ii];j<A->ptr[ii+1];j++) { jj = A->index[j]; #ifdef USE_MPI if( jj>n-1 ) continue; #endif u[jj] += A->value[j]*W->value[i][k]; if( jj>i && ww[jj]==0 ) { ww[jj] = 1; iu[cu++] = jj; } } } for(k=0;k<cu;k++) ww[iu[k]] = 0; /* d_ii = uZ_i or W_i'l */ t = 0.0; for(k=0;k<Z->nnz[i];k++) { t += u[Z->index[i][k]]*Z->value[i][k]; } d[i] = 1.0/t; /* for j>i, l_j!=0 */ /* w_j = w_j - (l_j/d_ii)*w_i */ for(jj=0;jj<cl;jj++) { j = il[jj]; dd = l[j]*d[i]; for(k=0;k<W->nnz[j];k++) { ww[W->index[j][k]] = k+1; } for(ik=0;ik<W->nnz[i];ik++) { jk = ww[W->index[i][ik]]; if( jk!=0 ) { t = dd*W->value[i][ik]; if( fabs(t)*nrm > tol ) { W->value[j][jk-1] -= t; } } else { t = dd*W->value[i][ik]; if( fabs(t)*nrm > tol ) { if( W->nnz[j] == W->nnz_ma[j] ) { W->nnz_ma[j] += annz; err = lis_matrix_ilu_realloc(j,W->nnz_ma[j],W); if( err ) return err; } jk = W->nnz[j]; W->index[j][jk] = W->index[i][ik]; W->value[j][jk] = -t; W->nnz[j]++; } } } for(k=0;k<W->nnz[j];k++) { ww[W->index[j][k]] = 0; } } /* for j>i, u_j!=0 */ /* z_j = z_j - (u_j/d_ii)*z_i */ for(jj=0;jj<cu;jj++) { j = iu[jj]; dd = u[j]*d[i]; for(k=0;k<Z->nnz[j];k++) { ww[Z->index[j][k]] = k+1; } for(ik=0;ik<Z->nnz[i];ik++) { jk = ww[Z->index[i][ik]]; if( jk!=0 ) { t = dd*Z->value[i][ik]; if( fabs(t)*nrm > tol ) { Z->value[j][jk-1] -= t; } } else { t = dd*Z->value[i][ik]; if( fabs(t)*nrm > tol ) { if( Z->nnz[j] == Z->nnz_ma[j] ) { Z->nnz_ma[j] += annz; err = lis_matrix_ilu_realloc(j,Z->nnz_ma[j],Z); if( err ) return err; } jk = Z->nnz[j]; Z->index[j][jk] = Z->index[i][ik]; Z->value[j][jk] = -t; Z->nnz[j]++; } } } for(k=0;k<Z->nnz[j];k++) { ww[Z->index[j][k]] = 0; } } } lis_matrix_destroy(B); lis_free2(5,l,u,ww,il,iu); precon->L = W; precon->U = Z; precon->D = D; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_vector_set_values2(LIS_INT flag, LIS_INT start, LIS_INT count, LIS_SCALAR value[], LIS_VECTOR v) { LIS_INT np,i,is,ie; LIS_DEBUG_FUNC_IN; np = v->np; is = v->is; ie = v->ie; if(v->status==LIS_VECTOR_NULL) { v->value = (LIS_SCALAR *)lis_malloc( np*sizeof(LIS_SCALAR),"lis_vector_set_values::v->value" ); if( NULL==v->value ) { LIS_SETERR_MEM(np*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } v->is_copy = LIS_TRUE; v->status = LIS_VECTOR_ASSEMBLING; } if(flag==LIS_INS_VALUE) { for(i=0;i<count;i++) { start = i; if( v->origin ) start--; if( start<is || start>=ie ) { if( v->origin ) { is++; ie++; start++; i++; } LIS_SETERR3(LIS_ERR_ILL_ARG, "%d is less than %d or not less than %d\n",start,is,ie); return LIS_ERR_ILL_ARG; } v->value[start-is] = value[i]; } } else { for(i=0;i<count;i++) { start = i; if( v->origin ) start++; if( start<is || start>=ie ) { if( v->origin ) { is++; ie++; start++; i++; } LIS_SETERR3(LIS_ERR_ILL_ARG, "%d is less than %d or not less than %d\n",start,is,ie); return LIS_ERR_ILL_ARG; } v->value[start-is] += value[i]; } } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_split2_csr(LIS_MATRIX A) { LIS_INT i,j,n; LIS_INT nnzl,nnzu; LIS_INT err; LIS_INT *lptr,*lindex,*uptr,*uindex; LIS_SCALAR *lvalue,*uvalue; #ifdef _OPENMP LIS_INT kl,ku; LIS_INT *liw,*uiw; #endif LIS_DEBUG_FUNC_IN; n = A->n; nnzl = 0; nnzu = 0; lptr = NULL; lindex = NULL; lvalue = NULL; uptr = NULL; uindex = NULL; uvalue = NULL; #ifdef _OPENMP liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split2_csr::liw"); if( liw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split2_csr::uiw"); if( uiw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free(liw); return LIS_OUT_OF_MEMORY; } #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { liw[i] = 0; uiw[i] = 0; } #pragma omp parallel for private(i,j) for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<n ) { liw[i+1]++; } else { uiw[i+1]++; } } } for(i=0;i<n;i++) { liw[i+1] += liw[i]; uiw[i+1] += uiw[i]; } nnzl = liw[n]; nnzu = uiw[n]; #else for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<n ) { nnzl++; } else { nnzu++; } } } #endif err = lis_matrix_LU_create(A); if( err ) { return err; } err = lis_matrix_malloc_csr(n,nnzl,&lptr,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_csr(n,nnzu,&uptr,&uindex,&uvalue); if( err ) { lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) for(i=0;i<n+1;i++) { lptr[i] = liw[i]; uptr[i] = uiw[i]; } #pragma omp parallel for private(i,j,kl,ku) for(i=0;i<n;i++) { kl = lptr[i]; ku = uptr[i]; for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<n ) { lindex[kl] = A->index[j]; lvalue[kl] = A->value[j]; kl++; } else { uindex[ku] = A->index[j]; uvalue[ku] = A->value[j]; ku++; } } } lis_free2(2,liw,uiw); #else nnzl = 0; nnzu = 0; lptr[0] = 0; uptr[0] = 0; for(i=0;i<n;i++) { for(j=A->ptr[i];j<A->ptr[i+1];j++) { if( A->index[j]<n ) { lindex[nnzl] = A->index[j]; lvalue[nnzl] = A->value[j]; nnzl++; } else { uindex[nnzu] = A->index[j]; uvalue[nnzu] = A->value[j]; nnzu++; } } lptr[i+1] = nnzl; uptr[i+1] = nnzu; } #endif A->L->nnz = nnzl; A->L->ptr = lptr; A->L->index = lindex; A->L->value = lvalue; A->U->nnz = nnzu; A->U->ptr = uptr; A->U->index = uindex; A->U->value = uvalue; A->is_splited = LIS_TRUE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_vector_set_size(LIS_VECTOR vec, LIS_INT local_n, LIS_INT global_n) { LIS_INT nprocs,my_rank; LIS_INT is,ie; LIS_INT i,err,precision; LIS_INT *ranges; LIS_DEBUG_FUNC_IN; if( global_n>0 && local_n>global_n ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"local n(=%d) is larger than global n(=%d)\n",local_n,global_n); return LIS_ERR_ILL_ARG; } if( local_n<0 || global_n<0 ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"local n(=%d) or global n(=%d) are less than 0\n",local_n,global_n); return LIS_ERR_ILL_ARG; } /* the condition (local_n=0 and global_n=0) deleted, as it is now allowed . . . */ /* satisfaction of that condition implies that local sizes were specified, and the */ /* local size on the current process is zero */ err = lis_ranges_create(vec->comm,&local_n,&global_n,&ranges,&is,&ie,&nprocs,&my_rank); if( err ) { return err; } vec->ranges = ranges; precision = vec->precision; if( !precision ) { vec->value = (LIS_SCALAR *)lis_malloc( local_n*sizeof(LIS_SCALAR),"lis_vector_set_size::vec->value" ); if( NULL==vec->value ) { LIS_SETERR_MEM(local_n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<local_n;i++) { vec->value[i] = 0.0; } } else { vec->value = (LIS_SCALAR *)lis_malloc( (2*local_n + local_n%2)*sizeof(LIS_SCALAR),"lis_vector_set_size::vec->value" ); if( NULL==vec->value ) { LIS_SETERR_MEM((2*local_n+local_n%2)*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } vec->value_lo = vec->value + local_n + local_n%2; vec->work = (LIS_SCALAR *)lis_malloc( 32*sizeof(LIS_SCALAR),"lis_vector_set_size::vec->work" ); if( NULL==vec->work ) { LIS_SETERR_MEM(32*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } #ifdef USE_VEC_COMP #pragma cdir nodep #endif #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<local_n;i++) { vec->value[i] = 0.0; vec->value_lo[i] = 0.0; } } vec->is_copy = LIS_TRUE; vec->status = LIS_VECTOR_ASSEMBLED; vec->n = local_n; vec->gn = global_n; vec->np = local_n; vec->my_rank = my_rank; vec->nprocs = nprocs; vec->is = is; vec->ie = ie; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }