LIS_INT lis_input_hb(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file) { LIS_INT err; LIS_INT matrix_type; LIS_MATRIX B; LIS_DEBUG_FUNC_IN; matrix_type = A->matrix_type; err = lis_input_hb_csr(A,b,x,file); if( err ) return err; if( matrix_type!=LIS_MATRIX_CSR && matrix_type!=LIS_MATRIX_CSC ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,matrix_type); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); if( A->matrix_type==LIS_MATRIX_JAD ) { A->work = (LIS_SCALAR *)lis_malloc(A->n*sizeof(LIS_SCALAR),"lis_input_hb::A->work"); if( A->work==NULL ) { LIS_SETERR_MEM(A->n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } } } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_copy_csr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT err; LIS_INT i,n,nnz,lnnz,unnz; LIS_INT *ptr,*index; LIS_INT *lptr,*lindex; LIS_INT *uptr,*uindex; LIS_SCALAR *value,*lvalue,*uvalue,*diag; LIS_DEBUG_FUNC_IN; n = Ain->n; if( Ain->is_splited ) { lnnz = Ain->L->nnz; unnz = Ain->U->nnz; lptr = NULL; lindex = NULL; uptr = NULL; uindex = NULL; diag = NULL; err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_csr::diag"); if( diag==NULL ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { diag[i] = Ain->D->value[i]; } lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue); lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue); err = lis_matrix_setDLU_csr(lnnz,unnz,diag,lptr,lindex,lvalue,uptr,uindex,uvalue,Aout); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } } if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) ) { ptr = NULL; index = NULL; value = NULL; nnz = Ain->nnz; err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value); if( err ) { return err; } lis_matrix_elements_copy_csr(n,Ain->ptr,Ain->index,Ain->value,ptr,index,value); err = lis_matrix_set_csr(nnz,ptr,index,value,Aout); if( err ) { lis_free2(3,ptr,index,value); return err; } } if( Ain->matrix_type==LIS_MATRIX_CSC ) { Aout->matrix_type = LIS_MATRIX_CSC; Aout->status = -LIS_MATRIX_CSC; err = lis_matrix_assemble(Aout); } else { err = lis_matrix_assemble(Aout); } if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_solve_kernel(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT nsolver, precon_type, maxiter; LIS_INT err; LIS_SCALAR *residual; LIS_VECTOR xx; LIS_INT output; LIS_INT scale; LIS_INT conv_cond; LIS_INT precision,is_use_at,storage,block; LIS_INT i,n,np; double p_c_times, p_i_times,itimes; LIS_SCALAR nrm2,tol,tol_w; LIS_VECTOR t; LIS_VECTOR bb; LIS_MATRIX AA,B; LIS_MATRIX At; char buf[64]; LIS_DEBUG_FUNC_IN; nsolver = solver->options[LIS_OPTIONS_SOLVER]; precon_type = solver->options[LIS_OPTIONS_PRECON]; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; scale = solver->options[LIS_OPTIONS_SCALE]; precision = solver->options[LIS_OPTIONS_PRECISION]; is_use_at = solver->options[LIS_OPTIONS_USE_AT]; storage = solver->options[LIS_OPTIONS_STORAGE]; block = solver->options[LIS_OPTIONS_STORAGE_BLOCK]; conv_cond = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol_w = solver->params[LIS_PARAMS_RESID_WEIGHT-LIS_OPTIONS_LEN]; solver->precision = precision; if( nsolver < 1 || nsolver > LIS_SOLVERS_LEN ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_SOLVER is %d (Set between 1 to %d)\n",nsolver, LIS_SOLVERS_LEN); return LIS_ERR_ILL_ARG; } if( precon_type < 0 || precon_type > precon_register_type ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_PRECON is %d (Set between 0 to %d)\n",precon_type, precon_register_type-1); return LIS_ERR_ILL_ARG; } if( maxiter<0 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_MAXITER(=%d) is less than 0\n",maxiter); return LIS_ERR_ILL_ARG; } #ifdef USE_MPI if( precon_type == LIS_PRECON_TYPE_SAAMG && solver->A->nprocs < 2) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter A->nprocs (=%d) is less than 2 (Set more than 1 when using parallel version of SAAMG)\n",solver->A->nprocs); return LIS_ERR_ILL_ARG; } #endif #ifdef USE_QUAD_PRECISION if( precision==LIS_PRECISION_QUAD && lis_solver_execute_quad[nsolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision solver %s is not implemented\n",lis_solvername[nsolver]); return LIS_ERR_NOT_IMPLEMENTED; } else if( precision==LIS_PRECISION_SWITCH && lis_solver_execute_switch[nsolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch solver %s is not implemented\n",lis_solvername[nsolver]); return LIS_ERR_NOT_IMPLEMENTED; } if( solver->options[LIS_OPTIONS_SWITCH_MAXITER]==-1 ) { solver->options[LIS_OPTIONS_SWITCH_MAXITER] = maxiter; } #endif err = lis_solver_check_params[nsolver](solver); if( err ) { solver->retcode = err; return err; } /* end parameter check */ solver->A = A; solver->b = b; /* create initial vector */ #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(A,&xx); #else if( precision==LIS_PRECISION_DOUBLE ) { err = lis_vector_duplicate(A,&xx); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx); } #endif if( err ) { solver->retcode = err; return err; } if( solver->options[LIS_OPTIONS_INITGUESS_ZEROS] ) { if( output ) lis_printf(A->comm,"initial vector x = 0\n"); #ifndef USE_QUAD_PRECISION lis_vector_set_all(0.0,xx); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_set_all(0.0,xx); } else { lis_vector_set_allex_nm(0.0,xx); } #endif } else { if( output ) lis_printf(A->comm,"initial vector x = user defined\n"); #ifndef USE_QUAD_PRECISION lis_vector_copy(x,xx); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(x,xx); } else { lis_vector_copyex_nm(x,xx); } #endif } /* create residual history vector */ if( solver->residual ) lis_free(solver->residual); residual = (LIS_SCALAR *)lis_malloc((maxiter+2)*sizeof(LIS_SCALAR),"lis_solve::residual"); if( residual==NULL ) { LIS_SETERR_MEM((maxiter+2)*sizeof(LIS_SCALAR)); lis_vector_destroy(xx); solver->retcode = err; return err; } residual[0] = 1.0; n = A->n; np = A->np; t = NULL; At = NULL; p_c_times = lis_wtime(); if( precon_type==LIS_PRECON_TYPE_IS ) { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI); } else if( !b->is_scaled ) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { b->value[i] = b->value[i]*solver->d->value[i]; } } if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR ) { solver->options[LIS_OPTIONS_ISLEVEL] = 0; } } else if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR && precon_type!=LIS_PRECON_TYPE_NONE ) { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI); } } else if( scale ) { if( storage==LIS_MATRIX_BSR && scale==LIS_SCALE_JACOBI ) { if( A->matrix_type!=LIS_MATRIX_BSR ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_blocksize(B,block,block,NULL,NULL); lis_matrix_set_type(B,storage); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } err = lis_matrix_split(A); if( err ) return err; err = lis_matrix_diag_duplicate(A->D,&solver->WD); if( err ) return err; lis_matrix_diag_copy(A->D,solver->WD); lis_matrix_diag_inverse(solver->WD); lis_matrix_bscaling_bsr(A,solver->WD); lis_vector_duplicate(A,&t); lis_matrix_diag_matvec(solver->WD,b,t); lis_vector_copy(t,b); lis_vector_destroy(t); t = NULL; } else { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( scale==LIS_SCALE_JACOBI && nsolver==LIS_SOLVER_CG ) { scale = LIS_SCALE_SYMM_DIAG; } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,scale); } else if( !b->is_scaled ) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { b->value[i] = b->value[i]*solver->d->value[i]; } } } } /* precon_type = precon->precon_type;*/ if( precon_type==LIS_PRECON_TYPE_IS ) { if( nsolver < LIS_SOLVER_JACOBI || nsolver > LIS_SOLVER_SOR ) { AA = solver->A; bb = solver->b; } else { AA = precon->A; bb = precon->Pb; } } else { AA = A; bb = b; } p_c_times = lis_wtime() - p_c_times; itimes = lis_wtime(); /* Matrix Convert */ solver->A = AA; solver->b = bb; err = lis_matrix_convert_self(solver); if( err ) { lis_vector_destroy(xx); lis_solver_work_destroy(solver); lis_free(residual); solver->retcode = err; return err; } block = solver->A->bnr; if( A->my_rank==0 ) { if( output ) printf("precision : %s\n", lis_precisionname[precision]); if( output ) printf("solver : %s %d\n", lis_solvername[nsolver],nsolver); switch( precon_type ) { case LIS_PRECON_TYPE_ILU: i = solver->options[LIS_OPTIONS_FILL]; if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_VBR ) { if( output ) sprintf(buf,"Block %s(%d)",lis_preconname[precon_type],i); } else { if( output ) sprintf(buf,"%s(%d)",lis_preconname[precon_type],i); } break; default: if( output ) sprintf(buf,"%s",lis_preconname[precon_type]); break; } if( solver->options[LIS_OPTIONS_ADDS] && precon_type ) { if( output ) printf("precon : %s + additive schwarz\n", buf); } else { if( output ) printf("precon : %s\n", buf); } } switch(conv_cond) { case LIS_CONV_COND_NRM2_R: case LIS_CONV_COND_NRM2_B: if( A->my_rank==0 ) { if( output ) ("CONV_COND : ||r||_2 <= %6.1e*||r_0||_2\n", tol); } break; case LIS_CONV_COND_NRM1_B: lis_vector_nrm1(b,&nrm2); nrm2 = nrm2*tol_w + tol; if( A->my_rank==0 ) { if( output ) printf("conv_cond : ||r||_1 <= %6.1e*||b||_1 + %6.1e = %6.1e\n", tol_w,tol,nrm2); } break; } if( A->my_rank==0 ) { if( AA->matrix_type==LIS_MATRIX_BSR || AA->matrix_type==LIS_MATRIX_BSC ) { if( output ) printf("storage : %s(%d x %d)\n", lis_storagename[AA->matrix_type-1],block,block); } else { if( output ) printf("storage : %s\n", lis_storagename[AA->matrix_type-1]); } } /* create work vector */ err = lis_solver_malloc_work[nsolver](solver); if( err ) { lis_vector_destroy(xx); lis_precon_destroy(precon); solver->retcode = err; return err; } if( nsolver==LIS_SOLVER_BICG && is_use_at ) { if( output ) lis_printf(A->comm,"Use At\n"); lis_matrix_duplicate(AA,&At); lis_matrix_set_type(At,LIS_USE_AT_TYPE[AA->matrix_type]); lis_matrix_convert(AA,At); solver->At = At; } solver->x = xx; solver->xx = x; solver->precon = precon; solver->residual = residual; /* execute solver */ #ifndef USE_QUAD_PRECISION err = lis_solver_execute[nsolver](solver); #else if( precision==LIS_PRECISION_DOUBLE ) { err = lis_solver_execute[nsolver](solver); } else if( precision==LIS_PRECISION_QUAD ) { err = lis_solver_execute_quad[nsolver](solver); } else if( precision==LIS_PRECISION_SWITCH ) { err = lis_solver_execute_switch[nsolver](solver); } #endif solver->retcode = err; if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { x->value[i] = xx->value[i]*solver->d->value[i]; } } else { #ifndef USE_QUAD_PRECISION lis_vector_copy(xx,x); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(xx,x); } else { lis_vector_copyex_mn(xx,x); } #endif } itimes = lis_wtime() - itimes - solver->ptimes; p_i_times = solver->ptimes; solver->ptimes = p_c_times + p_i_times; solver->p_c_times = p_c_times; solver->p_i_times = p_i_times; solver->times = solver->ptimes + itimes; solver->itimes = itimes; lis_solver_work_destroy(solver); lis_vector_duplicate(A,&t); xx->precision = LIS_PRECISION_DEFAULT; lis_matvec(A,xx,t); lis_vector_xpay(b,-1.0,t); if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { t->value[i] = t->value[i]/solver->d->value[i]; } } lis_vector_nrm2(t,&nrm2); /* solver->resid = nrm2; */ if( A->my_rank==0 ) { if( err ) { if( output ) printf("lis_solve : %s(code=%d)\n\n",lis_returncode[err],err); } else { if( output ) printf("lis_solve : normal end\n\n"); } } if( precision==LIS_PRECISION_DOUBLE ) { solver->iter2 = solver->iter; } else if( precision==LIS_PRECISION_QUAD ) { solver->iter2 = 0; } lis_vector_destroy(t); /* lis_vector_destroy(d);*/ lis_vector_destroy(xx); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_copy_dia(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT err; LIS_INT i,n,nnd,lnnd,unnd; LIS_INT *index; LIS_INT *lindex; LIS_INT *uindex; LIS_SCALAR *value,*lvalue,*uvalue,*diag; LIS_DEBUG_FUNC_IN; n = Ain->n; if( Ain->is_splited ) { lnnd = Ain->L->nnd; unnd = Ain->U->nnd; lindex = NULL; uindex = NULL; diag = NULL; err = lis_matrix_malloc_dia(n,lnnd,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_dia(n,unnd,&uindex,&uvalue); if( err ) { lis_free2(5,diag,uindex,lindex,uvalue,lvalue); return err; } diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_dia::diag"); if( diag==NULL ) { lis_free2(5,diag,uindex,lindex,uvalue,lvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { diag[i] = Ain->D->value[i]; } lis_matrix_elements_copy_dia(n,lnnd,Ain->L->index,Ain->L->value,lindex,lvalue); lis_matrix_elements_copy_dia(n,unnd,Ain->U->index,Ain->U->value,uindex,uvalue); err = lis_matrix_setDLU_dia(lnnd,unnd,diag,lindex,lvalue,uindex,uvalue,Aout); if( err ) { lis_free2(5,diag,uindex,lindex,uvalue,lvalue); return err; } } if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) ) { index = NULL; value = NULL; nnd = Ain->nnd; err = lis_matrix_malloc_dia(n,nnd,&index,&value); if( err ) { return err; } lis_matrix_elements_copy_dia(n,nnd,Ain->index,Ain->value,index,value); err = lis_matrix_set_dia(nnd,index,value,Aout); if( err ) { lis_free2(2,index,value); return err; } } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_input_hb_csr(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file) { char buf[BUFSIZE]; char title[128], key[128], mtx[64], dat[128]; char *p; char MXTYPE_F,MXTYPE_S,MXTYPE_T; char RHSTYP_F,RHSTYP_S,RHSTYP_T; LIS_INT TOTCRD,PTRCRD,INDCRD,VALCRD,RHSCRD; LIS_INT NROW,NCOL,NNZERO,NELTVL; LIS_INT NRHS,NRHSIX; LIS_INT iptr,iind,ival,irhs; LIS_INT wptr,wind,wval,wrhs; LIS_INT i,k,j,my_rank; LIS_INT err; LIS_INT n,is,ie; LIS_INT *ptr, *index; LIS_INT matrix_type; LIS_SCALAR *value; LIS_MATRIX B; #ifdef USE_MPI MPI_Comm_rank(A->comm,&my_rank); #else my_rank = 0; #endif matrix_type = A->matrix_type; /* Line 1 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } strncpy(title, buf ,72); title[72] = '\0'; strncpy(key ,&buf[72], 8); key[8] = '\0'; printf("title: %s\n",title); printf("key : %s\n",key); /* Line 2 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG if( sscanf(buf, "%14lld%14lld%14lld%14lld%14lld", &TOTCRD, &PTRCRD, &INDCRD, &VALCRD, &RHSCRD) != 5 ) #else if( sscanf(buf, "%14d%14d%14d%14d%14d", &TOTCRD, &PTRCRD, &INDCRD, &VALCRD, &RHSCRD) != 5 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG printf("%14lld%14lld%14lld%14lld%14lld\n",TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD); #else printf("%14d%14d%14d%14d%14d\n",TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD); #endif /* Line 3 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG if( sscanf(buf, "%s %lld %lld %lld %lld", mtx, &NROW, &NCOL, &NNZERO, &NELTVL) != 5 ) #else if( sscanf(buf, "%s %d %d %d %d", mtx, &NROW, &NCOL, &NNZERO, &NELTVL) != 5 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } for(p=mtx;*p!='\0';p++) *p = (char)tolower(*p); MXTYPE_F = mtx[0]; MXTYPE_S = mtx[1]; MXTYPE_T = mtx[2]; if( mtx[0]!='r' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not real\n"); return LIS_ERR_FILE_IO; } /* if( mtx[1]!='u' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not unsymmetric\n"); return LIS_ERR_FILE_IO; } */ if( mtx[2]!='a' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not assembled\n"); return LIS_ERR_FILE_IO; } if( NROW!=NCOL ) { LIS_SETERR(LIS_ERR_FILE_IO,"matrix is not square\n"); return LIS_ERR_FILE_IO; } #ifdef _LONGLONG printf("%c%c%c %lld %lld %lld %lld\n",MXTYPE_F, MXTYPE_S, MXTYPE_T, NROW, NCOL, NNZERO, NELTVL); #else printf("%c%c%c %d %d %d %d\n",MXTYPE_F, MXTYPE_S, MXTYPE_T, NROW, NCOL, NNZERO, NELTVL); #endif /* Line 4 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } lis_input_hb_get_fmt( buf ,16,&iptr,&wptr); lis_input_hb_get_fmt(&buf[16],16,&iind,&wind); lis_input_hb_get_fmt(&buf[32],20,&ival,&wval); lis_input_hb_get_fmt(&buf[52],20,&irhs,&wrhs); #ifdef _LONGLONG printf("%lld %lld %lld %lld\n",iptr,iind,ival,irhs); printf("%lld %lld %lld %lld\n",wptr,wind,wval,wrhs); #else printf("%d %d %d %d\n",iptr,iind,ival,irhs); printf("%d %d %d %d\n",wptr,wind,wval,wrhs); #endif /* Line 5 */ if( RHSCRD!=0 ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG sscanf(buf, "%s %lld %lld", mtx, &NRHS, &NRHSIX); #else sscanf(buf, "%s %d %d", mtx, &NRHS, &NRHSIX); #endif /* #ifdef _LONGLONG if( sscanf(buf, "%s %lld %lld", mtx, &NRHS, &NRHSIX) != 3 ) #else if( sscanf(buf, "%s %d %d", mtx, &NRHS, &NRHSIX) != 3 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } */ for(p=mtx;*p!='\0';p++) *p = (char)tolower(*p); RHSTYP_F = mtx[0]; RHSTYP_S = mtx[1]; RHSTYP_T = mtx[2]; #ifdef _LONGLONG printf("%c%c%c %lld %lld\n",RHSTYP_F, RHSTYP_S, RHSTYP_T, NRHS, NRHSIX); #else printf("%c%c%c %d %d\n",RHSTYP_F, RHSTYP_S, RHSTYP_T, NRHS, NRHSIX); #endif } err = lis_matrix_set_size(A,0,NROW); if( err ) { return err; } n = A->n; lis_matrix_get_range(A,&is,&ie); err = lis_matrix_malloc_csr(n,NNZERO,&ptr,&index,&value); if( err ) { return err; } /* read data */ k = 0; for( i=0; i<PTRCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<iptr&&k<n+1;j++) { strncpy(dat, p, wptr); dat[wptr] = '\0'; ptr[k] = atoi(dat) - 1; p += wptr; k++; } } k = 0; for( i=0; i<INDCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<iind&&k<NNZERO;j++) { strncpy(dat, p, wind); dat[wind] = '\0'; index[k] = atoi(dat) - 1; p += wind; k++; } } k = 0; for( i=0; i<VALCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<ival&&k<NNZERO;j++) { strncpy(dat, p, wval); dat[wval] = '\0'; value[k] = atof(dat); p += wval; k++; } } if( RHSCRD>0 ) { /* k = 0; for( i=0; i<RHSCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<ival&&k<NNZERO;j++) { strncpy(dat, p, wval); dat[wval] = '\0'; value[k] = atof(dat); p += wval; printf("%e ",value[k]); k++; } printf("\n"); } */ } err = lis_matrix_set_csc(NNZERO,ptr,index,value,A); if( err ) { return err; } err = lis_matrix_assemble(A); if( err ) return err; if( matrix_type!=LIS_MATRIX_CSC ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CSR); err = lis_matrix_convert_csc2csr(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } return LIS_SUCCESS; }
LIS_INT lis_input_mm_csr(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file) { char buf[BUFSIZE]; LIS_INT nr,nc,nnz; LIS_INT i,j,my_rank; LIS_INT err; LIS_INT mmtype,mode; LIS_INT n,is,ie; LIS_INT ridx,cidx; LIS_INT *ptr, *index; LIS_INT *work; LIS_INT isb,isx,isbin; LIS_SCALAR val; LIS_SCALAR *value; LIS_MM_MATFMT matfmt; LIS_DEBUG_FUNC_IN; #ifdef USE_MPI my_rank = A->my_rank; #else my_rank = 0; #endif /* check banner */ err = lis_input_mm_banner(file,&mmtype); if( err ) return err; /* check size */ err = lis_input_mm_size(file,&nr,&nc,&nnz,&isb,&isx,&isbin); if( err ) return err; err = lis_matrix_set_size(A,0,nr); if( err ) return err; #ifdef _LONGLONG if( my_rank==0 ) printf("matrix size = %lld x %lld (%lld nonzero entries)\n\n",nr,nc,nnz); #else if( my_rank==0 ) printf("matrix size = %d x %d (%d nonzero entries)\n\n",nr,nc,nnz); #endif n = A->n; ptr = NULL; index = NULL; value = NULL; work = NULL; lis_matrix_get_range(A,&is,&ie); ptr = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_input_mm_csr::ptr" ); if( ptr==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,work); return LIS_OUT_OF_MEMORY; } work = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_input_mm_csr::work" ); if( work==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,work); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n+1;i++) { ptr[i] = 0; work[i] = 0; } /* read data */ mode = 1; mode = *(char *)&mode; if( mode!=(isbin-1) ) { mode = LIS_TRUE; } else { mode = LIS_FALSE; } for( i=0; i<nnz; i++ ) { if( isbin ) { if( fread(&matfmt, sizeof(matfmt), 1, file)!=1 ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } ridx = matfmt.i; cidx = matfmt.j; if( mode ) { lis_bswap_int(1,&ridx); lis_bswap_int(1,&cidx); } } else { if( fgets(buf, BUFSIZE, file)==NULL ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } #ifdef _LONGLONG #ifdef _LONG__DOUBLE if( sscanf(buf, "%lld %lld %Lg", &ridx, &cidx, &val) != 3 ) #else if( sscanf(buf, "%lld %lld %lg", &ridx, &cidx, &val) != 3 ) #endif #else #ifdef _LONG__DOUBLE if( sscanf(buf, "%d %d %Lg", &ridx, &cidx, &val) != 3 ) #else if( sscanf(buf, "%d %d %lg", &ridx, &cidx, &val) != 3 ) #endif #endif { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } } /* if( val!=0.0 )*/ { if( mmtype==MM_SYMM && ridx!=cidx ) { if( cidx>is && cidx<=ie ) work[cidx-is-1]++; } if( ridx>is && ridx<=ie ) { ptr[ridx-is]++; } } } ptr[0] = 0; for( i=0; i<n; i++ ) { if( mmtype==MM_SYMM ) { ptr[i+1] += ptr[i] + work[i]; } else { ptr[i+1] += ptr[i]; } work[i] = 0; } index = (LIS_INT *)lis_malloc( ptr[n]*sizeof(LIS_INT),"lis_input_mm_csr::index" ); if( index==NULL ) { LIS_SETERR_MEM(ptr[n]*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,work); return LIS_OUT_OF_MEMORY; } value = (LIS_SCALAR *)lis_malloc( ptr[n]*sizeof(LIS_SCALAR),"lis_input_mm_csr::value" ); if( value==NULL ) { LIS_SETERR_MEM(ptr[n]*sizeof(LIS_SCALAR)); lis_free2(4,ptr,index,value,work); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel for private(i,j) #endif for(i=0;i<n;i++) { for(j=ptr[i];j<ptr[i+1];j++) { index[j] = 0; value[j] = 0.0; } } rewind(file); if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } do { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } }while( buf[0]=='%' ); for( i=0; i<nnz; i++ ) { if( isbin ) { if( fread(&matfmt, sizeof(matfmt), 1, file)!=1 ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } ridx = matfmt.i; cidx = matfmt.j; val = matfmt.value; if( mode ) { lis_bswap_int(1,&ridx); lis_bswap_int(1,&cidx); lis_bswap_scalar(1,&val); } } else { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } #ifdef _LONGLONG #ifdef _LONG__DOUBLE if( sscanf(buf, "%lld %lld %Lg", &ridx, &cidx, &val) != 3 ) #else if( sscanf(buf, "%lld %lld %lg", &ridx, &cidx, &val) != 3 ) #endif #else #ifdef _LONG__DOUBLE if( sscanf(buf, "%d %d %Lg", &ridx, &cidx, &val) != 3 ) #else if( sscanf(buf, "%d %d %lg", &ridx, &cidx, &val) != 3 ) #endif #endif { LIS_SETERR_FIO; lis_free2(4,ptr,index,value,work); return LIS_ERR_FILE_IO; } } ridx--; cidx--; if( ridx==cidx && val==0.0 ) { #ifdef _LONGLONG printf("diagonal element is zero (i=%lld)\n",ridx); #else printf("diagonal element is zero (i=%d)\n",ridx); #endif } /* if( val!=0.0 )*/ { if( mmtype==MM_SYMM && ridx!=cidx ) { if( cidx>=is && cidx<ie ) { value[ptr[cidx-is]+work[cidx-is]] = val; index[ptr[cidx-is]+work[cidx-is]] = ridx; work[cidx-is]++; } } if( ridx>=is && ridx<ie ) { value[ptr[ridx-is]+work[ridx-is]] = val; index[ptr[ridx-is]+work[ridx-is]] = cidx; work[ridx-is]++; } } } #ifdef USE_MPI MPI_Barrier(A->comm); #endif err = lis_matrix_set_csr(ptr[n],ptr,index,value,A); if( err ) { lis_free2(4,ptr,index,value,work); return err; } err = lis_matrix_assemble(A); if( err ) { lis_matrix_storage_destroy(A); lis_free(work); return err; } if( b!=NULL && x!=NULL ) { err = lis_input_mm_vec(A,b,x,file,isb,isx,isbin); if( err ) { lis_matrix_storage_destroy(A); lis_free(work); } } lis_free(work); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_esolve(LIS_MATRIX A, LIS_VECTOR x, LIS_SCALAR *evalue0, LIS_ESOLVER esolver) { LIS_INT nesolver,niesolver,emaxiter; LIS_SCALAR *evalue; LIS_VECTOR *evector; LIS_SCALAR *resid; LIS_SCALAR *rhistory; LIS_INT *iter,*iter2; LIS_INT err; LIS_INT output; LIS_INT ss, mode; double time; double gshift; LIS_INT estorage,eblock; LIS_MATRIX B; LIS_INT eprecision; LIS_VECTOR xx; LIS_DEBUG_FUNC_IN; /* begin parameter check */ err = lis_matrix_check(A,LIS_MATRIX_CHECK_ALL); if( err ) return err; if( x==NULL ) { LIS_SETERR(LIS_ERR_ILL_ARG,"vector x is undefined\n"); return LIS_ERR_ILL_ARG; } if( A->n!=x->n ) { return LIS_ERR_ILL_ARG; } if( A->gn<=0 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Size n(=%d) of matrix A is less than 0\n",A->gn); return LIS_ERR_ILL_ARG; } nesolver = esolver->options[LIS_EOPTIONS_ESOLVER]; niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER]; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; mode = esolver->options[LIS_EOPTIONS_MODE]; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; gshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; estorage = esolver->options[LIS_EOPTIONS_STORAGE]; eblock = esolver->options[LIS_EOPTIONS_STORAGE_BLOCK]; eprecision = esolver->options[LIS_EOPTIONS_PRECISION]; esolver->eprecision = eprecision; if( nesolver < 1 || nesolver > LIS_ESOLVER_LEN ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_ESOLVER is %d (Set between 1 to %d)\n",nesolver, LIS_ESOLVER_LEN); return LIS_ERR_ILL_ARG; } if( niesolver < 1 || niesolver > 7 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 7)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && niesolver > 4 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 4 for Subspace)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI && niesolver == LIS_ESOLVER_PI ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 7 for Lanczos)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI && (( niesolver == LIS_ESOLVER_PI ) || ( niesolver == LIS_ESOLVER_CG) || ( niesolver == LIS_ESOLVER_JD)) ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 4 or 6 for Arnoldi)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && ss > A->gn ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Subspace)\n", ss, A->gn); return LIS_ERR_ILL_ARG; } if (( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI || esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI ) && ss > A->gn ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Lanczos and Arnoldi)\n", ss, A->gn); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && mode >= ss ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Subspace)\n", mode, ss); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == ( LIS_ESOLVER_LI || LIS_ESOLVER_AI ) && mode >= ss ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Lanczos or Arnoldi)\n", mode, ss); return LIS_ERR_ILL_ARG; } #ifdef USE_QUAD_PRECISION if( eprecision==LIS_PRECISION_QUAD && lis_esolver_execute_quad[nesolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision eigensolver %s is not implemented\n",lis_esolvername[nesolver]); return LIS_ERR_NOT_IMPLEMENTED; } else if( eprecision==LIS_PRECISION_SWITCH && lis_esolver_execute_switch[nesolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch esolver %s is not implemented\n",lis_esolvername[nesolver]); return LIS_ERR_NOT_IMPLEMENTED; } if( esolver->options[LIS_EOPTIONS_SWITCH_MAXITER]==-1 ) { esolver->options[LIS_EOPTIONS_SWITCH_MAXITER] = emaxiter; } #endif /* create eigenvalue array */ if( esolver->evalue ) lis_free(esolver->evalue); evalue = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::evalue"); if( evalue==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } evalue[0] = 1.0; evalue[ss-1] = 1.0; /* create residual norm array */ if( esolver->resid ) lis_free(esolver->resid); resid = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::resid"); if( resid==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create number of iterations array */ if( esolver->iter ) lis_free(esolver->iter); iter = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter"); if( iter==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create quad precision number of iterations array */ if( esolver->iter2 ) lis_free(esolver->iter2); iter2 = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter2"); if( iter2==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create initial vector */ #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(A,&xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { err = lis_vector_duplicate(A,&xx); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx); } #endif if( err ) { esolver->retcode = err; return err; } if( esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { if( output ) lis_printf(A->comm,"initial vector x : 1\n"); #ifndef USE_QUAD_PRECISION lis_vector_set_all(1.0,xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { lis_vector_set_all(1.0,xx); } else { lis_vector_set_allex_nm(1.0,xx); } #endif } else { if( output ) lis_printf(A->comm,"initial vector x : user defined\n"); #ifndef USE_QUAD_PRECISION lis_vector_copy(x,xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(x,xx); } else { lis_vector_copyex_nm(x,xx); } #endif } /* global shift */ if ( output ) if( A->my_rank==0 ) printf("shift : %e\n", gshift); /* create eigenvector array */ if( esolver->evector ) lis_free(esolver->evector); evector = (LIS_VECTOR *)lis_malloc((ss+2)*sizeof(LIS_VECTOR),"lis_esolve::evector"); if( evector==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_VECTOR)); esolver->retcode = err; return err; } /* create residual history array */ if( esolver->rhistory ) lis_free(esolver->rhistory); rhistory = (LIS_SCALAR *)lis_malloc((emaxiter+2)*sizeof(LIS_SCALAR),"lis_esolve::rhistory"); if( rhistory==NULL ) { LIS_SETERR_MEM((emaxiter+2)*sizeof(LIS_SCALAR)); lis_vector_destroy(xx); esolver->retcode = err; return err; } /* convert matrix */ if( estorage>0 && A->matrix_type!=estorage ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_blocksize(B,eblock,eblock,NULL,NULL); lis_matrix_set_type(B,estorage); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } esolver->A = A; esolver->evalue = evalue; esolver->x = x; esolver->evector = evector; rhistory[0] = 1.0; esolver->rhistory = rhistory; esolver->resid = resid; esolver->iter = iter; esolver->iter2 = iter2; if( A->my_rank==0 ) { #ifdef _LONG__DOUBLE if ( output ) printf("precision : long double\n"); #else if ( output ) printf("precision : %s\n", lis_eprecisionname[eprecision]); #endif #ifdef _LONG__LONG if ( output ) printf("eigensolver : %s\n", lis_esolvername[nesolver]); #else if ( output ) printf("eigensolver : %s\n", lis_esolvername[nesolver]); #endif } if( A->my_rank==0 ) { #ifdef _LONG__DOUBLE if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1Le * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); #else if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1e * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); #endif } if( A->my_rank==0 ) { if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { #ifdef _LONG__LONG if ( output ) printf("matrix storage format : %s(%lld x %lld)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); #else if ( output ) printf("matrix storage format : %s(%d x %d)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); #endif } else { if ( output ) printf("matrix storage format : %s\n", lis_estoragename[A->matrix_type-1]); } } time = lis_wtime(); esolver->ptime = 0; esolver->itime = 0; esolver->p_c_time = 0; esolver->p_i_time = 0; if (gshift != 0.0) lis_matrix_shift_diagonal(A, gshift); /* create work vector */ err = lis_esolver_malloc_work[nesolver](esolver); if( err ) { lis_vector_destroy(xx); esolver->retcode = err; return err; } esolver->x = xx; esolver->xx = x; /* execute esolver */ #ifndef USE_QUAD_PRECISION err = lis_esolver_execute[nesolver](esolver); #else if( eprecision==LIS_PRECISION_DOUBLE ) { err = lis_esolver_execute[nesolver](esolver); } else if( eprecision==LIS_PRECISION_QUAD ) { err = lis_esolver_execute_quad[nesolver](esolver); } else if( eprecision==LIS_PRECISION_SWITCH ) { err = lis_esolver_execute_switch[nesolver](esolver); } #endif esolver->retcode = err; *evalue0 = esolver->evalue[0]; lis_vector_copy(esolver->x, x); esolver->time = lis_wtime() - time; lis_matrix_shift_diagonal(A, -gshift); if( A->my_rank==0 ) { if( err ) { #ifdef _LONG__LONG if ( output ) printf("eigensolver status : %s(code=%lld)\n\n",lis_ereturncode[err],err); #else if ( output ) printf("eigensolver status : %s(code=%d)\n\n",lis_ereturncode[err],err); #endif } else { if ( output ) printf("eigensolver status : normal end\n\n"); } } if( eprecision==LIS_PRECISION_DOUBLE ) { esolver->iter2[mode] = esolver->iter[mode]; } else if( eprecision==LIS_PRECISION_QUAD ) { esolver->iter2[mode] = 0; } lis_vector_destroy(xx); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_csr2msr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k,jj; LIS_INT err; LIS_INT n,nnz,ndz; LIS_INT count; LIS_INT *iw; LIS_INT *index; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; n = Ain->n; nnz = Ain->nnz; iw = NULL; index = NULL; value = NULL; iw = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_convert_csr2msr::iw" ); if( iw==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); return LIS_ERR_OUT_OF_MEMORY; } /* check ndz */ for(i=0;i<n+1;i++) iw[i] = 0; count = 0; #ifdef _OPENMP #pragma omp parallel private(i,j) #endif { #ifdef _OPENMP #pragma omp for #endif for(i=0;i<n;i++) { iw[i+1] = 0; for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++) { if( i==Ain->index[j] ) { iw[i+1] = 1; } } } #ifdef _OPENMP #pragma omp for reduction(+:count) #endif for(i=0;i<n;i++) { count += iw[i+1]; } #ifdef _OPENMP #pragma omp for #endif for(i=0;i<n;i++) { iw[i+1] = Ain->ptr[i+1]-Ain->ptr[i]-iw[i+1]; } } ndz = n - count; err = lis_matrix_malloc_msr(n,nnz,ndz,&index,&value); if( err ) { lis_free2(3,index,value,iw); return err; } /* convert msr */ iw[0] = n+1; for(i=0;i<n;i++) { iw[i+1] = iw[i+1] + iw[i]; } #ifdef _OPENMP #pragma omp parallel private(i,j,k) #endif { #ifdef _OPENMP #pragma omp for #endif for(i=0;i<n+1;i++) { index[i] = iw[i]; } #ifdef _OPENMP #pragma omp for #endif for(i=0;i<n;i++) { k = index[i]; for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++) { jj = Ain->index[j]; if( jj==i ) { value[i] = Ain->value[j]; } else { value[k] = Ain->value[j]; index[k] = Ain->index[j]; k++; } } } } err = lis_matrix_set_msr(nnz,ndz,index,value,Aout); if( err ) { lis_free2(3,index,value,iw); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_free(iw); lis_matrix_storage_destroy(Aout); return err; } lis_free(iw); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_msr2csr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k; LIS_INT err; LIS_INT n,nnz,is; LIS_INT *ptr,*index; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; n = Ain->n; nnz = Ain->nnz; is = Ain->is; ptr = NULL; index = NULL; value = NULL; err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value); if( err ) { return err; } /* convert csr */ #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { ptr[i+1] = Ain->index[i+1] - Ain->index[i]; if( Ain->value[i]!=0.0 ) { ptr[i+1]++; } } ptr[0] = 0; for(i=0;i<n;i++) { ptr[i+1] += ptr[i]; } #ifdef _OPENMP #pragma omp parallel for private(i,j,k) #endif for(i=0;i<n;i++) { k = ptr[i]; if( Ain->value[i]!=(LIS_SCALAR)0.0 ) { value[k] = Ain->value[i]; index[k] = i; k++; } for(j=Ain->index[i];j<Ain->index[i+1];j++) { value[k] = Ain->value[j]; index[k] = Ain->index[j]; k++; } } err = lis_matrix_set_csr(nnz,ptr,index,value,Aout); if( err ) { lis_free2(3,ptr,index,value); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_rco2csc(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k,l,n,nnz,err; LIS_INT *ptr,*index,*iw; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; ptr = NULL; index = NULL; value = NULL; iw = NULL; n = Ain->n; iw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,iw); return LIS_OUT_OF_MEMORY; } ptr = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::ptr"); if( ptr==NULL ) { LIS_SETERR_MEM((n+1)*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,iw); return LIS_OUT_OF_MEMORY; } for(i=0;i<n;i++) iw[i] = 0; for(i=0;i<n;i++) { for(j=0;j<Ain->w_row[i];j++) { iw[Ain->w_index[i][j]]++; } } ptr[0] = 0; for(i=0;i<n;i++) { ptr[i+1] = ptr[i] + iw[i]; iw[i] = ptr[i]; } nnz = ptr[n]; index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::index" ); if( index==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_INT)); lis_free2(4,ptr,index,value,iw); return LIS_OUT_OF_MEMORY; } value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_rco2csc::value" ); if( value==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR)); lis_free2(4,ptr,index,value,iw); return LIS_OUT_OF_MEMORY; } for(i=0;i<n;i++) { for(j=0;j<Ain->w_row[i];j++) { k = Ain->w_index[i][j]; l = iw[k]; value[l] = Ain->w_value[i][j]; index[l] = i; iw[k]++; } } err = lis_matrix_set_csc(nnz,ptr,index,value,Aout); if( err ) { lis_free2(4,ptr,index,value,iw); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } lis_free(iw); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_rco2bsr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k,n,gn,nnz,bnnz,nr,nc,bnr,bnc,err; LIS_INT ii,jj,kk,bj,jpos,ij,kv,bi; LIS_INT *iw,*iw2; LIS_INT *bptr,*bindex; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; bnr = Ain->conv_bnr; bnc = Ain->conv_bnc; n = Ain->n; gn = Ain->gn; nr = 1 + (n-1)/bnr; nc = 1 + (gn-1)/bnc; bptr = NULL; bindex = NULL; value = NULL; iw = NULL; iw2 = NULL; bptr = (LIS_INT *)lis_malloc( (nr+1)*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::bptr" ); if( bptr==NULL ) { LIS_SETERR_MEM((nr+1)*sizeof(LIS_INT)); lis_free2(5,bptr,bindex,value,iw,iw2); return LIS_OUT_OF_MEMORY; } #ifdef _OPENMP #pragma omp parallel private(i,k,ii,j,bj,kk,ij,jj,iw,iw2,kv,jpos) #endif { iw = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw" ); iw2 = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw2" ); memset(iw,0,nc*sizeof(LIS_INT)); #ifdef _OPENMP #pragma omp for #endif for(i=0;i<nr;i++) { k = 0; kk = bnr*i; jj = 0; for(ii=0;ii+kk<n&&ii<bnr;ii++) { for(j=0;j<Ain->w_row[kk+ii];j++) { bj = Ain->w_index[kk+ii][j]/bnc; jpos = iw[bj]; if( jpos==0 ) { iw[bj] = 1; iw2[jj] = bj; jj++; } } } for(bj=0;bj<jj;bj++) { k++; ii = iw2[bj]; iw[ii]=0; } bptr[i+1] = k; } lis_free(iw); lis_free(iw2); } bptr[0] = 0; for(i=0;i<nr;i++) { bptr[i+1] += bptr[i]; } bnnz = bptr[nr]; nnz = bnnz*bnr*bnc; bindex = (LIS_INT *)lis_malloc( bnnz*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::bindex" ); if( bindex==NULL ) { LIS_SETERR_MEM((nr+1)*sizeof(LIS_INT)); lis_free2(3,bptr,bindex,value); return LIS_OUT_OF_MEMORY; } value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_rco2bsr::value" ); if( value==NULL ) { LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR)); lis_free2(3,bptr,bindex,value); return LIS_OUT_OF_MEMORY; } /* convert bsr */ #ifdef _OPENMP #pragma omp parallel private(bi,i,ii,k,j,bj,jpos,kv,kk,ij,jj,iw) #endif { iw = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw" ); memset(iw,0,nc*sizeof(LIS_INT)); #ifdef _OPENMP #pragma omp for #endif for(bi=0;bi<nr;bi++) { i = bi*bnr; ii = 0; kk = bptr[bi]; while( i+ii<n && ii<=bnr-1 ) { for( k=0;k<Ain->w_row[i+ii];k++) { j = Ain->w_index[i+ii][k]; bj = j/bnc; j = j%bnc; jpos = iw[bj]; if( jpos==0 ) { kv = kk * bnr * bnc; iw[bj] = kv+1; bindex[kk] = bj; for(jj=0;jj<bnr*bnc;jj++) value[kv+jj] = 0.0; ij = j*bnr + ii; value[kv+ij] = Ain->w_value[i+ii][k]; kk = kk+1; } else { ij = j*bnr + ii; value[jpos+ij-1] = Ain->w_value[i+ii][k]; } } ii = ii+1; } for(j=bptr[bi];j<bptr[bi+1];j++) { iw[bindex[j]] = 0; } } lis_free(iw); } err = lis_matrix_set_bsr(bnr,bnc,bnnz,bptr,bindex,value,Aout); if( err ) { lis_free2(3,bptr,bindex,value); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_matrix_convert_rco2csr(LIS_MATRIX Ain, LIS_MATRIX Aout) { LIS_INT i,j,k,n,nnz,err; LIS_INT *ptr,*index; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; ptr = NULL; index = NULL; value = NULL; n = Ain->n; nnz = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+:nnz) private(i) #endif for(i=0;i<n;i++) { nnz += Ain->w_row[i]; } err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value); if( err ) { return err; } #ifdef _NUMA #pragma omp parallel for private(i) for(i=0;i<n+1;i++) ptr[i] = 0; #else ptr[0] = 0; #endif for(i=0;i<n;i++) { ptr[i+1] = ptr[i] + Ain->w_row[i]; } #ifdef _OPENMP #pragma omp parallel for private(i,j,k) #endif for(i=0;i<n;i++) { k = ptr[i]; for(j=0;j<Ain->w_row[i];j++) { index[k] = Ain->w_index[i][j]; value[k] = Ain->w_value[i][j]; k++; } } err = lis_matrix_set_csr(nnz,ptr,index,value,Aout); if( err ) { lis_free2(3,ptr,index,value); return err; } err = lis_matrix_assemble(Aout); if( err ) { lis_matrix_storage_destroy(Aout); return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }