LIS_INT lis_output(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, LIS_INT format, char *path) { LIS_INT err; LIS_MATRIX B; LIS_DEBUG_FUNC_IN; #ifdef USE_MPI MPI_Barrier(A->comm); #endif err = lis_matrix_check(A,LIS_MATRIX_CHECK_ALL); if( err ) return err; if( format==LIS_FMT_MM || format==LIS_FMT_MMB ) { switch( A->matrix_type ) { case LIS_MATRIX_CSR: err = lis_output_mm_csr(A,b,x,format,path); break; default: err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CSR); err = lis_matrix_convert(A,B); if( err ) return err; err = lis_output_mm_csr(B,b,x,format,path); lis_matrix_destroy(B); break; } } LIS_DEBUG_FUNC_OUT; return err; }
LIS_INT lis_precon_create_ilut(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_MATRIX A,B; LIS_DEBUG_FUNC_IN; switch( solver->A->matrix_type ) { case LIS_MATRIX_CSR: err = lis_precon_create_ilut_csr(solver,precon); lis_psolve_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolve_ilut_csr; lis_psolvet_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolvet_ilut_csr; break; default: A = solver->A; err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CSR); err = lis_matrix_convert(A,B); if( err ) return err; solver->A = B; err = lis_precon_create_ilut_csr(solver,precon); lis_psolve_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolve_ilut_csr; lis_psolvet_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolvet_ilut_csr; lis_matrix_destroy(B); solver->A = A; break; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_ilut(LIS_SOLVER solver, LIS_PRECON precon) { #ifdef ENABLE_BSR LIS_INT storage,block; #endif LIS_INT err; LIS_MATRIX A,B; LIS_DEBUG_FUNC_IN; #ifdef ENABLE_BSR storage = solver->options[LIS_OPTIONS_STORAGE]; block = solver->options[LIS_OPTIONS_STORAGE_BLOCK]; if( solver->A->matrix_type!=LIS_MATRIX_BSR && storage==LIS_MATRIX_BSR ) { err = lis_matrix_convert_self(solver); if( err ) return err; } #endif switch( solver->A->matrix_type ) { case LIS_MATRIX_CSR: err = lis_precon_create_ilut_csr(solver,precon); lis_psolve_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolve_ilut_csr; lis_psolvet_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolvet_ilut_csr; break; #ifdef ENABLE_BSR case LIS_MATRIX_BSR: err = lis_precon_create_ilut_bsr(solver,precon); lis_psolve_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolve_ilut_bsr; lis_psolvet_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolvet_ilut_bsr; break; #endif default: A = solver->A; err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CSR); err = lis_matrix_convert(A,B); if( err ) return err; solver->A = B; err = lis_precon_create_ilut_csr(solver,precon); lis_psolve_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolve_ilut_csr; lis_psolvet_xxx[LIS_PRECON_TYPE_ILUT] = lis_psolvet_ilut_csr; lis_matrix_destroy(B); solver->A = A; break; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
void lis_matrix_duplicate_f(LIS_MATRIX_F *Ain, LIS_MATRIX_F *Aout, LIS_INT *ierr) { LIS_MATRIX A; LIS_DEBUG_FUNC_IN; *ierr = lis_matrix_duplicate((LIS_MATRIX)LIS_V2P(Ain),&A); *Aout = LIS_P2V(A); LIS_DEBUG_FUNC_OUT; return; }
LIS_INT lis_precon_create_sainv(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_MATRIX A,B; LIS_DEBUG_FUNC_IN; switch( solver->A->matrix_type ) { case LIS_MATRIX_CSR: err = lis_precon_create_sainv_csr(solver,precon); break; default: A = solver->A; err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CSR); err = lis_matrix_convert(A,B); if( err ) return err; solver->A = B; err = lis_precon_create_sainv_csr(solver,precon); lis_matrix_destroy(B); solver->A = A; break; } #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(solver->A,&precon->temp); #else if( solver->precision==LIS_PRECISION_DEFAULT ) { err = lis_vector_duplicate(solver->A,&precon->temp); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&precon->temp); } #endif if( err ) return err; precon->A = solver->A; precon->is_copy = LIS_FALSE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_input_hb(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file) { LIS_INT err; LIS_INT matrix_type; LIS_MATRIX B; LIS_DEBUG_FUNC_IN; matrix_type = A->matrix_type; err = lis_input_hb_csr(A,b,x,file); if( err ) return err; if( matrix_type!=LIS_MATRIX_CSR && matrix_type!=LIS_MATRIX_CSC ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,matrix_type); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); if( A->matrix_type==LIS_MATRIX_JAD ) { A->work = (LIS_SCALAR *)lis_malloc(A->n*sizeof(LIS_SCALAR),"lis_input_hb::A->work"); if( A->work==NULL ) { LIS_SETERR_MEM(A->n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } } } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A0,A; LIS_VECTOR x,b,u; LIS_SOLVER solver; LIS_INT m,n,nn,nnz; LIS_INT i,j,ii,jj,ctr; LIS_INT is,ie; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nsol; LIS_INT err,iter,mtype,iter_double,iter_quad; double time,itime,ptime,p_c_time,p_i_time; LIS_REAL resid; char solvername[128]; LIS_INT *ptr,*index; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 6 ) { if( my_rank==0 ) { printf("Usage: %s m n matrix_type solution_filename rhistory_filename [options]\n", argv[0]); } CHKERR(1); } m = atoi(argv[1]); n = atoi(argv[2]); mtype = atoi(argv[3]); if( m<=0 || n<=0 ) { #ifdef _LONGLONG if( my_rank==0 ) printf("m=%lld <=0 or n=%lld <=0\n",m,n); #else if( my_rank==0 ) printf("m=%d <=0 or n=%d <=0\n",m,n); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONGLONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP if( my_rank==0 ) { #ifdef _LONGLONG printf("max number of threads = %lld\n",omp_get_num_procs()); printf("number of threads = %lld\n",omp_get_max_threads()); #else printf("max number of threads = %d\n",omp_get_num_procs()); printf("number of threads = %d\n",omp_get_max_threads()); #endif } #endif /* create matrix and vectors */ nn = m*n; err = lis_matrix_create(LIS_COMM_WORLD,&A); err = lis_matrix_set_size(A,0,nn); CHKERR(err); ptr = (LIS_INT *)malloc((A->n+1)*sizeof(LIS_INT)); if( ptr==NULL ) CHKERR(1); index = (LIS_INT *)malloc(5*A->n*sizeof(LIS_INT)); if( index==NULL ) CHKERR(1); value = (LIS_SCALAR *)malloc(5*A->n*sizeof(LIS_SCALAR)); if( value==NULL ) CHKERR(1); lis_matrix_get_range(A,&is,&ie); ctr = 0; for(ii=is;ii<ie;ii++) { i = ii/m; j = ii - i*m; if( i>0 ) { jj = ii - m; index[ctr] = jj; value[ctr++] = -1.0;} if( i<n-1 ) { jj = ii + m; index[ctr] = jj; value[ctr++] = -1.0;} if( j>0 ) { jj = ii - 1; index[ctr] = jj; value[ctr++] = -1.0;} if( j<m-1 ) { jj = ii + 1; index[ctr] = jj; value[ctr++] = -1.0;} index[ctr] = ii; value[ctr++] = 4.0; ptr[ii-is+1] = ctr; } ptr[0] = 0; err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A); CHKERR(err); err = lis_matrix_assemble(A); CHKERR(err); nnz = A->nnz; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A->comm); nnz = i; #endif #ifdef _LONGLONG if( my_rank==0 ) printf("matrix size = %lld x %lld (%lld nonzero entries)\n\n",nn,nn,nnz); #else if( my_rank==0 ) printf("matrix size = %d x %d (%d nonzero entries)\n\n",nn,nn,nnz); #endif err = lis_matrix_duplicate(A,&A0); CHKERR(err); lis_matrix_set_type(A0,mtype); err = lis_matrix_convert(A,A0); CHKERR(err); lis_matrix_destroy(A); A = A0; err = lis_vector_duplicate(A,&u); CHKERR(err); err = lis_vector_duplicate(A,&b); CHKERR(err); err = lis_vector_duplicate(A,&x); CHKERR(err); err = lis_vector_set_all(1.0,u); lis_matvec(A,u,b); err = lis_solver_create(&solver); CHKERR(err); lis_solver_set_option("-print mem",solver); lis_solver_set_optionC(solver); err = lis_solve(A,b,x,solver); CHKERR(err); lis_solver_get_iterex(solver,&iter,&iter_double,&iter_quad); lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time); lis_solver_get_residualnorm(solver,&resid); lis_solver_get_solver(solver,&nsol); lis_solver_get_solvername(nsol,solvername); if( my_rank==0 ) { #ifdef _LONGLONG #ifdef _LONG__DOUBLE printf("%s: number of iterations = %lld \n",solvername, iter); #else printf("%s: number of iterations = %lld (double = %lld, quad = %lld)\n",solvername,iter, iter_double, iter_quad); #endif #else #ifdef _LONG__DOUBLE printf("%s: number of iterations = %d \n",solvername, iter); #else printf("%s: number of iterations = %d (double = %d, quad = %d)\n",solvername,iter, iter_double, iter_quad); #endif #endif printf("%s: elapsed time = %e sec.\n",solvername,time); printf("%s: preconditioner = %e sec.\n",solvername, ptime); printf("%s: matrix creation = %e sec.\n",solvername, p_c_time); printf("%s: linear solver = %e sec.\n",solvername, itime); #ifdef _LONG__DOUBLE printf("%s: relative residual = %Le\n\n",solvername,resid); #else printf("%s: relative residual = %e\n\n",solvername,resid); #endif } /* write solution */ lis_output_vector(x,LIS_FMT_MM,argv[4]); /* write residual history */ lis_solver_output_rhistory(solver, argv[5]); lis_solver_destroy(solver); lis_matrix_destroy(A); lis_vector_destroy(b); lis_vector_destroy(x); lis_vector_destroy(u); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT lis_matrix_copyDLU_csr(LIS_MATRIX Ain, LIS_MATRIX_DIAG *D, LIS_MATRIX *L, LIS_MATRIX *U) { LIS_INT err; LIS_INT i,n,np,lnnz,unnz; LIS_INT *lptr,*lindex; LIS_INT *uptr,*uindex; LIS_SCALAR *lvalue,*uvalue,*diag; LIS_DEBUG_FUNC_IN; *D = NULL; *L = NULL; *U = NULL; err = lis_matrix_check(Ain,LIS_MATRIX_CHECK_ALL); if( err ) return err; n = Ain->n; np = Ain->np; err = lis_matrix_duplicate(Ain,L); if( err ) { return err; } err = lis_matrix_duplicate(Ain,U); if( err ) { lis_matrix_destroy(*L); return err; } err = lis_matrix_diag_duplicateM(Ain,D); if( err ) { lis_matrix_destroy(*L); lis_matrix_destroy(*U); return err; } lis_free((*D)->value); if( Ain->is_splited ) { } lnnz = Ain->L->nnz; unnz = Ain->U->nnz; lptr = NULL; lindex = NULL; uptr = NULL; uindex = NULL; diag = NULL; err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue); if( err ) { return err; } err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } diag = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR),"lis_matrix_copyDLU_csr::diag"); if( diag==NULL ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<n;i++) { diag[i] = Ain->D->value[i]; } lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue); lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue); (*D)->value = diag; err = lis_matrix_set_csr(lnnz,lptr,lindex,lvalue,*L); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } err = lis_matrix_set_csr(unnz,uptr,uindex,uvalue,*U); if( err ) { lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue); return err; } err = lis_matrix_assemble(*L); if( err ) { return err; } err = lis_matrix_assemble(*U); if( err ) { return err; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_solve_kernel(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT nsolver, precon_type, maxiter; LIS_INT err; LIS_SCALAR *residual; LIS_VECTOR xx; LIS_INT output; LIS_INT scale; LIS_INT conv_cond; LIS_INT precision,is_use_at,storage,block; LIS_INT i,n,np; double p_c_times, p_i_times,itimes; LIS_SCALAR nrm2,tol,tol_w; LIS_VECTOR t; LIS_VECTOR bb; LIS_MATRIX AA,B; LIS_MATRIX At; char buf[64]; LIS_DEBUG_FUNC_IN; nsolver = solver->options[LIS_OPTIONS_SOLVER]; precon_type = solver->options[LIS_OPTIONS_PRECON]; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; scale = solver->options[LIS_OPTIONS_SCALE]; precision = solver->options[LIS_OPTIONS_PRECISION]; is_use_at = solver->options[LIS_OPTIONS_USE_AT]; storage = solver->options[LIS_OPTIONS_STORAGE]; block = solver->options[LIS_OPTIONS_STORAGE_BLOCK]; conv_cond = solver->options[LIS_OPTIONS_CONV_COND]; tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN]; tol_w = solver->params[LIS_PARAMS_RESID_WEIGHT-LIS_OPTIONS_LEN]; solver->precision = precision; if( nsolver < 1 || nsolver > LIS_SOLVERS_LEN ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_SOLVER is %d (Set between 1 to %d)\n",nsolver, LIS_SOLVERS_LEN); return LIS_ERR_ILL_ARG; } if( precon_type < 0 || precon_type > precon_register_type ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_PRECON is %d (Set between 0 to %d)\n",precon_type, precon_register_type-1); return LIS_ERR_ILL_ARG; } if( maxiter<0 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_MAXITER(=%d) is less than 0\n",maxiter); return LIS_ERR_ILL_ARG; } #ifdef USE_MPI if( precon_type == LIS_PRECON_TYPE_SAAMG && solver->A->nprocs < 2) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter A->nprocs (=%d) is less than 2 (Set more than 1 when using parallel version of SAAMG)\n",solver->A->nprocs); return LIS_ERR_ILL_ARG; } #endif #ifdef USE_QUAD_PRECISION if( precision==LIS_PRECISION_QUAD && lis_solver_execute_quad[nsolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision solver %s is not implemented\n",lis_solvername[nsolver]); return LIS_ERR_NOT_IMPLEMENTED; } else if( precision==LIS_PRECISION_SWITCH && lis_solver_execute_switch[nsolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch solver %s is not implemented\n",lis_solvername[nsolver]); return LIS_ERR_NOT_IMPLEMENTED; } if( solver->options[LIS_OPTIONS_SWITCH_MAXITER]==-1 ) { solver->options[LIS_OPTIONS_SWITCH_MAXITER] = maxiter; } #endif err = lis_solver_check_params[nsolver](solver); if( err ) { solver->retcode = err; return err; } /* end parameter check */ solver->A = A; solver->b = b; /* create initial vector */ #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(A,&xx); #else if( precision==LIS_PRECISION_DOUBLE ) { err = lis_vector_duplicate(A,&xx); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx); } #endif if( err ) { solver->retcode = err; return err; } if( solver->options[LIS_OPTIONS_INITGUESS_ZEROS] ) { if( output ) lis_printf(A->comm,"initial vector x = 0\n"); #ifndef USE_QUAD_PRECISION lis_vector_set_all(0.0,xx); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_set_all(0.0,xx); } else { lis_vector_set_allex_nm(0.0,xx); } #endif } else { if( output ) lis_printf(A->comm,"initial vector x = user defined\n"); #ifndef USE_QUAD_PRECISION lis_vector_copy(x,xx); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(x,xx); } else { lis_vector_copyex_nm(x,xx); } #endif } /* create residual history vector */ if( solver->residual ) lis_free(solver->residual); residual = (LIS_SCALAR *)lis_malloc((maxiter+2)*sizeof(LIS_SCALAR),"lis_solve::residual"); if( residual==NULL ) { LIS_SETERR_MEM((maxiter+2)*sizeof(LIS_SCALAR)); lis_vector_destroy(xx); solver->retcode = err; return err; } residual[0] = 1.0; n = A->n; np = A->np; t = NULL; At = NULL; p_c_times = lis_wtime(); if( precon_type==LIS_PRECON_TYPE_IS ) { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI); } else if( !b->is_scaled ) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { b->value[i] = b->value[i]*solver->d->value[i]; } } if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR ) { solver->options[LIS_OPTIONS_ISLEVEL] = 0; } } else if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR && precon_type!=LIS_PRECON_TYPE_NONE ) { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI); } } else if( scale ) { if( storage==LIS_MATRIX_BSR && scale==LIS_SCALE_JACOBI ) { if( A->matrix_type!=LIS_MATRIX_BSR ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_blocksize(B,block,block,NULL,NULL); lis_matrix_set_type(B,storage); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } err = lis_matrix_split(A); if( err ) return err; err = lis_matrix_diag_duplicate(A->D,&solver->WD); if( err ) return err; lis_matrix_diag_copy(A->D,solver->WD); lis_matrix_diag_inverse(solver->WD); lis_matrix_bscaling_bsr(A,solver->WD); lis_vector_duplicate(A,&t); lis_matrix_diag_matvec(solver->WD,b,t); lis_vector_copy(t,b); lis_vector_destroy(t); t = NULL; } else { if( solver->d==NULL ) { err = lis_vector_duplicate(A,&solver->d); if( err ) { return err; } } if( scale==LIS_SCALE_JACOBI && nsolver==LIS_SOLVER_CG ) { scale = LIS_SCALE_SYMM_DIAG; } if( !A->is_scaled ) { lis_matrix_scaling(A,b,solver->d,scale); } else if( !b->is_scaled ) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { b->value[i] = b->value[i]*solver->d->value[i]; } } } } /* precon_type = precon->precon_type;*/ if( precon_type==LIS_PRECON_TYPE_IS ) { if( nsolver < LIS_SOLVER_JACOBI || nsolver > LIS_SOLVER_SOR ) { AA = solver->A; bb = solver->b; } else { AA = precon->A; bb = precon->Pb; } } else { AA = A; bb = b; } p_c_times = lis_wtime() - p_c_times; itimes = lis_wtime(); /* Matrix Convert */ solver->A = AA; solver->b = bb; err = lis_matrix_convert_self(solver); if( err ) { lis_vector_destroy(xx); lis_solver_work_destroy(solver); lis_free(residual); solver->retcode = err; return err; } block = solver->A->bnr; if( A->my_rank==0 ) { if( output ) printf("precision : %s\n", lis_precisionname[precision]); if( output ) printf("solver : %s %d\n", lis_solvername[nsolver],nsolver); switch( precon_type ) { case LIS_PRECON_TYPE_ILU: i = solver->options[LIS_OPTIONS_FILL]; if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_VBR ) { if( output ) sprintf(buf,"Block %s(%d)",lis_preconname[precon_type],i); } else { if( output ) sprintf(buf,"%s(%d)",lis_preconname[precon_type],i); } break; default: if( output ) sprintf(buf,"%s",lis_preconname[precon_type]); break; } if( solver->options[LIS_OPTIONS_ADDS] && precon_type ) { if( output ) printf("precon : %s + additive schwarz\n", buf); } else { if( output ) printf("precon : %s\n", buf); } } switch(conv_cond) { case LIS_CONV_COND_NRM2_R: case LIS_CONV_COND_NRM2_B: if( A->my_rank==0 ) { if( output ) ("CONV_COND : ||r||_2 <= %6.1e*||r_0||_2\n", tol); } break; case LIS_CONV_COND_NRM1_B: lis_vector_nrm1(b,&nrm2); nrm2 = nrm2*tol_w + tol; if( A->my_rank==0 ) { if( output ) printf("conv_cond : ||r||_1 <= %6.1e*||b||_1 + %6.1e = %6.1e\n", tol_w,tol,nrm2); } break; } if( A->my_rank==0 ) { if( AA->matrix_type==LIS_MATRIX_BSR || AA->matrix_type==LIS_MATRIX_BSC ) { if( output ) printf("storage : %s(%d x %d)\n", lis_storagename[AA->matrix_type-1],block,block); } else { if( output ) printf("storage : %s\n", lis_storagename[AA->matrix_type-1]); } } /* create work vector */ err = lis_solver_malloc_work[nsolver](solver); if( err ) { lis_vector_destroy(xx); lis_precon_destroy(precon); solver->retcode = err; return err; } if( nsolver==LIS_SOLVER_BICG && is_use_at ) { if( output ) lis_printf(A->comm,"Use At\n"); lis_matrix_duplicate(AA,&At); lis_matrix_set_type(At,LIS_USE_AT_TYPE[AA->matrix_type]); lis_matrix_convert(AA,At); solver->At = At; } solver->x = xx; solver->xx = x; solver->precon = precon; solver->residual = residual; /* execute solver */ #ifndef USE_QUAD_PRECISION err = lis_solver_execute[nsolver](solver); #else if( precision==LIS_PRECISION_DOUBLE ) { err = lis_solver_execute[nsolver](solver); } else if( precision==LIS_PRECISION_QUAD ) { err = lis_solver_execute_quad[nsolver](solver); } else if( precision==LIS_PRECISION_SWITCH ) { err = lis_solver_execute_switch[nsolver](solver); } #endif solver->retcode = err; if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { x->value[i] = xx->value[i]*solver->d->value[i]; } } else { #ifndef USE_QUAD_PRECISION lis_vector_copy(xx,x); #else if( precision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(xx,x); } else { lis_vector_copyex_mn(xx,x); } #endif } itimes = lis_wtime() - itimes - solver->ptimes; p_i_times = solver->ptimes; solver->ptimes = p_c_times + p_i_times; solver->p_c_times = p_c_times; solver->p_i_times = p_i_times; solver->times = solver->ptimes + itimes; solver->itimes = itimes; lis_solver_work_destroy(solver); lis_vector_duplicate(A,&t); xx->precision = LIS_PRECISION_DEFAULT; lis_matvec(A,xx,t); lis_vector_xpay(b,-1.0,t); if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS) { #ifdef _OPENMP #pragma omp parallel for #endif for(i=0;i<n;i++) { t->value[i] = t->value[i]/solver->d->value[i]; } } lis_vector_nrm2(t,&nrm2); /* solver->resid = nrm2; */ if( A->my_rank==0 ) { if( err ) { if( output ) printf("lis_solve : %s(code=%d)\n\n",lis_returncode[err],err); } else { if( output ) printf("lis_solve : normal end\n\n"); } } if( precision==LIS_PRECISION_DOUBLE ) { solver->iter2 = solver->iter; } else if( precision==LIS_PRECISION_QUAD ) { solver->iter2 = 0; } lis_vector_destroy(t); /* lis_vector_destroy(d);*/ lis_vector_destroy(xx); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x; LIS_SCALAR *value; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads,maxthreads; LIS_INT gn,nnz,np; LIS_INT i,j,k,si,sj,sk,ii,jj,ctr; LIS_INT l,m,n,nn; LIS_INT is,ie; LIS_INT err,iter,matrix_type,storage,ss,se; LIS_INT *ptr,*index; double time,time2,nnzs,nnzap,nnzt; LIS_SCALAR val; double commtime,comptime,flops; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 5 ) { if( my_rank==0 ) { printf("Usage: %s l m n iter [matrix_type]\n", argv[0]); } CHKERR(1); } l = atoi(argv[1]); m = atoi(argv[2]); n = atoi(argv[3]); iter = atoi(argv[4]); if (argv[5] == NULL) { storage = 0; } else { storage = atoi(argv[5]); } if( iter<=0 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("iter=%lld <= 0\n",iter); #else if( my_rank==0 ) printf("iter=%d <= 0\n",iter); #endif CHKERR(1); } if( l<=0 || m<=0 || n<=0 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("l=%lld <=0, m=%lld <=0 or n=%lld <=0\n",l,m,n); #else if( my_rank==0 ) printf("l=%d <=0, m=%d <=0 or n=%d <=0\n",l,m,n); #endif CHKERR(1); } if( storage<0 || storage>11 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("matrix_type=%lld < 0 or matrix_type=%lld > 11\n",storage,storage); #else if( my_rank==0 ) printf("matrix_type=%d < 0 or matrix_type=%d > 11\n",storage,storage); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONG__LONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); if( my_rank==0 ) { #ifdef _LONG__LONG printf("max number of threads = %lld\n", nthreads); printf("number of threads = %lld\n", maxthreads); #else printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); #endif } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ nn = l*m*n; err = lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_matrix_set_size(A0,0,nn); CHKERR(err); ptr = (LIS_INT *)malloc((A0->n+1)*sizeof(LIS_INT)); if( ptr==NULL ) CHKERR(1); index = (LIS_INT *)malloc(27*A0->n*sizeof(LIS_INT)); if( index==NULL ) CHKERR(1); value = (LIS_SCALAR *)malloc(27*A0->n*sizeof(LIS_SCALAR)); if( value==NULL ) CHKERR(1); lis_matrix_get_range(A0,&is,&ie); ctr = 0; for(ii=is;ii<ie;ii++) { i = ii/(m*n); j = (ii - i*m*n)/n; k = ii - i*m*n - j*n; for(si=-1;si<=1;si++) { if( i+si>-1 && i+si<l ) { for(sj=-1;sj<=1;sj++) { if( j+sj>-1 && j+sj<m ) { for(sk=-1;sk<=1;sk++) { if( k+sk>-1 && k+sk<n ) { jj = ii + si*m*n + sj*n + sk; index[ctr] = jj; if( jj==ii ) { value[ctr++] = 26.0;} else { value[ctr++] = -1.0;} } } } } } } ptr[ii-is+1] = ctr; } ptr[0] = 0; err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A0); CHKERR(err); err = lis_matrix_assemble(A0); CHKERR(err); n = A0->n; gn = A0->gn; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { #ifdef _LONG__LONG printf("matrix size = %lld x %lld (%lld nonzero entries)\n",gn,gn,nnz); printf("number of iterations = %lld\n\n",iter); #else printf("matrix size = %d x %d (%d nonzero entries)\n",gn,gn,nnz); printf("number of iterations = %d\n\n",iter); #endif } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } for(i=0;i<n;i++) { lis_sort_id(A0->ptr[i],A0->ptr[i+1]-1,A0->index,A0->value); } /* MPI version of VBR is not implemented. DNS is also excluded to reduce memory usage. */ if (storage==0) { ss = 1; se = 11; } else { ss = storage; se = storage+1; } for (matrix_type=ss;matrix_type<se;matrix_type++) { if ( nprocs>1 && matrix_type==9 ) continue; lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; #ifdef USE_MPI #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #endif #else #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #endif #endif } lis_matrix_destroy(A); } lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x,v; LIS_SCALAR ntimes,nmflops,nnrm2; LIS_SCALAR *value; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads, maxthreads; LIS_INT gn,nnz,mode; LIS_INT i,j,jj,j0,j1,l,k,n,np,h,ih; LIS_INT m,nn,ii; LIS_INT block; LIS_INT rn,rmin,rmax,rb; LIS_INT is,ie,clsize,ci,*iw; LIS_INT err,iter,matrix_type; LIS_INT *ptr,*index; double mem,val,ra,rs,ri,ria,ca,time,time2,convtime,val2,nnzs,nnzap,nnzt; double commtime,comptime,flops; FILE *file; char path[1024]; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 4 ) { if( my_rank==0 ) printf("Usage: spmvtest5 matrix_filename matrix_type iter [block] \n"); lis_finalize(); exit(0); } file = fopen(argv[1], "r"); if( file==NULL ) CHKERR(1); matrix_type = atoi(argv[2]); iter = atoi(argv[3]); if (argv[4] == NULL) { block = 2; } else { block = atoi(argv[4]); } if( matrix_type<1 || matrix_type>11 ) { if( my_rank==0 ) printf("matrix_type=%d <1 or matrix_type=%d >11\n",matrix_type,matrix_type); CHKERR(1); } if( iter<=0 ) { if( my_rank==0 ) printf("iter=%d <= 0\n",iter); CHKERR(1); } if( my_rank==0 ) { printf("\n"); printf("number of processes = %d\n",nprocs); } #ifdef _OPENMP if( my_rank==0 ) { nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_input(A0,NULL,NULL,argv[1]); CHKERR(err); n = A0->n; gn = A0->gn; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { printf("block size of BSR and BSC = %d x %d\n",block,block); printf("iteration count = %d\n\n",iter); } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { A->bnr = block; A->bnc = block; } comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); } lis_matrix_destroy(A); lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads, maxthreads; LIS_INT nnz; LIS_INT i,n,np; LIS_INT block; LIS_INT is,ie; LIS_INT err,iter,matrix_type; double time,time2,nnzs,nnzap,nnzt; LIS_SCALAR val; double commtime,comptime,flops; char path[1024]; FILE *file; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 3 ) { if( my_rank==0 ) { printf("Usage: %s matrix_filename_list iter [block] \n", argv[0]); } lis_finalize(); exit(0); } file = fopen(argv[1], "r"); if( file==NULL ) CHKERR(1); iter = atoi(argv[2]); if (argv[3] == NULL) { block = 2; } else { block = atoi(argv[3]); } if( iter<=0 ) { #ifdef _LONG__LONG printf("iter=%lld <= 0\n",iter); #else printf("iter=%d <= 0\n",iter); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONG__LONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP if( my_rank==0 ) { nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); #ifdef _LONG__LONG printf("max number of threads = %lld\n", nthreads); printf("number of threads = %lld\n", maxthreads); #else printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); #endif } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ while( fscanf(file, "%s\n", path)==1 ) { if( my_rank==0 ) { printf("matrix_filename = %s\n", path); } lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_input(A0,NULL,NULL,path); if( err ) CHKERR(err); n = A0->n; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { #ifdef _LONG__LONG printf("block size of BSR and BSC = %lld x %lld\n",block,block); printf("number of iterations = %lld\n\n",iter); #else printf("block size of BSR and BSC = %d x %d\n",block,block); printf("number of iterations = %d\n\n",iter); #endif } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } /* MPI version of VBR is not implemented. DNS is also excluded to reduce memory usage. */ for (matrix_type=1;matrix_type<11;matrix_type++) { if ( nprocs>1 && matrix_type==9 ) continue; lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); if( my_rank==0 ) { if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { A->bnr = block; A->bnc = block; } } comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; #ifdef USE_MPI #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #endif #else #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #endif #endif } lis_matrix_destroy(A); } lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); } fclose(file); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT lis_input_hb_csr(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file) { char buf[BUFSIZE]; char title[128], key[128], mtx[64], dat[128]; char *p; char MXTYPE_F,MXTYPE_S,MXTYPE_T; char RHSTYP_F,RHSTYP_S,RHSTYP_T; LIS_INT TOTCRD,PTRCRD,INDCRD,VALCRD,RHSCRD; LIS_INT NROW,NCOL,NNZERO,NELTVL; LIS_INT NRHS,NRHSIX; LIS_INT iptr,iind,ival,irhs; LIS_INT wptr,wind,wval,wrhs; LIS_INT i,k,j,my_rank; LIS_INT err; LIS_INT n,is,ie; LIS_INT *ptr, *index; LIS_INT matrix_type; LIS_SCALAR *value; LIS_MATRIX B; #ifdef USE_MPI MPI_Comm_rank(A->comm,&my_rank); #else my_rank = 0; #endif matrix_type = A->matrix_type; /* Line 1 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } strncpy(title, buf ,72); title[72] = '\0'; strncpy(key ,&buf[72], 8); key[8] = '\0'; printf("title: %s\n",title); printf("key : %s\n",key); /* Line 2 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG if( sscanf(buf, "%14lld%14lld%14lld%14lld%14lld", &TOTCRD, &PTRCRD, &INDCRD, &VALCRD, &RHSCRD) != 5 ) #else if( sscanf(buf, "%14d%14d%14d%14d%14d", &TOTCRD, &PTRCRD, &INDCRD, &VALCRD, &RHSCRD) != 5 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG printf("%14lld%14lld%14lld%14lld%14lld\n",TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD); #else printf("%14d%14d%14d%14d%14d\n",TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD); #endif /* Line 3 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG if( sscanf(buf, "%s %lld %lld %lld %lld", mtx, &NROW, &NCOL, &NNZERO, &NELTVL) != 5 ) #else if( sscanf(buf, "%s %d %d %d %d", mtx, &NROW, &NCOL, &NNZERO, &NELTVL) != 5 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } for(p=mtx;*p!='\0';p++) *p = (char)tolower(*p); MXTYPE_F = mtx[0]; MXTYPE_S = mtx[1]; MXTYPE_T = mtx[2]; if( mtx[0]!='r' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not real\n"); return LIS_ERR_FILE_IO; } /* if( mtx[1]!='u' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not unsymmetric\n"); return LIS_ERR_FILE_IO; } */ if( mtx[2]!='a' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not assembled\n"); return LIS_ERR_FILE_IO; } if( NROW!=NCOL ) { LIS_SETERR(LIS_ERR_FILE_IO,"matrix is not square\n"); return LIS_ERR_FILE_IO; } #ifdef _LONGLONG printf("%c%c%c %lld %lld %lld %lld\n",MXTYPE_F, MXTYPE_S, MXTYPE_T, NROW, NCOL, NNZERO, NELTVL); #else printf("%c%c%c %d %d %d %d\n",MXTYPE_F, MXTYPE_S, MXTYPE_T, NROW, NCOL, NNZERO, NELTVL); #endif /* Line 4 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } lis_input_hb_get_fmt( buf ,16,&iptr,&wptr); lis_input_hb_get_fmt(&buf[16],16,&iind,&wind); lis_input_hb_get_fmt(&buf[32],20,&ival,&wval); lis_input_hb_get_fmt(&buf[52],20,&irhs,&wrhs); #ifdef _LONGLONG printf("%lld %lld %lld %lld\n",iptr,iind,ival,irhs); printf("%lld %lld %lld %lld\n",wptr,wind,wval,wrhs); #else printf("%d %d %d %d\n",iptr,iind,ival,irhs); printf("%d %d %d %d\n",wptr,wind,wval,wrhs); #endif /* Line 5 */ if( RHSCRD!=0 ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG sscanf(buf, "%s %lld %lld", mtx, &NRHS, &NRHSIX); #else sscanf(buf, "%s %d %d", mtx, &NRHS, &NRHSIX); #endif /* #ifdef _LONGLONG if( sscanf(buf, "%s %lld %lld", mtx, &NRHS, &NRHSIX) != 3 ) #else if( sscanf(buf, "%s %d %d", mtx, &NRHS, &NRHSIX) != 3 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } */ for(p=mtx;*p!='\0';p++) *p = (char)tolower(*p); RHSTYP_F = mtx[0]; RHSTYP_S = mtx[1]; RHSTYP_T = mtx[2]; #ifdef _LONGLONG printf("%c%c%c %lld %lld\n",RHSTYP_F, RHSTYP_S, RHSTYP_T, NRHS, NRHSIX); #else printf("%c%c%c %d %d\n",RHSTYP_F, RHSTYP_S, RHSTYP_T, NRHS, NRHSIX); #endif } err = lis_matrix_set_size(A,0,NROW); if( err ) { return err; } n = A->n; lis_matrix_get_range(A,&is,&ie); err = lis_matrix_malloc_csr(n,NNZERO,&ptr,&index,&value); if( err ) { return err; } /* read data */ k = 0; for( i=0; i<PTRCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<iptr&&k<n+1;j++) { strncpy(dat, p, wptr); dat[wptr] = '\0'; ptr[k] = atoi(dat) - 1; p += wptr; k++; } } k = 0; for( i=0; i<INDCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<iind&&k<NNZERO;j++) { strncpy(dat, p, wind); dat[wind] = '\0'; index[k] = atoi(dat) - 1; p += wind; k++; } } k = 0; for( i=0; i<VALCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<ival&&k<NNZERO;j++) { strncpy(dat, p, wval); dat[wval] = '\0'; value[k] = atof(dat); p += wval; k++; } } if( RHSCRD>0 ) { /* k = 0; for( i=0; i<RHSCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<ival&&k<NNZERO;j++) { strncpy(dat, p, wval); dat[wval] = '\0'; value[k] = atof(dat); p += wval; printf("%e ",value[k]); k++; } printf("\n"); } */ } err = lis_matrix_set_csc(NNZERO,ptr,index,value,A); if( err ) { return err; } err = lis_matrix_assemble(A); if( err ) return err; if( matrix_type!=LIS_MATRIX_CSC ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CSR); err = lis_matrix_convert_csc2csr(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } return LIS_SUCCESS; }
LIS_INT lis_esolve(LIS_MATRIX A, LIS_VECTOR x, LIS_SCALAR *evalue0, LIS_ESOLVER esolver) { LIS_INT nesolver,niesolver,emaxiter; LIS_SCALAR *evalue; LIS_VECTOR *evector; LIS_SCALAR *resid; LIS_SCALAR *rhistory; LIS_INT *iter,*iter2; LIS_INT err; LIS_INT output; LIS_INT ss, mode; double time; double gshift; LIS_INT estorage,eblock; LIS_MATRIX B; LIS_INT eprecision; LIS_VECTOR xx; LIS_DEBUG_FUNC_IN; /* begin parameter check */ err = lis_matrix_check(A,LIS_MATRIX_CHECK_ALL); if( err ) return err; if( x==NULL ) { LIS_SETERR(LIS_ERR_ILL_ARG,"vector x is undefined\n"); return LIS_ERR_ILL_ARG; } if( A->n!=x->n ) { return LIS_ERR_ILL_ARG; } if( A->gn<=0 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Size n(=%d) of matrix A is less than 0\n",A->gn); return LIS_ERR_ILL_ARG; } nesolver = esolver->options[LIS_EOPTIONS_ESOLVER]; niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER]; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; mode = esolver->options[LIS_EOPTIONS_MODE]; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; gshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; estorage = esolver->options[LIS_EOPTIONS_STORAGE]; eblock = esolver->options[LIS_EOPTIONS_STORAGE_BLOCK]; eprecision = esolver->options[LIS_EOPTIONS_PRECISION]; esolver->eprecision = eprecision; if( nesolver < 1 || nesolver > LIS_ESOLVER_LEN ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_ESOLVER is %d (Set between 1 to %d)\n",nesolver, LIS_ESOLVER_LEN); return LIS_ERR_ILL_ARG; } if( niesolver < 1 || niesolver > 7 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 7)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && niesolver > 4 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 4 for Subspace)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI && niesolver == LIS_ESOLVER_PI ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 7 for Lanczos)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI && (( niesolver == LIS_ESOLVER_PI ) || ( niesolver == LIS_ESOLVER_CG) || ( niesolver == LIS_ESOLVER_JD)) ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 4 or 6 for Arnoldi)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && ss > A->gn ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Subspace)\n", ss, A->gn); return LIS_ERR_ILL_ARG; } if (( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI || esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI ) && ss > A->gn ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Lanczos and Arnoldi)\n", ss, A->gn); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && mode >= ss ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Subspace)\n", mode, ss); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == ( LIS_ESOLVER_LI || LIS_ESOLVER_AI ) && mode >= ss ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Lanczos or Arnoldi)\n", mode, ss); return LIS_ERR_ILL_ARG; } #ifdef USE_QUAD_PRECISION if( eprecision==LIS_PRECISION_QUAD && lis_esolver_execute_quad[nesolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision eigensolver %s is not implemented\n",lis_esolvername[nesolver]); return LIS_ERR_NOT_IMPLEMENTED; } else if( eprecision==LIS_PRECISION_SWITCH && lis_esolver_execute_switch[nesolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch esolver %s is not implemented\n",lis_esolvername[nesolver]); return LIS_ERR_NOT_IMPLEMENTED; } if( esolver->options[LIS_EOPTIONS_SWITCH_MAXITER]==-1 ) { esolver->options[LIS_EOPTIONS_SWITCH_MAXITER] = emaxiter; } #endif /* create eigenvalue array */ if( esolver->evalue ) lis_free(esolver->evalue); evalue = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::evalue"); if( evalue==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } evalue[0] = 1.0; evalue[ss-1] = 1.0; /* create residual norm array */ if( esolver->resid ) lis_free(esolver->resid); resid = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::resid"); if( resid==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create number of iterations array */ if( esolver->iter ) lis_free(esolver->iter); iter = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter"); if( iter==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create quad precision number of iterations array */ if( esolver->iter2 ) lis_free(esolver->iter2); iter2 = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter2"); if( iter2==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create initial vector */ #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(A,&xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { err = lis_vector_duplicate(A,&xx); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx); } #endif if( err ) { esolver->retcode = err; return err; } if( esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { if( output ) lis_printf(A->comm,"initial vector x : 1\n"); #ifndef USE_QUAD_PRECISION lis_vector_set_all(1.0,xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { lis_vector_set_all(1.0,xx); } else { lis_vector_set_allex_nm(1.0,xx); } #endif } else { if( output ) lis_printf(A->comm,"initial vector x : user defined\n"); #ifndef USE_QUAD_PRECISION lis_vector_copy(x,xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(x,xx); } else { lis_vector_copyex_nm(x,xx); } #endif } /* global shift */ if ( output ) if( A->my_rank==0 ) printf("shift : %e\n", gshift); /* create eigenvector array */ if( esolver->evector ) lis_free(esolver->evector); evector = (LIS_VECTOR *)lis_malloc((ss+2)*sizeof(LIS_VECTOR),"lis_esolve::evector"); if( evector==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_VECTOR)); esolver->retcode = err; return err; } /* create residual history array */ if( esolver->rhistory ) lis_free(esolver->rhistory); rhistory = (LIS_SCALAR *)lis_malloc((emaxiter+2)*sizeof(LIS_SCALAR),"lis_esolve::rhistory"); if( rhistory==NULL ) { LIS_SETERR_MEM((emaxiter+2)*sizeof(LIS_SCALAR)); lis_vector_destroy(xx); esolver->retcode = err; return err; } /* convert matrix */ if( estorage>0 && A->matrix_type!=estorage ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_blocksize(B,eblock,eblock,NULL,NULL); lis_matrix_set_type(B,estorage); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } esolver->A = A; esolver->evalue = evalue; esolver->x = x; esolver->evector = evector; rhistory[0] = 1.0; esolver->rhistory = rhistory; esolver->resid = resid; esolver->iter = iter; esolver->iter2 = iter2; if( A->my_rank==0 ) { #ifdef _LONG__DOUBLE if ( output ) printf("precision : long double\n"); #else if ( output ) printf("precision : %s\n", lis_eprecisionname[eprecision]); #endif #ifdef _LONG__LONG if ( output ) printf("eigensolver : %s\n", lis_esolvername[nesolver]); #else if ( output ) printf("eigensolver : %s\n", lis_esolvername[nesolver]); #endif } if( A->my_rank==0 ) { #ifdef _LONG__DOUBLE if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1Le * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); #else if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1e * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); #endif } if( A->my_rank==0 ) { if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { #ifdef _LONG__LONG if ( output ) printf("matrix storage format : %s(%lld x %lld)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); #else if ( output ) printf("matrix storage format : %s(%d x %d)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); #endif } else { if ( output ) printf("matrix storage format : %s\n", lis_estoragename[A->matrix_type-1]); } } time = lis_wtime(); esolver->ptime = 0; esolver->itime = 0; esolver->p_c_time = 0; esolver->p_i_time = 0; if (gshift != 0.0) lis_matrix_shift_diagonal(A, gshift); /* create work vector */ err = lis_esolver_malloc_work[nesolver](esolver); if( err ) { lis_vector_destroy(xx); esolver->retcode = err; return err; } esolver->x = xx; esolver->xx = x; /* execute esolver */ #ifndef USE_QUAD_PRECISION err = lis_esolver_execute[nesolver](esolver); #else if( eprecision==LIS_PRECISION_DOUBLE ) { err = lis_esolver_execute[nesolver](esolver); } else if( eprecision==LIS_PRECISION_QUAD ) { err = lis_esolver_execute_quad[nesolver](esolver); } else if( eprecision==LIS_PRECISION_SWITCH ) { err = lis_esolver_execute_switch[nesolver](esolver); } #endif esolver->retcode = err; *evalue0 = esolver->evalue[0]; lis_vector_copy(esolver->x, x); esolver->time = lis_wtime() - time; lis_matrix_shift_diagonal(A, -gshift); if( A->my_rank==0 ) { if( err ) { #ifdef _LONG__LONG if ( output ) printf("eigensolver status : %s(code=%lld)\n\n",lis_ereturncode[err],err); #else if ( output ) printf("eigensolver status : %s(code=%d)\n\n",lis_ereturncode[err],err); #endif } else { if ( output ) printf("eigensolver status : normal end\n\n"); } } if( eprecision==LIS_PRECISION_DOUBLE ) { esolver->iter2[mode] = esolver->iter[mode]; } else if( eprecision==LIS_PRECISION_QUAD ) { esolver->iter2[mode] = 0; } lis_vector_destroy(xx); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,ii,jj,ik,jk; LIS_INT n,annz,cl,cu; LIS_INT *ww,*il,*iu; LIS_SCALAR t,dd; LIS_REAL tol,nrm; LIS_SCALAR *d,*l,*u; LIS_MATRIX A,B; LIS_MATRIX_ILU W,Z; LIS_VECTOR D; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; annz = A->n / 10; W = NULL; ww = NULL; d = NULL; l = NULL; u = NULL; il = NULL; iu = NULL; err = lis_matrix_ilu_create(n,1,&W); if( err ) return err; err = lis_matrix_ilu_create(n,1,&Z); if( err ) return err; err = lis_matrix_ilu_setCR(W); if( err ) return err; err = lis_matrix_ilu_setCR(Z); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) return err; d = D->value; err = lis_matrix_ilu_premalloc(annz,W); if( err ) return err; err = lis_matrix_ilu_premalloc(annz,Z); if( err ) return err; l = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( l==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } u = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( u==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } il = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il"); if( il==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( iu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } ww = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( ww==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } err = lis_matrix_duplicate(A,&B); if( err ) return err; err = lis_matrix_convert_csr2csc(A,B); if( err ) { return err; } for(i=0;i<n;i++) ww[i] = 0; for(i=0;i<n;i++) { W->value[i][0] = 1.0; W->index[i][0] = i; W->nnz[i] = 1; Z->value[i][0] = 1.0; Z->index[i][0] = i; Z->nnz[i] = 1; } for(i=0; i<n; i++) { /* nrm_inf(A[i,:]) */ nrm = 0.0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { nrm = _max(nrm,fabs(A->value[j])); } nrm = 1.0/nrm; /* l = AZ_i */ cl = 0; memset(l,0,n*sizeof(LIS_SCALAR)); for(k=0;k<Z->nnz[i];k++) { ii = Z->index[i][k]; for(j=B->ptr[ii];j<B->ptr[ii+1];j++) { jj = B->index[j]; if( jj>i ) { l[jj] += B->value[j]*Z->value[i][k]; if( ww[jj]==0 ) { ww[jj] = 1; il[cl++] = jj; } } } } for(k=0;k<cl;k++) ww[il[k]] = 0; /* u = W_i'A */ cu = 0; memset(u,0,n*sizeof(LIS_SCALAR)); for(k=0;k<W->nnz[i];k++) { ii = W->index[i][k]; for(j=A->ptr[ii];j<A->ptr[ii+1];j++) { jj = A->index[j]; #ifdef USE_MPI if( jj>n-1 ) continue; #endif u[jj] += A->value[j]*W->value[i][k]; if( jj>i && ww[jj]==0 ) { ww[jj] = 1; iu[cu++] = jj; } } } for(k=0;k<cu;k++) ww[iu[k]] = 0; /* d_ii = uZ_i or W_i'l */ t = 0.0; for(k=0;k<Z->nnz[i];k++) { t += u[Z->index[i][k]]*Z->value[i][k]; } d[i] = 1.0/t; /* for j>i, l_j!=0 */ /* w_j = w_j - (l_j/d_ii)*w_i */ for(jj=0;jj<cl;jj++) { j = il[jj]; dd = l[j]*d[i]; for(k=0;k<W->nnz[j];k++) { ww[W->index[j][k]] = k+1; } for(ik=0;ik<W->nnz[i];ik++) { jk = ww[W->index[i][ik]]; if( jk!=0 ) { t = dd*W->value[i][ik]; if( fabs(t)*nrm > tol ) { W->value[j][jk-1] -= t; } } else { t = dd*W->value[i][ik]; if( fabs(t)*nrm > tol ) { if( W->nnz[j] == W->nnz_ma[j] ) { W->nnz_ma[j] += annz; err = lis_matrix_ilu_realloc(j,W->nnz_ma[j],W); if( err ) return err; } jk = W->nnz[j]; W->index[j][jk] = W->index[i][ik]; W->value[j][jk] = -t; W->nnz[j]++; } } } for(k=0;k<W->nnz[j];k++) { ww[W->index[j][k]] = 0; } } /* for j>i, u_j!=0 */ /* z_j = z_j - (u_j/d_ii)*z_i */ for(jj=0;jj<cu;jj++) { j = iu[jj]; dd = u[j]*d[i]; for(k=0;k<Z->nnz[j];k++) { ww[Z->index[j][k]] = k+1; } for(ik=0;ik<Z->nnz[i];ik++) { jk = ww[Z->index[i][ik]]; if( jk!=0 ) { t = dd*Z->value[i][ik]; if( fabs(t)*nrm > tol ) { Z->value[j][jk-1] -= t; } } else { t = dd*Z->value[i][ik]; if( fabs(t)*nrm > tol ) { if( Z->nnz[j] == Z->nnz_ma[j] ) { Z->nnz_ma[j] += annz; err = lis_matrix_ilu_realloc(j,Z->nnz_ma[j],Z); if( err ) return err; } jk = Z->nnz[j]; Z->index[j][jk] = Z->index[i][ik]; Z->value[j][jk] = -t; Z->nnz[j]++; } } } for(k=0;k<Z->nnz[j];k++) { ww[Z->index[j][k]] = 0; } } } lis_matrix_destroy(B); lis_free2(5,l,u,ww,il,iu); precon->L = W; precon->U = Z; precon->D = D; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon) { LIS_INT err; LIS_INT i,j,k,ii,jj,len,lfil; LIS_INT n,nnz,annz,cl,cu,cc,m; LIS_INT *wu,*wl,*il,*iu,*ic,*pc; LIS_SCALAR t,v; LIS_REAL tol,tol_dd,nrm; LIS_SCALAR *d,*r,*c,*l,*u,*tmp; LIS_MATRIX A,B; LIS_MATRIX_ILU W,Z; LIS_VECTOR D; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; nnz = A->nnz; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; annz = 10+A->nnz / A->n; lfil = (LIS_INT)((double)A->nnz/(2.0*n))*m; W = NULL; Z = NULL; wu = NULL; wl = NULL; d = NULL; l = NULL; u = NULL; il = NULL; iu = NULL; err = lis_matrix_ilu_create(n,1,&W); if( err ) return err; err = lis_matrix_ilu_create(n,1,&Z); if( err ) return err; err = lis_matrix_ilu_setCR(W); if( err ) return err; err = lis_matrix_ilu_setCR(Z); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) return err; d = D->value; tmp = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( tmp==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } r = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( r==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } c = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( c==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } l = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l"); if( l==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } u = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u"); if( u==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } il = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il"); if( il==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( iu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } ic = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( ic==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } wu = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( wu==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } wl = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww"); if( wl==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } pc = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu"); if( pc==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } lis_matrix_sort_csr(A); err = lis_matrix_duplicate(A,&B); if( err ) return err; err = lis_matrix_convert_csr2csc(A,B); if( err ) return err; for(i=0;i<n;i++) { wu[i] = 0; wl[i] = 0; pc[i] = A->ptr[i]; } for(i=0; i<n; i++) { /* nrm_inf(A[i,:]) */ nrm = 0.0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { nrm = _max(nrm,fabs(A->value[j])); } tol_dd = nrm * tol; /* l = e_i */ /* u = e_i */ l[i] = 1.0; u[i] = 1.0; il[0] = i; iu[0] = i; cl = 1; cu = 1; wu[i] = 1; wl[i] = 1; cc = 0; /* r = e_i^T*A */ for(j=A->ptr[i];j<A->ptr[i+1];j++) { jj = A->index[j]; r[jj] = A->value[j]; } /* c = A_i = A*e_i */ for(j=B->ptr[i];j<B->ptr[i+1];j++) { jj = B->index[j]; c[jj] = B->value[j]; } /* W_i = W_i - (r*Z_j/D_jj)*W_j */ for(j=0;j<i;j++) { t = 0.0; for(k=0;k<Z->nnz[j];k++) { t += r[Z->index[j][k]]*Z->value[j][k]; } t = t * d[j]; if( fabs(t) > tol_dd ) { for(k=0;k<W->nnz[j];k++) { v = t * W->value[j][k]; if( fabs(v) > tol_dd ) { jj = W->index[j][k]; if( wl[jj]==1 ) { l[jj] -= v; } else { l[jj] = -v; il[cl++] = jj; wl[jj] = 1; } } } } } /* Z_i = Z_i - (W_j^T*c/D_jj)*Z_j */ for(j=0;j<i;j++) { t = 0.0; for(k=0;k<W->nnz[j];k++) { t += c[W->index[j][k]]*W->value[j][k]; } t = t * d[j]; if( fabs(t) > tol_dd ) { for(k=0;k<Z->nnz[j];k++) { v = t * Z->value[j][k]; if( fabs(v) > tol_dd ) { jj = Z->index[j][k]; if( wu[jj]==1 ) { u[jj] -= v; } else { u[jj] = -v; iu[cu++] = jj; wu[jj] = 1; } } } } } /* len = _min(lfil,cl); for(j=0;j<cl;j++) tmp[j] = fabs(l[il[j]]); lis_sort_di(0,cl-1,tmp,il); lis_sort_i(0,len-1,il); cl = len; */ /* k = cl; for(j=0;j<cl;j++) { if( fabs(l[il[j]])<= tol_dd ) { wl[il[j]] = 0; il[j] = n; k--; } } lis_sort_i(0,cl-1,il); cl = k; k = cu; for(j=0;j<cu;j++) { if( fabs(u[iu[j]])<= tol_dd ) { wu[iu[j]] = 0; iu[j] = n; k--; } } lis_sort_i(0,cu-1,iu); cu = k; */ W->nnz[i] = cl; if( cl > 0 ) { W->index[i] = (LIS_INT *)malloc(cl*sizeof(LIS_INT)); W->value[i] = (LIS_SCALAR *)malloc(cl*sizeof(LIS_SCALAR)); memcpy(W->index[i],il,cl*sizeof(LIS_INT)); for(j=0;j<cl;j++) { W->value[i][j] = l[il[j]]; } } Z->nnz[i] = cu; if( cu > 0 ) { Z->index[i] = (LIS_INT *)malloc(cu*sizeof(LIS_INT)); Z->value[i] = (LIS_SCALAR *)malloc(cu*sizeof(LIS_SCALAR)); memcpy(Z->index[i],iu,cu*sizeof(LIS_INT)); for(j=0;j<cu;j++) { Z->value[i][j] = u[iu[j]]; } } for(j=A->ptr[i];j<A->ptr[i+1];j++) r[A->index[j]] = 0.0; for(j=B->ptr[i];j<B->ptr[i+1];j++) c[B->index[j]] = 0.0; for(j=0;j<cl;j++) { wl[il[j]] = 0; l[il[j]] = 0.0; } for(j=0;j<cu;j++) { wu[iu[j]] = 0; } /* D_ii = W_i^T * A * Z_i */ cl = 0; for(k=0;k<Z->nnz[i];k++) { ii = Z->index[i][k]; for(j=B->ptr[ii];j<B->ptr[ii+1];j++) { jj = B->index[j]; if( wl[jj]==0 ) { l[jj] = B->value[j]*Z->value[i][k]; wl[jj] = 1; il[cl++] = jj; } else { l[jj] += B->value[j]*Z->value[i][k]; } } } t = 0.0; for(j=0;j<W->nnz[i];j++) { k = W->index[i][j]; t += W->value[i][j] * l[k]; } d[i] = 1.0 / t; for(j=0;j<cl;j++) wl[il[j]] = 0; } lis_matrix_destroy(B); lis_free2(11,r,c,il,l,wl,iu,u,wu,ic,pc,tmp); precon->L = W; precon->U = Z; precon->D = D; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_precon_create_saamg(LIS_SOLVER solver, LIS_PRECON precon) { #if defined(USE_SAAMG) LIS_MATRIX A,B; LIS_COMMTABLE table; LIS_INT i; LIS_INT unsym,sol; LIS_INT err; LIS_REAL theta; #ifdef USE_MPI LIS_MPI_Fint comm; #endif LIS_DEBUG_FUNC_IN; if( solver->A->matrix_type!=LIS_MATRIX_CRS ) { A = solver->A; err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CRS); err = lis_matrix_convert(A,B); if( err ) return err; solver->A = B; lis_matrix_destroy(B); solver->A = A; } precon->A = solver->A; precon->is_copy = LIS_FALSE; A = precon->A; sol = solver->options[LIS_OPTIONS_SOLVER]; unsym = solver->options[LIS_OPTIONS_SAAMG_UNSYM]; theta = solver->params[LIS_PARAMS_SAAMG_THETA - LIS_OPTIONS_LEN]; #if 0 if( sol!=LIS_SOLVER_CG && !unsym ) { unsym = LIS_TRUE; } #endif err = lis_vector_duplicate(A,&precon->temp); if( err ) { return err; } F77_FUNC_(finit_data_creation,FINIT_DATA_CREATION)(c_data_creation_ptr_bar); F77_FUNC_(finit_data_creation_unsym,FINIT_DATA_CREATION_UNSYM)(c_data_creation_unsym_ptr_bar); F77_FUNC_(finit_v_cycle,FINIT_V_CYCLE)(c_v_cycle_ptr_bar); F77_FUNC_(finit_clear_matrix,FINIT_CLEAR_MATRIX)(c_clear_matrix_ptr_bar); lis_matrix_split(A); #ifdef USE_MPI comm = MPI_Comm_c2f(A->comm); lis_send_recv(A->commtable,A->D->value); table = A->commtable; if( !unsym ) { (*(void (*)())f_data_creation_ptr)(&A->n,&A->np,&A->L->nnz,&A->U->nnz, A->D->value,A->L->value,A->L->ptr,A->L->index, A->U->value, A->U->ptr, A->U->index, &table->neibpetot, table->neibpe, table->import_ptr, table->import_index, table->export_ptr, table->export_index, &table->imnnz,&table->exnnz, &comm, &precon->level_num,&precon->wsize, &theta); } else { (*(void (*)())f_data_creation_unsym_ptr)(&A->n,&A->np,&A->L->nnz,&A->U->nnz, A->D->value,A->L->value,A->L->ptr,A->L->index, A->U->value, A->U->ptr, A->U->index, &table->neibpetot, table->neibpe, table->import_ptr, table->import_index, table->export_ptr, table->export_index, &table->imnnz,&table->exnnz, &comm, &precon->level_num,&precon->wsize, &theta); } #else if( !unsym ) { (*(void (*)())f_data_creation_ptr)(&A->n,&A->L->nnz,&A->U->nnz, A->D->value,A->L->value,A->L->ptr,A->L->index, A->U->value, A->U->ptr, A->U->index, &precon->level_num, &theta); } else { (*(void (*)())f_data_creation_unsym_ptr)(&A->n,&A->L->nnz,&A->U->nnz, A->D->value,A->L->value,A->L->ptr,A->L->index, A->U->value, A->U->ptr, A->U->index, &precon->level_num, &theta); } #endif LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #else LIS_DEBUG_FUNC_IN; precon->A = solver->A; precon->is_copy = LIS_FALSE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #endif }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A0,A,B; LIS_VECTOR x,b,u; LIS_SOLVER solver; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nsol,rhs,len,i,j,k,jj,kk,p,nrow_p,l,n; LIS_INT err,iter,iter_double,iter_quad; LIS_INT *iw,*nrow,*index,*ptr; LIS_SCALAR *s,t,*value; double times,itimes,ptimes,p_c_times,p_i_times; LIS_REAL resid; char solvername[128]; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 5 ) { if( my_rank==0 ) { printf("Usage: %s matrix_filename rhs_setting solution_filename residual_filename [options]\n", argv[0]); } CHKERR(1); } len = (LIS_INT)strlen(argv[2]); if( len==1 ) { if( argv[2][0]=='0' || argv[2][0]=='1' || argv[2][0]=='2' ) { rhs = atoi(argv[2]); } else { rhs = -1; } } else { rhs = -1; } if( my_rank==0 ) { printf("\n"); #ifdef _LONGLONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP if( my_rank==0 ) { #ifdef _LONGLONG printf("max number of threads = %lld\n",omp_get_num_procs()); printf("number of threads = %lld\n",omp_get_max_threads()); #else printf("max number of threads = %d\n",omp_get_num_procs()); printf("number of threads = %d\n",omp_get_max_threads()); #endif } #endif /* read matrix and vectors from file */ err = lis_matrix_create(LIS_COMM_WORLD,&A); CHKERR(err); err = lis_vector_create(LIS_COMM_WORLD,&b); CHKERR(err); err = lis_vector_create(LIS_COMM_WORLD,&x); CHKERR(err); err = lis_input(A,b,x,argv[1]); CHKERR(err); err = lis_matrix_duplicate(A,&A0); CHKERR(err); lis_matrix_set_type(A0,LIS_MATRIX_CSR); err = lis_matrix_convert(A,A0); CHKERR(err); lis_matrix_destroy(A); A = A0; err = lis_vector_duplicate(A,&u); CHKERR(err); if( lis_vector_is_null(b) ) { lis_vector_destroy(b); lis_vector_duplicate(A,&b); CHKERR(err); if( rhs==0 ) { CHKERR(1); } else if( rhs==1 ) { err = lis_vector_set_all(1.0,b); } else { err = lis_vector_set_all(1.0,u); lis_matvec(A,u,b); } } if( rhs==-1 ) { lis_input_vector(b,argv[2]); } if( lis_vector_is_null(x) ) { lis_vector_destroy(x); err = lis_vector_duplicate(A,&x); CHKERR(err); } err = lis_solver_create(&solver); CHKERR(err); lis_solver_set_option("-print mem",solver); lis_solver_set_optionC(solver); err = lis_solve(A,b,x,solver); CHKERR(err); lis_solver_get_itersex(solver,&iter,&iter_double,&iter_quad); lis_solver_get_timeex(solver,×,&itimes,&ptimes,&p_c_times,&p_i_times); lis_solver_get_residualnorm(solver,&resid); lis_solver_get_solver(solver,&nsol); lis_solver_get_solvername(nsol,solvername); /* write results */ if( my_rank==0 ) { #ifdef _LONGLONG #ifdef _LONG__DOUBLE printf("%s: number of iterations = %lld \n",solvername, iter); #else printf("%s: number of iterations = %lld (double = %lld, quad = %lld)\n",solvername,iter, iter_double, iter_quad); #endif #else #ifdef _LONG__DOUBLE printf("%s: number of iterations = %d \n",solvername, iter); #else printf("%s: number of iterations = %d (double = %d, quad = %d)\n",solvername,iter, iter_double, iter_quad); #endif #endif printf("%s: elapsed time = %e sec.\n",solvername,times); printf("%s: preconditioner = %e sec.\n",solvername, ptimes); printf("%s: matrix creation = %e sec.\n",solvername, p_c_times); printf("%s: linear solver = %e sec.\n",solvername, itimes); #ifdef _LONG__DOUBLE printf("%s: relative residual 2-norm = %Le\n\n",solvername,resid); #else printf("%s: relative residual 2-norm = %e\n\n",solvername,resid); #endif } /* write solution */ lis_output_vector(x,LIS_FMT_MM,argv[3]); /* write residual */ lis_solver_output_rhistory(solver, argv[4]); lis_solver_destroy(solver); lis_vector_destroy(x); lis_vector_destroy(u); lis_vector_destroy(b); lis_matrix_destroy(A); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x,v; LIS_SCALAR ntimes,nmflops,nnrm2; LIS_SCALAR *value; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads, maxthreads; LIS_INT gn,nnz,mode; LIS_INT i,ii,j,jj,j0,j1,l,k,n,np,h,ih; LIS_INT rn,rmin,rmax,rb; LIS_INT is,ie,clsize,ci,*iw; LIS_INT err,iter,matrix_type,s,ss,se; LIS_INT *ptr,*index; double mem,ra,rs,ri,ria,ca,time,time2,convtime,nnzs,nnzap,nnzt; LIS_SCALAR val; double commtime,comptime,flops; FILE *file; char path[1024]; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 3 ) { if( my_rank==0 ) { printf("Usage: %s n iter [matrix_type]\n", argv[0]); } CHKERR(1); } n = atoi(argv[1]); iter = atoi(argv[2]); if (argv[3] == NULL) { s = 0; } else { s = atoi(argv[3]); } if( n<=0 ) { #ifdef _LONGLONG if( my_rank==0 ) printf("n=%lld <=0\n",n); #else if( my_rank==0 ) printf("n=%d <=0\n",n); #endif CHKERR(1); } if( iter<=0 ) { #ifdef _LONGLONG if( my_rank==0 ) printf("iter=%lld <= 0\n",iter); #else if( my_rank==0 ) printf("iter=%d <= 0\n",iter); #endif CHKERR(1); } if( s<0 || s>11 ) { #ifdef _LONGLONG if( my_rank==0 ) printf("matrix_type=%lld < 0 or matrix_type=%lld > 11\n",s,s); #else if( my_rank==0 ) printf("matrix_type=%d < 0 or matrix_type=%d > 11\n",s,s); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONGLONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); if( my_rank==0 ) { #ifdef _LONGLONG printf("max number of threads = %lld\n", nthreads); printf("number of threads = %lld\n", maxthreads); #else printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); #endif } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ lis_matrix_create(LIS_COMM_WORLD,&A0); lis_matrix_set_size(A0,0,n); lis_matrix_get_size(A0,&n,&gn); lis_matrix_get_range(A0,&is,&ie); k = 0; for(i=is;i<ie;i++) { if( i>0 ) lis_matrix_set_value(LIS_INS_VALUE,i,i-1,-1.0,A0); if( i<gn-1 ) lis_matrix_set_value(LIS_INS_VALUE,i,i+1,-1.0,A0); lis_matrix_set_value(LIS_INS_VALUE,i,i,2.0,A0); } err = lis_matrix_assemble(A0); CHKERR(err); n = A0->n; gn = A0->gn; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { #ifdef _LONGLONG printf("matrix size = %lld x %lld (%lld nonzero entries)\n",gn,gn,nnz); printf("iteration count = %lld\n\n",iter); #else printf("matrix size = %d x %d (%d nonzero entries)\n",gn,gn,nnz); printf("iteration count = %d\n\n",iter); #endif } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } /* MPI version of VBR is not implemented. DNS is also excluded to reduce memory usage. */ if (s==0) { ss = 1; se = 11; } else { ss = s; se = s+1; } for (matrix_type=ss;matrix_type<se;matrix_type++) { if ( nprocs>1 && matrix_type==9 ) continue; lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; MPI_Barrier(A->comm); #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; #ifdef USE_MPI #ifdef _LONG__DOUBLE #ifdef _LONGLONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #else #ifdef _LONGLONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #endif #else #ifdef _LONG__DOUBLE #ifdef _LONGLONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #else #ifdef _LONGLONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #endif #endif } lis_matrix_destroy(A); } lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }