LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x; LIS_SCALAR *value; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads,maxthreads; LIS_INT gn,nnz,np; LIS_INT i,j,k,si,sj,sk,ii,jj,ctr; LIS_INT l,m,n,nn; LIS_INT is,ie; LIS_INT err,iter,matrix_type,storage,ss,se; LIS_INT *ptr,*index; double time,time2,nnzs,nnzap,nnzt; LIS_SCALAR val; double commtime,comptime,flops; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 5 ) { if( my_rank==0 ) { printf("Usage: %s l m n iter [matrix_type]\n", argv[0]); } CHKERR(1); } l = atoi(argv[1]); m = atoi(argv[2]); n = atoi(argv[3]); iter = atoi(argv[4]); if (argv[5] == NULL) { storage = 0; } else { storage = atoi(argv[5]); } if( iter<=0 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("iter=%lld <= 0\n",iter); #else if( my_rank==0 ) printf("iter=%d <= 0\n",iter); #endif CHKERR(1); } if( l<=0 || m<=0 || n<=0 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("l=%lld <=0, m=%lld <=0 or n=%lld <=0\n",l,m,n); #else if( my_rank==0 ) printf("l=%d <=0, m=%d <=0 or n=%d <=0\n",l,m,n); #endif CHKERR(1); } if( storage<0 || storage>11 ) { #ifdef _LONG__LONG if( my_rank==0 ) printf("matrix_type=%lld < 0 or matrix_type=%lld > 11\n",storage,storage); #else if( my_rank==0 ) printf("matrix_type=%d < 0 or matrix_type=%d > 11\n",storage,storage); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONG__LONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); if( my_rank==0 ) { #ifdef _LONG__LONG printf("max number of threads = %lld\n", nthreads); printf("number of threads = %lld\n", maxthreads); #else printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); #endif } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ nn = l*m*n; err = lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_matrix_set_size(A0,0,nn); CHKERR(err); ptr = (LIS_INT *)malloc((A0->n+1)*sizeof(LIS_INT)); if( ptr==NULL ) CHKERR(1); index = (LIS_INT *)malloc(27*A0->n*sizeof(LIS_INT)); if( index==NULL ) CHKERR(1); value = (LIS_SCALAR *)malloc(27*A0->n*sizeof(LIS_SCALAR)); if( value==NULL ) CHKERR(1); lis_matrix_get_range(A0,&is,&ie); ctr = 0; for(ii=is;ii<ie;ii++) { i = ii/(m*n); j = (ii - i*m*n)/n; k = ii - i*m*n - j*n; for(si=-1;si<=1;si++) { if( i+si>-1 && i+si<l ) { for(sj=-1;sj<=1;sj++) { if( j+sj>-1 && j+sj<m ) { for(sk=-1;sk<=1;sk++) { if( k+sk>-1 && k+sk<n ) { jj = ii + si*m*n + sj*n + sk; index[ctr] = jj; if( jj==ii ) { value[ctr++] = 26.0;} else { value[ctr++] = -1.0;} } } } } } } ptr[ii-is+1] = ctr; } ptr[0] = 0; err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A0); CHKERR(err); err = lis_matrix_assemble(A0); CHKERR(err); n = A0->n; gn = A0->gn; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { #ifdef _LONG__LONG printf("matrix size = %lld x %lld (%lld nonzero entries)\n",gn,gn,nnz); printf("number of iterations = %lld\n\n",iter); #else printf("matrix size = %d x %d (%d nonzero entries)\n",gn,gn,nnz); printf("number of iterations = %d\n\n",iter); #endif } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } for(i=0;i<n;i++) { lis_sort_id(A0->ptr[i],A0->ptr[i+1]-1,A0->index,A0->value); } /* MPI version of VBR is not implemented. DNS is also excluded to reduce memory usage. */ if (storage==0) { ss = 1; se = 11; } else { ss = storage; se = storage+1; } for (matrix_type=ss;matrix_type<se;matrix_type++) { if ( nprocs>1 && matrix_type==9 ) continue; lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; #ifdef USE_MPI #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #endif #else #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #endif #endif } lis_matrix_destroy(A); } lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT lis_precon_create_saamg(LIS_SOLVER solver, LIS_PRECON precon) { #if defined(USE_SAAMG) LIS_MATRIX A,B; LIS_COMMTABLE table; LIS_INT i; LIS_INT unsym,sol; LIS_INT err; LIS_REAL theta; #ifdef USE_MPI LIS_MPI_Fint comm; #endif LIS_DEBUG_FUNC_IN; if( solver->A->matrix_type!=LIS_MATRIX_CRS ) { A = solver->A; err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CRS); err = lis_matrix_convert(A,B); if( err ) return err; solver->A = B; lis_matrix_destroy(B); solver->A = A; } precon->A = solver->A; precon->is_copy = LIS_FALSE; A = precon->A; sol = solver->options[LIS_OPTIONS_SOLVER]; unsym = solver->options[LIS_OPTIONS_SAAMG_UNSYM]; theta = solver->params[LIS_PARAMS_SAAMG_THETA - LIS_OPTIONS_LEN]; #if 0 if( sol!=LIS_SOLVER_CG && !unsym ) { unsym = LIS_TRUE; } #endif err = lis_vector_duplicate(A,&precon->temp); if( err ) { return err; } F77_FUNC_(finit_data_creation,FINIT_DATA_CREATION)(c_data_creation_ptr_bar); F77_FUNC_(finit_data_creation_unsym,FINIT_DATA_CREATION_UNSYM)(c_data_creation_unsym_ptr_bar); F77_FUNC_(finit_v_cycle,FINIT_V_CYCLE)(c_v_cycle_ptr_bar); F77_FUNC_(finit_clear_matrix,FINIT_CLEAR_MATRIX)(c_clear_matrix_ptr_bar); lis_matrix_split(A); #ifdef USE_MPI comm = MPI_Comm_c2f(A->comm); lis_send_recv(A->commtable,A->D->value); table = A->commtable; if( !unsym ) { (*(void (*)())f_data_creation_ptr)(&A->n,&A->np,&A->L->nnz,&A->U->nnz, A->D->value,A->L->value,A->L->ptr,A->L->index, A->U->value, A->U->ptr, A->U->index, &table->neibpetot, table->neibpe, table->import_ptr, table->import_index, table->export_ptr, table->export_index, &table->imnnz,&table->exnnz, &comm, &precon->level_num,&precon->wsize, &theta); } else { (*(void (*)())f_data_creation_unsym_ptr)(&A->n,&A->np,&A->L->nnz,&A->U->nnz, A->D->value,A->L->value,A->L->ptr,A->L->index, A->U->value, A->U->ptr, A->U->index, &table->neibpetot, table->neibpe, table->import_ptr, table->import_index, table->export_ptr, table->export_index, &table->imnnz,&table->exnnz, &comm, &precon->level_num,&precon->wsize, &theta); } #else if( !unsym ) { (*(void (*)())f_data_creation_ptr)(&A->n,&A->L->nnz,&A->U->nnz, A->D->value,A->L->value,A->L->ptr,A->L->index, A->U->value, A->U->ptr, A->U->index, &precon->level_num, &theta); } else { (*(void (*)())f_data_creation_unsym_ptr)(&A->n,&A->L->nnz,&A->U->nnz, A->D->value,A->L->value,A->L->ptr,A->L->index, A->U->value, A->U->ptr, A->U->index, &precon->level_num, &theta); } #endif LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #else LIS_DEBUG_FUNC_IN; precon->A = solver->A; precon->is_copy = LIS_FALSE; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #endif }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x,v; LIS_SCALAR ntimes,nmflops,nnrm2; LIS_SCALAR *value; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads, maxthreads; LIS_INT gn,nnz,mode; LIS_INT i,j,jj,j0,j1,l,k,n,np,h,ih; LIS_INT m,nn,ii; LIS_INT block; LIS_INT rn,rmin,rmax,rb; LIS_INT is,ie,clsize,ci,*iw; LIS_INT err,iter,matrix_type; LIS_INT *ptr,*index; double mem,val,ra,rs,ri,ria,ca,time,time2,convtime,val2,nnzs,nnzap,nnzt; double commtime,comptime,flops; FILE *file; char path[1024]; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 4 ) { if( my_rank==0 ) printf("Usage: spmvtest5 matrix_filename matrix_type iter [block] \n"); lis_finalize(); exit(0); } file = fopen(argv[1], "r"); if( file==NULL ) CHKERR(1); matrix_type = atoi(argv[2]); iter = atoi(argv[3]); if (argv[4] == NULL) { block = 2; } else { block = atoi(argv[4]); } if( matrix_type<1 || matrix_type>11 ) { if( my_rank==0 ) printf("matrix_type=%d <1 or matrix_type=%d >11\n",matrix_type,matrix_type); CHKERR(1); } if( iter<=0 ) { if( my_rank==0 ) printf("iter=%d <= 0\n",iter); CHKERR(1); } if( my_rank==0 ) { printf("\n"); printf("number of processes = %d\n",nprocs); } #ifdef _OPENMP if( my_rank==0 ) { nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_input(A0,NULL,NULL,argv[1]); CHKERR(err); n = A0->n; gn = A0->gn; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { printf("block size of BSR and BSC = %d x %d\n",block,block); printf("iteration count = %d\n\n",iter); } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { A->bnr = block; A->bnc = block; } comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); } lis_matrix_destroy(A); lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads, maxthreads; LIS_INT nnz; LIS_INT i,n,np; LIS_INT block; LIS_INT is,ie; LIS_INT err,iter,matrix_type; double time,time2,nnzs,nnzap,nnzt; LIS_SCALAR val; double commtime,comptime,flops; char path[1024]; FILE *file; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 3 ) { if( my_rank==0 ) { printf("Usage: %s matrix_filename_list iter [block] \n", argv[0]); } lis_finalize(); exit(0); } file = fopen(argv[1], "r"); if( file==NULL ) CHKERR(1); iter = atoi(argv[2]); if (argv[3] == NULL) { block = 2; } else { block = atoi(argv[3]); } if( iter<=0 ) { #ifdef _LONG__LONG printf("iter=%lld <= 0\n",iter); #else printf("iter=%d <= 0\n",iter); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONG__LONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP if( my_rank==0 ) { nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); #ifdef _LONG__LONG printf("max number of threads = %lld\n", nthreads); printf("number of threads = %lld\n", maxthreads); #else printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); #endif } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ while( fscanf(file, "%s\n", path)==1 ) { if( my_rank==0 ) { printf("matrix_filename = %s\n", path); } lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_input(A0,NULL,NULL,path); if( err ) CHKERR(err); n = A0->n; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { #ifdef _LONG__LONG printf("block size of BSR and BSC = %lld x %lld\n",block,block); printf("number of iterations = %lld\n\n",iter); #else printf("block size of BSR and BSC = %d x %d\n",block,block); printf("number of iterations = %d\n\n",iter); #endif } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } /* MPI version of VBR is not implemented. DNS is also excluded to reduce memory usage. */ for (matrix_type=1;matrix_type<11;matrix_type++) { if ( nprocs>1 && matrix_type==9 ) continue; lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); if( my_rank==0 ) { if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { A->bnr = block; A->bnc = block; } } comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; #ifdef USE_MPI #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #endif #else #ifdef _LONG__DOUBLE #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #else #ifdef _LONG__LONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #endif #endif } lis_matrix_destroy(A); } lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); } fclose(file); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x,v; LIS_SCALAR ntimes,nmflops,nnrm2; LIS_SCALAR *value; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nthreads, maxthreads; LIS_INT gn,nnz,mode; LIS_INT i,ii,j,jj,j0,j1,l,k,n,np,h,ih; LIS_INT rn,rmin,rmax,rb; LIS_INT is,ie,clsize,ci,*iw; LIS_INT err,iter,matrix_type,s,ss,se; LIS_INT *ptr,*index; double mem,ra,rs,ri,ria,ca,time,time2,convtime,nnzs,nnzap,nnzt; LIS_SCALAR val; double commtime,comptime,flops; FILE *file; char path[1024]; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 3 ) { if( my_rank==0 ) { printf("Usage: %s n iter [matrix_type]\n", argv[0]); } CHKERR(1); } n = atoi(argv[1]); iter = atoi(argv[2]); if (argv[3] == NULL) { s = 0; } else { s = atoi(argv[3]); } if( n<=0 ) { #ifdef _LONGLONG if( my_rank==0 ) printf("n=%lld <=0\n",n); #else if( my_rank==0 ) printf("n=%d <=0\n",n); #endif CHKERR(1); } if( iter<=0 ) { #ifdef _LONGLONG if( my_rank==0 ) printf("iter=%lld <= 0\n",iter); #else if( my_rank==0 ) printf("iter=%d <= 0\n",iter); #endif CHKERR(1); } if( s<0 || s>11 ) { #ifdef _LONGLONG if( my_rank==0 ) printf("matrix_type=%lld < 0 or matrix_type=%lld > 11\n",s,s); #else if( my_rank==0 ) printf("matrix_type=%d < 0 or matrix_type=%d > 11\n",s,s); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONGLONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); if( my_rank==0 ) { #ifdef _LONGLONG printf("max number of threads = %lld\n", nthreads); printf("number of threads = %lld\n", maxthreads); #else printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); #endif } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ lis_matrix_create(LIS_COMM_WORLD,&A0); lis_matrix_set_size(A0,0,n); lis_matrix_get_size(A0,&n,&gn); lis_matrix_get_range(A0,&is,&ie); k = 0; for(i=is;i<ie;i++) { if( i>0 ) lis_matrix_set_value(LIS_INS_VALUE,i,i-1,-1.0,A0); if( i<gn-1 ) lis_matrix_set_value(LIS_INS_VALUE,i,i+1,-1.0,A0); lis_matrix_set_value(LIS_INS_VALUE,i,i,2.0,A0); } err = lis_matrix_assemble(A0); CHKERR(err); n = A0->n; gn = A0->gn; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm); np = i; #endif if( my_rank==0 ) { #ifdef _LONGLONG printf("matrix size = %lld x %lld (%lld nonzero entries)\n",gn,gn,nnz); printf("iteration count = %lld\n\n",iter); #else printf("matrix size = %d x %d (%d nonzero entries)\n",gn,gn,nnz); printf("iteration count = %d\n\n",iter); #endif } err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } /* MPI version of VBR is not implemented. DNS is also excluded to reduce memory usage. */ if (s==0) { ss = 1; se = 11; } else { ss = s; se = s+1; } for (matrix_type=ss;matrix_type<se;matrix_type++) { if ( nprocs>1 && matrix_type==9 ) continue; lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,matrix_type); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; MPI_Barrier(A->comm); #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; #ifdef USE_MPI #ifdef _LONG__DOUBLE #ifdef _LONGLONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #else #ifdef _LONGLONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val); #endif #endif #else #ifdef _LONG__DOUBLE #ifdef _LONGLONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #else #ifdef _LONGLONG printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #else printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val); #endif #endif #endif } lis_matrix_destroy(A); } lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }