int main( int argc, char *argv[] ) { int errs = 0; int *ranks; int *ranksout; MPI_Group gworld, grev, gself; MPI_Comm comm; MPI_Comm commrev; int rank, size, i; double start, end, time1, time2; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); ranks = malloc(size*sizeof(int)); ranksout = malloc(size*sizeof(int)); if (!ranks || !ranksout) { fprintf(stderr, "out of memory\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* generate a comm with the rank order reversed */ MPI_Comm_split(comm, 0, (size-rank-1), &commrev); MPI_Comm_group(commrev, &grev); MPI_Comm_group(MPI_COMM_SELF, &gself); MPI_Comm_group(comm, &gworld); /* sanity check correctness first */ for (i=0; i < size; i++) { ranks[i] = i; ranksout[i] = -1; } MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); for (i=0; i < size; i++) { if (ranksout[i] != (size-i-1)) { if (rank == 0) printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i, (size-rank-1), ranksout[i]); ++errs; } } MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); for (i=0; i < size; i++) { int expected = (i == (size-rank-1) ? 0 : MPI_UNDEFINED); if (ranksout[i] != expected) { if (rank == 0) printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected, ranksout[i]); ++errs; } } /* now compare relative performance */ /* we needs lots of procs to get a group large enough to have meaningful * numbers. On most testing machines this means that we're oversubscribing * cores in a big way, which might perturb the timing results. So we make * sure everyone started up and then everyone but rank 0 goes to sleep to * let rank 0 do all the timings. */ MPI_Barrier(comm); if (rank != 0) { sleep(10); } else /* rank==0 */ { sleep(1); /* try to avoid timing while everyone else is making syscalls */ MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter*/ start = MPI_Wtime(); for (i = 0; i < NUM_LOOPS; ++i) { MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); } end = MPI_Wtime(); time1 = end - start; MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); /*throwaway iter*/ start = MPI_Wtime(); for (i = 0; i < NUM_LOOPS; ++i) { MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); } end = MPI_Wtime(); time2 = end - start; /* complain if the "gworld" time exceeds 2x the "gself" time */ if (fabs(time1 - time2) > (2.00 * time2)) { printf("too much difference in MPI_Group_translate_ranks performance:\n"); printf("time1=%f time2=%f\n", time1, time2); printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1-time2)/time2)); if (time1 < time2) { printf("also, (time1<time2) is surprising...\n"); } ++errs; } } free(ranks); free(ranksout); MPI_Group_free(&grev); MPI_Group_free(&gself); MPI_Group_free(&gworld); MPI_Comm_free(&commrev); MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[2]; ptrdiff_t n[4], ni[4], no[4]; ptrdiff_t alloc_local_forw, alloc_local_back, alloc_local, howmany; ptrdiff_t local_ni[4], local_i_start[4]; ptrdiff_t local_n[4], local_start[4]; ptrdiff_t local_no[4], local_o_start[4]; double err, *in; pfft_complex *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; /* Set size of FFT and process mesh */ ni[0] = ni[1] = ni[2] = ni[3] = 8; n[0] = 13; n[1] = 14; n[2] = 15; n[3] = 17; for(int t=0; t<4; t++) no[t] = ni[t]; np[0] = 2; np[1] = 2; howmany = 1; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ) { pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local_forw = pfft_local_size_many_dft_r2c(4, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_OUT, local_ni, local_i_start, local_n, local_start); alloc_local_back = pfft_local_size_many_dft_c2r(4, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_IN, local_n, local_start, local_no, local_o_start); /* Allocate enough memory for both trafos */ alloc_local = (alloc_local_forw > alloc_local_back) ? alloc_local_forw : alloc_local_back; in = pfft_alloc_real(2 * alloc_local); out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_many_dft_r2c( 4, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfft_plan_many_dft_c2r( 4, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfft_init_input_real(4, ni, local_ni, local_i_start, in); /* execute parallel forward FFT */ pfft_execute(plan_forw); /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++) in[l] /= (n[0]*n[1]*n[2]*n[3]); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_real(4, ni, local_ni, local_i_start, in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int numtasks, namelen, rank, dest = 1, tag = 111, source = 0, size, i, j; double start_time=0, elapsed_time=0, acum; double *outmsg, *inmsg; char hostname[256]; MPI_Status status,status2; MPI_Request send_request,recv_request; if (argc < 2) { printf("Usage: %s size [where size is the number elements (double) to send ]\n", argv[0]); return 0; } size = atoi(argv[1]); outmsg=(double*)malloc(sizeof(double)*size); if(outmsg==NULL) { printf("Unable to allocate memory\n"); return; } inmsg=(double*)malloc(sizeof(double)*size); if(inmsg==NULL) { printf("Unable to allocate memory\n"); return; } MPI_Init (&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); // get number of processes MPI_Comm_rank(MPI_COMM_WORLD, &rank); // get current process id MPI_Get_processor_name(hostname, &namelen); // get CPU name //Initialize the msg buffer to the rank id. for (i = 0; i < size; i++) outmsg[i] = rank; //Define as Source the left neighbour if (rank == 0) source=numtasks-1; else source=rank-1; //Define the destiny the rigth neighbour if(rank==numtasks-1) dest=0; else dest=rank+1; start_time = MPI_Wtime(); acum=0; for (i=0; i<numtasks; i++) { if (rank==0) printf("it: %2d - Rank %d (%s) sending data (%g) to rank %d\n",i,rank, hostname, inmsg[0], dest); MPI_Isend(outmsg, size, MPI_DOUBLE, dest, tag,MPI_COMM_WORLD,&send_request); MPI_Recv (inmsg, size, MPI_DOUBLE, source, tag, MPI_COMM_WORLD,&status); acum = acum + inmsg[0]; if (rank==0) printf("it: %2d - Rank %d received data (%g) from rank %d (acum=%g)\n",i,rank,outmsg[0],source,acum); MPI_Wait(&send_request, &status2); //Copy the inmsg to outmsg for the next iteration. for (j = 0; j < size; j++) outmsg[j] = inmsg[j]; } MPI_Barrier(MPI_COMM_WORLD); elapsed_time = MPI_Wtime() - start_time; printf(" Rank %d: Elapsed time to send %6d double(s) across a ring made up by %2d (acum=%g) in %g ms\n", rank, size, numtasks, acum, elapsed_time*1e03); MPI_Finalize (); }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errors; double *win_buf, *src_buf, *dst_buf; MPI_Win buf_win; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); bufsize = XDIM * YDIM * sizeof(double); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf); /* Alloc_mem is not required for the origin buffers for RMA operations - just for the Win_create memory */ MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf); if (rank == 0) if (verbose) printf("MPI RMA Strided Put Test:\n"); for (i = 0; i < XDIM*YDIM; i++) { *(win_buf + i) = 1.0 + rank; *(src_buf + i) = 1.0 + rank; } MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win); peer = (rank+1) % nranks; /* Perform ITERATIONS strided put operations */ for (i = 0; i < ITERATIONS; i++) { MPI_Aint idx_loc[SUB_YDIM]; int idx_rem[SUB_YDIM]; int blk_len[SUB_YDIM]; MPI_Datatype src_type, dst_type; void *base_ptr = dst_buf; MPI_Aint base_int; MPI_Get_address(base_ptr, &base_int); if (rank == 0) if (verbose) printf(" + iteration %d\n", i); for (j = 0; j < SUB_YDIM; j++) { MPI_Get_address(&src_buf[j*XDIM], &idx_loc[j]); idx_loc[j] = idx_loc[j] - base_int; idx_rem[j] = j*XDIM*sizeof(double); blk_len[j] = SUB_XDIM*sizeof(double); } MPI_Type_create_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_BYTE, &src_type); MPI_Type_create_indexed_block(SUB_YDIM, SUB_XDIM*sizeof(double), idx_rem, MPI_BYTE, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Put(base_ptr, 1, src_type, peer, 0, 1, dst_type, buf_win); MPI_Win_unlock(peer, buf_win); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); } MPI_Barrier(MPI_COMM_WORLD); /* Verify that the results are correct */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win); errors = 0; for (i = 0; i < SUB_XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(win_buf + i + j*XDIM); const double expected = (1.0 + ((rank+nranks-1)%nranks)); if (actual - expected > 1e-10) { SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); ); errors++; fflush(stdout); } }
void FieldStatic::finalize() { MPI_Barrier(MPI_COMM_WORLD); }
double timeStepper::computeDt(int &numReads, int &numWrites) { // Time step control array minSpeedTemp,maxSpeedTemp; array minSpeed,maxSpeed; elemOld->computeMinMaxCharSpeeds(directions::X1, minSpeedTemp, maxSpeedTemp, numReads,numWrites ); minSpeedTemp = minSpeedTemp/XCoords->dX1; maxSpeedTemp = maxSpeedTemp/XCoords->dX1; maxSpeed = af::max(maxSpeedTemp,af::abs(minSpeedTemp)); if(params::dim>1) { elemOld->computeMinMaxCharSpeeds(directions::X2, minSpeedTemp, maxSpeedTemp, numReads,numWrites ); minSpeedTemp = minSpeedTemp/XCoords->dX2; maxSpeedTemp = maxSpeedTemp/XCoords->dX2; maxSpeed += af::max(maxSpeedTemp,af::abs(minSpeedTemp)); } if(params::dim>2) { elemOld->computeMinMaxCharSpeeds(directions::X3, minSpeedTemp, maxSpeedTemp, numReads,numWrites); minSpeedTemp = minSpeedTemp/XCoords->dX3; maxSpeedTemp = maxSpeedTemp/XCoords->dX3; maxSpeed += af::max(maxSpeedTemp,af::abs(minSpeedTemp)); } array maxInvDt_af = af::max(af::max(af::max(maxSpeed,2),1),0); double maxInvDt = maxInvDt_af.host<double>()[0]; /* Use MPI to find minimum over all processors */ if (world_rank == 0) { double temp; for(int i=1;i<world_size;i++) { MPI_Recv(&temp, 1, MPI_DOUBLE, i, i, PETSC_COMM_WORLD,MPI_STATUS_IGNORE); if( maxInvDt < temp) { maxInvDt = temp; } } } else { MPI_Send(&maxInvDt, 1, MPI_DOUBLE, 0, world_rank, PETSC_COMM_WORLD); } MPI_Barrier(PETSC_COMM_WORLD); MPI_Bcast(&maxInvDt,1,MPI_DOUBLE,0,PETSC_COMM_WORLD); MPI_Barrier(PETSC_COMM_WORLD); double newDt = params::CourantFactor/maxInvDt; if (newDt > params::maxDtIncrement*dt) { newDt = params::maxDtIncrement*dt; } dt = newDt; }
int main (int argc, char **argv) { #ifdef VSG_HAVE_MPI VsgPRTreeParallelConfig pconfig = {{NULL,}}; #endif VsgVector3d lbound = {-TR, -TR, -TR}; VsgVector3d ubound = {TR, TR, TR}; VsgPRTree3d *prtree; AranSolver3d *solver; int ret = 0; GTimer *timer = NULL; #ifdef VSG_HAVE_MPI MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &sz); MPI_Comm_rank (MPI_COMM_WORLD, &rk); #endif aran_init(); parse_args (argc, argv); #ifdef VSG_HAVE_MPI pconfig.communicator = MPI_COMM_WORLD; pconfig.point = point_accum_vtable; aran_development3d_vtable_init (&pconfig.node_data, 0, order); #endif /* points = g_ptr_array_new (); */ if (check) { _cp_size = MAX (np, 128); check_points = g_malloc0 (_cp_size * sizeof (PointAccum)); } prtree = vsg_prtree3d_new_full (&lbound, &ubound, (VsgPoint3dLocFunc) vsg_vector3d_vector3d_locfunc, (VsgPoint3dDistFunc) vsg_vector3d_dist, (VsgRegion3dLocFunc) NULL, maxbox); solver = aran_solver3d_new (prtree, ARAN_TYPE_DEVELOPMENT3D, aran_development3d_new (0, order), (AranZeroFunc) aran_development3d_set_zero); #ifdef VSG_HAVE_MPI aran_solver3d_set_parallel (solver, &pconfig); #endif if (virtual_maxbox != 0) aran_solver3d_set_nf_isleaf (solver, _nf_isleaf_virtual_maxbox, &virtual_maxbox); aran_solver3d_set_functions (solver, (AranParticle2ParticleFunc3d) p2p, (AranParticle2MultipoleFunc3d) p2m, m2m, m2l, l2l, (AranLocal2ParticleFunc3d) l2p); if (semifar_threshold < G_MAXUINT) { aran_solver3d_set_functions_full (solver, (AranParticle2ParticleFunc3d) p2p, (AranParticle2MultipoleFunc3d) p2m, m2m, m2l, l2l, (AranLocal2ParticleFunc3d) l2p, (AranParticle2LocalFunc3d) p2l, (AranMultipole2ParticleFunc3d) m2p, semifar_threshold); if (semifar_threshold == 0) { PointAccum p1 = {{0.1, 0.1, 0.1}, 0.1, {0., 0., 0.}, 0}; PointAccum p2 = {{-0.1, -0.1, -0.1}, 0.1, {0., 0., 0.}, 1}; /* compute operators timings to be able to compute optimal solver parameters */ aran_solver3d_profile_operators (solver, (AranParticleInitFunc3d) point_accum_clear_accum, &p1, &p2); /* alternatively, we could get timings from profile databases */ /* aran_profile_db_read_file ("./profiledb-newtonfield3.ini", NULL); */ /* aran_solver3d_db_profile_operators (solver, (gdouble) order); */ } } if (_hilbert) { /* configure for hilbert curve order traversal */ aran_solver3d_set_children_order_hilbert (solver); } if (_verbose) { g_printerr ("%d : fill begin\n", rk); g_printerr ("%d : memory peak1 count = %u\n", rk, getpeak(0)); #ifdef VSG_HAVE_MPI MPI_Barrier (MPI_COMM_WORLD); #endif timer = g_timer_new (); } _fill (solver); if (_verbose) { g_printerr ("%d : fill elapsed=%f seconds\n", rk, g_timer_elapsed (timer, NULL)); g_printerr ("%d : tree depth count = %d\n", rk, aran_solver3d_depth (solver)); g_printerr ("%d : particle count=%d\n", rk, aran_solver3d_point_count (solver)); g_timer_destroy (timer); /* g_mem_profile(); */ } if (_verbose) { g_printerr ("%d : solve begin\n", rk); g_printerr ("%d : memory peak2 count = %u\n", rk, getpeak(0)); #ifdef VSG_HAVE_MPI MPI_Barrier (MPI_COMM_WORLD); #endif timer = g_timer_new (); } aran_solver3d_solve (solver); if (_verbose) { #ifdef VSG_HAVE_MPI MPI_Barrier (MPI_COMM_WORLD); #endif g_printerr ("%d : solve ok elapsed=%f seconds\n", rk, g_timer_elapsed (timer, NULL)); g_printerr ("%d : memory peak3 count = %u\n", rk, getpeak(0)); g_timer_destroy (timer); { glong zero_count, p2p_count, p2m_count, m2m_count; glong m2l_count, l2l_count, l2p_count, p2l_count, m2p_count; glong p2p_remote_count, m2l_remote_count; aran_solver3d_get_stats (solver, &zero_count, &p2p_count, &p2m_count, &m2m_count, &m2l_count, &l2l_count, &l2p_count, &p2l_count, &m2p_count, &p2p_remote_count, &m2l_remote_count); g_printerr ("%d : zero count=%ld\n", rk, zero_count); g_printerr ("%d : p2p count=%ld\n", rk, p2p_count); g_printerr ("%d : p2p remote count=%ld\n", rk, p2p_remote_count); g_printerr ("%d : p2m count=%ld\n", rk, p2m_count); g_printerr ("%d : m2m count=%ld\n", rk, m2m_count); g_printerr ("%d : m2l count=%ld\n", rk, m2l_count); g_printerr ("%d : m2l remote count=%ld\n", rk, m2l_remote_count); g_printerr ("%d : l2l count=%ld\n", rk, l2l_count); g_printerr ("%d : l2p count=%ld\n", rk, l2p_count); g_printerr ("%d : p2l count=%ld\n", rk, p2l_count); g_printerr ("%d : m2p count=%ld\n", rk, m2p_count); } } if (_write) { gchar fn[1024]; FILE *f; g_sprintf (fn, "tree%03d.txt", rk); f = fopen (fn, "w"); vsg_prtree3d_write (prtree, f); fclose (f); _tree_write (prtree, "solv"); _vtp_tree_write (solver, "solv"); } if (_save_fma_filename != NULL) { FILE *file = fopen (_save_fma_filename, "w"); aran_solver3d_write_fma (solver, file); fclose (file); } if (check) { guint64 i, j; if (sz == 1) { for (i=0; i<np; i ++) { PointAccum *pi = &check_points[i]; for (j=0; j<np; j ++) { if (j != i) { PointAccum *pj = &check_points[j]; p2p_one_way (pi, pj); } } } } else check_parallel_points (solver); aran_solver3d_foreach_point (solver, (GFunc) check_point_field, &ret); if (_verbose) g_printerr ("%d : max err = %e\n", rk, maxerr); g_free (check_points); } aran_solver3d_free (solver); #ifdef VSG_HAVE_MPI aran_development3d_vtable_clear (&pconfig.node_data); #endif /* g_ptr_array_free (points, TRUE); */ if (_load_file != NULL) g_free (_load_file); #ifdef VSG_HAVE_MPI MPI_Finalize (); #endif return ret; }
static int test_mpio_special_collective(char *filename) { int mpi_size, mpi_rank; MPI_File fh; MPI_Datatype etype,buftype,filetype; char mpi_err_str[MPI_MAX_ERROR_STRING]; int mpi_err_strlen; int mpi_err; char writedata[2]; char *buf; int i; int count,bufcount; int blocklens[2]; MPI_Aint offsets[2]; MPI_Offset mpi_off; MPI_Status mpi_stat; int retcode; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); retcode = 0; /* create MPI data type */ etype = MPI_BYTE; if(mpi_rank == 0 || mpi_rank == 1) { count = DIMSIZE; bufcount = 1; } else { count = 0; bufcount = 0; } blocklens[0] = count; offsets[0] = mpi_rank*count; blocklens[1] = count; offsets[1] = (mpi_size+mpi_rank)*count; if(count !=0) { if((mpi_err= MPI_Type_hindexed(2,blocklens,offsets,etype,&filetype)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str); return 1; } if((mpi_err=MPI_Type_commit(&filetype))!=MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_commit failed (%s)\n", mpi_err_str); return 1; } if((mpi_err= MPI_Type_hindexed(2,blocklens,offsets,etype,&buftype)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str); return 1; } if((mpi_err=MPI_Type_commit(&buftype))!=MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_commit failed (%s)\n", mpi_err_str); return 1; } } else { filetype = MPI_BYTE; buftype = MPI_BYTE; } /* Open a file */ if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDWR | MPI_MODE_CREATE , MPI_INFO_NULL, &fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_open failed (%s)\n", mpi_err_str); return 1; } /* each process writes some data */ for (i=0; i < 2*DIMSIZE; i++) writedata[i] = mpi_rank*DIMSIZE + i; mpi_off = 0; if((mpi_err = MPI_File_set_view(fh, mpi_off, MPI_BYTE, filetype, "native", MPI_INFO_NULL)) != MPI_SUCCESS) { MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_set_view failed (%s)\n", mpi_err_str); return 1; } buf = writedata; if ((mpi_err = MPI_File_write_at_all(fh, mpi_off, buf, bufcount, buftype, &mpi_stat)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n", (long) mpi_off, bufcount, mpi_err_str); return 1; }; if ((mpi_err = MPI_File_close(&fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_close failed. \n"); return 1; }; mpi_err = MPI_Barrier(MPI_COMM_WORLD); #ifdef H5_MPI_SPECIAL_COLLECTIVE_IO_WORKS if(retcode != 0) { if(mpi_rank == 0) { printf("special collective IO is NOT working at this platform\n"); printf("Go back to hdf5/config and find the corresponding\n"); printf("configure-specific file (for example, powerpc-ibm-aix5.x) and add\n"); printf("hdf5_cv_mpi_special_collective_io_works=${hdf5_cv_mpi_special_collective_io_works='no'}\n"); printf(" at the end of the file.\n"); printf(" Please report to [email protected] about this problem.\n"); } retcode = 1; } #else if(retcode == 0) { if(mpi_rank == 0) { printf(" This is NOT an error, What it really says is\n"); printf("special collective IO is WORKING at this platform\n"); printf(" Go back to hdf5/config and find the corresponding \n"); printf(" configure-specific file (for example, powerpc-ibm-aix5.x) and delete the line\n"); printf("hdf5_cv_mpi_special_collective_io_works=${hdf5_cv_mpi_special_collective_io_works='no'}\n"); printf(" at the end of the file.\n"); printf("Please report to [email protected] about this problem.\n"); } retcode = 1; } #endif return retcode; }
void system::set_problem(const bool init) { if (myproc == 0) fprintf(stderr, " ********* Setting up MHD Turbulence ************* \n"); const int reserve_n = (int)(1.25*local_n); U.reserve(reserve_n); dU.reserve(reserve_n); Wgrad.reserve(reserve_n); U.resize(local_n); dU.resize(local_n); Wgrad.resize(local_n); gamma_gas = 1.0; courant_no = 0.4; for (int i = 0; i < local_n; i++) { assert(U[i][Fluid::DENS] > 0.0); U[i][Fluid::PSI ] = 0.0; for (int k = 0 ; k < Fluid::NSCALARS; k++) U[i].scal(k) = 1.0; dU[i] = Fluid(0.0); Wgrad[i] = 0.0; for (int k = 0; k < Fluid::NFLUID; k++) Wgrad[i].m[k] = U[i][k]; U[i] = U[i].to_conservative(cells[i].Volume); ptcl[i].Volume = cells[i].Volume; } entropy_scalar = -1; isoeos_flag = true; MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " pvel ... \n"); get_active_ptcl(true); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " primitives ... \n"); exchange_primitive_and_wdot(); MPI_Barrier(MPI_COMM_WORLD); compute_pvel(); exchange_pvel(); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " tgradients ... \n"); compute_tgradient(); if (myproc == 0) fprintf(stderr , " timestep... \n"); compute_timesteps(true); for (int i = 0; i < local_n; i++) ptcl[i].rung[0] += 3; all_active = true; scheduler.flush_list(); for (int i = 0; i < local_n; i++) scheduler.push_particle(i, (int)ptcl[i].rung[0]); MPI_Barrier(MPI_COMM_WORLD); if (!eulerian) clear_mesh(); if (myproc == 0) fprintf(stderr, " proc= %d: complete problem setup \n", myproc); MPI_Barrier(MPI_COMM_WORLD); }
static int test_mpio_1wMr(char *filename, int special_request) { char hostname[128]; int mpi_size, mpi_rank; MPI_File fh; char mpi_err_str[MPI_MAX_ERROR_STRING]; int mpi_err_strlen; int mpi_err; unsigned char writedata[DIMSIZE], readdata[DIMSIZE]; unsigned char expect_val; int i, irank; int nerrs = 0; /* number of errors */ int atomicity; MPI_Offset mpi_off; MPI_Status mpi_stat; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); if (MAINPROCESS && VERBOSE_MED){ printf("Testing one process writes, all processes read.\n"); printf("Using %d processes accessing file %s\n", mpi_size, filename); printf(" (Filename can be specified via program argument)\n"); } /* show the hostname so that we can tell where the processes are running */ if (VERBOSE_DEF){ if (gethostname(hostname, 128) < 0){ PRINTID; printf("gethostname failed\n"); return 1; } PRINTID; printf("hostname=%s\n", hostname); } /* Delete any old file in order to start anew. */ /* Must delete because MPI_File_open does not have a Truncate mode. */ /* Don't care if it has error. */ MPI_File_delete(filename, MPI_INFO_NULL); MPI_Barrier(MPI_COMM_WORLD); /* prevent racing condition */ if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDWR | MPI_MODE_CREATE , MPI_INFO_NULL, &fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); PRINTID; printf("MPI_File_open failed (%s)\n", mpi_err_str); return 1; } if (special_request & USEATOM){ /* ================================================== * Set atomcity to true (1). A POSIX compliant filesystem * should not need this. * ==================================================*/ if ((mpi_err = MPI_File_get_atomicity(fh, &atomicity)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); PRINTID; printf("MPI_File_get_atomicity failed (%s)\n", mpi_err_str); } if (VERBOSE_HI) printf("Initial atomicity = %d\n", atomicity); if ((mpi_err = MPI_File_set_atomicity(fh, 1)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); PRINTID; printf("MPI_File_set_atomicity failed (%s)\n", mpi_err_str); } if ((mpi_err = MPI_File_get_atomicity(fh, &atomicity)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); PRINTID; printf("MPI_File_get_atomicity failed (%s)\n", mpi_err_str); } if (VERBOSE_HI) printf("After set_atomicity atomicity = %d\n", atomicity); } /* This barrier is not necessary but do it anyway. */ MPI_Barrier(MPI_COMM_WORLD); if (VERBOSE_HI){ PRINTID; printf("between MPI_Barrier and MPI_File_write_at\n"); } /* ================================================== * Each process calculates what to write but * only process irank(0) writes. * ==================================================*/ irank=0; for (i=0; i < DIMSIZE; i++) writedata[i] = irank*DIMSIZE + i; mpi_off = irank*DIMSIZE; /* Only one process writes */ if (mpi_rank==irank){ if (VERBOSE_HI){ PRINTID; printf("wrote %d bytes at %ld\n", DIMSIZE, (long)mpi_off); } if ((mpi_err = MPI_File_write_at(fh, mpi_off, writedata, DIMSIZE, MPI_BYTE, &mpi_stat)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); PRINTID; printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n", (long) mpi_off, DIMSIZE, mpi_err_str); return 1; }; }; /* Bcast the return code and */ /* make sure all writing are done before reading. */ MPI_Bcast(&mpi_err, 1, MPI_INT, irank, MPI_COMM_WORLD); if (VERBOSE_HI){ PRINTID; printf("MPI_Bcast: mpi_err = %d\n", mpi_err); } if (special_request & USEFSYNC){ /* ================================================== * Do a file sync. A POSIX compliant filesystem * should not need this. * ==================================================*/ if (VERBOSE_HI) printf("Apply MPI_File_sync\n"); /* call file_sync to force the write out */ if ((mpi_err = MPI_File_sync(fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); PRINTID; printf("MPI_File_sync failed (%s)\n", mpi_err_str); } MPI_Barrier(MPI_COMM_WORLD); /* call file_sync to force the write out */ if ((mpi_err = MPI_File_sync(fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); PRINTID; printf("MPI_File_sync failed (%s)\n", mpi_err_str); } } /* This barrier is not necessary because the Bcase or File_sync above */ /* should take care of it. Do it anyway. */ MPI_Barrier(MPI_COMM_WORLD); if (VERBOSE_HI){ PRINTID; printf("after MPI_Barrier\n"); } /* ================================================== * Each process reads what process 0 wrote and verify. * ==================================================*/ irank=0; mpi_off = irank*DIMSIZE; if ((mpi_err = MPI_File_read_at(fh, mpi_off, readdata, DIMSIZE, MPI_BYTE, &mpi_stat)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); PRINTID; printf("MPI_File_read_at offset(%ld), bytes (%d), failed (%s)\n", (long) mpi_off, DIMSIZE, mpi_err_str); return 1; }; for (i=0; i < DIMSIZE; i++){ expect_val = irank*DIMSIZE + i; if (readdata[i] != expect_val){ PRINTID; printf("read data[%d:%d] got %02x, expect %02x\n", irank, i, readdata[i], expect_val); nerrs++; } } MPI_File_close(&fh); if (VERBOSE_HI){ PRINTID; printf("%d data errors detected\n", nerrs); } mpi_err = MPI_Barrier(MPI_COMM_WORLD); return nerrs; }
static int test_mpio_derived_dtype(char *filename) { MPI_File fh; char mpi_err_str[MPI_MAX_ERROR_STRING]; int mpi_err_strlen; int mpi_err; int i; int nerrors = 0; /* number of errors */ MPI_Datatype etype,filetype; MPI_Datatype adv_filetype,bas_filetype[2]; MPI_Datatype etypenew, filetypenew; MPI_Offset disp; MPI_Status Status; MPI_Aint adv_disp[2]; MPI_Aint offsets[1]; int blocklens[1],adv_blocklens[2]; int count,outcount; int retcode; int mpi_rank,mpi_size; char buf[3],outbuf[3] = {0}; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); retcode = 0; for(i=0;i<3;i++) buf[i] = i+1; if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_open failed (%s)\n", mpi_err_str); return 1; } disp = 0; etype = MPI_BYTE; count = 1; blocklens[0] = 1; offsets[0] = 0; if((mpi_err= MPI_Type_hindexed(count,blocklens,offsets,MPI_BYTE,&filetype)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str); return 1; } if((mpi_err=MPI_Type_commit(&filetype))!=MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_commit failed (%s)\n", mpi_err_str); return 1; } count = 1; blocklens[0]=1; offsets[0] = 1; if((mpi_err= MPI_Type_hindexed(count,blocklens,offsets,MPI_BYTE,&filetypenew)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str); return 1; } if((mpi_err=MPI_Type_commit(&filetypenew))!=MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_commit failed (%s)\n", mpi_err_str); return 1; } outcount = 2; adv_blocklens[0] = 1; adv_blocklens[1] = 1; adv_disp[0] = 0; adv_disp[1] = 1; bas_filetype[0] = filetype; bas_filetype[1] = filetypenew; if((mpi_err= MPI_Type_struct(outcount,adv_blocklens,adv_disp,bas_filetype,&adv_filetype)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_struct failed (%s)\n", mpi_err_str); return 1; } if((mpi_err=MPI_Type_commit(&adv_filetype))!=MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_Type_commit failed (%s)\n", mpi_err_str); return 1; } if((mpi_err = MPI_File_set_view(fh,disp,etype,adv_filetype,"native",MPI_INFO_NULL))!= MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_set_view failed (%s)\n", mpi_err_str); return 1; } if((mpi_err = MPI_File_write(fh,buf,3,MPI_BYTE,&Status))!= MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_write failed (%s)\n", mpi_err_str); return 1; ; } if((mpi_err = MPI_File_close(&fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_close failed (%s)\n", mpi_err_str); return 1; } if((mpi_err = MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_open failed (%s)\n", mpi_err_str); return 1; } if((mpi_err = MPI_File_set_view(fh,0,MPI_BYTE,MPI_BYTE,"native",MPI_INFO_NULL))!= MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_set_view failed (%s)\n", mpi_err_str); return 1; } if((mpi_err = MPI_File_read(fh,outbuf,3,MPI_BYTE,&Status))!=MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_read failed (%s)\n", mpi_err_str); return 1; } if(outbuf[2]==2) { retcode = 0; } else { /* if(mpi_rank == 0) { printf("complicated derived datatype is NOT working at this platform\n"); printf("go back to hdf5/config and find the corresponding\n"); printf("configure-specific file and change ?????\n"); } */ retcode = -1; } if((mpi_err = MPI_File_close(&fh)) != MPI_SUCCESS){ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); printf("MPI_File_close failed (%s)\n", mpi_err_str); return 1; } mpi_err = MPI_Barrier(MPI_COMM_WORLD); #ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS if(retcode == -1) { if(mpi_rank == 0) { printf("Complicated derived datatype is NOT working at this platform\n"); printf("Go back to hdf5/config and find the corresponding\n"); printf("configure-specific file (for example, powerpc-ibm-aix5.x) and add\n"); printf("hdf5_cv_mpi_complex_derived_datatype_works=${hdf5_cv_mpi_complex_derived_datatype-works='no'}\n"); printf(" at the end of the file.\n"); printf(" Please report to [email protected] about this problem.\n"); } retcode = 1; } #else if(retcode == 0) { if(mpi_rank == 0) { printf(" This is NOT an error, What it really says is\n"); printf("Complicated derived datatype is WORKING at this platform\n"); printf(" Go back to hdf5/config and find the corresponding \n"); printf(" configure-specific file (for example, powerpc-ibm-aix5.x) and delete the line\n"); printf("hdf5_cv_mpi_complex_derived_datatype_works=${hdf5_cv_mpi_complex_derived_datatype-works='no'}\n"); printf(" at the end of the file.\n"); printf("Please report to [email protected] about this problem.\n"); } retcode = 1; } if(retcode == -1) retcode = 0; #endif return retcode; }
static int test_mpio_overlap_writes(char *filename) { int mpi_size, mpi_rank; MPI_Comm comm; MPI_Info info = MPI_INFO_NULL; int color, mrc; MPI_File fh; int i; int vrfyerrs, nerrs; unsigned char buf[4093]; /* use some prime number for size */ int bufsize = sizeof(buf); MPI_Offset stride; MPI_Offset mpi_off; MPI_Status mpi_stat; if (VERBOSE_MED) printf("MPIO independent overlapping writes test on file %s\n", filename); nerrs = 0; /* set up MPI parameters */ MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); /* Need at least 2 processes */ if (mpi_size < 2) { if (MAINPROCESS) printf("Need at least 2 processes to run MPIO test.\n"); printf(" -SKIP- \n"); return 0; } /* splits processes 0 to n-2 into one comm. and the last one into another */ color = ((mpi_rank < (mpi_size - 1)) ? 0 : 1); mrc = MPI_Comm_split (MPI_COMM_WORLD, color, mpi_rank, &comm); VRFY((mrc==MPI_SUCCESS), "Comm_split succeeded"); if (color==0){ /* First n-1 processes (color==0) open a file and write it */ mrc = MPI_File_open(comm, filename, MPI_MODE_CREATE|MPI_MODE_RDWR, info, &fh); VRFY((mrc==MPI_SUCCESS), ""); stride = 1; mpi_off = mpi_rank*stride; while (mpi_off < MPIO_TEST_WRITE_SIZE){ /* make sure the write does not exceed the TEST_WRITE_SIZE */ if (mpi_off+stride > MPIO_TEST_WRITE_SIZE) stride = MPIO_TEST_WRITE_SIZE - mpi_off; /* set data to some trivial pattern for easy verification */ for (i=0; i<stride; i++) buf[i] = (unsigned char)(mpi_off+i); mrc = MPI_File_write_at(fh, mpi_off, buf, (int)stride, MPI_BYTE, &mpi_stat); VRFY((mrc==MPI_SUCCESS), ""); /* move the offset pointer to last byte written by all processes */ mpi_off += (mpi_size - 1 - mpi_rank) * stride; /* Increase chunk size without exceeding buffer size. */ /* Then move the starting offset for next write. */ stride *= 2; if (stride > bufsize) stride = bufsize; mpi_off += mpi_rank*stride; } /* close file and free the communicator */ mrc = MPI_File_close(&fh); VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE"); mrc = MPI_Comm_free(&comm); VRFY((mrc==MPI_SUCCESS), "MPI_Comm_free"); /* sync with the other waiting processes */ mrc = MPI_Barrier(MPI_COMM_WORLD); VRFY((mrc==MPI_SUCCESS), "Sync after writes"); }else{ /* last process waits till writes are done, * then opens file to verify data. */ mrc = MPI_Barrier(MPI_COMM_WORLD); VRFY((mrc==MPI_SUCCESS), "Sync after writes"); mrc = MPI_File_open(comm, filename, MPI_MODE_RDONLY, info, &fh); VRFY((mrc==MPI_SUCCESS), ""); stride = bufsize; for (mpi_off=0; mpi_off < MPIO_TEST_WRITE_SIZE; mpi_off += bufsize){ /* make sure it does not read beyond end of data */ if (mpi_off+stride > MPIO_TEST_WRITE_SIZE) stride = MPIO_TEST_WRITE_SIZE - mpi_off; mrc = MPI_File_read_at(fh, mpi_off, buf, (int)stride, MPI_BYTE, &mpi_stat); VRFY((mrc==MPI_SUCCESS), ""); vrfyerrs=0; for (i=0; i<stride; i++){ unsigned char expected; expected = (unsigned char)(mpi_off+i); if ((expected != buf[i]) && (vrfyerrs++ < MAX_ERR_REPORT || VERBOSE_MED)) { printf("proc %d: found data error at [%ld], expect %u, got %u\n", mpi_rank, (long)(mpi_off+i), expected, buf[i]); } } if (vrfyerrs > MAX_ERR_REPORT && !VERBOSE_MED) printf("proc %d: [more errors ...]\n", mpi_rank); nerrs += vrfyerrs; } /* close file and free the communicator */ mrc = MPI_File_close(&fh); VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE"); mrc = MPI_Comm_free(&comm); VRFY((mrc==MPI_SUCCESS), "MPI_Comm_free"); } /* * one more sync to ensure all processes have done reading * before ending this test. */ mrc = MPI_Barrier(MPI_COMM_WORLD); VRFY((mrc==MPI_SUCCESS), "Sync before leaving test"); return (nerrs); }
/* * Verify that MPI_Offset exceeding 2**31 can be computed correctly. * Print any failure as information only, not as an error so that this * won't abort the remaining test or other separated tests. * * Test if MPIO can write file from under 2GB to over 2GB and then * from under 4GB to over 4GB. * Each process writes 1MB in round robin fashion. * Then reads the file back in by reverse order, that is process 0 * reads the data of process n-1 and vice versa. */ static int test_mpio_gb_file(char *filename) { int mpi_size, mpi_rank; MPI_Info info = MPI_INFO_NULL; int mrc; MPI_File fh; int i, j, n; int vrfyerrs; int writerrs; /* write errors */ int nerrs; int ntimes; /* how many times */ char *buf = NULL; char expected; MPI_Offset size; MPI_Offset mpi_off; MPI_Offset mpi_off_old; MPI_Status mpi_stat; struct stat stat_buf; int is_signed, sizeof_mpi_offset; nerrs = 0; /* set up MPI parameters */ MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); if (VERBOSE_MED) printf("MPI_Offset range test\n"); /* figure out the signness and sizeof MPI_Offset */ mpi_off = 0; is_signed = ((MPI_Offset)(mpi_off - 1)) < 0; sizeof_mpi_offset = (int)(sizeof(MPI_Offset)); /* * Verify the sizeof MPI_Offset and correctness of handling multiple GB * sizes. */ if (MAINPROCESS){ /* only process 0 needs to check it*/ printf("MPI_Offset is %s %d bytes integeral type\n", is_signed ? "signed" : "unsigned", (int)sizeof(MPI_Offset)); if (sizeof_mpi_offset <= 4 && is_signed){ printf("Skipped 2GB range test " "because MPI_Offset cannot support it\n"); }else { /* verify correctness of assigning 2GB sizes */ mpi_off = 2 * 1024 * (MPI_Offset)MB; INFO((mpi_off>0), "2GB OFFSET assignment no overflow"); INFO((mpi_off-1)==TWO_GB_LESS1, "2GB OFFSET assignment succeed"); /* verify correctness of increasing from below 2 GB to above 2GB */ mpi_off = TWO_GB_LESS1; for (i=0; i < 3; i++){ mpi_off_old = mpi_off; mpi_off = mpi_off + 1; /* no overflow */ INFO((mpi_off>0), "2GB OFFSET increment no overflow"); /* correct inc. */ INFO((mpi_off-1)==mpi_off_old, "2GB OFFSET increment succeed"); } } if (sizeof_mpi_offset <= 4){ printf("Skipped 4GB range test " "because MPI_Offset cannot support it\n"); }else { /* verify correctness of assigning 4GB sizes */ mpi_off = 4 * 1024 * (MPI_Offset)MB; INFO((mpi_off>0), "4GB OFFSET assignment no overflow"); INFO((mpi_off-1)==FOUR_GB_LESS1, "4GB OFFSET assignment succeed"); /* verify correctness of increasing from below 4 GB to above 4 GB */ mpi_off = FOUR_GB_LESS1; for (i=0; i < 3; i++){ mpi_off_old = mpi_off; mpi_off = mpi_off + 1; /* no overflow */ INFO((mpi_off>0), "4GB OFFSET increment no overflow"); /* correct inc. */ INFO((mpi_off-1)==mpi_off_old, "4GB OFFSET increment succeed"); } } } /* * Verify if we can write to a file of multiple GB sizes. */ if (VERBOSE_MED) printf("MPIO GB file test %s\n", filename); if (sizeof_mpi_offset <= 4){ printf("Skipped GB file range test " "because MPI_Offset cannot support it\n"); }else{ buf = malloc(MB); VRFY((buf!=NULL), "malloc succeed"); /* open a new file. Remove it first in case it exists. */ /* Must delete because MPI_File_open does not have a Truncate mode. */ /* Don't care if it has error. */ MPI_File_delete(filename, MPI_INFO_NULL); MPI_Barrier(MPI_COMM_WORLD); /* prevent racing condition */ mrc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE|MPI_MODE_RDWR, info, &fh); VRFY((mrc==MPI_SUCCESS), "MPI_FILE_OPEN"); printf("MPIO GB file write test %s\n", filename); /* instead of writing every bytes of the file, we will just write * some data around the 2 and 4 GB boundaries. That should cover * potential integer overflow and filesystem size limits. */ writerrs = 0; for (n=2; n <= 4; n+=2){ ntimes = GB/MB*n/mpi_size + 1; for (i=ntimes-2; i <= ntimes; i++){ mpi_off = (i*mpi_size + mpi_rank)*(MPI_Offset)MB; if (VERBOSE_MED) HDfprintf(stdout,"proc %d: write to mpi_off=%016llx, %lld\n", mpi_rank, mpi_off, mpi_off); /* set data to some trivial pattern for easy verification */ for (j=0; j<MB; j++) *(buf+j) = i*mpi_size + mpi_rank; if (VERBOSE_MED) HDfprintf(stdout,"proc %d: writing %d bytes at offset %lld\n", mpi_rank, MB, mpi_off); mrc = MPI_File_write_at(fh, mpi_off, buf, MB, MPI_BYTE, &mpi_stat); INFO((mrc==MPI_SUCCESS), "GB size file write"); if (mrc!=MPI_SUCCESS) writerrs++; } } /* close file and free the communicator */ mrc = MPI_File_close(&fh); VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE"); mrc = MPI_Barrier(MPI_COMM_WORLD); VRFY((mrc==MPI_SUCCESS), "Sync after writes"); /* * Verify if we can read the multiple GB file just created. */ /* open it again to verify the data written */ /* but only if there was no write errors */ printf("MPIO GB file read test %s\n", filename); if (errors_sum(writerrs)>0){ printf("proc %d: Skip read test due to previous write errors\n", mpi_rank); goto finish; } mrc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDONLY, info, &fh); VRFY((mrc==MPI_SUCCESS), ""); /* Only read back parts of the file that have been written. */ for (n=2; n <= 4; n+=2){ ntimes = GB/MB*n/mpi_size + 1; for (i=ntimes-2; i <= ntimes; i++){ mpi_off = (i*mpi_size + (mpi_size - mpi_rank - 1))*(MPI_Offset)MB; if (VERBOSE_MED) HDfprintf(stdout,"proc %d: read from mpi_off=%016llx, %lld\n", mpi_rank, mpi_off, mpi_off); mrc = MPI_File_read_at(fh, mpi_off, buf, MB, MPI_BYTE, &mpi_stat); INFO((mrc==MPI_SUCCESS), "GB size file read"); expected = i*mpi_size + (mpi_size - mpi_rank - 1); vrfyerrs=0; for (j=0; j<MB; j++){ if ((*(buf+j) != expected) && (vrfyerrs++ < MAX_ERR_REPORT || VERBOSE_MED)){ printf("proc %d: found data error at [%ld+%d], expect %d, got %d\n", mpi_rank, (long)mpi_off, j, expected, *(buf+j)); } } if (vrfyerrs > MAX_ERR_REPORT && !VERBOSE_MED) printf("proc %d: [more errors ...]\n", mpi_rank); nerrs += vrfyerrs; } } /* close file and free the communicator */ mrc = MPI_File_close(&fh); VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE"); /* * one more sync to ensure all processes have done reading * before ending this test. */ mrc = MPI_Barrier(MPI_COMM_WORLD); VRFY((mrc==MPI_SUCCESS), "Sync before leaving test"); /* * Check if MPI_File_get_size works correctly. Some systems (only SGI Altix * Propack 4 so far) return wrong file size. It can be avoided by reconfiguring * with "--disable-mpi-size". */ #ifdef H5_HAVE_MPI_GET_SIZE printf("Test if MPI_File_get_size works correctly with %s\n", filename); mrc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDONLY, info, &fh); VRFY((mrc==MPI_SUCCESS), ""); if (MAINPROCESS){ /* only process 0 needs to check it*/ mrc = MPI_File_get_size(fh, &size); VRFY((mrc==MPI_SUCCESS), ""); mrc=stat(filename, &stat_buf); VRFY((mrc==0), ""); /* Hopefully this casting is safe */ if(size != (MPI_Offset)(stat_buf.st_size)) { printf("Warning: MPI_File_get_size doesn't return correct file size. To avoid using it in the library, reconfigure and rebuild the library with --disable-mpi-size.\n"); } } /* close file and free the communicator */ mrc = MPI_File_close(&fh); VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE"); /* * one more sync to ensure all processes have done reading * before ending this test. */ mrc = MPI_Barrier(MPI_COMM_WORLD); VRFY((mrc==MPI_SUCCESS), "Sync before leaving test"); #else printf("Skipped testing MPI_File_get_size because it's disabled\n"); #endif } finish: if (buf) HDfree(buf); return (nerrs); }
int main(int argc, char **argv) { int mpi_size, mpi_rank; /* mpi variables */ int ret_code; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); /* Attempt to turn off atexit post processing so that in case errors * happen during the test and the process is aborted, it will not get * hang in the atexit post processing in which it may try to make MPI * calls. By then, MPI calls may not work. */ if (H5dont_atexit() < 0){ printf("Failed to turn off atexit processing. Continue.\n", mpi_rank); }; H5open(); if (parse_options(argc, argv) != 0){ if (MAINPROCESS) usage(); goto finish; } if (MAINPROCESS){ printf("===================================\n"); printf("MPI functionality tests\n"); printf("===================================\n"); } if (VERBOSE_MED) h5_show_hostname(); fapl = H5Pcreate (H5P_FILE_ACCESS); H5Pset_fapl_mpio(fapl, MPI_COMM_WORLD, MPI_INFO_NULL); /* set alarm. */ ALARM_ON; /*======================================= * MPIO 1 write Many read test *=======================================*/ MPI_BANNER("MPIO 1 write Many read test..."); ret_code = test_mpio_1wMr(filenames[0], USENONE); ret_code = errors_sum(ret_code); if (mpi_rank==0 && ret_code > 0){ printf("***FAILED with %d total errors\n", ret_code); nerrors += ret_code; } /* test atomicity and file sync in high verbose mode only */ /* since they often hang when broken and PHDF5 does not use them. */ if (VERBOSE_HI){ MPI_BANNER("MPIO 1 write Many read test with atomicity..."); ret_code = test_mpio_1wMr(filenames[0], USEATOM); ret_code = errors_sum(ret_code); if (mpi_rank==0 && ret_code > 0){ printf("***FAILED with %d total errors\n", ret_code); nerrors += ret_code; } MPI_BANNER("MPIO 1 write Many read test with file sync..."); ret_code = test_mpio_1wMr(filenames[0], USEFSYNC); ret_code = errors_sum(ret_code); if (mpi_rank==0 && ret_code > 0){ printf("***FAILED with %d total errors\n", ret_code); nerrors += ret_code; } } /*======================================= * MPIO MPIO File size range test *=======================================*/ MPI_BANNER("MPIO File size range test..."); ret_code = test_mpio_gb_file(filenames[0]); ret_code = errors_sum(ret_code); if (mpi_rank==0 && ret_code > 0){ printf("***FAILED with %d total errors\n", ret_code); nerrors += ret_code; } /*======================================= * MPIO independent overlapping writes *=======================================*/ MPI_BANNER("MPIO independent overlapping writes..."); ret_code = test_mpio_overlap_writes(filenames[0]); ret_code = errors_sum(ret_code); if (mpi_rank==0 && ret_code > 0){ printf("***FAILED with %d total errors\n", ret_code); nerrors += ret_code; } /*======================================= * MPIO complicated derived datatype test *=======================================*/ /* test_mpio_derived_dtype often hangs when fails. * Do not run it if it is known NOT working unless ask to * run explicitly by high verbose mode. */ #ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS MPI_BANNER("MPIO complicated derived datatype test..."); ret_code = test_mpio_derived_dtype(filenames[0]); #else if (VERBOSE_HI){ MPI_BANNER("MPIO complicated derived datatype test..."); ret_code = test_mpio_derived_dtype(filenames[0]); }else{ MPI_BANNER("MPIO complicated derived datatype test SKIPPED."); ret_code = 0; /* fake ret_code */ } #endif ret_code = errors_sum(ret_code); if (mpi_rank==0 && ret_code > 0){ printf("***FAILED with %d total errors\n", ret_code); nerrors += ret_code; } /*======================================= * MPIO special collective IO test *=======================================*/ /* test_special_collective_io often hangs when fails. * Do not run it if it is known NOT working unless ask to * run explicitly by high verbose mode. */ if(mpi_size !=4){ MPI_BANNER("MPIO special collective io test SKIPPED."); if(mpi_rank == 0){ printf("Use FOUR processes to run this test\n"); printf("If you still see the <test SKIPPED>, use <-vh> option to verify the test\n"); } ret_code = 0; goto sc_finish; } #ifdef H5_MPI_SPECIAL_COLLECTIVE_IO_WORKS MPI_BANNER("MPIO special collective io test..."); ret_code = test_mpio_special_collective(filenames[0]); #else if (VERBOSE_HI){ MPI_BANNER("MPIO special collective io test..."); ret_code = test_mpio_special_collective(filenames[0]); }else{ MPI_BANNER("MPIO special collective io test SKIPPED."); ret_code = 0; /* fake ret_code */ } #endif sc_finish: ret_code = errors_sum(ret_code); if (mpi_rank==0 && ret_code > 0){ printf("***FAILED with %d total errors\n", ret_code); nerrors += ret_code; } finish: /* make sure all processes are finished before final report, cleanup * and exit. */ MPI_Barrier(MPI_COMM_WORLD); if (MAINPROCESS){ /* only process 0 reports */ printf("===================================\n"); if (nerrors){ printf("***MPI tests detected %d errors***\n", nerrors); } else{ printf("MPI tests finished with no errors\n"); } printf("===================================\n"); } /* turn off alarm */ ALARM_OFF; h5_cleanup(FILENAME, fapl); H5close(); /* MPI_Finalize must be called AFTER H5close which may use MPI calls */ MPI_Finalize(); /* cannot just return (nerrors) because exit code is limited to 1byte */ return(nerrors!=0); }
ForecastData* Init_ForecastData(char* fcst_filename,unsigned int string_size) { FILE* inputfile = NULL; ForecastData* Forecaster; int errorcode,valsread; char end_char; unsigned int buff_size = string_size + 20; char* linebuffer = (char*) malloc(buff_size*sizeof(char)); MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0) { //Open file inputfile = fopen(fcst_filename,"r"); errorcode = 0; if(!inputfile) { printf("[%i]: Error opening file %s.\n",my_rank,fcst_filename); errorcode = 1; } } //Check if forecast file was openned MPI_Bcast(&errorcode,1,MPI_INT,0,MPI_COMM_WORLD); if(errorcode) return NULL; //Reserve space Forecaster = (ForecastData*) malloc(sizeof(ForecastData)); Forecaster->model_name = (char*) malloc(string_size*sizeof(char)); //Read table name //if(my_rank == 0) { ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size); valsread = sscanf(linebuffer,"%s",Forecaster->model_name); if(ReadLineError(valsread,1,"forecaster model name")) return NULL; //length = strlen(Forecaster->model_name); } //MPI_Bcast(&length,1,MPI_UNSIGNED,0,MPI_COMM_WORLD); //MPI_Bcast(Forecaster->model_name,length+1,MPI_CHAR,0,MPI_COMM_WORLD); //Read if data is displayed on ifis //if(my_rank == 0) { ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size); valsread = sscanf(linebuffer,"%hi",&(Forecaster->ifis_display)); if(ReadLineError(valsread,1,"flag if displaying on ifis")) return NULL; } //MPI_Bcast(&(Forecaster->ifis_display),1,MPI_SHORT,0,MPI_COMM_WORLD); //Read which forcing index is used for forecasting //if(my_rank == 0) { ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size); valsread = sscanf(linebuffer,"%u",&(Forecaster->forecasting_forcing)); if(ReadLineError(valsread,1,"index of forecastin forcing")) return NULL; } //MPI_Bcast(&(Forecaster->forecasting_forcing),1,MPI_UNSIGNED,0,MPI_COMM_WORLD); //Read number of rainfall steps to use per forecast //if(my_rank == 0) { ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size); valsread = sscanf(linebuffer,"%u",&(Forecaster->num_rainsteps)); if(ReadLineError(valsread,1,"number of precipitation values")) return NULL; } //MPI_Bcast(&(Forecaster->num_rainsteps),1,MPI_UNSIGNED,0,MPI_COMM_WORLD); //Read forecast window ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size); valsread = sscanf(linebuffer,"%lf",&(Forecaster->forecast_window)); if(ReadLineError(valsread,1,"forecast window")) return NULL; //Read and create a database connection for the rain maps Forecaster->rainmaps_filename = NULL; Forecaster->rainmaps_db = NULL; //if(my_rank == 0) { Forecaster->rainmaps_filename = (char*) malloc(string_size*sizeof(char)); ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size); valsread = sscanf(linebuffer,"%s",Forecaster->rainmaps_filename); if(ReadLineError(valsread,1,"rain map filename")) return NULL; Forecaster->rainmaps_db = ReadDBC(Forecaster->rainmaps_filename,string_size); if(!Forecaster->rainmaps_db) return NULL; } //Read halt filename Forecaster->halt_filename = (char*) malloc(string_size*sizeof(char)); //if(my_rank == 0) { ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size); valsread = sscanf(linebuffer,"%s",Forecaster->halt_filename); if(ReadLineError(valsread,1,"halt filename")) return NULL; //length = strlen(Forecaster->halt_filename); } //MPI_Bcast(&length,1,MPI_UNSIGNED,0,MPI_COMM_WORLD); //MPI_Bcast(Forecaster->halt_filename,length+1,MPI_CHAR,0,MPI_COMM_WORLD); //Read ending mark //if(my_rank == 0) { ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size); valsread = sscanf(linebuffer,"%c",&end_char); if(ReadLineError(valsread,1,"ending mark")) return NULL; } //MPI_Bcast(&end_char,1,MPI_CHAR,0,MPI_COMM_WORLD); //Clean up free(linebuffer); if(my_rank == 0) fclose(inputfile); MPI_Barrier(MPI_COMM_WORLD); if(end_char != '#') { if(my_rank == 0) printf("[%i]: Error: Ending mark not seen in %s.\n",my_rank,fcst_filename); return NULL; } return Forecaster; }
void system::set_geometry(const bool init) { const double dt_max = 1.0/512; scheduler = Scheduler(dt_max); int np; float lx, ly, lz; FILE *fin = NULL; if (myproc == 0) { float wp; fin = fopen(fin_data, "r"); int ival; size_t nread; nread = fread(&ival, sizeof(int), 1, fin); assert(ival == 2*sizeof(int)); nread = fread(&np, sizeof(int), 1, fin); nread = fread(&wp, sizeof(float), 1, fin); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == 2*sizeof(int)); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == 3*sizeof(float)); nread = fread(&lx, sizeof(float), 1, fin); nread = fread(&ly, sizeof(float), 1, fin); nread = fread(&lz, sizeof(float), 1, fin); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == 3*sizeof(float)); fprintf(stderr, " np= %d wp= %g \n",np, wp); fprintf(stderr, " lx= %g ly= %g lz= %g \n", lx, ly, lz); } MPI_Bcast(&lx, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&ly, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&lz, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); t_end = 0.2; n_restart = 2; dt_restart = dt_max; dt_dump = 0.01; di_log = 100; global_n = local_n = 0; // eulerian = true; const vec3 rmin(0.0); const vec3 rmax(lx, ly, lz); global_domain = boundary(rmin, rmax); global_domain_size = global_domain.hsize() * 2.0; const vec3 Len3 = global_domain.hsize() * 2.0; pfloat<0>::set_scale(Len3.x); pfloat<1>::set_scale(Len3.y); pfloat<2>::set_scale(Len3.z); if (myproc == 0) { ptcl.resize(np); const int nx = (int)std::pow(np, 1.0/3.0); const dvec3 dr = dvec3(Len3.x/nx, Len3.y/nx, Len3.z/nx); const real rmax = dr.abs() * 1.0; fprintf(stderr, "dr= %g %g %g \n", dr.x, dr.y, dr.z); local_n = ptcl.size(); global_n = local_n; { std::vector<float> x(local_n), y(local_n), z(local_n); size_t nread; int ival; nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&x[0], sizeof(float), local_n, fin); assert((int)nread == local_n); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&y[0], sizeof(float), local_n, fin); assert((int)nread == local_n); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&z[0], sizeof(float), local_n, fin); assert((int)nread == local_n); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); for (int i = 0; i < local_n; i++) { const dvec3 vel(0.0, 0.0, 0.0); ptcl[i] = Particle(x[i], y[i], z[i], vel.x, vel.y, vel.z, i); ptcl[i].rmax = rmax; ptcl[i].unset_derefine(); } } U.resize(local_n); const int var_list[7] = { Fluid::VELX, Fluid::VELY, Fluid::VELZ, Fluid::DENS, Fluid::BX, Fluid::BY, Fluid::BZ}; std::vector<float> data(local_n); for (int var = 0; var < 7; var++) { fprintf(stderr, " reading vat %d out of %d \n", var+1, 7); int ival; size_t nread; nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&data[0], sizeof(float), local_n, fin); assert((int)nread == local_n); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); for (int i = 0; i < local_n; i++) U[i][var_list[var]] = data[i]; } for (int i = 0; i < local_n; i++) { assert(U[i][Fluid::DENS] > 0.0); U[i][Fluid::ETHM] = cs2 * U[i][Fluid::DENS]; } fclose(fin); fprintf(stderr, " *** proc= %d : local_n= %d global_n= %d \n", myproc, local_n, global_n); } // myproc == 0 MPI_Bcast(&global_n, 1, MPI_INT, 0, MPI_COMM_WORLD); fprintf(stderr, " proc= %d distrubite \n", myproc); MPI_Barrier(MPI_COMM_WORLD); Distribute::int3 nt(1, 1, 1); switch(nproc) { case 1: break; case 2: nt.x = 2; nt.y = 1; nt.z = 1; break; case 4: nt.x = 2; nt.y = 2; nt.z = 1; break; case 6: nt.x = 3; nt.y = 2; nt.z = 1; break; case 8: nt.x = 2; nt.y = 2; nt.z = 2; break; case 16: nt.x = 4; nt.y = 2; nt.z = 2; break; case 32: nt.x = 4; nt.y = 4; nt.z = 2; break; case 64: nt.x = 4; nt.y = 4; nt.z = 4; break; case 128: nt.x = 8; nt.y = 4; nt.z = 4; break; case 256: nt.x = 8; nt.y = 8; nt.z = 4; break; case 512: nt.x = 8; nt.y = 8; nt.z = 8; break; default: assert(false); } const Distribute::int3 nt_glb(nt); const pBoundary pglobal_domain(pfloat3(0.0), pfloat3(Len3)); distribute_glb.set(nproc, nt, pglobal_domain); for (int k = 0; k < 5; k++) distribute_data(true, false); const int nloc_reserve = (int)(2.0*global_n/nproc); fit_reserve_vec(ptcl, nloc_reserve); fit_reserve_vec(ptcl_ppos, nloc_reserve); fit_reserve_vec(U, nloc_reserve); fit_reserve_vec(dU, nloc_reserve); fit_reserve_vec(Wgrad, nloc_reserve); fit_reserve_vec(gradPsi, nloc_reserve); fit_reserve_vec(cells, nloc_reserve); MPI_Barrier(MPI_COMM_WORLD); fprintf(stderr, " *** proc= %d : local_n= %d global_n= %d \n", myproc, local_n, global_n); fprintf(stderr, " proc= %d building_mesh \n", myproc); MPI_Barrier(MPI_COMM_WORLD); const double t10 = mytimer::get_wtime(); clear_mesh(); int nattempt = build_mesh(true); double dt10 = mytimer::get_wtime() - t10; double volume_loc = 0.0; { std::vector<TREAL> v(local_n); for (int i = 0; i < local_n; i++) v[i] = cells[i].Volume; std::sort(v.begin(), v.end()); // sort volumes from low to high, to avoid roundoff errors for (int i = 0; i < local_n; i++) volume_loc += v[i]; } double dt10max; MPI_Allreduce(&dt10, &dt10max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); double volume_glob = 0.0; int nattempt_max, nattempt_min; MPI_Allreduce(&volume_loc, &volume_glob, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&nattempt, &nattempt_max, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&nattempt, &nattempt_min, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); const double volume_exact = global_domain_size.x*global_domain_size.y*global_domain_size.z; if (myproc == 0) { fprintf(stderr, "first call build_mesh:[ %g sec :: %g cells/s/proc/thread ]\n", dt10max, global_n/nproc/dt10max); fprintf(stderr, " computed_volume= %g exact_volume= %g diff= %g [ %g ] nattempt= %d %d \n", volume_glob, volume_exact, volume_glob - volume_exact, (volume_glob - volume_exact)/volume_exact, nattempt_min, nattempt_max); } exchange_ptcl(); }
/*@C PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones. Collective on MPI_Comm Input Parameters: . comm - MPI_Communicator that may share working directory Output Parameters: . shared - PETSC_TRUE or PETSC_FALSE Options Database Keys: + -shared_working_directory . -not_shared_working_directory Environmental Variables: + PETSC_SHARED_WORKING_DIRECTORY . PETSC_NOT_SHARED_WORKING_DIRECTORY Level: developer Notes: Stores the status as a MPI attribute so it does not have to be redetermined each time. Assumes that all processors in a communicator either 1) have a common working directory or 2) each has a separate working directory eventually we can write a fancier one that determines which processors share a common working directory. This will be very slow on runs with a large number of processors since it requires O(p*p) file opens. @*/ PetscErrorCode PETSC_DLLEXPORT PetscSharedWorkingDirectory(MPI_Comm comm,PetscTruth *shared) { PetscErrorCode ierr; PetscMPIInt size,rank,*tagvalp,sum,cnt,i; PetscTruth flg,iflg; FILE *fd; static PetscMPIInt Petsc_WD_keyval = MPI_KEYVAL_INVALID; int err; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size == 1) { *shared = PETSC_TRUE; PetscFunctionReturn(0); } ierr = PetscOptionsGetenv(comm,"PETSC_SHARED_WORKING_DIRECTORY",PETSC_NULL,0,&flg);CHKERRQ(ierr); if (flg) { *shared = PETSC_TRUE; PetscFunctionReturn(0); } ierr = PetscOptionsGetenv(comm,"PETSC_NOT_SHARED_WORKING_DIRECTORY",PETSC_NULL,0,&flg);CHKERRQ(ierr); if (flg) { *shared = PETSC_FALSE; PetscFunctionReturn(0); } if (Petsc_WD_keyval == MPI_KEYVAL_INVALID) { ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelTmpShared,&Petsc_WD_keyval,0);CHKERRQ(ierr); } ierr = MPI_Attr_get(comm,Petsc_WD_keyval,(void**)&tagvalp,(int*)&iflg);CHKERRQ(ierr); if (!iflg) { char filename[PETSC_MAX_PATH_LEN]; /* This communicator does not yet have a shared attribute */ ierr = PetscMalloc(sizeof(PetscMPIInt),&tagvalp);CHKERRQ(ierr); ierr = MPI_Attr_put(comm,Petsc_WD_keyval,tagvalp);CHKERRQ(ierr); ierr = PetscGetWorkingDirectory(filename,240);CHKERRQ(ierr); ierr = PetscStrcat(filename,"/petsctestshared");CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* each processor creates a file and all the later ones check */ /* this makes sure no subset of processors is shared */ *shared = PETSC_FALSE; for (i=0; i<size-1; i++) { if (rank == i) { fd = fopen(filename,"w"); if (!fd) SETERRQ1(PETSC_ERR_FILE_OPEN,"Unable to open test file %s",filename); err = fclose(fd); if (err) SETERRQ(PETSC_ERR_SYS,"fclose() failed on file"); } ierr = MPI_Barrier(comm);CHKERRQ(ierr); if (rank >= i) { fd = fopen(filename,"r"); if (fd) cnt = 1; else cnt = 0; if (fd) { err = fclose(fd); if (err) SETERRQ(PETSC_ERR_SYS,"fclose() failed on file"); } } else { cnt = 0; } ierr = MPI_Allreduce(&cnt,&sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); if (rank == i) { unlink(filename); } if (sum == size) { *shared = PETSC_TRUE; break; } else if (sum != 1) { SETERRQ(PETSC_ERR_SUP_SYS,"Subset of processes share working directory"); } } *tagvalp = (int)*shared; } else { *shared = (PetscTruth) *tagvalp; } ierr = PetscInfo1(0,"processors %s working directory\n",(*shared) ? "shared" : "do NOT share");CHKERRQ(ierr); PetscFunctionReturn(0); }
static int reallocate_buffers(int numNewVertices, int numNewPins) { int status = 0; ZOLTAN_ID_TYPE *idbuf=NULL; int *ibuf=NULL; float *fbuf=NULL; if (verbose) MPI_Barrier(MPI_COMM_WORLD); if (numNewVertices > numMyVertices){ /* avoid realloc bug */ idbuf = (ZOLTAN_ID_TYPE *)malloc(sizeof(ZOLTAN_ID_TYPE) * numNewVertices); if (!idbuf) return 1; memcpy(idbuf, vtxGID, sizeof(ZOLTAN_ID_TYPE) * numMyVertices); free(vtxGID); vtxGID = idbuf; if (verbose){ printf("(%d) vtxGID allocated for %d vertices\n",myRank,numNewVertices); } ibuf = (int *)malloc(sizeof(int) * (numNewVertices+1)); if (!ibuf) return 1; memcpy(ibuf, nborIndex, sizeof(int) * (1 +numMyVertices)); free(nborIndex); nborIndex = ibuf; if (verbose){ printf("(%d) nborIndex allocated for %d indices into nbor array\n",myRank,numNewVertices+1); } } if (numNewPins > numMyPins){ idbuf = (ZOLTAN_ID_TYPE *)malloc(sizeof(ZOLTAN_ID_TYPE) * numNewPins); if (!idbuf) return 1; memcpy(idbuf, nborGID, sizeof(ZOLTAN_ID_TYPE) * numMyPins); free(nborGID); nborGID = idbuf; if (verbose){ printf("(%d) nborGID allocated for %d neighbor IDs\n",myRank,numNewPins); } ibuf = (int *)malloc(sizeof(int) * numNewPins); if (!ibuf) return 1; memcpy(ibuf, nborProc, sizeof(int) * numMyPins); free(nborProc); nborProc = ibuf; if (verbose){ printf("(%d) nborProc allocated for %d process IDs\n",myRank,numNewPins); } fbuf = (float *)malloc(sizeof(float) * numNewPins); if (!fbuf) return 1; memcpy(fbuf, edgeWgt, sizeof(float) * numMyPins); free(edgeWgt); edgeWgt = fbuf; if (verbose){ printf("(%d) edgeWgt allocated for %d edge weights\n",myRank,numNewPins); } } if (verbose) { fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); } return status; }
int main (int argc, char ** argv) { int rank, size, i, j, k, token; MPI_Comm comm = MPI_COMM_WORLD; MPI_Status status; enum ADIOS_READ_METHOD method = ADIOS_READ_METHOD_BP; ADIOS_SELECTION * sel; void * data = NULL; uint64_t start[3], count[3], step = 0; MPI_Init (&argc, &argv); MPI_Comm_rank (comm, &rank); MPI_Comm_size (comm, &size); adios_read_init_method (method, comm, "verbose=3"); adios_logger_open ("log_read_as_file_C", rank); /* adios_read_open_file() allows for seeing all timesteps in the file */ ADIOS_FILE * f = adios_read_open_file ("global_array_time_C.bp", method, comm); if (f == NULL) { log_error ("%s\n", adios_errmsg()); return -1; } ADIOS_VARINFO * v = adios_inq_var (f, "temperature"); // read in two timesteps data = malloc (2 * v->dims[0] * v->dims[1] * sizeof (double)); if (data == NULL) { log_error ("malloc failed.\n"); return -1; } // read in timestep 'rank' (up to 12) step = rank % 13; start[0] = 0; count[0] = v->dims[0]; start[1] = 0; count[1] = v->dims[1]; /* Read a subset of the temperature array */ sel = adios_selection_boundingbox (v->ndim, start, count); /* 2 steps from 'step' */ adios_schedule_read (f, sel, "temperature", step, 2, data); adios_perform_reads (f, 1); if (rank == 0) log_test ("Array size of temperature [0:%lld,0:%lld]\n", v->dims[0], v->dims[1]); if (rank > 0) { MPI_Recv (&token, 1, MPI_INT, rank-1, 0, comm, &status); } log_test("------------------------------------------------\n"); log_test("rank=%d: \n", rank); for (i = 0; i < 2; i++) { log_test ("step %lld = [\n", step+i); for (j = 0; j < v->dims[0]; j++) { log_test (" ["); for (k = 0; k < v->dims[1]; k++) { log_test ("%g ", ((double *)data) [ i * v->dims[0] * v->dims[1] + j * v->dims[1] + k]); } log_test ("]\n"); } log_test ("]\n"); } log_test ("\n"); if (rank < size-1) { MPI_Send (&token, 1, MPI_INT, rank+1, 0, comm); } free (data); adios_free_varinfo (v); adios_read_close (f); MPI_Barrier (comm); adios_read_finalize_method (method); adios_logger_close(); MPI_Finalize (); return 0; }
int main(int argc, char *argv[]) { int rc, do_hier, status; float ver; struct Zoltan_Struct *zz; int changes, numGidEntries, numLidEntries, numImport, numExport; int generate_files = 0; char *platform=NULL, *topology=NULL; char *graph_package=NULL; ZOLTAN_ID_PTR importGlobalGids, importLocalGids, exportGlobalGids, exportLocalGids; int *importProcs, *importToPart, *exportProcs, *exportToPart; struct option opts[10]; double comm_time[10]; float cut_weight[3] = {0., 0., 0.}; long nvert=0; char *debug_level=NULL; status = 0; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); MPI_Comm_size(MPI_COMM_WORLD, &numProcs); Zoltan_Initialize(argc, argv, &ver); zz = Zoltan_Create(MPI_COMM_WORLD); /****************************************************************** ** Check that this test makes sense. ******************************************************************/ if (sizeof(long) < sizeof(ZOLTAN_ID_TYPE)){ if (myRank == 0){ printf("ERROR: This code assumes that a long is at least %d bytes\n",(int)sizeof(ZOLTAN_ID_TYPE)); } status = 1; } check_error_status(status, "configuration error"); /****************************************************************** ** Initialize zoltan ******************************************************************/ /* options */ opts[0].name = "platform"; opts[0].has_arg = 1; opts[0].flag = NULL; opts[0].val = 1; opts[1].name = "topology"; opts[1].has_arg = 1; opts[1].flag = NULL; opts[1].val = 2; opts[2].name = "size"; opts[2].has_arg = 1; opts[2].flag = NULL; opts[2].val = 4; opts[3].name = "verbose"; opts[3].has_arg = 0; opts[3].flag = NULL; opts[3].val = 5; opts[4].name = "help"; opts[4].has_arg = 0; opts[4].flag = NULL; opts[4].val = 6; opts[5].name = "graph_package"; opts[5].has_arg = 1; opts[5].flag = NULL; opts[5].val = 7; opts[6].name = "generate_files"; opts[6].has_arg = 0; opts[6].flag = NULL; opts[6].val = 8; opts[7].name = "debug_level"; opts[7].has_arg = 1; opts[7].flag = NULL; opts[7].val = 9; opts[8].name = 0; opts[8].has_arg = 0; opts[8].flag = NULL; opts[8].val = 0; status = 0; while (1){ rc = getopt_long_only(argc, argv, "", opts, NULL); if (rc == '?'){ MPI_Barrier(MPI_COMM_WORLD); if (myRank == 0) usage(); MPI_Finalize(); exit(0); } else if (rc == 1){ platform = optarg; if (myRank == 0) printf( "For platform %s\n",optarg ); } else if (rc == 2){ topology = optarg; if (myRank == 0) printf( "For topology %s\n",optarg); } else if (rc == 7){ graph_package = optarg; if (myRank == 0) printf( "Zoltan parameter GRAPH_PACKAGE = %s\n",graph_package); } else if (rc == 8){ generate_files = 1; if (myRank == 0) printf( "Zoltan_Generate_Files will be called for each level.\n"); } else if (rc == 4){ nvert = atol(optarg); if (nvert < 1) status = 1; check_error_status(status, "--size={approximate number of vertices}"); if (myRank == 0){ printf( "Graph will have approximately %ld vertices.\n",nvert); } } else if (rc == 5){ verbose = 1; } else if (rc == 6){ if (myRank == 0) usage(); MPI_Finalize(); exit(0); } else if (rc == 9){ debug_level = optarg; } else if (rc <= 0){ break; } } if ((platform==NULL) && (topology==NULL)){ if (myRank == 0) fprintf(stdout,"No platform or topology, so we'll skip hierarchical partitioning\n"); do_hier = 0; } else if (graph_package == NULL){ if (myRank == 0) fprintf(stdout,"No graph package, so we'll skip hierarchical partitioning\n"); do_hier = 0; } else{ do_hier = 1; } /* start */ Zoltan_Memory_Debug(0); if (nvert > 0) numGlobalVertices = nvert; else numGlobalVertices = NUM_GLOBAL_VERTICES; status = create_a_graph(); check_error_status(status, "creating the graph"); Zoltan_Set_Param(zz, "DEBUG_LEVEL", "0"); Zoltan_Set_Param(zz, "REMAP", "0"); Zoltan_Set_Param(zz, "NUM_GID_ENTRIES", "1"); Zoltan_Set_Param(zz, "NUM_LID_ENTRIES", "1"); Zoltan_Set_Param(zz, "RETURN_LISTS", "ALL"); /* export AND import lists */ Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "1"); /* number of weights per vertex */ Zoltan_Set_Param(zz, "EDGE_WEIGHT_DIM", "1");/* number of weights per hyperedge */ Zoltan_Set_Num_Obj_Fn(zz, get_number_of_vertices, NULL); Zoltan_Set_Obj_List_Fn(zz, get_vertex_list, NULL); Zoltan_Set_Num_Edges_Multi_Fn(zz, get_num_edges_list, NULL); Zoltan_Set_Edge_List_Multi_Fn(zz, get_edge_list, NULL); /* GRAPH PARTITION */ Zoltan_Set_Param(zz, "LB_METHOD", "GRAPH"); Zoltan_Set_Param(zz, "LB_APPROACH", "PARTITION"); if (graph_package) Zoltan_Set_Param(zz, "GRAPH_PACKAGE", graph_package); if (verbose){ debug(zz, "Initial graph", 0); } if (generate_files){ rc = Zoltan_Generate_Files(zz, "flat", myRank, 0, 1, 0); if (rc != ZOLTAN_OK) status = 1; check_error_status(status, "Zoltan_Generate_Files"); } /* Performance before partitioning */ time_communication(comm_time+0); cut_weight[0] = get_edge_cut_weight(zz); if (cut_weight[0] < 0.0) status = 1; check_error_status(status, "First call to get_edge_cut_weight"); rc = Zoltan_LB_Partition(zz, /* input (all remaining fields are output) */ &changes, /* 1 if partitioning was changed, 0 otherwise */ &numGidEntries, /* Number of integers used for a global ID */ &numLidEntries, /* Number of integers used for a local ID */ &numImport, /* Number of vertices to be sent to me */ &importGlobalGids, /* Global IDs of vertices to be sent to me */ &importLocalGids, /* Local IDs of vertices to be sent to me */ &importProcs, /* Process rank for source of each incoming vertex */ &importToPart, /* New partition for each incoming vertex */ &numExport, /* Number of vertices I must send to other processes*/ &exportGlobalGids, /* Global IDs of the vertices I must send */ &exportLocalGids, /* Local IDs of the vertices I must send */ &exportProcs, /* Process to which I send each of the vertices */ &exportToPart); /* Partition to which each vertex will belong */ if (rc != ZOLTAN_OK) status = 1; check_error_status(status, "First call to LB_Partition"); status = migrate_graph(numExport, numImport, exportLocalGids, importGlobalGids); check_error_status(status, "migration"); if (verbose){ debug(zz, "After flat partitioning and migration", 0); } time_communication(comm_time+1); /* With graph partitioning */ cut_weight[1] = get_edge_cut_weight(zz); if (cut_weight[1] < 0.0) status = 1; check_error_status(status, "Second call to get_edge_cut_weight"); Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids, &importProcs, &importToPart); Zoltan_LB_Free_Part(&exportGlobalGids, &exportLocalGids, &exportProcs, &exportToPart); if (do_hier){ /* HIERARCHICAL PARTITION */ free_graph(); status = create_a_graph(); check_error_status(status, "create graph for hierarchical partitioning"); Zoltan_Set_Param(zz, "LB_METHOD", "HIER"); Zoltan_Set_Param(zz, "HIER_ASSIST", "1"); if (generate_files){ Zoltan_Set_Param(zz, "HIER_GENERATE_FILES", "1"); } if (debug_level) /* 1, 2 or 3 */ Zoltan_Set_Param(zz, "HIER_DEBUG_LEVEL", debug_level); else Zoltan_Set_Param(zz, "HIER_DEBUG_LEVEL", "0"); /* TODO: Suppose graph is not symmetric, and we request SYMMETRIZE. Do we still get * a "good" answer when each sub-graph in the hierarchy is symmetrized? */ if (topology) Zoltan_Set_Param(zz, "TOPOLOGY", topology); else if (platform) Zoltan_Set_Param(zz, "PLATFORM", platform); rc = Zoltan_LB_Partition(zz, /* input (all remaining fields are output) */ &changes, /* 1 if partitioning was changed, 0 otherwise */ &numGidEntries, /* Number of integers used for a global ID */ &numLidEntries, /* Number of integers used for a local ID */ &numImport, /* Number of vertices to be sent to me */ &importGlobalGids, /* Global IDs of vertices to be sent to me */ &importLocalGids, /* Local IDs of vertices to be sent to me */ &importProcs, /* Process rank for source of each incoming vertex */ &importToPart, /* New partition for each incoming vertex */ &numExport, /* Number of vertices I must send to other processes*/ &exportGlobalGids, /* Global IDs of the vertices I must send */ &exportLocalGids, /* Local IDs of the vertices I must send */ &exportProcs, /* Process to which I send each of the vertices */ &exportToPart); /* Partition to which each vertex will belong */ if (rc != ZOLTAN_OK) status = 1; check_error_status(status, "Second call to LB_Partition"); status = migrate_graph(numExport, numImport, exportLocalGids, importGlobalGids); check_error_status(status, "second migration"); if (verbose){ debug(zz, "After hierarchical partitioning and migration", 0); } time_communication(comm_time+2); /* With hierarchical graph partitioning */ cut_weight[2] = get_edge_cut_weight(zz); if (cut_weight[2] < 0.0) status = 1; check_error_status(status, "Third call to get_edge_cut_weight"); Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids, &importProcs, &importToPart); Zoltan_LB_Free_Part(&exportGlobalGids, &exportLocalGids, &exportProcs, &exportToPart); } Zoltan_Destroy(&zz); free_graph(); if (myRank == 0){ fprintf(stdout,"Graph cut weight before partitioning: %f\n",cut_weight[0]); fprintf(stdout," after flat partitioning: %f\n",cut_weight[1]); if (do_hier) fprintf(stdout," after hierarchical partitioning: %f\n",cut_weight[2]); fflush(stdout); } if (cut_weight[1] >= cut_weight[0]){ status = 1; if (zz->Proc == 0){ fprintf(stderr,"FAILED: No improvement shown in flat partitioning"); } } if (do_hier && (cut_weight[2] > cut_weight[0])){ status = 1; if (zz->Proc == 0){ fprintf(stderr,"FAILED: No improvement shown in hierarchical partitioning"); } } MPI_Finalize(); return status; }
/// Recreate the shared nodes. An alternate incorrect version can be enabled by undefining CORRECT_COORD_COMPARISON void ParFUM_recreateSharedNodes(int meshid, int dim, MPI_Comm newComm) { #define CORRECT_COORD_COMPARISON MPI_Comm comm = newComm; int rank, nParts; int send_count=0; // sanity check int recv_count=0; // sanity check MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nParts); #if SUPER_FAST_SPECIFIC_TORUS #define TORUSY 15 #define TORUSZ 15 CkPrintf("rank %d is manually configuring the IDXL lists to make the shared node generation fast\n"); FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes"); IDXL_Side &shared = mesh->node.shared; int low = (rank-1+nParts) % nParts; int high = (rank+1) % nParts; IDXL_List &list1 = shared.addList(low); IDXL_List &list2 = shared.addList(high); int nodesInPlane = TORUSY * TORUSZ; int numNodes = FEM_Mesh_get_length(meshid,FEM_NODE); // vp - 1 for(int j=0;j<nodesInPlane;j++){ list1.push_back(j); } // vp + 1 for(int j=0;j<nodesInPlane;j++){ list2.push_back(numNodes - nodesInPlane +j); } return; #else // Shared data will be temporarily stored in the following structure int *sharedNodeCounts; // sharedCounts[i] = number of nodes shared with rank i int **sharedNodeLists; // sharedNodes[i] is the list of nodes shared with rank i // Initialize shared data sharedNodeCounts = (int *)malloc(nParts*sizeof(int)); sharedNodeLists = (int **)malloc(nParts*sizeof(int *)); for (int i=0; i<nParts; i++) { sharedNodeLists[i] = NULL; sharedNodeCounts[i] = 0; } // Get local node count and coordinates int numNodes; int coord_msg_tag=42, sharedlist_msg_tag=43; double *nodeCoords; numNodes = FEM_Mesh_get_length(meshid,FEM_NODE); nodeCoords = (double *)malloc(dim*numNodes*sizeof(double)); FEM_Mesh_become_get(meshid); FEM_Mesh_data(meshid,FEM_NODE,FEM_COORD, nodeCoords, 0, numNodes,FEM_DOUBLE, dim); //MPI_Barrier(MPI_COMM_WORLD); if (rank==0) CkPrintf("Extracted node data...\n"); // Begin exchange of node coordinates to determine shared nodes // FIX ME: compute bounding box, only exchange when bounding boxes collide /// The highest partition # to which I send my coordinates(wraps around) int sendUpperBound; if(nParts %2==0){ sendUpperBound = rank + (nParts/2) - (rank%2); } else { sendUpperBound = rank + (nParts/2) ; } /// The lowest partition # to which I send my coordinates(wraps around) int sendLowerBound; if(nParts %2==0){ sendLowerBound = rank - (nParts/2) + ((rank+1)%2); } else { sendLowerBound = rank - (nParts/2); } // Special case optimization for when the mesh is generated in such a way that only neighboring partitions share nodes // look for command line argument #ifdef SHARED_NODES_ONLY_NEIGHBOR //#warning "ParFUM_recreateSharedNodes only allows adjacent partitions(rank +/- 1) to have shared nodes" sendUpperBound = rank + 1; sendLowerBound = rank - 1; #endif for (int i=rank+1; i<=sendUpperBound; i++) { //send nodeCoords to rank i MPI_Send(nodeCoords, dim*numNodes, MPI_DOUBLE, i%nParts, coord_msg_tag, comm); send_count ++; // printf("[%d] Sending %d doubles to rank %d \n",rank,dim*numNodes,i%nParts); } // Receive coordinates from the appropriate number of other partitions // These can be received in any order for (int i=sendLowerBound; i<rank; i++) { std::vector<int> remoteSharedNodes, localSharedNodes; double *recvNodeCoords; MPI_Status status; int source, length; // Probe for a coordinate message from any source; extract source and msg length MPI_Probe(MPI_ANY_SOURCE, coord_msg_tag, comm, &status); source = status.MPI_SOURCE; length = status.MPI_LENGTH/sizeof(double); // printf("[%d] Receiving %d doubles from rank %d \n",rank,length,source); recv_count ++; // Receive whatever data was available according to probe recvNodeCoords = (double *)malloc(length*sizeof(double)); MPI_Recv((void*)recvNodeCoords, length, MPI_DOUBLE, source, coord_msg_tag, comm, &status); // Match coords between local nodes and received coords int recvNodeCount = length/dim; // PERFORM THE NODE COMPARISONS #ifdef SHARED_NODES_ONLY_NEIGHBOR int borderNodes = BORDERNODES; //#warning "Only the first and last BORDERNODES nodes on each partition are candidates for being shared nodes" // indices are inclusive int myBottomLow = 0; int myBottomHigh = borderNodes; int myTopLow = numNodes - borderNodes; int myTopHigh = numNodes-1; int recvBottomLow = 0; int recvBottomHigh = borderNodes; int recvTopLow = recvNodeCount - borderNodes; int recvTopHigh = recvNodeCount-1; CkPrintf("[%d] rank=%d myBottomLow=%d myBottomHigh=%d myTopLow=%d myTopHigh=%d recvBottomLow=%d recvBottomHigh=%d recvTopLow=%d recvTopHigh=%d\n", CkMyPe(), rank, myBottomLow, myBottomHigh, myTopLow, myTopHigh, recvBottomLow, recvBottomHigh, recvTopLow, recvTopHigh); // make sure the top region is non-negative if(myTopLow < 0) myTopLow = 0; if(recvTopLow < 0) recvTopLow = 0; // make the two regions be non-overlapping if(myBottomHigh >= myTopLow) myTopLow = myTopLow-1; if(recvBottomHigh >= recvTopLow) recvTopLow = recvTopLow-1; for (int j=myBottomLow; j<=myBottomHigh; j++) { for (int k=recvBottomLow; k<=recvBottomHigh; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); break; } } } for (int j=myTopLow; j<=myBottomHigh; j++) { for (int k=recvTopLow; k<=recvTopHigh; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); break; } } } for (int j=myTopLow; j<=myTopHigh; j++) { for (int k=recvBottomLow; k<=recvBottomHigh; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); break; } } } for (int j=myBottomLow; j<=myTopHigh; j++) { for (int k=recvTopLow; k<=recvTopHigh; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); break; } } } #else // CkPrintf("Comparing %d nodes with %d received nodes\n", numNodes, recvNodeCount); for (int j=0; j<numNodes; j++) { for (int k=0; k<recvNodeCount; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); //printf("[%d] found local node %d to match with remote node %d \n",rank,j,k); break; } } } #endif // Copy local nodes that were shared with source into the data structure int *localSharedNodeList = (int *)malloc(localSharedNodes.size()*sizeof(int)); for (int m=0; m<localSharedNodes.size(); m++) { localSharedNodeList[m] = localSharedNodes[m]; } sharedNodeCounts[source] = localSharedNodes.size(); sharedNodeLists[source] = localSharedNodeList; // do not delete localSharedNodeList as a pointer to it is stored // Send remote nodes that were shared with this partition to remote partition MPI_Send((int *)&remoteSharedNodes[0], remoteSharedNodes.size(), MPI_INT, source, sharedlist_msg_tag, comm); free(recvNodeCoords); } for (int i=rank+1; i<=sendUpperBound; i++) { // recv shared node lists (from the partitions in any order) int *sharedNodes; MPI_Status status; int source, length; // Probe for a shared node list from any source; extract source and msg length MPI_Probe(MPI_ANY_SOURCE, sharedlist_msg_tag, comm, &status); source = status.MPI_SOURCE; length = status.MPI_LENGTH/sizeof(int); // Recv the shared node list the probe revealed was available sharedNodes = (int *)malloc(length*sizeof(int)); MPI_Recv((void*)sharedNodes, length, MPI_INT, source, sharedlist_msg_tag, comm, &status); // Store the shared node list in the data structure sharedNodeCounts[source] = length; sharedNodeLists[source] = sharedNodes; // don't delete sharedNodes! we kept a pointer to it! } if (rank==0) CkPrintf("Received new shared node lists...\n"); // IMPLEMENT ME: use sharedNodeLists and sharedNodeCounts to move shared node data // to IDXL FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes"); IDXL_Side &shared = mesh->node.shared; for(int i=0;i<nParts;i++){ if(i == rank) continue; if(sharedNodeCounts[i] != 0){ IDXL_List &list = shared.addList(i); for(int j=0;j<sharedNodeCounts[i];j++){ list.push_back(sharedNodeLists[i][j]); } } } MPI_Barrier(MPI_COMM_WORLD); if (rank==0) CkPrintf("Recreation of shared nodes complete...\n"); //printf("After recreating shared nodes %d \n",rank); //shared.print(); #ifdef SHARED_NODES_ONLY_NEIGHBOR CkAssert(send_count + recv_count == 2); #else CkAssert(send_count + recv_count == nParts-1); #endif // Clean up free(nodeCoords); free(sharedNodeCounts); for (int i=0; i<nParts; i++) { if (sharedNodeLists[i]) free(sharedNodeLists[i]); } free(sharedNodeLists); #endif // normal mode, not super fast mesh specific one }
int main(int argc, char* argv[]) { LIS_MATRIX A,A0; LIS_VECTOR b,x,v; LIS_SCALAR ntimes,nmflops,nnrm2; LIS_SCALAR *value; int nprocs,my_rank; int nthreads, maxthreads; int gn,nnz,mode; int i,j,jj,j0,j1,l,k,n,np,h,ih; int m,nn,ii; int block; int rn,rmin,rmax,rb; int is,ie,clsize,ci,*iw; int err,iter,storage; int *ptr,*index; double mem,val,ra,rs,ri,ria,ca,time,time2,convtime,val2,nnzs,nnzap,nnzt; double commtime,comptime,flops; FILE *file; char path[1024]; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); #else nprocs = 1; my_rank = 0; #endif if( argc < 4 ) { if( my_rank==0 ) printf("Usage: spmvtest4 matrix_filename matrix_type iter [block] \n"); lis_finalize(); exit(0); } file = fopen(argv[1], "r"); if( file==NULL ) CHKERR(1); storage = atoi(argv[2]); iter = atoi(argv[3]); if (argv[4] == NULL) { block = 2; } else { block = atoi(argv[4]); } if( storage<1 || storage>11 ) { if( my_rank==0 ) printf("storage=%d <1 or storage=%d >11\n",storage,storage); CHKERR(1); } if( iter<=0 ) { if( my_rank==0 ) printf("iter=%d <= 0\n",iter); CHKERR(1); } if( my_rank==0 ) { printf("\n"); printf("number of processes = %d\n",nprocs); } #ifdef _OPENMP if( my_rank==0 ) { nthreads = omp_get_num_procs(); maxthreads = omp_get_max_threads(); printf("max number of threads = %d\n", nthreads); printf("number of threads = %d\n", maxthreads); } #else nthreads = 1; maxthreads = 1; #endif /* create matrix and vectors */ lis_matrix_create(LIS_COMM_WORLD,&A0); err = lis_input(A0,NULL,NULL,argv[1]); CHKERR(err); n = A0->n; gn = A0->gn; nnz = A0->nnz; np = A0->np-n; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,MPI_INT,MPI_SUM,A0->comm); nnzap = (double)i / (double)nprocs; nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap); nnz = i; MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm); nnzs = (nnzs / (double)nprocs)/nnzap; MPI_Allreduce(&np,&i,1,MPI_INT,MPI_SUM,A0->comm); np = i; #endif err = lis_vector_duplicate(A0,&x); if( err ) CHKERR(err); err = lis_vector_duplicate(A0,&b); if( err ) CHKERR(err); lis_matrix_get_range(A0,&is,&ie); for(i=0;i<n;i++) { err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x); } lis_matrix_duplicate(A0,&A); lis_matrix_set_type(A,storage); err = lis_matrix_convert(A0,A); if( err ) CHKERR(err); comptime = 0.0; commtime = 0.0; for(i=0;i<iter;i++) { #ifdef USE_MPI MPI_Barrier(A->comm); time = lis_wtime(); lis_send_recv(A->commtable,x->value); commtime += lis_wtime() - time; #endif time2 = lis_wtime(); lis_matvec(A,x,b); comptime += lis_wtime() - time2; } lis_vector_nrm2(b,&val); if( my_rank==0 ) { flops = 2.0*nnz*iter*1.0e-6 / comptime; if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { A->bnr = block; A->bnc = block; printf("format = %s(%dx%d) (%2d), iteration = %d, computation = %e sec., %8.3f MFLOPS, communication = %e sec., communication/computation = %3.3f %%, 2-norm = %e\n",lis_storagename2[storage-1],block,block,storage,iter,comptime,flops,commtime,commtime/comptime*100,val); } else { printf("format = %s (%2d), iteration = %d, computation = %e sec., %8.3f MFLOPS, communication = %e sec., communication/computation = %3.3f %%, 2-norm = %e\n",lis_storagename2[storage-1],storage,iter,comptime,flops,commtime,commtime/comptime*100,val); } lis_matrix_destroy(A); } lis_matrix_destroy(A); lis_matrix_destroy(A0); lis_vector_destroy(b); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
void forward_modeling(sf_file Fdat, sf_mpi *mpipar, sf_sou soupar, sf_acqui acpar, sf_vec_s array, bool verb) /*< acoustic forward modeling >*/ { int ix, iz, is, ir, it; int sx, rx, sz, rz, frectx, frectz; int nz, nx, padnz, padnx, padnzx, nt, nr, nb; float dx2, dz2, dt2, dt; float **vv, **dd; float **p0, **p1, **p2, **term, **tmparray, *rr; FILE *swap; MPI_Comm comm=MPI_COMM_WORLD; swap=fopen("temswap.bin", "wb+"); padnz=acpar->padnz; padnx=acpar->padnx; padnzx=padnz*padnx; nz=acpar->nz; nx=acpar->nx; nt=acpar->nt; nr=acpar->nr; nb=acpar->nb; sz=acpar->sz; rz=acpar->rz; frectx=soupar->frectx; frectz=soupar->frectz; dx2=acpar->dx*acpar->dx; dz2=acpar->dz*acpar->dz; dt2=acpar->dt*acpar->dt; dt=acpar->dt; vv = sf_floatalloc2(padnz, padnx); dd=sf_floatalloc2(nt, nr); p0=sf_floatalloc2(padnz, padnx); p1=sf_floatalloc2(padnz, padnx); p2=sf_floatalloc2(padnz, padnx); term=sf_floatalloc2(padnz, padnx); rr=sf_floatalloc(padnzx); /* padding and convert vector to 2-d array */ pad2d(array->vv, vv, nz, nx, nb); for(is=mpipar->cpuid; is<acpar->ns; is+=mpipar->numprocs){ sf_warning("###### is=%d ######", is+1); memset(dd[0], 0., nr*nt*sizeof(float)); memset(p0[0], 0., padnzx*sizeof(float)); memset(p1[0], 0., padnzx*sizeof(float)); memset(p2[0], 0., padnzx*sizeof(float)); sx=acpar->s0_v+is*acpar->ds_v; source_map(sx, sz, frectx, frectz, padnx, padnz, padnzx, rr); for(it=0; it<nt; it++){ if(verb) sf_warning("Modeling is=%d; it=%d;", is+1, it); /* output data */ for(ir=0; ir<acpar->nr2[is]; ir++){ rx=acpar->r0_v[is]+ir*acpar->dr_v; dd[acpar->r02[is]+ir][it]=p1[rx][rz]; } /* laplacian operator */ laplace(p1, term, padnx, padnz, dx2, dz2); /* load source */ for(ix=0; ix<padnx; ix++){ for(iz=0; iz<padnz; iz++){ term[ix][iz] += rr[ix*padnz+iz]*array->ww[it]; } } /* update */ for(ix=0; ix<padnx; ix++){ for(iz=0; iz<padnz; iz++){ p2[ix][iz]=2*p1[ix][iz]-p0[ix][iz]+vv[ix][iz]*vv[ix][iz]*dt2*term[ix][iz]; } } /* swap wavefield pointer of different time steps */ tmparray=p0; p0=p1; p1=p2; p2=tmparray; /* boundary condition */ apply_sponge(p0, acpar->bc, padnx, padnz, nb); apply_sponge(p1, acpar->bc, padnx, padnz, nb); } // end of time loop fseeko(swap, is*nr*nt*sizeof(float), SEEK_SET); fwrite(dd[0], sizeof(float), nr*nt, swap); }// end of shot loop fclose(swap); MPI_Barrier(comm); /* transfer data to Fdat */ if(mpipar->cpuid==0){ swap=fopen("temswap.bin", "rb"); for(is=0; is<acpar->ns; is++){ fseeko(swap, is*nr*nt*sizeof(float), SEEK_SET); fread(dd[0], sizeof(float), nr*nt, swap); sf_floatwrite(dd[0], nr*nt, Fdat); } fclose(swap); remove("temswap.bin"); } MPI_Barrier(comm); /* release allocated memory */ free(*p0); free(p0); free(*p1); free(p1); free(*p2); free(p2); free(*vv); free(vv); free(*dd); free(dd); free(rr); free(*term); free(term); }
int main(int argc, char** argv) { //Number of CPUs int numProcs; //Processor ID int rank; //The status of our receiver MPI_Status status; //Init MPI, Starts the parallelization sort of. MPI_Init(&argc, &argv); //Finds out how many CPUs are in our network MPI_Comm_size(MPI_COMM_WORLD, &numProcs); //Determines the rank of a process MPI_Comm_rank(MPI_COMM_WORLD, &rank); //Height and width of image will be passed in. int height = atoi(argv[1]); int width = atoi(argv[2]); Complex num; struct timeval start; double time = 0.0; //Mandelbrot Set will have lie in this plane. //X range float realMax = 2.0; float realMin = -2.0; //Y range float imagMax = 2.0; float imagMin = -2.0; //Scale the image so that it can be seen at the give resolution. float scaleX = (realMax - realMin) / width; float scaleY = (imagMax - imagMin) / height; //Number of slaves int numGroups = numProcs - 1; //Number of remaining rows after even partitions for slave. int remainder = height % numGroups; //How height those partitions are. int grpHeight = (height - remainder) / numGroups; //The area of our partition int partArea = grpHeight * width; //Image array unsigned int* image = (unsigned int *) malloc(sizeof(unsigned int) * height * width); unsigned int* buffer = (unsigned int *) malloc(sizeof(unsigned int) * (width + 10)); int DATA_TAG = 0; int TERMINATE = 1; MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { int count = 0; int row = 0; //Starting the clock gettimeofday(&start, NULL); for (int proc = 1; proc < numProcs; proc++) { MPI_Send(&row, 1, MPI_INT, proc, DATA_TAG, MPI_COMM_WORLD); count++; row++; } do { MPI_Recv(buffer, width, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); count--; if (row < height) { MPI_Send(&row, 1, MPI_INT, status.MPI_SOURCE, DATA_TAG, MPI_COMM_WORLD); count++; row++; } else { MPI_Send(&row, 1, MPI_INT, status.MPI_SOURCE, TERMINATE, MPI_COMM_WORLD); } for (int x = 0; x < width; x++) { image[status.MPI_TAG * width + x] = buffer[x]; } } while (count > 0); //Stop the clock time = getElapsed(&start); //Output result printf("%d cores %dx%d: %fs\n", numProcs, height, width, time); //Calculate I/O time //gettimeofday(&start, NULL); //Display the set //writeImage("Static.ppm", image, height, width); //Stop the clock // time = getElapsed(&start); //Output result //printf("Runtime for file I/O: %fs\n", time); } else { int row; MPI_Recv(&row, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD, &status); //printf("Slave: %d Receive Init", rank); while (status.MPI_TAG != TERMINATE) { num.imag = imagMin + ((float) row * scaleY); for (int x = 0; x < width; x++) { //Initialize Complex based on position. num.real = realMin + ((float) x * scaleX); //Calculates the color of the current pixel. buffer[x] = calPixel(num); } MPI_Send(buffer, width, MPI_UNSIGNED, 0, row, MPI_COMM_WORLD); //printf("Slave: %d Send row %d\n", rank, row); //Send only partition worked on MPI_Recv(&row, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); //printf("Slave: %d Recv row %d\n", rank, row); } } free(buffer); free(image); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int num_errors = 0, total_num_errors = 0; int rank, size; char port1[MPI_MAX_PORT_NAME]; char port2[MPI_MAX_PORT_NAME]; MPI_Status status; MPI_Comm comm1, comm2; int verbose = 0; int data = 0; MTEST_VG_MEM_INIT(port1, MPI_MAX_PORT_NAME * sizeof(char)); MTEST_VG_MEM_INIT(port2, MPI_MAX_PORT_NAME * sizeof(char)); if (getenv("MPITEST_VERBOSE")) { verbose = 1; } MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (size < 3) { printf("Three processes needed to run this test.\n"); MPI_Finalize(); return 0; } if (rank == 0) { IF_VERBOSE(("0: opening ports.\n")); MPI_Open_port(MPI_INFO_NULL, port1); MPI_Open_port(MPI_INFO_NULL, port2); IF_VERBOSE(("0: opened port1: <%s>\n", port1)); IF_VERBOSE(("0: opened port2: <%s>\n", port2)); IF_VERBOSE(("0: sending ports.\n")); MPI_Send(port1, MPI_MAX_PORT_NAME, MPI_CHAR, 1, 0, MPI_COMM_WORLD); MPI_Send(port2, MPI_MAX_PORT_NAME, MPI_CHAR, 2, 0, MPI_COMM_WORLD); IF_VERBOSE(("0: accepting port2.\n")); MPI_Comm_accept(port2, MPI_INFO_NULL, 0, MPI_COMM_SELF, &comm2); IF_VERBOSE(("0: accepting port1.\n")); MPI_Comm_accept(port1, MPI_INFO_NULL, 0, MPI_COMM_SELF, &comm1); IF_VERBOSE(("0: closing ports.\n")); MPI_Close_port(port1); MPI_Close_port(port2); IF_VERBOSE(("0: sending 1 to process 1.\n")); data = 1; MPI_Send(&data, 1, MPI_INT, 0, 0, comm1); IF_VERBOSE(("0: sending 2 to process 2.\n")); data = 2; MPI_Send(&data, 1, MPI_INT, 0, 0, comm2); IF_VERBOSE(("0: disconnecting.\n")); MPI_Comm_disconnect(&comm1); MPI_Comm_disconnect(&comm2); } else if (rank == 1) { IF_VERBOSE(("1: receiving port.\n")); MPI_Recv(port1, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &status); IF_VERBOSE(("1: received port1: <%s>\n", port1)); IF_VERBOSE(("1: connecting.\n")); MPI_Comm_connect(port1, MPI_INFO_NULL, 0, MPI_COMM_SELF, &comm1); MPI_Recv(&data, 1, MPI_INT, 0, 0, comm1, &status); if (data != 1) { printf("Received %d from root when expecting 1\n", data); fflush(stdout); num_errors++; } IF_VERBOSE(("1: disconnecting.\n")); MPI_Comm_disconnect(&comm1); } else if (rank == 2) { IF_VERBOSE(("2: receiving port.\n")); MPI_Recv(port2, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &status); IF_VERBOSE(("2: received port2: <%s>\n", port2)); /* make sure process 1 has time to do the connect before this process * attempts to connect */ MTestSleep(3); IF_VERBOSE(("2: connecting.\n")); MPI_Comm_connect(port2, MPI_INFO_NULL, 0, MPI_COMM_SELF, &comm2); MPI_Recv(&data, 1, MPI_INT, 0, 0, comm2, &status); if (data != 2) { printf("Received %d from root when expecting 2\n", data); fflush(stdout); num_errors++; } IF_VERBOSE(("2: disconnecting.\n")); MPI_Comm_disconnect(&comm2); } MPI_Barrier(MPI_COMM_WORLD); MPI_Reduce(&num_errors, &total_num_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { if (total_num_errors) { printf(" Found %d errors\n", total_num_errors); } else { printf(" No Errors\n"); } fflush(stdout); } MPI_Finalize(); return total_num_errors; }
int main(int argc, char **argv) { char myhost[256]; real_t dt = 0; int nvtk = 0; char outnum[80]; int time_output = 0; long flops = 0; // real_t output_time = 0.0; real_t next_output_time = 0; double start_time = 0, end_time = 0; double start_iter = 0, end_iter = 0; double elaps = 0; struct timespec start, end; double cellPerCycle = 0; double avgCellPerCycle = 0; long nbCycle = 0; // array of timers to profile the code memset(functim, 0, TIM_END * sizeof(functim[0])); #ifdef MPI MPI_Init(&argc, &argv); #endif process_args(argc, argv, &H); hydro_init(&H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts in %s precision.\n", ((sizeof(real_t) == sizeof(double))? "double": "single")); gethostname(myhost, 255); if (H.mype == 0) { fprintf(stdout, "Hydro: Main process running on %s\n", myhost); } #ifdef _OPENMP if (H.mype == 0) { fprintf(stdout, "Hydro: OpenMP mode ON\n"); fprintf(stdout, "Hydro: OpenMP %d max threads\n", omp_get_max_threads()); fprintf(stdout, "Hydro: OpenMP %d num threads\n", omp_get_num_threads()); fprintf(stdout, "Hydro: OpenMP %d num procs\n", omp_get_num_procs()); } #endif #ifdef MPI if (H.mype == 0) { fprintf(stdout, "Hydro: MPI run with %d procs\n", H.nproc); } #else fprintf(stdout, "Hydro: standard build\n"); #endif // PRINTUOLD(H, &Hv); #ifdef MPI if (H.nproc > 1) #if FTI>0 MPI_Barrier(FTI_COMM_WORLD); #endif #if FTI==0 MPI_Barrier(MPI_COMM_WORLD); #endif #endif if (H.dtoutput > 0) { // outputs are in physical time not in time steps time_output = 1; next_output_time = next_output_time + H.dtoutput; } if (H.dtoutput > 0 || H.noutput > 0) vtkfile(++nvtk, H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts main loop.\n"); //pre-allocate memory before entering in loop //For godunov scheme start = cclock(); start = cclock(); allocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov); compute_deltat_init_mem(H, &Hw_deltat, &Hvw_deltat); end = cclock(); #ifdef MPI #if FTI==1 FTI_Protect(0,functim, TIM_END,FTI_DBLE); FTI_Protect(1,&nvtk,1,FTI_INTG); FTI_Protect(2,&next_output_time,1,FTI_DBLE); FTI_Protect(3,&dt,1,FTI_DBLE); FTI_Protect(4,&MflopsSUM,1,FTI_DBLE); FTI_Protect(5,&nbFLOPS,1,FTI_LONG); FTI_Protect(6,&(H.nstep),1,FTI_INTG); FTI_Protect(7,&(H.t),1,FTI_DBLE); FTI_Protect(8,Hv.uold,H.nvar * H.nxt * H.nyt,FTI_DBLE); #endif #endif if (H.mype == 0) fprintf(stdout, "Hydro: init mem %lfs\n", ccelaps(start, end)); // we start timings here to avoid the cost of initial memory allocation start_time = dcclock(); while ((H.t < H.tend) && (H.nstep < H.nstepmax)) { //system("top -b -n1"); // reset perf counter for this iteration flopsAri = flopsSqr = flopsMin = flopsTra = 0; start_iter = dcclock(); outnum[0] = 0; if ((H.nstep % 2) == 0) { dt = 0; // if (H.mype == 0) fprintf(stdout, "Hydro computes deltat.\n"); start = cclock(); compute_deltat(&dt, H, &Hw_deltat, &Hv, &Hvw_deltat); end = cclock(); functim[TIM_COMPDT] += ccelaps(start, end); if (H.nstep == 0) { dt = dt / 2.0; if (H.mype == 0) fprintf(stdout, "Hydro computes initial deltat: %le\n", dt); } #ifdef MPI if (H.nproc > 1) { real_t dtmin; // printf("pe=%4d\tdt=%lg\n",H.mype, dt); #if FTI==0 if (sizeof(real_t) == sizeof(double)) { MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); } else { MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); } #endif #if FTI>0 if (sizeof(real_t) == sizeof(double)) { MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD); } else { MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, FTI_COMM_WORLD); } #endif dt = dtmin; } #endif } // dt = 1.e-3; // if (H.mype == 1) fprintf(stdout, "Hydro starts godunov.\n"); if ((H.nstep % 2) == 0) { hydro_godunov(1, dt, H, &Hv, &Hw_godunov, &Hvw_godunov); // hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); } else { hydro_godunov(2, dt, H, &Hv, &Hw_godunov, &Hvw_godunov); // hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); } end_iter = dcclock(); cellPerCycle = (double) (H.globnx * H.globny) / (end_iter - start_iter) / 1000000.0L; avgCellPerCycle += cellPerCycle; nbCycle++; H.nstep++; H.t += dt; { real_t iter_time = (real_t) (end_iter - start_iter); #ifdef MPI long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t; start = cclock(); #if FTI==0 MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); #endif #if FTI>0 MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD); MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD); MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD); MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD); #endif // if (H.mype == 1) // printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t); flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA; end = cclock(); functim[TIM_ALLRED] += ccelaps(start, end); #else flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA; #endif nbFLOPS++; if (flops > 0) { if (iter_time > 1.e-9) { double mflops = (double) flops / (double) 1.e+6 / iter_time; MflopsSUM += mflops; sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time); } } else { sprintf(outnum, "%s (%.3fs)", outnum, iter_time); } } if (time_output == 0 && H.noutput > 0) { if ((H.nstep % H.noutput) == 0) { vtkfile(++nvtk, H, &Hv); sprintf(outnum, "%s [%04d]", outnum, nvtk); } } else { if (time_output == 1 && H.t >= next_output_time) { vtkfile(++nvtk, H, &Hv); next_output_time = next_output_time + H.dtoutput; sprintf(outnum, "%s [%04d]", outnum, nvtk); } } if (H.mype == 0) { fprintf(stdout, "--> step=%4d, %12.5e, %10.5e %.3lf MC/s%s\n", H.nstep, H.t, dt, cellPerCycle, outnum); fflush(stdout); } #ifdef MPI #if FTI==1 FTI_Snapshot(); #endif #endif } // while end_time = dcclock(); // Deallocate work spaces deallocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov); compute_deltat_clean_mem(H, &Hw_deltat, &Hvw_deltat); hydro_finish(H, &Hv); elaps = (double) (end_time - start_time); timeToString(outnum, elaps); if (H.mype == 0) { fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS)); fprintf(stdout, " "); } if (H.nproc == 1) { int sizeFmt = sizeLabel(functim, TIM_END); printTimingsLabel(TIM_END, sizeFmt); fprintf(stdout, "\n"); if (sizeof(real_t) == sizeof(double)) { fprintf(stdout, "PE0_DP "); } else { fprintf(stdout, "PE0_SP "); } printTimings(functim, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "%% "); percentTimings(functim, TIM_END); printTimings(functim, TIM_END, sizeFmt); fprintf(stdout, "\n"); } #ifdef MPI if (H.nproc > 1) { double timMAX[TIM_END]; double timMIN[TIM_END]; double timSUM[TIM_END]; #if FTI==0 MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #endif #if FTI>0 MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD); MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD); MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, FTI_COMM_WORLD); #endif if (H.mype == 0) { int sizeFmt = sizeLabel(timMAX, TIM_END); printTimingsLabel(TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "MIN "); printTimings(timMIN, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "MAX "); printTimings(timMAX, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "AVG "); avgTimings(timSUM, TIM_END, H.nproc); printTimings(timSUM, TIM_END, sizeFmt); fprintf(stdout, "\n"); } } #endif if (H.mype == 0) { fprintf(stdout, "Average MC/s: %.3lf\n", (double)(avgCellPerCycle / nbCycle)); } #ifdef MPI #if FTI>0 FTI_Finalize(); #endif MPI_Finalize(); #endif return 0; }
int main (int argc, char** argv) { fastbit_init(0); fastbit_set_verbose_level(0); ADIOS_FILE * f; //MPI_Comm comm_dummy = 0; // MPI_Comm is defined through adios_read.h MPI_Comm comm_dummy = MPI_COMM_WORLD; int rank, size; MPI_Init (&argc, &argv); MPI_Comm_rank (comm_dummy, &rank); MPI_Comm_size (comm_dummy, &size); adios_init_noxml (comm_dummy); if (argc < 2) { printf("Usage: index_fastbit fileName (attrName)"); return 0; } f = adios_read_open_file (argv[1], ADIOS_READ_METHOD_BP, comm_dummy); if (f == NULL) { printf ("::%s\n", adios_errmsg()); return -1; } /* adios_allocate_buffer (ADIOS_BUFFER_ALLOC_NOW, (f->file_size)*2/1048576 + 5); // +5MB for extra room in buffer adios_declare_group (&gAdios_group, gGroupNameFastbitIdx, "", adios_flag_yes); adios_select_method (gAdios_group, "MPI", "", ""); */ gIdxFileName = fastbit_adios_util_getFastbitIndexFileName(argv[1]); unlink(gIdxFileName); adios_allocate_buffer (ADIOS_BUFFER_ALLOC_NOW, 500); // +5MB for extra room in buffer adios_declare_group (&gAdios_group, gGroupNameFastbitIdx, "", adios_flag_yes); adios_select_method (gAdios_group, "MPI", "", ""); adios_open (&gAdios_write_file, gGroupNameFastbitIdx, gIdxFileName, "w", MPI_COMM_WORLD); #ifdef MULTI_BLOCK int testid = adios_define_var (gAdios_group, "pack", "", adios_integer , 0, 0, 0); #endif #ifdef BOX int testid = adios_define_var (gAdios_group, "elements", "", adios_integer , 0, 0, 0); #endif //uint64_t estimatedbytes = (nb+nk+no)*adios_type_size(adios_double, NULL); int jobCounter = getJobCounter(f); uint64_t estimatedbytes = getByteEstimationOnFile(f, rank); if (size > 1) { int maxJobsPP = jobCounter/size + 1; estimatedbytes = estimatedbytes * maxJobsPP /jobCounter +1048576; } estimatedbytes += 1048576; uint64_t adios_totalsize; // adios_group_size needs to be call before any write_byid, Otherwise write_byid does nothing adios_group_size (gAdios_write_file, estimatedbytes , &adios_totalsize); printf("=> .. adios open output file: %s, rank %d allocated %" PRIu64 " bytes... \n", gIdxFileName, rank, adios_totalsize); // IMPORTANT: // can only call open/close once in a process // otherwise data is tangled or only the data in the last open/close call is recorded #ifdef MULTI_BLOCK adios_write_byid(gAdios_write_file, testid, &pack); #endif #ifdef BOX adios_write_byid(gAdios_write_file, testid, &recommended_index_ele); #endif sumLogTime(-1); sumLogTimeMillis(-1); if (argc >= 3) { int i=2; while (i<argc) { const char* varName = argv[i]; if(strstr(varName, "<binning prec") != NULL) { if (gBinningOption == NULL) { gBinningOption = argv[i]; } if (argc == 3) { buildIndexOnAllVar(f, rank, size); break; } i++; continue; } else { ADIOS_VARINFO * v = adios_inq_var(f, varName); if (v == NULL) { printf("No such variable: %s\n", varName); return 0; } printf("building fastbit index on variable: %s\n", varName); buildIndex_mpi(f, v, rank, size); adios_free_varinfo(v); i++; } } } else { buildIndexOnAllVar(f, rank, size); } sumLogTime(0); sumLogTimeMillis(0); adios_close(gAdios_write_file); adios_read_close(f); // // writing file clean up // // read back: f = adios_read_open_file (gIdxFileName, ADIOS_READ_METHOD_BP, comm_dummy); if (f == NULL) { printf("No such file: %s \n", gIdxFileName); return 0; } int numVars = f->nvars; int i=0; int k=0; int j=0; for (i=0; i<numVars; i++) { char* varName = f->var_namelist[i]; ADIOS_VARINFO* v = adios_inq_var(f, varName); adios_inq_var_blockinfo(f,v); int timestep = 0; for (k=0; k<v->sum_nblocks; k++) { verifyData(f, v, k, timestep); } adios_free_varinfo(v); } adios_read_close(f); if (rank == 0) { printf(" ==> index file is at: %s\n", gIdxFileName); } // clean up MPI_Barrier (comm_dummy); adios_finalize (rank); MPI_Finalize (); free (gIdxFileName); fastbit_cleanup(); return 0; }
void PrintAllCompilationOptions() { #if defined MPI PRINT0("Perform communications every %d iterations (default %d)\n", count_to_communication, CBLOCK_DEFAULT ) ; #if (defined COMM_COST)||(defined ITER_COST)||(defined COMM_CONFIG) PRINT0("Prob communication = %d\n", proba_communication) ; #endif #endif /* MPI */ PRINT0("Compilation options:\n") ; PRINT0("- Backtrack when reset: ") ; #if defined BACKTRACK PRINT0("ON\n") ; #else PRINT0("OFF\n") ; #endif #if defined MPI PRINT0("- MPI (So forced count to 1 (-b 1)!\n") ; #if defined DEBUG_MPI_ENDING PRINT0("- DEBUG_MPI_ENDING\n") ; #endif #if defined LOG_FILE PRINT0("- LOG_FILE\n") ; #endif #if defined NO_SCREEN_OUTPUT PRINT0("- NO_SCREEN_OUTPUT\n") ; #endif #if defined DISPLAY_0 PRINT0("- DISPLAY_0\n") ; #endif #if defined DISPLAY_ALL PRINT0("- DISPLAY_ALL\n") ; #endif #if defined DEBUG PRINT0("- DEBUG\n") ; #endif #if defined DEBUG_QUEUE PRINT0("- DEBUG_QUEUE\n") ; #endif #if defined DEBUG_PRINT_QUEUE PRINT0("- DEBUG_PRINT_QUEUE\n") ; #endif #if defined MPI_ABORT PRINT0("- MPI_ABORT\n") ; #endif #if defined MPI_BEGIN_BARRIER PRINT0("- MPI_BEGIN_BARRIER\n") ; #endif /* Heuristic for communications */ #if defined COMM_COST PRINT0("- With COMM_COST\n") ; #elif defined ITER_COST PRINT0("- With ITER_COST\n") ; #elif defined COMM_CONFIG PRINT0("- With COMM_CONFIG\n") ; #else PRINT0("- Without comm exept for terminaison\n") ; #endif #endif /* MPI */ #if defined MPI_BEGIN_BARRIER PRINT0("===========================================\n\n") ; PRINT0("MPI Barrier called to synchronize processus before solve()\n"); MPI_Barrier(MPI_COMM_WORLD); #endif /* MPI_BEGIN_BARRIER */ }
int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size!=4) { if (rank==0) printf("Use 4 processes\n"); MPI_Finalize(); return size; } { if (rank==0) printf("MPI_Reduce_scatter(sendbuf, recvbuf...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int sendbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvbuf[1] = {0}; int recvcounts[4] = {1,1,1,1}; MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n", rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce_scatter(MPI_IN_PLACE, recvbuf...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int recvbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvcounts[4] = {1,1,1,1}; MPI_Reduce_scatter(MPI_IN_PLACE, recvbuf, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("%d: recvbuf = {%d,%d,%d,%d} \n", rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce_scatter_block(sendbuf, recvbuf...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int sendbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvbuf[1] = {0}; int recvcount = 1; MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n", rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int recvbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvcount = 1; MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf, recvcount, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("%d: recvbuf = {%d,%d,%d,%d} \n", rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce(sendbuf, tempbuf... + MPI_Scatter(tempbuf, recvcount...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int sendbuf[4] = {junk, junk*2, junk*3, junk*4}; int tempbuf[4] = {0,0,0,0}; int recvbuf[1] = {0}; int recvcount = 1; MPI_Reduce(sendbuf, tempbuf, 4*recvcount, MPI_INT, MPI_SUM, 0 /* root */, MPI_COMM_WORLD); MPI_Scatter(tempbuf, recvcount, MPI_INT, recvbuf, recvcount, MPI_INT, 0 /* root */, MPI_COMM_WORLD); printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n", rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce(MPI_IN_PLACE, recvbuf... + MPI_Scatter(MPI_IN_PLACE, recvcount...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int recvbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvcount = 1; MPI_Reduce(rank==0 ? MPI_IN_PLACE : recvbuf, rank==0 ? recvbuf : NULL, 4*recvcount, MPI_INT, MPI_SUM, 0 /* root */, MPI_COMM_WORLD); MPI_Scatter(recvbuf, recvcount, MPI_INT, rank==0 ? MPI_IN_PLACE : recvbuf, recvcount, MPI_INT, 0 /* root */, MPI_COMM_WORLD); printf("%d: recvbuf = {%d,%d,%d,%d} \n", rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]); } MPI_Finalize(); return 0; }
int main(int argc, char * argv[]) { /* Constant Declarations */ //long const SET_SIZE = 7920; /* Variable Declarations */ int count = 0; // local count double elapsed_time = 0.00; // time elapsed int first; // index of first multiple int global_count = 1; // global count int high_value; // highest value on processor char hostname[MPI_MAX_PROCESSOR_NAME]; // host process is running on int i; // counter variable int id; // process id number int index; int init_status; // initialization error status flag bool initialized = false; // mpi initialized flag int len; // hostname length int low_value; // lowest value on the processor char* marked; // portion of 2 to n that is marked int n; // number of elements to sieve int n_sqrt; // square root of n int p; // number of processes int prime; int proc0_size; // size of process 0's subarray int size; // elements in marked int* sqrt_primes; // primes up to the square root int sqrt_primes_index; // index in the square root primes array char* sqrt_primes_marked; // numbers up to sqrt marked prime or not int sqrt_primes_size; // size of square root primes array /* Function Declarations */ //int is_prime( int ); /* Initialization */ MPI_Initialized( &initialized ); // set initialized flag if( !initialized ) // if MPI is not initialized init_status = MPI_Init( &argc, &argv ); // Initialize MPI else init_status = MPI_SUCCESS; // otherwise set init_status to success if( init_status != MPI_SUCCESS ) { // if not successfully initialized printf ("Error starting MPI program. Terminating.\n"); // print error message fflush(stdout); MPI_Abort(MPI_COMM_WORLD, init_status); // abort } MPI_Get_processor_name( hostname, &len ); // set hostname MPI_Comm_rank( MPI_COMM_WORLD, &id ); // set process rank MPI_Comm_size( MPI_COMM_WORLD, &p ); // set size of comm group //printf("Process rank %d started on %s.\n", id, hostname); // print start message //fflush(stdout); //MPI_Barrier(MPI_COMM_WORLD ); /* Start Timer */ MPI_Barrier( MPI_COMM_WORLD ); // synchronize elapsed_time = - MPI_Wtime(); // start time /* Check that a set size was passed into the program */ if(argc != 2) { if(id==0) { printf("Command line: %s <m>\n", argv[0]); fflush(stdout); } MPI_Finalize(); exit(1); } n = atoi(argv[1]); n_sqrt = ceil(sqrt((double)n)); //if(id==0) // printf("square root: %i\n", n_sqrt); // debug //if(id==0) { //printf("n sqrt: %i\n", n_sqrt); //fflush(stdout); //} sqrt_primes_marked = (char *) malloc(n_sqrt + 1); sqrt_primes_marked[0] = 1; sqrt_primes_marked[1] =1; for(i = 2; i <= n_sqrt; ++i) { sqrt_primes_marked[i] = 0; } prime = 2; sqrt_primes_size = n_sqrt; //printf("sqrt primes size: %i\n", sqrt_primes_size); do { for(i = prime * prime; i < n_sqrt; i+=prime) { sqrt_primes_marked[i] = 1; //sqrt_primes_size--; } while(sqrt_primes_marked[++prime]); } while (prime * prime <= n_sqrt); //printf("sqrt primes size: %i\n", sqrt_primes_size); sqrt_primes = (int *) malloc(sqrt_primes_size); sqrt_primes_index = 0; //sqrt_primes_size = 0; for(i = 3; i <= n_sqrt; ++i) { if(!sqrt_primes_marked[i]) { sqrt_primes[sqrt_primes_index] = i; // printf("%i, ", sqrt_primes[sqrt_primes_index]); sqrt_primes_index++; } } sqrt_primes_size = sqrt_primes_index; //printf("sqrt primes size: %i\n", sqrt_primes_size); //fflush(stdout); /* Set process's array share and first and last elements */ low_value = 2 + BLOCK_LOW(id,p,n-1); high_value = 2 + BLOCK_HIGH(id,p,n-1); size = BLOCK_SIZE(id,p,n-1); //printf("Process %i block low: %i\n", id, low_value); //fflush(stdout); //printf("Process %i block high: %i\n", id, high_value); //fflush(stdout); //printf("Block size: %i\n", size); //fflush(stdout); if(low_value % 2 == 0) { if(high_value % 2 == 0) { size = (int)floor((double)size / 2.0); high_value--; } else { size = size / 2; } low_value++; } else { if(high_value % 2 == 0) { size = size / 2; high_value--; } else { size = (int)ceil((double)size / 2.0); } } //printf("Process %i block low: %i\n", id, low_value); //fflush(stdout); //printf("Process %i block high: %i\n", id, high_value); //fflush(stdout); //printf("Block size: %i\n", size); //fflush(stdout); //proc0_size = (n-1)/p; /* if process 0 doesn't have all the primes for sieving, then bail*/ /*if((2+proc0_size) < (int)sqrt((double)n)) { if(id==0) { printf("Too many processes\n"); fflush(stdout); } MPI_Finalize(); exit(1); } */ /* Allocate share of array */ marked = (char *) malloc(size); if(marked == NULL) { printf("Cannot allocate enough memory\n"); fflush(stdout); MPI_Finalize(); exit(1); } /* Run Sieve */ //printf("made it to sieve\n"); //fflush(stdout); for(i = 0; i < size; i++) marked[i] = 0; if(id==0) first = 0; sqrt_primes_index = 0; prime = sqrt_primes[sqrt_primes_index]; //printf("first prime: %i\n", prime); //fflush(stdout); //for(i = 0; i < sqrt_primes_size; i++) { // printf("%i,", sqrt_primes[i]); // fflush(stdout); //} do { if(prime >= low_value) first = ((prime - low_value) / 2) + prime; else if(prime * prime > low_value) { first = (prime * prime - low_value) / 2; } else { if(low_value % prime == 0) first = 0; else { first = 1; while ((low_value + (2 * first)) % prime != 0) ++first; } } //printf("first: %i\n", first); //fflush(stdout); for(i = first; i < size; i += (prime)) marked[i] = 1; //printf("made it to prime assignment\n"); prime = sqrt_primes[++sqrt_primes_index]; //printf("prime: %i\n", prime); //fflush(stdout); } while(prime * prime <= n && sqrt_primes_index < sqrt_primes_size); count = 0; for(i = 0; i < size; i++) { if(!marked[i]) count++; } //printf("size: %i\ncount: %i\n", size, count); // for( i=id; i<SET_SIZE; i+=p ) // interleaved allocation // count += is_prime( i ); // check if prime w/ sieve of eratosthenes /* Reduce Sum */ MPI_Reduce( &count, &global_count, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD ); // reduce the primes count, root: proces 0 /* Stop Timer */ elapsed_time += MPI_Wtime(); // end time //printf("Process %i found %i primes.\n", id, count); //fflush(stdout); //printf("Process %d is done in %d, running on %s.\n", id, elapsed_time, hostname); // print process done message if( id == 0 ) { // rank 0 prints global count printf("There are %d primes in the first %i integers.\nExecution took %10.6f.\n", global_count, n, elapsed_time); fflush(stdout); // printf("Debug:\n"); // fflush(stdout); // printf("sqrt primes size: %i\n", sqrt_primes_size); // fflush(stdout); for(i = 0; i < sqrt_primes_size; i++) { if(!sqrt_primes[i]){ printf("%i,", sqrt_primes[i]); fflush(stdout); } } } MPI_Barrier(MPI_COMM_WORLD); // printf("rank: %i\nlow value: %i\nhigh value: %i\ncount: %i\n", id, low_value, high_value, count); //fflush(stdout); MPI_Finalize(); // finalize return 0; }