int main(int argc, char ** argv) { int args_used = 1; // keeps track of # consumed arguments uint64_t L; // dimension of grid in cells uint64_t iterations; // total number of simulation steps uint64_t n; // total number of particles in the simulation char *init_mode; // particle initialization mode (char) uint64_t particle_mode; // particle initialization mode (int) double rho; // attenuation factor for geometric particle distribution int64_t k, m; // determine initial horizontal and vertical velocity of // particles-- (2*k)+1 cells per time step double alpha, beta; // slope and offset values for linear particle distribution bbox_t grid_patch, // whole grid init_patch; // subset of grid used for localized initialization int correctness = 1; // determines whether simulation was correct double *Qgrid; // field of fixed charges particle_t *particles, *p; // the particles array uint64_t iter, i; // dummies double fx, fy, ax, ay; // forces and accelerations #if UNUSED int particles_per_cell;// number of particles per cell to be injected int error=0; // used for graceful exit after error #endif double avg_time, pic_time;// timing parameters int nthread_input, // thread parameters nthread; int num_error=0; // flag that signals that requested and obtained // numbers of threads are the same random_draw_t dice; printf("Parallel Research Kernels Version %s\n", PRKVERSION); printf("OpenMP Particle-in-Cell execution on 2D grid\n"); /******************************************************************************* ** process and test input parameters ********************************************************************************/ if (argc<7) { printf("Usage: %s <#threads> <#simulation steps> <grid size> <#particles> <k (particle charge semi-increment)> ", argv[0]); printf("<m (vertical particle velocity)>\n"); printf(" <init mode> <init parameters>]\n"); printf(" init mode \"GEOMETRIC\" parameters: <attenuation factor>\n"); printf(" \"SINUSOIDAL\" parameters: none\n"); printf(" \"LINEAR\" parameters: <negative slope> <constant offset>\n"); printf(" \"PATCH\" parameters: <xleft> <xright> <ybottom> <ytop>\n"); exit(SUCCESS); } /* Take number of threads to request from command line */ nthread_input = atoi(*++argv); if ((nthread_input < 1) || (nthread_input > MAX_THREADS)) { printf("ERROR: Invalid number of threads: %d\n", nthread_input); exit(EXIT_FAILURE); } omp_set_num_threads(nthread_input); iterations = atol(*++argv); args_used++; if (iterations<1) { printf("ERROR: Number of time steps must be positive: %" PRIu64 "\n", iterations); exit(FAILURE); } L = atol(*++argv); args_used++; if (L<1 || L%2) { printf("ERROR: Number of grid cells must be positive and even: %" PRIu64 "\n", L); exit(FAILURE); } grid_patch = (bbox_t){0, L+1, 0, L+1}; n = atol(*++argv); args_used++; if (n<1) { printf("ERROR: Number of particles must be positive: %" PRIu64 "\n", n); exit(FAILURE); } particle_mode = UNDEFINED; k = atoi(*++argv); args_used++; if (k<0) { printf("ERROR: Particle semi-charge must be non-negative: %" PRIu64 "\n", k); exit(FAILURE); } m = atoi(*++argv); args_used++; init_mode = *++argv; args_used++; /* Initialize particles with geometric distribution */ if (strcmp(init_mode, "GEOMETRIC") == 0) { if (argc<args_used+1) { printf("ERROR: Not enough arguments for GEOMETRIC\n"); exit(FAILURE); } particle_mode = GEOMETRIC; rho = atof(*++argv); args_used++; } /* Initialize with a sinusoidal particle distribution (single period) */ if (strcmp(init_mode, "SINUSOIDAL") == 0) { particle_mode = SINUSOIDAL; } /* Initialize particles with linear distribution */ /* The linear function is f(x) = -alpha * x + beta , x in [0,1]*/ if (strcmp(init_mode, "LINEAR") == 0) { if (argc<args_used+2) { printf("ERROR: Not enough arguments for LINEAR initialization\n"); exit(EXIT_FAILURE); } particle_mode = LINEAR; alpha = atof(*++argv); args_used++; beta = atof(*++argv); args_used++; if (beta <0 || beta<alpha) { printf("ERROR: linear profile gives negative particle density\n"); exit(EXIT_FAILURE); } } /* Initialize particles uniformly within a "patch" */ if (strcmp(init_mode, "PATCH") == 0) { if (argc<args_used+4) { printf("ERROR: Not enough arguments for PATCH initialization\n"); exit(FAILURE); } particle_mode = PATCH; init_patch.left = atoi(*++argv); args_used++; init_patch.right = atoi(*++argv); args_used++; init_patch.bottom = atoi(*++argv); args_used++; init_patch.top = atoi(*++argv); args_used++; if (bad_patch(&init_patch, &grid_patch)) { printf("ERROR: inconsistent initial patch\n"); exit(FAILURE); } } #pragma omp parallel { #pragma omp master { nthread = omp_get_num_threads(); if (nthread != nthread_input) { num_error = 1; printf("ERROR: number of requested threads %d does not equal ", nthread_input); printf("number of spawned threads %d\n", nthread); } else { printf("Number of threads = %d\n",nthread_input); printf("Grid size = %lld\n", L); printf("Number of particles requested = %lld\n", n); printf("Number of time steps = %lld\n", iterations); printf("Initialization mode = %s\n", init_mode); switch(particle_mode) { case GEOMETRIC: printf(" Attenuation factor = %lf\n", rho); break; case SINUSOIDAL: break; case LINEAR: printf(" Negative slope = %lf\n", alpha); printf(" Offset = %lf\n", beta); break; case PATCH: printf(" Bounding box = %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", init_patch.left, init_patch.right, init_patch.bottom, init_patch.top); break; default: printf("ERROR: Unsupported particle initializating mode\n"); exit(FAILURE); } printf("Particle charge semi-increment = %"PRIu64"\n", k); printf("Vertical velocity = %"PRIu64"\n", m); /* Initialize grid of charges and particles */ Qgrid = initializeGrid(L); LCG_init(&dice); switch(particle_mode) { case GEOMETRIC: particles = initializeGeometric(n, L, rho, k, m, &n, &dice); break; case SINUSOIDAL: particles = initializeSinusoidal(n, L, k, m, &n, &dice); break; case LINEAR: particles = initializeLinear(n, L, alpha, beta, k, m, &n, &dice); break; case PATCH: particles = initializePatch(n, L, init_patch, k, m, &n, &dice); break; default: printf("ERROR: Unsupported particle distribution\n"); exit(FAILURE); } printf("Number of particles placed = %lld\n", n); } } bail_out(num_error); } for (iter=0; iter<=iterations; iter++) { /* start the timer after one warm-up time step */ if (iter==1) { pic_time = wtime(); } /* Calculate forces on particles and update positions */ #pragma omp parallel for private(i, p, fx, fy, ax, ay) for (i=0; i<n; i++) { p = particles; fx = 0.0; fy = 0.0; computeTotalForce(p[i], L, Qgrid, &fx, &fy); ax = fx * MASS_INV; ay = fy * MASS_INV; /* Update particle positions, taking into account periodic boundaries */ p[i].x = fmod(p[i].x + p[i].v_x*DT + 0.5*ax*DT*DT + L, L); p[i].y = fmod(p[i].y + p[i].v_y*DT + 0.5*ay*DT*DT + L, L); /* Update velocities */ p[i].v_x += ax * DT; p[i].v_y += ay * DT; } } pic_time = wtime() - pic_time; /* Run the verification test */ for (i=0; i<n; i++) { correctness *= verifyParticle(particles[i], iterations, Qgrid, L); } if (correctness) { printf("Solution validates\n"); #ifdef VERBOSE printf("Simulation time is %lf seconds\n", pic_time); #endif avg_time = n*iterations/pic_time; printf("Rate (Mparticles_moved/s): %lf\n", 1.0e-6*avg_time); } else { printf("Solution does not validate\n"); } return(EXIT_SUCCESS); }
int main(int argc, char ** argv) { int Num_procs; // number of ranks int Num_procsx, Num_procsy; // number of ranks in each coord direction int args_used = 1; // keeps track of # consumed arguments int my_ID; // MPI rank int my_IDx, my_IDy; // coordinates of rank in rank grid int root = 0; // master rank uint64_t L; // dimension of grid in cells uint64_t iterations ; // total number of simulation steps uint64_t n; // total number of particles requested in the simulation uint64_t actual_particles, // actual number of particles owned by my rank total_particles; // total number of generated particles char *init_mode; // particle initialization mode (char) double rho ; // attenuation factor for geometric particle distribution uint64_t k, m; // determine initial horizontal and vertical velocity of // particles-- (2*k)+1 cells per time step double *grid; // the grid is represented as an array of charges uint64_t iter, i; // dummies double fx, fy, ax, ay; // particle forces and accelerations int error=0; // used for graceful exit after error uint64_t correctness=0; // boolean indicating correct particle displacements uint64_t istart, jstart, iend, jend, particles_size, particles_count; bbox_t grid_patch, // whole grid init_patch, // subset of grid used for localized initialization my_tile; // subset of grid owner by my rank particle_t *particles, *p; // array of particles owned by my rank uint64_t *cur_counts; // uint64_t ptr_my; // uint64_t owner; // owner (rank) of a particular particle double pic_time, local_pic_time, avg_time; uint64_t my_checksum = 0, tot_checksum = 0, correctness_checksum = 0; uint64_t width, height; // minimum dimensions of grid tile owned by my rank int particle_mode; // type of initialization double alpha, beta; // negative slope and offset for linear initialization int nbr[8]; // topological neighbor ranks int icrit, jcrit; // global grid indices where tile size drops to minimum find_owner_type find_owner; int ileftover, jleftover;// excess grid points divided among "fat" tiles uint64_t to_send[8], to_recv[8];// int procsize; // number of ranks per OS process MPI_Status status[16]; MPI_Request requests[16]; /* Initialize the MPI environment */ MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); MPI_Comm_size(MPI_COMM_WORLD, &Num_procs); /* FIXME: This can be further improved */ /* Create MPI data type for particle_t */ MPI_Datatype PARTICLE; MPI_Type_contiguous(sizeof(particle_t)/sizeof(double), MPI_DOUBLE, &PARTICLE); MPI_Type_commit( &PARTICLE ); if (my_ID==root) { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("FG_MPI Particle-in-Cell execution on 2D grid\n"); if (argc<6) { printf("Usage: %s <#simulation steps> <grid size> <#particles> <k (particle charge semi-increment)> ", argv[0]); printf("<m (vertical particle velocity)>\n"); printf(" <init mode> <init parameters>]\n"); printf(" init mode \"GEOMETRIC\" parameters: <attenuation factor>\n"); printf(" \"SINUSOIDAL\" parameters: none\n"); printf(" \"LINEAR\" parameters: <negative slope> <constant offset>\n"); printf(" \"PATCH\" parameters: <xleft> <xright> <ybottom> <ytop>\n"); error = 1; goto ENDOFTESTS; } iterations = atol(*++argv); args_used++; if (iterations<1) { printf("ERROR: Number of time steps must be positive: %llu\n", iterations); error = 1; goto ENDOFTESTS; } L = atol(*++argv); args_used++; if (L<1 || L%2) { printf("ERROR: Number of grid cells must be positive and even: %llu\n", L); error = 1; goto ENDOFTESTS; } n = atol(*++argv); args_used++; if (n<1) { printf("ERROR: Number of particles must be positive: %llu\n", n); error = 1; goto ENDOFTESTS; } particle_mode = UNDEFINED; k = atoi(*++argv); args_used++; if (k<0) { printf("ERROR: Particle semi-charge must be non-negative: %llu\n", k); error = 1; goto ENDOFTESTS; } m = atoi(*++argv); args_used++; init_mode = *++argv; args_used++; ENDOFTESTS:; } // done with standard initialization parameters bail_out(error); MPI_Bcast(&iterations, 1, MPI_UINT64_T, root, MPI_COMM_WORLD); MPI_Bcast(&L, 1, MPI_UINT64_T, root, MPI_COMM_WORLD); MPI_Bcast(&n, 1, MPI_UINT64_T, root, MPI_COMM_WORLD); MPI_Bcast(&k, 1, MPI_UINT64_T, root, MPI_COMM_WORLD); MPI_Bcast(&m, 1, MPI_UINT64_T, root, MPI_COMM_WORLD); grid_patch = (bbox_t){0, L+1, 0, L+1}; if (my_ID==root) { // process initialization parameters /* Initialize particles with geometric distribution */ if (strcmp(init_mode, "GEOMETRIC") == 0) { if (argc<args_used+1) { printf("ERROR: Not enough arguments for GEOMETRIC\n"); error = 1; goto ENDOFTESTS2; } particle_mode = GEOMETRIC; rho = atof(*++argv); args_used++; } /* Initialize with a sinusoidal particle distribution (single period) */ if (strcmp(init_mode, "SINUSOIDAL") == 0) { particle_mode = SINUSOIDAL; } /* Initialize particles with linear distribution */ /* The linear function is f(x) = -alpha * x + beta , x in [0,1]*/ if (strcmp(init_mode, "LINEAR") == 0) { if (argc<args_used+2) { printf("ERROR: Not enough arguments for LINEAR initialization\n"); error = 1; goto ENDOFTESTS2; exit(EXIT_FAILURE); } particle_mode = LINEAR; alpha = atof(*++argv); args_used++; beta = atof(*++argv); args_used++; if (beta <0 || beta<alpha) { printf("ERROR: linear profile gives negative particle density\n"); error = 1; goto ENDOFTESTS2; } } /* Initialize particles uniformly within a "patch" */ if (strcmp(init_mode, "PATCH") == 0) { if (argc<args_used+4) { printf("ERROR: Not enough arguments for PATCH initialization\n"); error = 1; goto ENDOFTESTS2; } particle_mode = PATCH; init_patch.left = atoi(*++argv); args_used++; init_patch.right = atoi(*++argv); args_used++; init_patch.bottom = atoi(*++argv); args_used++; init_patch.top = atoi(*++argv); args_used++; if (bad_patch(&init_patch, &grid_patch)) { printf("ERROR: inconsistent initial patch\n"); error = 1; goto ENDOFTESTS2; } } ENDOFTESTS2:; } //done with processing initializaton parameters, now broadcast bail_out(error); MPI_Bcast(&particle_mode, 1, MPI_INT, root, MPI_COMM_WORLD); switch (particle_mode) { case GEOMETRIC: MPI_Bcast(&rho, 1, MPI_DOUBLE, root, MPI_COMM_WORLD); break; case SINUSOIDAL: break; case LINEAR: MPI_Bcast(&alpha, 1, MPI_DOUBLE, root, MPI_COMM_WORLD); MPI_Bcast(&beta, 1, MPI_DOUBLE, root, MPI_COMM_WORLD); break; case PATCH: MPI_Bcast(&init_patch.left, 1, MPI_INT64_T, root, MPI_COMM_WORLD); MPI_Bcast(&init_patch.right, 1, MPI_INT64_T, root, MPI_COMM_WORLD); MPI_Bcast(&init_patch.bottom, 1, MPI_INT64_T, root, MPI_COMM_WORLD); MPI_Bcast(&init_patch.top, 1, MPI_INT64_T, root, MPI_COMM_WORLD); break; } /* determine best way to create a 2D grid of ranks (closest to square, for best surface/volume ratio); we do this brute force for now */ for (Num_procsx=(int) (sqrt(Num_procs+1)); Num_procsx>0; Num_procsx--) { if (!(Num_procs%Num_procsx)) { Num_procsy = Num_procs/Num_procsx; break; } } my_IDx = my_ID%Num_procsx; my_IDy = my_ID/Num_procsx; if (my_ID == root) { MPIX_Get_collocated_size(&procsize); printf("Number of ranks = %llu\n", Num_procs); printf("Number of ranks/process = %d\n", procsize); printf("Load balancing = None\n"); printf("Grid size = %llu\n", L); printf("Tiles in x/y-direction = %d/%d\n", Num_procsx, Num_procsy); printf("Number of particles requested = %llu\n", n); printf("Number of time steps = %llu\n", iterations); printf("Initialization mode = %s\n", init_mode); switch(particle_mode) { case GEOMETRIC: printf(" Attenuation factor = %lf\n", rho); break; case SINUSOIDAL: break; case LINEAR: printf(" Negative slope = %lf\n", alpha); printf(" Offset = %lf\n", beta); break; case PATCH: printf(" Bounding box = %llu, %llu, %llu, %llu\n", init_patch.left, init_patch.right, init_patch.bottom, init_patch.top); break; default: printf("ERROR: Unsupported particle initializating mode\n"); error = 1; } printf("Particle charge semi-increment (k) = %llu\n", k); printf("Vertical velocity (m) = %llu\n", m); } bail_out(error); /* The processes collectively create the underlying grid following a 2D block decomposition; unlike in the stencil code, successive blocks share an overlap vertex */ width = L/Num_procsx; if (width < 2*k) { if (my_ID==0) printf("k-value too large: %llu, must be no greater than %llu\n", k, width/2); bail_out(1); } ileftover = L%Num_procsx; if (my_IDx<ileftover) { istart = (width+1) * my_IDx; iend = istart + width + 1; } else { istart = (width+1) * ileftover + width * (my_IDx-ileftover); iend = istart + width; } icrit = (width+1) * ileftover; height = L/Num_procsy; if (height < m) { if (my_ID==0) printf("m-value too large: %llu, must be no greater than %llu\n", m, height); bail_out(1); } jleftover = L%Num_procsy; if (my_IDy<jleftover) { jstart = (height+1) * my_IDy; jend = jstart + height + 1; } else { jstart = (height+1) * jleftover + height * (my_IDy-jleftover); jend = jstart + height; } jcrit = (height+1) * jleftover; /* if the problem can be divided evenly among ranks, use the simple owner finding function */ if (icrit==0 && jcrit==0) { find_owner = find_owner_simple; if (my_ID==root) printf("Rank search mode used = simple\n"); } else { find_owner = find_owner_general; if (my_ID==root) printf("Rank search mode used = general\n"); } /* define bounding box for tile owned by my rank for convenience */ my_tile = (bbox_t){istart,iend,jstart,jend}; /* Find neighbors. Indexing: left=0, right=1, bottom=2, top=3, bottom-left=4, bottom-right=5, top-left=6, top-right=7 */ /* These are IDs in the global communicator */ nbr[0] = (my_IDx == 0 ) ? my_ID + Num_procsx - 1 : my_ID - 1; nbr[1] = (my_IDx == Num_procsx-1) ? my_ID - Num_procsx + 1 : my_ID + 1; nbr[2] = (my_IDy == Num_procsy-1) ? my_ID + Num_procsx - Num_procs : my_ID + Num_procsx; nbr[3] = (my_IDy == 0 ) ? my_ID - Num_procsx + Num_procs : my_ID - Num_procsx; nbr[4] = (my_IDy == Num_procsy-1) ? nbr[0] + Num_procsx - Num_procs : nbr[0] + Num_procsx; nbr[5] = (my_IDy == Num_procsy-1) ? nbr[1] + Num_procsx - Num_procs : nbr[1] + Num_procsx; nbr[6] = (my_IDy == 0 ) ? nbr[0] - Num_procsx + Num_procs : nbr[0] - Num_procsx; nbr[7] = (my_IDy == 0 ) ? nbr[1] - Num_procsx + Num_procs : nbr[1] - Num_procsx; grid = initializeGrid(my_tile); switch(particle_mode){ case GEOMETRIC: particles = initializeGeometric(n, L, rho, my_tile, k, m, &particles_count, &particles_size); break; case LINEAR: particles = initializeLinear(n, L, alpha, beta, my_tile, k, m, &particles_count, &particles_size); break; case SINUSOIDAL: particles = initializeSinusoidal(n, L, my_tile, k, m, &particles_count, &particles_size); break; case PATCH: particles = initializePatch(n, L, init_patch, my_tile, k, m, &particles_count, &particles_size); } if (!particles) { printf("ERROR: Rank %d could not allocate space for %llu particles\n", my_ID, particles_size); error=1; } bail_out(error); #if VERBOSE for (i=0; i<Num_procs; i++) { MPI_Barrier(MPI_COMM_WORLD); if (i == my_ID) printf("Rank %d has %llu particles\n", my_ID, particles_count); } #endif if (my_ID==root) { MPI_Reduce(&particles_count, &total_particles, 1, MPI_UINT64_T, MPI_SUM, root, MPI_COMM_WORLD); printf("Number of particles placed = %llu\n", total_particles); } else { MPI_Reduce(&particles_count, &total_particles, 1, MPI_UINT64_T, MPI_SUM, root, MPI_COMM_WORLD); } /* Allocate space for communication buffers. Adjust appropriately as the simulation proceeds */ uint64_t sendbuf_size[8], recvbuf_size[8]; particle_t *sendbuf[8], *recvbuf[8]; error=0; for (i=0; i<8; i++) { sendbuf_size[i] = MAX(1,n/(MEMORYSLACK*Num_procs)); recvbuf_size[i] = MAX(1,n/(MEMORYSLACK*Num_procs)); sendbuf[i] = (particle_t*) prk_malloc(sendbuf_size[i] * sizeof(particle_t)); recvbuf[i] = (particle_t*) prk_malloc(recvbuf_size[i] * sizeof(particle_t)); if (!sendbuf[i] || !recvbuf[i]) error++; } if (error) printf("Rank %d could not allocate communication buffers\n", my_ID); bail_out(error); /* Run the simulation */ for (iter=0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_pic_time = wtime(); } ptr_my = 0; for (i=0; i<8; i++) to_send[i]=0; /* Process own particles */ p = particles; for (i=0; i < particles_count; i++) { fx = 0.0; fy = 0.0; computeTotalForce(p[i], my_tile, grid, &fx, &fy); ax = fx * MASS_INV; ay = fy * MASS_INV; /* Update particle positions, taking into account periodic boundaries */ p[i].x = fmod(p[i].x + p[i].v_x*DT + 0.5*ax*DT*DT + L, L); p[i].y = fmod(p[i].y + p[i].v_y*DT + 0.5*ay*DT*DT + L, L); /* Update velocities */ p[i].v_x += ax * DT; p[i].v_y += ay * DT; /* Check if particle stayed in same subdomain or moved to another */ owner = find_owner(p[i], width, height, Num_procsx, icrit, jcrit, ileftover, jleftover); if (owner==my_ID) { add_particle_to_buffer(p[i], &p, &ptr_my, &particles_size); /* Add particle to the appropriate communication buffer */ } else if (owner == nbr[0]) { add_particle_to_buffer(p[i], &sendbuf[0], &to_send[0], &sendbuf_size[0]); } else if (owner == nbr[1]) { add_particle_to_buffer(p[i], &sendbuf[1], &to_send[1], &sendbuf_size[1]); } else if (owner == nbr[2]) { add_particle_to_buffer(p[i], &sendbuf[2], &to_send[2], &sendbuf_size[2]); } else if (owner == nbr[3]) { add_particle_to_buffer(p[i], &sendbuf[3], &to_send[3], &sendbuf_size[3]); } else if (owner == nbr[4]) { add_particle_to_buffer(p[i], &sendbuf[4], &to_send[4], &sendbuf_size[4]); } else if (owner == nbr[5]) { add_particle_to_buffer(p[i], &sendbuf[5], &to_send[5], &sendbuf_size[5]); } else if (owner == nbr[6]) { add_particle_to_buffer(p[i], &sendbuf[6], &to_send[6], &sendbuf_size[6]); } else if (owner == nbr[7]) { add_particle_to_buffer(p[i], &sendbuf[7], &to_send[7], &sendbuf_size[7]); } else { printf("Could not find neighbor owner of particle %llu in tile %llu\n", i, owner); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } } /* Communicate the number of particles to be sent/received */ for (i=0; i<8; i++) { MPI_Isend(&to_send[i], 1, MPI_UINT64_T, nbr[i], 0, MPI_COMM_WORLD, &requests[i]); MPI_Irecv(&to_recv[i], 1, MPI_UINT64_T, nbr[i], 0, MPI_COMM_WORLD, &requests[8+i]); } MPI_Waitall(16, requests, status); /* Resize receive buffers if need be */ for (i=0; i<8; i++) { resize_buffer(&recvbuf[i], &recvbuf_size[i], to_recv[i]); } /* Communicate the particles */ for (i=0; i<8; i++) { MPI_Isend(sendbuf[i], to_send[i], PARTICLE, nbr[i], 0, MPI_COMM_WORLD, &requests[i]); MPI_Irecv(recvbuf[i], to_recv[i], PARTICLE, nbr[i], 0, MPI_COMM_WORLD, &requests[8+i]); } MPI_Waitall(16, requests, status); /* Attach received particles to particles buffer */ for (i=0; i<4; i++) { attach_received_particles(&particles, &ptr_my, &particles_size, recvbuf[2*i], to_recv[2*i], recvbuf[2*i+1], to_recv[2*i+1]); } particles_count = ptr_my; } local_pic_time = MPI_Wtime() - local_pic_time; MPI_Reduce(&local_pic_time, &pic_time, 1, MPI_DOUBLE, MPI_MAX, root, MPI_COMM_WORLD); /* Run the verification test */ /* First verify own particles */ for (i=0; i < particles_count; i++) { correctness += verifyParticle(particles[i], (double)L, iterations); my_checksum += (uint64_t)particles[i].ID; } /* Gather total checksum of particles */ MPI_Reduce(&my_checksum, &tot_checksum, 1, MPI_UINT64_T, MPI_SUM, root, MPI_COMM_WORLD); /* Gather total checksum of correctness flags */ MPI_Reduce(&correctness, &correctness_checksum, 1, MPI_UINT64_T, MPI_SUM, root, MPI_COMM_WORLD); if ( my_ID == root) { if (correctness_checksum != total_particles ) { printf("ERROR: there are %llu miscalculated locations\n", total_particles-correctness_checksum); } else { if (tot_checksum != (total_particles*(total_particles+1))/2) { printf("ERROR: Particle checksum incorrect\n"); } else { avg_time = total_particles*iterations/pic_time; printf("Solution validates\n"); printf("Rate (Mparticles_moved/s): %lf\n", 1.0e-6*avg_time); } } } #if VERBOSE for (i=0; i<Num_procs; i++) { MPI_Barrier(MPI_COMM_WORLD); if (i == my_ID) printf("Rank %d has %llu particles\n", my_ID, particles_count); } #endif MPI_Finalize(); return 0; }