int main(int argc, char ** argv) { int rc; /* These are the desired and available thread support. A hybrid code where all MPI calls are made from the main thread can used FUNNELED. If threads are making MPI calls, MULTIPLE is appropriate. */ int requested = MPI_THREAD_FUNNELED, provided; /* MPICH2 will be substantially more efficient than OpenMPI for MPI_THREAD_{FUNNELED,SERIALIZED} but this is unlikely to be a serious bottleneck. */ rc = MPI_Init_thread(&argc, &argv, requested, &provided); CHECK_MPI(rc); if (provided<requested) { printf("MPI_Init_thread provided %s when %s was requested. Exiting. \n", MPI_THREAD_STRING(provided), MPI_THREAD_STRING(requested) ); exit(1); } int world_size, world_rank; rc = MPI_Comm_size(MPI_COMM_WORLD,&world_size); CHECK_MPI(rc); rc = MPI_Comm_rank(MPI_COMM_WORLD,&world_rank); CHECK_MPI(rc); int root = 0, count = 1; /* the ternary is often branchless... */ long i, n = (argc>1 ? atol(argv[1]) : 100000); rc = MPI_Bcast(&n, count, MPI_LONG, root, MPI_COMM_WORLD); CHECK_MPI(rc); if (world_rank==0) printf("%d: using %ld samples.\n", world_rank, world_size*n); /* seed the RNG with something unique to a rank */ srand(world_rank); long in = 0, total = 0; for (i=0;i<n;i++) { register double x = (double)rand()/(double)RAND_MAX; register double y = (double)rand()/(double)RAND_MAX; register double z = x*x + y*y; if (z<1.0) in++; } rc = MPI_Reduce(&in, &total, count, MPI_LONG, MPI_SUM, root, MPI_COMM_WORLD); CHECK_MPI(rc); double pi = 4.0*(double)total/(world_size*n); if (world_rank==0) printf("%d: pi = %12.8lf.\n", world_rank, pi); MPI_Finalize(); return 0; }
void init_mpi (struct pe_vars * v) { int mpi_provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_SERIALIZED, &mpi_provided ); MPI_Query_thread(&mpi_provided); if (strcmp((const char *)MPI_THREAD_STRING(mpi_provided),"WTF") == 0) MPI_Abort (MPI_COMM_WORLD, 5); MPI_Comm_rank( MPI_COMM_WORLD, &(v->me) ); MPI_Comm_size( MPI_COMM_WORLD, &(v->npes) ); v->pairs = v->npes / 2; v->nxtpe = ( v->me < v->pairs ) ? ( v->me + v->pairs ) : ( v->me - v->pairs ); return; }
/// main for SCF int main (int argc, char **argv) { // init MPI int myrank; int nprocs; int provided; #if defined (USE_ELEMENTAL) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &myrank ); ElMPICommSize( MPI_COMM_WORLD, &nprocs ); MPI_Query_thread(&provided); #else MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); #endif if (myrank == 0) { printf("MPI thread support: %s\n", MPI_THREAD_STRING(provided)); } #if 0 char hostname[1024]; gethostname (hostname, 1024); printf ("Rank %d of %d running on node %s\n", myrank, nprocs, hostname); #endif // create basis set BasisSet_t basis; CInt_createBasisSet(&basis); // input parameters and load basis set int nprow_fock; int npcol_fock; int nblks_fock; int nprow_purif; int nshells; int natoms; int nfunctions; int niters; if (myrank == 0) { if (argc != 8) { usage(argv[0]); MPI_Finalize(); exit(0); } // init parameters nprow_fock = atoi(argv[3]); npcol_fock = atoi(argv[4]); nprow_purif = atoi(argv[5]); nblks_fock = atoi(argv[6]); niters = atoi(argv[7]); assert(nprow_fock * npcol_fock == nprocs); assert(nprow_purif * nprow_purif * nprow_purif <= nprocs); assert(niters > 0); CInt_loadBasisSet(basis, argv[1], argv[2]); nshells = CInt_getNumShells(basis); natoms = CInt_getNumAtoms(basis); nfunctions = CInt_getNumFuncs(basis); assert(nprow_fock <= nshells && npcol_fock <= nshells); assert(nprow_purif <= nfunctions && nprow_purif <= nfunctions); printf("Job information:\n"); char *fname; fname = basename(argv[2]); printf(" molecule: %s\n", fname); fname = basename(argv[1]); printf(" basisset: %s\n", fname); printf(" charge = %d\n", CInt_getTotalCharge(basis)); printf(" #atoms = %d\n", natoms); printf(" #shells = %d\n", nshells); printf(" #functions = %d\n", nfunctions); printf(" fock build uses %d (%dx%d) nodes\n", nprow_fock * npcol_fock, nprow_fock, npcol_fock); printf(" purification uses %d (%dx%dx%d) nodes\n", nprow_purif * nprow_purif * nprow_purif, nprow_purif, nprow_purif, nprow_purif); printf(" #tasks = %d (%dx%d)\n", nblks_fock * nblks_fock * nprow_fock * nprow_fock, nblks_fock * nprow_fock, nblks_fock * nprow_fock); int nthreads = omp_get_max_threads(); printf(" #nthreads_cpu = %d\n", nthreads); } int btmp[8]; btmp[0] = nprow_fock; btmp[1] = npcol_fock; btmp[2] = nprow_purif; btmp[3] = nblks_fock; btmp[4] = niters; btmp[5] = natoms; btmp[6] = nshells; btmp[7] = nfunctions; MPI_Bcast(btmp, 8, MPI_INT, 0, MPI_COMM_WORLD); nprow_fock = btmp[0]; npcol_fock = btmp[1]; nprow_purif = btmp[2]; nblks_fock = btmp[3]; niters = btmp[4]; natoms = btmp[5]; nshells = btmp[6]; nfunctions = btmp[7]; // broadcast basis set void *bsbuf; int bsbufsize; if (myrank == 0) { CInt_packBasisSet(basis, &bsbuf, &bsbufsize); MPI_Bcast(&bsbufsize, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(bsbuf, bsbufsize, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&bsbufsize, 1, MPI_INT, 0, MPI_COMM_WORLD); bsbuf = (void *)malloc(bsbufsize); assert(bsbuf != NULL); MPI_Bcast(bsbuf, bsbufsize, MPI_CHAR, 0, MPI_COMM_WORLD); CInt_unpackBasisSet(basis, bsbuf); free(bsbuf); } // init PFock if (myrank == 0) { printf("Initializing pfock ...\n"); } PFock_t pfock; PFock_create(basis, nprow_fock, npcol_fock, nblks_fock, 1e-11, MAX_NUM_D, IS_SYMM, &pfock); if (myrank == 0) { double mem_cpu; PFock_getMemorySize(pfock, &mem_cpu); printf(" CPU uses %.3f MB\n", mem_cpu / 1024.0 / 1024.0); printf(" Done\n"); } // init purif purif_t *purif = create_purif(basis, nprow_purif, nprow_purif, nprow_purif); init_oedmat(basis, pfock, purif, nprow_fock, npcol_fock); // compute SCF if (myrank == 0) { printf("Computing SCF ...\n"); } int rowstart = purif->srow_purif; int rowend = purif->nrows_purif + rowstart - 1; int colstart = purif->scol_purif; int colend = purif->ncols_purif + colstart - 1; double energy0 = -1.0; double totaltime = 0.0; double purif_flops = 2.0 * nfunctions * nfunctions * nfunctions; double diis_flops; // set initial guess if (myrank == 0) { printf(" initialing D ...\n"); } PFock_setNumDenMat(NUM_D, pfock); initial_guess(pfock, basis, purif->runpurif, rowstart, rowend, colstart, colend, purif->D_block, purif->ldx); MPI_Barrier(MPI_COMM_WORLD); // compute nuc energy double ene_nuc = CInt_getNucEnergy(basis); if (myrank == 0) { printf(" nuc energy = %.10f\n", ene_nuc); } MPI_Barrier(MPI_COMM_WORLD); // main loop double t1, t2, t3, t4; for (int iter = 0; iter < niters; iter++) { if (myrank == 0) { printf(" iter %d\n", iter); } t3 = MPI_Wtime(); // fock matrix construction t1 = MPI_Wtime(); fock_build(pfock, basis, purif->runpurif, rowstart, rowend, colstart, colend, purif->ldx, purif->D_block, purif->F_block); if (myrank == 0) { printf("After fock build \n"); } // compute energy double energy = compute_energy(purif, purif->F_block, purif->D_block); t2 = MPI_Wtime(); if (myrank == 0) { printf(" fock build takes %.3f secs\n", t2 - t1); if (iter > 0) { printf(" energy %.10f (%.10f), %le\n", energy + ene_nuc, energy, fabs (energy - energy0)); } else { printf(" energy %.10f (%.10f)\n", energy + ene_nuc, energy); } } if (iter > 0 && fabs (energy - energy0) < 1e-11) { niters = iter + 1; break; } energy0 = energy; // compute DIIS t1 = MPI_Wtime(); compute_diis(pfock, purif, purif->D_block, purif->F_block, iter); t2 = MPI_Wtime(); if (myrank == 0) { if (iter > 1) { diis_flops = purif_flops * 6.0; } else { diis_flops = purif_flops * 2.0; } printf(" diis takes %.3f secs, %.3lf Gflops\n", t2 - t1, diis_flops / (t2 - t1) / 1e9); } #ifdef __SCF_OUT__ if (myrank == 0) { double outbuf[nfunctions]; char fname[1024]; sprintf(fname, "XFX_%d_%d.dat", nfunctions, iter); FILE *fp = fopen(fname, "w+"); assert(fp != NULL); for (int i = 0; i < nfunctions; i++) { PFock_getMat(pfock, PFOCK_MAT_TYPE_F, USE_D_ID, i, i, USE_D_ID, nfunctions - 1, outbuf, nfunctions); for (int j = 0; j < nfunctions; j++) { fprintf(fp, "%.10e\n", outbuf[j]); } } fclose(fp); } #endif // purification MPI_Barrier(MPI_COMM_WORLD); t1 = MPI_Wtime(); int it = compute_purification(purif, purif->F_block, purif->D_block); t2 = MPI_Wtime(); MPI_Barrier(MPI_COMM_WORLD); if (myrank == 0) { printf(" purification takes %.3f secs," " %d iterations, %.3f Gflops\n", t2 - t1, it, (it * 2.0 + 4.0) * purif_flops / (t2 - t1) / 1e9); } /* #if defined(USE_ELEMENTAL) ElGlobalArraysPrint_d( eldga, pfock->ga_D[USE_D_ID] ); #else GA_Print (pfock->ga_D[USE_D_ID]); #endif */ t4 = MPI_Wtime (); totaltime += t4 - t3; #ifdef __SCF_TIMING__ PFock_getStatistics(pfock); double purif_timedgemm; double purif_timepdgemm; double purif_timepass; double purif_timetr; MPI_Reduce(&purif->timedgemm, &purif_timedgemm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&purif->timepdgemm, &purif_timepdgemm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&purif->timepass, &purif_timepass, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&purif->timetr, &purif_timetr, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (myrank == 0) { printf(" Purification Statistics:\n"); printf(" average totaltime = %.3f\n" " average timetr = %.3f\n" " average timedgemm = %.3f, %.3f Gflops\n" " average timepdgemm = %.3f, %.3f Gflops\n", purif_timepass / purif->np_purif, purif_timetr / purif->np_purif, purif_timedgemm / purif->np_purif, (it * 2.0 + 4.0) * purif_flops / (purif_timedgemm / purif->np_purif) / 1e9, purif_timepdgemm / purif->np_purif, (it * 2.0 + 4.0) * purif_flops / (purif_timepdgemm / purif->np_purif) / 1e9); } #endif } /* for (iter = 0; iter < NITERATIONS; iter++) */ if (myrank == 0) { printf(" totally takes %.3f secs: %.3f secs/iters\n", totaltime, totaltime / niters); printf(" Done\n"); } destroy_purif(purif); PFock_destroy(pfock); CInt_destroyBasisSet(basis); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { /********************************************************************************* * INITIALIZE MPI *********************************************************************************/ int world_size = 0, world_rank = -1; int provided = -1; #if defined(USE_MPI_INIT) MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); if (world_rank==0) print_meminfo(stdout, "after MPI_Init"); #else int requested = -1; # if defined(USE_MPI_INIT_THREAD_MULTIPLE) requested = MPI_THREAD_MULTIPLE; # elif defined(USE_MPI_INIT_THREAD_SERIALIZED) requested = MPI_THREAD_SERIALIZED; # elif defined(USE_MPI_INIT_THREAD_FUNNELED) requested = MPI_THREAD_FUNNELED; # else requested = MPI_THREAD_SINGLE; # endif MPI_Init_thread( &argc, &argv, requested, &provided ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); if (world_rank==0) print_meminfo(stdout, "after MPI_Init_thread"); if (provided>requested) { if (world_rank==0) printf("MPI_Init_thread returned %s instead of %s, but this is okay. \n", MPI_THREAD_STRING(provided), MPI_THREAD_STRING(requested) ); } if (provided<requested) { if (world_rank==0) printf("MPI_Init_thread returned %s instead of %s so the test will exit. \n", MPI_THREAD_STRING(provided), MPI_THREAD_STRING(requested) ); MPI_Abort(MPI_COMM_WORLD, 1); } #endif double t0 = MPI_Wtime(); int is_init = 0; MPI_Initialized(&is_init); if (world_rank==0) printf("MPI %s initialized. \n", (is_init==1 ? "was" : "was not") ); MPI_Query_thread(&provided); if (world_rank==0) printf("MPI thread support is %s. \n", MPI_THREAD_STRING(provided) ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); if (world_rank==0) printf("MPI test program running on %d ranks. \n", world_size); char procname[MPI_MAX_PROCESSOR_NAME]; int pnlen; MPI_Get_processor_name(procname,&pnlen); printf("%d: processor name = %s\n", world_rank, procname); /********************************************************************************* * SETUP MPI COMMUNICATORS *********************************************************************************/ if (world_rank==0) printf("MPI_Barrier on MPI_COMM_WORLD 1 \n"); MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) printf("MPI_Comm_dup of MPI_COMM_WORLD \n"); MPI_Comm comm_world_dup; MPI_Comm_dup(MPI_COMM_WORLD, &comm_world_dup); if (world_rank==0) print_meminfo(stdout, "after MPI_Comm_dup"); if (world_rank==0) printf("MPI_Barrier on comm_world_dup \n"); MPI_Barrier( comm_world_dup ); if (world_rank==0) printf("MPI_Comm_split of MPI_COMM_WORLD into world_reordered \n"); MPI_Comm comm_world_reordered; MPI_Comm_split(MPI_COMM_WORLD, 0, world_size-world_rank, &comm_world_reordered); if (world_rank==0) print_meminfo(stdout, "after MPI_Comm_split"); if (world_rank==0) printf("MPI_Comm_split of MPI_COMM_WORLD into left-right \n"); MPI_Comm comm_world_leftright; int leftright = (world_rank<(world_size/2)); MPI_Comm_split(MPI_COMM_WORLD, leftright, world_rank, &comm_world_leftright); if (world_rank==0) print_meminfo(stdout, "after MPI_Comm_split"); if (world_rank==0) printf("MPI_Barrier on comm_world_leftright \n"); MPI_Barrier( comm_world_leftright ); if (world_rank==0) printf("MPI_Comm_split of MPI_COMM_WORLD into odd-even \n"); MPI_Comm comm_world_oddeven; int oddeven = (world_rank%2); MPI_Comm_split(MPI_COMM_WORLD, oddeven, world_rank, &comm_world_oddeven); if (world_rank==0) print_meminfo(stdout, "after MPI_Comm_split"); if (world_rank==0) printf("MPI_Barrier on comm_world_oddeven \n"); MPI_Barrier( comm_world_oddeven ); if (world_rank==0) printf("MPI_Comm_split MPI_COMM_WORLD into (world-1) \n"); MPI_Comm comm_world_minus_one; int left_out = world_rank==(world_size/2); MPI_Comm_split(MPI_COMM_WORLD, left_out, world_rank, &comm_world_minus_one); if (world_rank==0) print_meminfo(stdout, "after MPI_Comm_split"); if (world_rank==0) printf("MPI_Barrier on comm_world_minus_one \n"); MPI_Barrier( comm_world_minus_one ); if (world_rank==0) printf("MPI_Comm_group of group_world from MPI_COMM_WORLD \n"); MPI_Group group_world; MPI_Comm_group(MPI_COMM_WORLD, &group_world); if (world_rank==0) print_meminfo(stdout, "after MPI_Comm_group"); int geomprog_size = (world_size==1) ? 1 : ceil(log2(world_size)); int * geomprog_list = NULL; geomprog_list = (int *) safemalloc( geomprog_size * sizeof(int) ); for (int i=0; i<geomprog_size; i++) geomprog_list[i] = pow(2,i)-1; if (world_rank==0) for (int i=0; i<geomprog_size; i++) if (world_rank==0) printf("geomprog_list[%d] = %d \n", i, geomprog_list[i]); if (world_rank==0) printf("MPI_Group_incl of group_geomprog (geometric progression) from group_world \n"); MPI_Group group_geomprog; MPI_Group_incl(group_world, geomprog_size, geomprog_list, &group_geomprog); MPI_Group_free(&group_world); if (world_rank==0) printf("MPI_Comm_create of comm_geomprog from group_geomprog on MPI_COMM_WORLD \n"); MPI_Comm comm_geomprog; MPI_Comm_create(MPI_COMM_WORLD, group_geomprog, &comm_geomprog); MPI_Group_free(&group_geomprog); if (world_rank==0) print_meminfo(stdout, "after MPI_Comm_create"); if (world_rank==0) printf("MPI_Barrier on comm_geomprog \n"); for (int i=0; i<geomprog_size; i++) if (geomprog_list[i]==world_rank) MPI_Barrier( comm_geomprog ); if (world_rank==0) printf("MPI_Barrier on MPI_COMM_WORLD 2 \n"); MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) print_meminfo(stdout, "after MPI communicator creation"); /********************************************************************************* * COLLECTIVES *********************************************************************************/ int max_mem = (argc>1 ? atoi(argv[1]) : 32*1024*1024); MPI_Comm test_comm; #if defined(DO_COMM_WORLD) test_comm = MPI_COMM_WORLD; MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) printf("############## %s ##############\n", "MPI_COMM_WORLD - pass 1" ); { MPI_Barrier( test_comm ); bcast_only(stdout, test_comm, max_mem); gather_only(stdout, test_comm, max_mem); allgather_only(stdout, test_comm, max_mem); scatter_only(stdout, test_comm, max_mem); alltoall_only(stdout, test_comm, max_mem); reduce_only(stdout, test_comm, max_mem); allreduce_only(stdout, test_comm, max_mem); reducescatterblock_only(stdout, test_comm, max_mem); } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) printf("############## %s ##############\n", "MPI_COMM_WORLD - pass 2" ); { MPI_Barrier( test_comm ); bcast_only(stdout, test_comm, max_mem); gather_only(stdout, test_comm, max_mem); allgather_only(stdout, test_comm, max_mem); scatter_only(stdout, test_comm, max_mem); alltoall_only(stdout, test_comm, max_mem); reduce_only(stdout, test_comm, max_mem); allreduce_only(stdout, test_comm, max_mem); reducescatterblock_only(stdout, test_comm, max_mem); } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); #endif #ifdef DO_COMM_WORLD_JITTER test_comm = MPI_COMM_WORLD; MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) printf("############## %s ##############\n", "COMM_WORLD_JITTER" ); { int jitter = 0; if ((world_rank%10)==0) jitter++; if ((world_rank%100)==0) jitter++; if ((world_rank%1000)==0) jitter++; if ((world_rank%10000)==0) jitter++; if ((world_rank%100000)==0) jitter++; MPI_Barrier( test_comm ); sleep(jitter); bcast_only(stdout, test_comm, max_mem); MPI_Barrier( test_comm ); sleep(jitter); gather_only(stdout, test_comm, max_mem); MPI_Barrier( test_comm ); sleep(jitter); allgather_only(stdout, test_comm, max_mem); MPI_Barrier( test_comm ); sleep(jitter); scatter_only(stdout, test_comm, max_mem); MPI_Barrier( test_comm ); sleep(jitter); alltoall_only(stdout, test_comm, max_mem); MPI_Barrier( test_comm ); sleep(jitter); reduce_only(stdout, test_comm, max_mem); MPI_Barrier( test_comm ); sleep(jitter); allreduce_only(stdout, test_comm, max_mem); MPI_Barrier( test_comm ); sleep(jitter); reducescatterblock_only(stdout, test_comm, max_mem); } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); #endif #ifdef DO_COMM_WORLD_DUP test_comm = comm_world_dup; MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) printf("############## %s ##############\n", "COMM_WORLD_DUP" ); { MPI_Barrier( test_comm ); bcast_only(stdout, test_comm, max_mem); gather_only(stdout, test_comm, max_mem); allgather_only(stdout, test_comm, max_mem); scatter_only(stdout, test_comm, max_mem); alltoall_only(stdout, test_comm, max_mem); reduce_only(stdout, test_comm, max_mem); allreduce_only(stdout, test_comm, max_mem); reducescatterblock_only(stdout, test_comm, max_mem); } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); #endif #ifdef DO_WORLD_REORDERED test_comm = comm_world_reordered; MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) printf("############## %s ##############\n", "WORLD_REORDERED" ); { MPI_Barrier( test_comm ); bcast_only(stdout, test_comm, max_mem); gather_only(stdout, test_comm, max_mem); allgather_only(stdout, test_comm, max_mem); scatter_only(stdout, test_comm, max_mem); alltoall_only(stdout, test_comm, max_mem); reduce_only(stdout, test_comm, max_mem); allreduce_only(stdout, test_comm, max_mem); reducescatterblock_only(stdout, test_comm, max_mem); } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); #endif #ifdef DO_WORLD_MINUS_ONE test_comm = comm_world_minus_one; MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) printf("############## %s ##############\n", "WORLD_MINUS_ONE" ); if (left_out==0) { MPI_Barrier( test_comm ); bcast_only(stdout, test_comm, max_mem); gather_only(stdout, test_comm, max_mem); allgather_only(stdout, test_comm, max_mem); scatter_only(stdout, test_comm, max_mem); alltoall_only(stdout, test_comm, max_mem); reduce_only(stdout, test_comm, max_mem); allreduce_only(stdout, test_comm, max_mem); reducescatterblock_only(stdout, test_comm, max_mem); } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); #endif #if DO_LEFT_RIGHT test_comm = comm_world_leftright; for (int i=0; i<2; i++) { MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==i) printf("############## %s ##############\n", (i==0 ? "LEFT" : "RIGHT") ); if (leftright==i) { MPI_Barrier( test_comm ); bcast_only(stdout, test_comm, max_mem); gather_only(stdout, test_comm, max_mem); allgather_only(stdout, test_comm, max_mem); scatter_only(stdout, test_comm, max_mem); alltoall_only(stdout, test_comm, max_mem); reduce_only(stdout, test_comm, max_mem); allreduce_only(stdout, test_comm, max_mem); reducescatterblock_only(stdout, test_comm, max_mem); } } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); #endif #if DO_ODD_EVEN test_comm = comm_world_oddeven; for (int i=0; i<2; i++) { MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==i) printf("############## %s ##############\n", (i==0 ? "EVEN" : "ODD") ); if (oddeven==i) { MPI_Barrier( test_comm ); bcast_only(stdout, test_comm, max_mem); gather_only(stdout, test_comm, max_mem); allgather_only(stdout, test_comm, max_mem); scatter_only(stdout, test_comm, max_mem); alltoall_only(stdout, test_comm, max_mem); reduce_only(stdout, test_comm, max_mem); allreduce_only(stdout, test_comm, max_mem); reducescatterblock_only(stdout, test_comm, max_mem); } } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); #endif #ifdef DO_GEOM_PROG test_comm = comm_geomprog; MPI_Barrier( MPI_COMM_WORLD ); if (world_rank==0) printf("############## %s ##############\n", "GEOM_PROG" ); for (int i=0; i<geomprog_size; i++) if (geomprog_list[i]==world_rank) { MPI_Barrier( test_comm ); bcast_only(stdout, test_comm, max_mem); gather_only(stdout, test_comm, max_mem); allgather_only(stdout, test_comm, max_mem); scatter_only(stdout, test_comm, max_mem); alltoall_only(stdout, test_comm, max_mem); reduce_only(stdout, test_comm, max_mem); allreduce_only(stdout, test_comm, max_mem); reducescatterblock_only(stdout, test_comm, max_mem); } fflush(stdout); MPI_Barrier( MPI_COMM_WORLD ); #endif if (world_rank==0) print_meminfo(stdout, "after MPI collective tests"); /********************************************************************************* * CLEAN UP AND FINALIZE *********************************************************************************/ for (int i=0; i<geomprog_size; i++) if (geomprog_list[i]==world_rank) MPI_Comm_free(&comm_geomprog); free(geomprog_list); MPI_Comm_free(&comm_world_minus_one); MPI_Comm_free(&comm_world_oddeven); MPI_Comm_free(&comm_world_leftright); MPI_Comm_free(&comm_world_reordered); MPI_Comm_free(&comm_world_dup); MPI_Barrier( MPI_COMM_WORLD ); double t1 = MPI_Wtime(); double dt = t1-t0; if (world_rank==0) printf("TEST FINISHED SUCCESSFULLY IN %lf SECONDS \n", dt); fflush(stdout); if (world_rank==0) print_meminfo(stdout, "before MPI_Finalize"); MPI_Finalize(); return 0; }