int main( int argc, char *argv[] ) { int errs = 0; int wrank, wsize, mrank, msize, inter_rank; int np = 2; int errcodes[2]; int rrank = -1; MPI_Comm parentcomm, intercomm, intercomm2, even_odd_comm, merged_world; int can_spawn; MTest_Init( &argc, &argv ); errs += MTestSpawnPossible(&can_spawn); if (can_spawn) { MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); MPI_Comm_size( MPI_COMM_WORLD, &wsize ); if (wsize != 2) { printf( "world size != 2, this test will not work correctly\n" ); errs++; } MPI_Comm_get_parent( &parentcomm ); if (parentcomm == MPI_COMM_NULL) { MPI_Comm_spawn( (char*)"./spaiccreate2", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &intercomm, errcodes ); } else { intercomm = parentcomm; } MPI_Intercomm_merge( intercomm, (parentcomm == MPI_COMM_NULL ? 0 : 1), &merged_world ); MPI_Comm_rank( merged_world, &mrank ); MPI_Comm_size( merged_world, &msize ); MPI_Comm_split( merged_world, mrank % 2, wrank, &even_odd_comm ); MPI_Intercomm_create( even_odd_comm, 0, merged_world, (mrank + 1) % 2, 123, &intercomm2 ); MPI_Comm_rank( intercomm2, &inter_rank ); /* odds receive from evens */ MPI_Sendrecv( &inter_rank, 1, MPI_INT, inter_rank, 456, &rrank, 1, MPI_INT, inter_rank, 456, intercomm2, MPI_STATUS_IGNORE ); if (rrank != inter_rank) { printf( "Received %d from %d; expected %d\n", rrank, inter_rank, inter_rank ); errs++; } MPI_Barrier( intercomm2 ); MPI_Comm_free( &intercomm ); MPI_Comm_free( &intercomm2 ); MPI_Comm_free( &merged_world ); MPI_Comm_free( &even_odd_comm ); /* Note that the MTest_Finalize get errs only over COMM_WORLD */ /* Note also that both the parent and child will generate "No Errors" if both call MTest_Finalize */ if (parentcomm == MPI_COMM_NULL) { MTest_Finalize( errs ); } } else { MTest_Finalize( errs ); } MPI_Finalize(); return 0; }
/*@C PetscHMPISpawn - Initialize additional processes to be used as "worker" processes. This is not generally called by users. One should use -hmpi_spawn_size <n> to indicate that you wish to have n-1 new MPI processes spawned for each current process. Not Collective (could make collective on MPI_COMM_WORLD, generate one huge comm and then split it up) Input Parameter: . nodesize - size of each compute node that will share processors Options Database: . -hmpi_spawn_size nodesize Notes: This is only supported on systems with an MPI 2 implementation that includes the MPI_Comm_Spawn() routine. $ Comparison of two approaches for HMPI usage (MPI started with N processes) $ $ -hmpi_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code $ and n-1 worker processes (used by PETSc) for each application node. $ You MUST launch MPI so that only ONE MPI process is created for each hardware node. $ $ -hmpi_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes $ (used by PETSc) $ You MUST launch MPI so that n MPI processes are created for each hardware node. $ $ petscmpiexec -n 2 ./ex1 -hmpi_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes) $ petscmpiexec -n 6 ./ex1 -hmpi_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes $ This is what would use if each of the computers hardware nodes had 3 CPUs. $ $ These are intended to be used in conjunction with USER HMPI code. The user will have 1 process per $ computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully $ utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for $ PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs $ are always working on p task, never more than p. $ $ See PCHMPI for a PETSc preconditioner that can use this functionality $ For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD consists of all the processes in a "node." In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command). Level: developer Concepts: HMPI .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscHMPIFinalize(), PetscInitialize(), PetscHMPIMerge(), PetscHMPIRun() @*/ PetscErrorCode PetscHMPISpawn(PetscMPIInt nodesize) { PetscErrorCode ierr; PetscMPIInt size; MPI_Comm parent,children; PetscFunctionBegin; ierr = MPI_Comm_get_parent(&parent);CHKERRQ(ierr); if (parent == MPI_COMM_NULL) { /* the original processes started by user */ char programname[PETSC_MAX_PATH_LEN]; char **argv; ierr = PetscGetProgramName(programname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); ierr = PetscGetArguments(&argv);CHKERRQ(ierr); ierr = MPI_Comm_spawn(programname,argv,nodesize-1,MPI_INFO_NULL,0,PETSC_COMM_SELF,&children,MPI_ERRCODES_IGNORE);CHKERRQ(ierr); ierr = PetscFreeArguments(argv);CHKERRQ(ierr); ierr = MPI_Intercomm_merge(children,0,&PETSC_COMM_LOCAL_WORLD);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = PetscInfo2(0,"PETSc HMPI successfully spawned: number of nodes = %d node size = %d\n",size,nodesize);CHKERRQ(ierr); saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD; } else { /* worker nodes that get spawned */ ierr = MPI_Intercomm_merge(parent,1,&PETSC_COMM_LOCAL_WORLD);CHKERRQ(ierr); ierr = PetscHMPIHandle(PETSC_COMM_LOCAL_WORLD);CHKERRQ(ierr); PetscHMPIWorker = PETSC_TRUE; /* so that PetscHMPIFinalize() will not attempt a broadcast from this process */ PetscEnd(); /* cannot continue into user code */ } PetscFunctionReturn(0); }
static void do_target(char* argv[], MPI_Comm parent) { int rank, first = 0, err; MPI_Comm intra, inter, merge1; if( 0 == strcmp(argv[1], cmd_argv1) ) first = 1; /*MPI_Comm_set_errhandler(parent, MPI_ERRORS_RETURN);*/ err = MPI_Intercomm_merge( parent, 1, &intra ); MPI_Comm_rank(intra, &rank); if( first ) { printf( "%s: MPI_Intercomm_create( intra, 0, intra, MPI_COMM_NULL, %d, &inter) [rank %d]\n", whoami, tag, rank ); err = MPI_Intercomm_create( intra, 0, MPI_COMM_WORLD, 0, tag, &inter); printf( "%s: intercomm_create (%d)\n", whoami, err ); } else { printf( "%s: MPI_Intercomm_create( MPI_COMM_WORLD, 0, intra, 0, %d, &inter) [rank %d]\n", whoami, tag, rank ); err = MPI_Intercomm_create( MPI_COMM_WORLD, 0, intra, 0, tag, &inter); printf( "%s: intercomm_create (%d)\n", whoami, err ); } err = MPI_Intercomm_merge( inter, 0, &merge1 ); MPI_Comm_rank(merge1, &rank); printf( "%s: intercomm_merge(%d) (%d) [rank %d]\n", whoami, first, err, rank ); sleep(20); err = MPI_Barrier(merge1); printf( "%s: barrier (%d)\n", whoami, err ); MPI_Comm_free(&merge1); MPI_Comm_free(&inter); MPI_Comm_free(&intra); MPI_Comm_disconnect(&parent); }
/* Create a processor group containing the processes in pid_list. * * NOTE: pid_list list must be identical and sorted on all processes */ void pgroup_create(int grp_size, int *pid_list, MPI_Comm *group_out) { int i, grp_me, me, nproc, merge_size; MPI_Comm pgroup, inter_pgroup; MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nproc); /* CASE: Group size 0 */ if (grp_size == 0) { *group_out = MPI_COMM_NULL; return; } /* CASE: Group size 1 */ else if (grp_size == 1 && pid_list[0] == me) { *group_out = MPI_COMM_SELF; return; } /* CHECK: If I'm not a member, return COMM_NULL */ grp_me = -1; for (i = 0; i < grp_size; i++) { if (pid_list[i] == me) { grp_me = i; break; } } if (grp_me < 0) { *group_out = MPI_COMM_NULL; return; } pgroup = MPI_COMM_SELF; for (merge_size = 1; merge_size < grp_size; merge_size *= 2) { int gid = grp_me / merge_size; MPI_Comm pgroup_old = pgroup; if (gid % 2 == 0) { /* Check if right partner doesn't exist */ if ((gid+1)*merge_size >= grp_size) continue; MPI_Intercomm_create(pgroup, 0, MPI_COMM_WORLD, pid_list[(gid+1)*merge_size], INTERCOMM_TAG, &inter_pgroup); MPI_Intercomm_merge(inter_pgroup, 0 /* LOW */, &pgroup); } else { MPI_Intercomm_create(pgroup, 0, MPI_COMM_WORLD, pid_list[(gid-1)*merge_size], INTERCOMM_TAG, &inter_pgroup); MPI_Intercomm_merge(inter_pgroup, 1 /* HIGH */, &pgroup); } MPI_Comm_free(&inter_pgroup); if (pgroup_old != MPI_COMM_SELF) MPI_Comm_free(&pgroup_old); } *group_out = pgroup; }
/** Create an ARMCI group that contains a subset of the nodes in the parent * group. Collective across output group. * * @param[in] grp_size Number of entries in pid_list. * @param[in] pid_list Sorted list of process ids that will be in the new group. * @param[out] armci_grp_out The new ARMCI group, only valid on group members. * @param[in] armci_grp_parent The parent of the new ARMCI group. */ static inline void ARMCI_Group_create_comm_noncollective(int grp_size, int *pid_list, ARMCI_Group *armci_grp_out, ARMCI_Group *armci_grp_parent) { const int INTERCOMM_TAG = 42; int i, grp_me, me, merge_size; MPI_Comm pgroup, inter_pgroup; me = armci_grp_parent->rank; /* CHECK: If I'm not a member, return COMM_NULL */ grp_me = -1; for (i = 0; i < grp_size; i++) { if (pid_list[i] == me) { grp_me = i; break; } } if (grp_me < 0) { armci_grp_out->comm = MPI_COMM_NULL; return; } /* CASE: Group size 1 */ else if (grp_size == 1 && pid_list[0] == me) { MPI_Comm_dup(MPI_COMM_SELF, &armci_grp_out->comm); return; } pgroup = MPI_COMM_SELF; /* Recursively merge adjacent groups until only one group remains. */ for (merge_size = 1; merge_size < grp_size; merge_size *= 2) { int gid = grp_me / merge_size; MPI_Comm pgroup_old = pgroup; if (gid % 2 == 0) { /* Check if right partner doesn't exist */ if ((gid+1)*merge_size >= grp_size) continue; MPI_Intercomm_create(pgroup, 0, armci_grp_parent->noncoll_pgroup_comm, pid_list[(gid+1)*merge_size], INTERCOMM_TAG, &inter_pgroup); MPI_Intercomm_merge(inter_pgroup, 0 /* LOW */, &pgroup); } else { MPI_Intercomm_create(pgroup, 0, armci_grp_parent->noncoll_pgroup_comm, pid_list[(gid-1)*merge_size], INTERCOMM_TAG, &inter_pgroup); MPI_Intercomm_merge(inter_pgroup, 1 /* HIGH */, &pgroup); } MPI_Comm_free(&inter_pgroup); if (pgroup_old != MPI_COMM_SELF) MPI_Comm_free(&pgroup_old); } armci_grp_out->comm = pgroup; }
int main(int argc, char *argv[]) { int rank, size; int lsize, rsize; int grank, gsize; MPI_Comm parent, global; MPI_Init(&argc, &argv); // Locat info MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Global info MPI_Comm_get_parent(&parent); if (parent == MPI_COMM_NULL) error("No parent!"); MPI_Comm_remote_size(parent, &size); MPI_Comm_size(parent, &lsize); MPI_Comm_remote_size(parent, &rsize); MPI_Intercomm_merge(parent, 1, &global); MPI_Comm_rank(global, &grank); MPI_Comm_size(global, &gsize); printf("child %d: lsize=%d, rsize=%d, grank=%d, gsize=%d\n", rank, lsize, rsize, grank, gsize); MPI_Barrier(global); printf("%d: after Barrier\n", grank); MPI_Comm_free(&global); MPI_Finalize(); return 0; }
static int spawn_and_merge( char* argv[], char* arg, int count, MPI_Comm* inter, MPI_Comm* intra ) { int *errcode, err, i; char *spawn_argv[2]; errcode = malloc(sizeof(int) * count); if (errcode == NULL) ompitest_error(__FILE__, __LINE__, "Doh! Rank %d was not able to allocate enough memory. MPI test aborted!\n", 0); memset(errcode, -1, count); /*MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);*/ spawn_argv[0] = arg; spawn_argv[1] = NULL; err = MPI_Comm_spawn(argv[0], spawn_argv, count, MPI_INFO_NULL, 0, MPI_COMM_WORLD, inter, errcode); for (i = 0; i < count; i++) if (errcode[i] != MPI_SUCCESS) ompitest_error(__FILE__, __LINE__, "ERROR: MPI_Comm_spawn returned errcode[%d] = %d\n", i, errcode[i]); if (err != MPI_SUCCESS) ompitest_error(__FILE__, __LINE__, "ERROR: MPI_Comm_spawn returned errcode = %d\n", err); err = MPI_Intercomm_merge( *inter, 0, intra ); free(errcode); return err; }
int main(int argc, char **argv) { int iter, err, rank, size; MPI_Comm comm, merged; /* MPI environnement */ printf("parent*******************************\n"); printf("parent: Launching MPI*\n"); MPI_Init( &argc, &argv); for (iter = 0; iter < 1000; ++iter) { MPI_Comm_spawn(EXE_TEST, NULL, 1, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &comm, &err); printf("parent: MPI_Comm_spawn #%d return : %d\n", iter, err); MPI_Intercomm_merge(comm, 0, &merged); MPI_Comm_rank(merged, &rank); MPI_Comm_size(merged, &size); printf("parent: MPI_Comm_spawn #%d rank %d, size %d\n", iter, rank, size); // sleep(2); MPI_Comm_free(&merged); } MPI_Finalize(); printf("parent: End .\n" ); return 0; }
int main(int argc, char *argv[]) { char str[10]; MPI_Comm intercomm1, intracomm, intercomm2; int err, errcodes[256], rank; MPI_Init(&argc, &argv); /* printf("Child out of MPI_Init\n"); fflush(stdout); */ MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_get_parent(&intercomm1); MPI_Intercomm_merge(intercomm1, 1, &intracomm); err = MPI_Comm_spawn("spawn_merge_child2", MPI_ARGV_NULL, 2, MPI_INFO_NULL, 2, intracomm, &intercomm2, errcodes); if (err) printf("Error in MPI_Comm_spawn\n"); MPI_Comm_rank(intercomm2, &rank); if (rank == 3) { err = MPI_Recv(str, 3, MPI_CHAR, 1, 0, intercomm2, MPI_STATUS_IGNORE); printf("Parent (first child) received from child 2: %s\n", str); fflush(stdout); err = MPI_Send("bye", 4, MPI_CHAR, 1, 0, intercomm2); } MPI_Finalize(); return 0; }
static void do_parent(char *argv[], int rank, int count) { MPI_Comm ab_inter, ab_intra, ac_inter, ac_intra, ab_c_inter, abc_intra; int err; err = spawn_and_merge( argv, cmd_argv1, count, &ab_inter, &ab_intra ); err = spawn_and_merge( argv, cmd_argv2, count, &ac_inter, &ac_intra ); printf( "%s: MPI_Intercomm_create( ab_intra, 0, ac_intra, %d, %d, &inter) (%d)\n", whoami, count, tag, err ); err = MPI_Intercomm_create( ab_intra, 0, ac_intra, count, tag, &ab_c_inter ); printf( "%s: intercomm_create (%d)\n", whoami, err ); printf( "%s: barrier on inter-comm - before\n", whoami ); err = MPI_Barrier(ab_c_inter); printf( "%s: barrier on inter-comm - after\n", whoami ); err = MPI_Intercomm_merge(ab_c_inter, 0, &abc_intra); printf( "%s: intercomm_merge(%d) (%d) [rank %d]\n", whoami, 0, err, rank ); err = MPI_Barrier(abc_intra); printf( "%s: barrier (%d)\n", whoami, err ); MPI_Comm_free(&abc_intra); MPI_Comm_free(&ab_c_inter); MPI_Comm_free(&ab_intra); MPI_Comm_free(&ac_intra); MPI_Comm_disconnect(&ab_inter); MPI_Comm_disconnect(&ac_inter); }
EXPORT_MPI_API void FORTRAN_API mpi_intercomm_merge_ ( MPI_Fint *comm, MPI_Fint *high, MPI_Fint *comm_out, MPI_Fint *__ierr ) { MPI_Comm l_comm_out; *__ierr = MPI_Intercomm_merge( MPI_Comm_f2c(*comm), (int)*high, &l_comm_out); *comm_out = MPI_Comm_c2f(l_comm_out); }
void mpif_intercomm_merge_(MPI_Fint *intercomm, int *high, MPI_Fint *newintracomm, int *error) { MPI_Comm intercomm_c = MPI_Comm_f2c(*intercomm); MPI_Comm newintracomm_c; *error = MPI_Intercomm_merge(intercomm_c, *high, &newintracomm_c); *newintracomm = MPI_Comm_c2f(newintracomm_c); }
void mpi_intercomm_merge_f(MPI_Fint *intercomm, MPI_Flogical *high, MPI_Fint *newintracomm, MPI_Fint *ierr) { MPI_Comm c_newcomm; MPI_Comm c_intercomm = MPI_Comm_f2c(*intercomm); *ierr = MPI_Intercomm_merge (c_intercomm, OMPI_LOGICAL_2_INT(*high), &c_newcomm); if (MPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { *newintracomm = MPI_Comm_c2f (c_newcomm); } }
int main(int argc, char *argv[]) { int rank, size; int universe_size, *universe_sizep, flag; int lsize, rsize; int grank, gsize; MPI_Comm everyone, global; /* intercommunicator */ char worker_program[100]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_UNIVERSE_SIZE, &universe_sizep, &flag); if (!flag) { universe_size = 8; } else universe_size = *universe_sizep; if( rank == 0 ) { printf("univ size = %d\n", universe_size); } sprintf(worker_program, "./slave"); MPI_Comm_spawn(worker_program, MPI_ARGV_NULL, 6, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &everyone, MPI_ERRCODES_IGNORE); MPI_Comm_size(everyone, &lsize); MPI_Comm_remote_size(everyone, &rsize); MPI_Intercomm_merge(everyone, 1, &global); MPI_Comm_rank(global, &grank); MPI_Comm_size(global, &gsize); printf("parent %d: lsize=%d, rsize=%d, grank=%d, gsize=%d\n", rank, lsize, rsize, grank, gsize); MPI_Barrier(global); printf("%d: after Barrier\n", grank); MPI_Comm_free(&global); MPI_Finalize(); return 0; }
void ompi_intercomm_merge_f(MPI_Fint *intercomm, ompi_fortran_logical_t *high, MPI_Fint *newintracomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; MPI_Comm c_intercomm = MPI_Comm_f2c(*intercomm); c_ierr = MPI_Intercomm_merge (c_intercomm, OMPI_LOGICAL_2_INT(*high), &c_newcomm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { *newintracomm = MPI_Comm_c2f (c_newcomm); } }
int main( int argc, char *argv[] ) { int np = NUM_SPAWNS; int my_rank, size; int errcodes[NUM_SPAWNS]; MPI_Comm allcomm; MPI_Comm intercomm; MPI_Init( &argc, &argv ); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_spawn( (char*)"./spawntest_child", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &intercomm, errcodes ); if ( intercomm == MPI_COMM_NULL ) { fprintf(stdout, "intercomm is null\n"); } MPI_Intercomm_merge(intercomm, 0, &allcomm); MPI_Comm_rank(allcomm, &my_rank); MPI_Comm_size(allcomm, &size); /* Without the Free of allcomm, the children *must not exit* until the master calls MPI_Finalize. */ MPI_Barrier( allcomm ); /* According to 10.5.4, case 1b in MPI2.2, the children and master are still connected unless MPI_Comm_disconnect is used with allcomm. MPI_Comm_free is not sufficient */ MPI_Comm_free( &allcomm ); MPI_Comm_disconnect( &intercomm ); fprintf(stdout, "%s:%d: Sleep starting; children should exit\n", __FILE__, __LINE__ );fflush(stdout); sleep(30); fprintf(stdout, "%s:%d: Sleep done; all children should have already exited\n", __FILE__, __LINE__ );fflush(stdout); MPI_Finalize(); return 0; }
static void do_parent(char *argv[], int rank, int count) { MPI_Comm ab_inter, ab_intra, ac_inter, ac_intra, ab_c_inter, abc_intra; int err; err = spawn_and_merge( argv, cmd_argv1, count, &ab_inter, &ab_intra ); err = spawn_and_merge( argv, cmd_argv2, count, &ac_inter, &ac_intra ); printf( "%s: MPI_Intercomm_create( ab_intra, 0, ac_intra, 0, %d, &inter) (%d)\n", whoami, tag, err ); err = MPI_Intercomm_create( ab_intra, 0, ac_intra, 1, tag, &ab_c_inter ); printf( "%s: intercomm_create (%d)\n", whoami, err ); err = MPI_Intercomm_merge(ab_c_inter, 0, &abc_intra); printf( "%s: intercomm_merge(%d) (%d) [rank %d]\n", whoami, 0, err, rank ); sleep(20); err = MPI_Barrier(abc_intra); printf( "%s: barrier (%d)\n", whoami, err ); }
/* **** PA_SendData **** * This function sends the parameters to child 0 and then distributes the * data to all of the child processes. The data is distributed in the * 2D block/cyclic pattern required by the ScaLAPACK library. */ int PA_SendData(int ipDims[], double dpA[], double dpB[]) { int iFunction; MPI_Comm intercomm; iFunction = ipDims[8]; /* Broadcast the Data Dimension to the child Processes */ PA_ErrorHandler(MPI_Intercomm_merge(childComm, 0, &intercomm)); PA_ErrorHandler(MPI_Bcast(ipDims,10,MPI_INT, 0, intercomm)); /* If the function was sla.gridInit or sla.gridExit, then there is * no data to distribute. */ if (iFunction == 0) { D_Rprintf(("PA: iFunction = 0, Just before returning\n")); return 0; } else { /* Otherwise, distribute data as usual. */ /* Check if ready to run */ if ( PA_CheckFaultPriorRun () != 0){ printf(" Memory related problems in one/all of Spawned Processes \n"); printf(" Report the bug to: [email protected] \n"); return -1; } /* Distribute the first matrix*/ PAdistData (dpA,ipDims, ipDims[0], ipDims[1]); if (ipDims[2] != 0 && ipDims[8] != 2){ /* Distribute the Second matrix*/ PAdistData (dpB,ipDims, ipDims[2], ipDims[3]); } return 0; } }
/* Receive - I/P Data dimensions and Process Grid Specifications from the parent * 1. No. of rows in matrix A * 2. No. of cols in matrix A * 3. No. of rows in matrix B * 4. No. of cols in matrix B * 5. MB - Row Block size for matrix A * 6. NB - Col Block size for matrix A * 7. NPROW - Number of Process rows in the Process Grid - Row Block Size * 8. NPCOL - Number of Process cols in the Process Grid - Col Block Size * 9. Function id * 10. Relaease Flag */ int CR_GetInputParams(MPI_Comm mcParent, int *ipGridAndDims) { MPI_Comm parent; if (MPI_Comm_get_parent(&parent) != MPI_SUCCESS) { Rprintf("ERROR[2]: Getting Parent Comm ... FAILED .. EXITING !!\n"); return AsInt(2); } if(MPI_Intercomm_merge(parent, 1, &intercomm)!= MPI_SUCCESS) return -1; if(MPI_Bcast(ipGridAndDims,10, MPI_INT, 0, intercomm) != MPI_SUCCESS) { D_Rprintf(("Child: Broadcast error\n")); return -2; } else { return 0; } }
int main(int argc, char **argv) { int errs = 0; int i; int rank, size; int *excl; int ranges[1][3]; int isLeft, rleader; MPI_Group world_group, high_group, even_group; MPI_Comm local_comm, inter_comm, test_comm, outcomm; MPI_Comm idupcomms[NUM_IDUPS]; MPI_Request reqs[NUM_IDUPS]; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_group(MPI_COMM_WORLD, &world_group); if (size < 2) { printf("this test requires at least 2 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* Idup MPI_COMM_WORLD multiple times */ for (i = 0; i < NUM_IDUPS; i++) { MPI_Comm_idup(MPI_COMM_WORLD, &idupcomms[i], &reqs[i]); } /* Overlap pending idups with various comm generation functions */ /* Comm_dup */ MPI_Comm_dup(MPI_COMM_WORLD, &outcomm); errs += MTestTestComm(outcomm); MTestFreeComm(&outcomm); /* Comm_split */ MPI_Comm_split(MPI_COMM_WORLD, rank % 2, size - rank, &outcomm); errs += MTestTestComm(outcomm); MTestFreeComm(&outcomm); /* Comm_create, high half of MPI_COMM_WORLD */ ranges[0][0] = size / 2; ranges[0][1] = size - 1; ranges[0][2] = 1; MPI_Group_range_incl(world_group, 1, ranges, &high_group); MPI_Comm_create(MPI_COMM_WORLD, high_group, &outcomm); MPI_Group_free(&high_group); errs += MTestTestComm(outcomm); MTestFreeComm(&outcomm); /* Comm_create_group, even ranks of MPI_COMM_WORLD */ /* exclude the odd ranks */ excl = malloc((size / 2) * sizeof(int)); for (i = 0; i < size / 2; i++) excl[i] = (2 * i) + 1; MPI_Group_excl(world_group, size / 2, excl, &even_group); free(excl); if (rank % 2 == 0) { MPI_Comm_create_group(MPI_COMM_WORLD, even_group, 0, &outcomm); } else { outcomm = MPI_COMM_NULL; } MPI_Group_free(&even_group); errs += MTestTestComm(outcomm); MTestFreeComm(&outcomm); /* Intercomm_create & Intercomm_merge */ MPI_Comm_split(MPI_COMM_WORLD, (rank < size / 2), rank, &local_comm); if (rank == 0) { rleader = size / 2; } else if (rank == size / 2) { rleader = 0; } else { rleader = -1; } isLeft = rank < size / 2; MPI_Intercomm_create(local_comm, 0, MPI_COMM_WORLD, rleader, 99, &inter_comm); MPI_Intercomm_merge(inter_comm, isLeft, &outcomm); MPI_Comm_free(&local_comm); errs += MTestTestComm(inter_comm); MTestFreeComm(&inter_comm); errs += MTestTestComm(outcomm); MTestFreeComm(&outcomm); MPI_Waitall(NUM_IDUPS, reqs, MPI_STATUSES_IGNORE); for (i = 0; i < NUM_IDUPS; i++) { errs += MTestTestComm(idupcomms[i]); MPI_Comm_free(&idupcomms[i]); } MPI_Group_free(&world_group); MTest_Finalize(errs); return MTestReturnValue(errs); }
/** * Create a child group for to the given group. * * @param[in] n #procs in this group (<= that in group_parent) * @param[in] pid_list The list of proc ids (w.r.t. group_parent) * @param[out] id_child Handle to store the created group * @param[in] id_parent Parent group */ int comex_group_create( int n, int *pid_list, comex_group_t id_parent, comex_group_t *id_child) { int status; int grp_me; comex_igroup_t *igroup_child = NULL; MPI_Group *group_child = NULL; MPI_Comm *comm_child = NULL; comex_igroup_t *igroup_parent = NULL; MPI_Group *group_parent = NULL; MPI_Comm *comm_parent = NULL; /* create the node in the linked list of groups and */ /* get the child's MPI_Group and MPI_Comm, to be populated shortly */ comex_create_group_and_igroup(id_child, &igroup_child); group_child = &(igroup_child->group); comm_child = &(igroup_child->comm); /* get the parent's MPI_Group and MPI_Comm */ igroup_parent = comex_get_igroup_from_group(id_parent); group_parent = &(igroup_parent->group); comm_parent = &(igroup_parent->comm); status = MPI_Group_incl(*group_parent, n, pid_list, group_child); if (status != MPI_SUCCESS) { comex_error("MPI_Group_incl: Failed ", status); } { MPI_Comm comm, comm1, comm2; int lvl=1, local_ldr_pos; MPI_Group_rank(*group_child, &grp_me); if (grp_me == MPI_UNDEFINED) { *comm_child = MPI_COMM_NULL; /* FIXME: keeping the group around for now */ return COMEX_SUCCESS; } /* SK: sanity check for the following bitwise operations */ assert(grp_me>=0); MPI_Comm_dup(MPI_COMM_SELF, &comm); /* FIXME: can be optimized away */ local_ldr_pos = grp_me; while(n>lvl) { int tag=0; int remote_ldr_pos = local_ldr_pos^lvl; if (remote_ldr_pos < n) { int remote_leader = pid_list[remote_ldr_pos]; MPI_Comm peer_comm = *comm_parent; int high = (local_ldr_pos<remote_ldr_pos)?0:1; MPI_Intercomm_create( comm, 0, peer_comm, remote_leader, tag, &comm1); MPI_Comm_free(&comm); MPI_Intercomm_merge(comm1, high, &comm2); MPI_Comm_free(&comm1); comm = comm2; } local_ldr_pos &= ((~0)^lvl); lvl<<=1; } *comm_child = comm; /* cleanup temporary group (from MPI_Group_incl above) */ MPI_Group_free(group_child); /* get the actual group associated with comm */ MPI_Comm_group(*comm_child, group_child); } return COMEX_SUCCESS; }
int main( int argc, char *argv[] ) { int errs = 0, err; int rank, size, rsize, i; int np = 2; int errcodes[2]; MPI_Comm parentcomm, intercomm, intracomm, intracomm2, intracomm3; int isChild = 0; MPI_Status status; MTest_Init( &argc, &argv ); MPI_Comm_get_parent( &parentcomm ); if (parentcomm == MPI_COMM_NULL) { /* Create 2 more processes */ MPI_Comm_spawn( (char*)"./spawnintra", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &intercomm, errcodes ); } else intercomm = parentcomm; /* We now have a valid intercomm */ MPI_Comm_remote_size( intercomm, &rsize ); MPI_Comm_size( intercomm, &size ); MPI_Comm_rank( intercomm, &rank ); if (parentcomm == MPI_COMM_NULL) { /* Master */ if (rsize != np) { errs++; printf( "Did not create %d processes (got %d)\n", np, rsize ); } if (rank == 0) { for (i=0; i<rsize; i++) { MPI_Send( &i, 1, MPI_INT, i, 0, intercomm ); } } } else { /* Child */ isChild = 1; if (size != np) { errs++; printf( "(Child) Did not create %d processes (got %d)\n", np, size ); } MPI_Recv( &i, 1, MPI_INT, 0, 0, intercomm, &status ); if (i != rank) { errs++; printf( "Unexpected rank on child %d (%d)\n", rank, i ); } } /* At this point, try to form the intracommunicator */ MPI_Intercomm_merge( intercomm, isChild, &intracomm ); /* Check on the intra comm */ { int icsize, icrank, wrank; MPI_Comm_size( intracomm, &icsize ); MPI_Comm_rank( intracomm, &icrank ); MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); if (icsize != rsize + size) { errs++; printf( "Intracomm rank %d thinks size is %d, not %d\n", icrank, icsize, rsize + size ); } /* Make sure that the processes are ordered correctly */ if (isChild) { int psize; MPI_Comm_remote_size( parentcomm, &psize ); if (icrank != psize + wrank ) { errs++; printf( "Intracomm rank %d (from child) should have rank %d\n", icrank, psize + wrank ); } } else { if (icrank != wrank) { errs++; printf( "Intracomm rank %d (from parent) should have rank %d\n", icrank, wrank ); } } } /* At this point, try to form the intracommunicator, with the other processes first */ MPI_Intercomm_merge( intercomm, !isChild, &intracomm2 ); /* Check on the intra comm */ { int icsize, icrank, wrank; MPI_Comm_size( intracomm2, &icsize ); MPI_Comm_rank( intracomm2, &icrank ); MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); if (icsize != rsize + size) { errs++; printf( "(2)Intracomm rank %d thinks size is %d, not %d\n", icrank, icsize, rsize + size ); } /* Make sure that the processes are ordered correctly */ if (isChild) { if (icrank != wrank ) { errs++; printf( "(2)Intracomm rank %d (from child) should have rank %d\n", icrank, wrank ); } } else { int csize; MPI_Comm_remote_size( intercomm, &csize ); if (icrank != wrank + csize) { errs++; printf( "(2)Intracomm rank %d (from parent) should have rank %d\n", icrank, wrank + csize ); } } } /* At this point, try to form the intracommunicator, with an arbitrary choice for the first group of processes */ MPI_Intercomm_merge( intercomm, 0, &intracomm3 ); /* Check on the intra comm */ { int icsize, icrank, wrank; MPI_Comm_size( intracomm3, &icsize ); MPI_Comm_rank( intracomm3, &icrank ); MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); if (icsize != rsize + size) { errs++; printf( "(3)Intracomm rank %d thinks size is %d, not %d\n", icrank, icsize, rsize + size ); } /* Eventually, we should test that the processes are ordered correctly, by groups (must be one of the two cases above) */ } /* Update error count */ if (isChild) { /* Send the errs back to the master process */ MPI_Ssend( &errs, 1, MPI_INT, 0, 1, intercomm ); } else { if (rank == 0) { /* We could use intercomm reduce to get the errors from the children, but we'll use a simpler loop to make sure that we get valid data */ for (i=0; i<rsize; i++) { MPI_Recv( &err, 1, MPI_INT, i, 1, intercomm, MPI_STATUS_IGNORE ); errs += err; } } } /* It isn't necessary to free the intracomms, but it should not hurt */ MPI_Comm_free( &intracomm ); MPI_Comm_free( &intracomm2 ); MPI_Comm_free( &intracomm3 ); /* It isn't necessary to free the intercomm, but it should not hurt */ MPI_Comm_free( &intercomm ); /* Note that the MTest_Finalize get errs only over COMM_WORLD */ /* Note also that both the parent and child will generate "No Errors" if both call MTest_Finalize */ if (parentcomm == MPI_COMM_NULL) { MTest_Finalize( errs ); } MPI_Finalize(); return 0; }
MTEST_THREAD_RETURN_TYPE test_idup(void *arg) { int i; int size, rank; int ranges[1][3]; int rleader, isLeft; int *excl = NULL; int tid = *(int *) arg; MPI_Group ingroup, high_group, even_group; MPI_Comm local_comm, inter_comm; MPI_Comm idupcomms[NUM_IDUPS]; MPI_Request reqs[NUM_IDUPS]; MPI_Comm outcomm; MPI_Comm incomm = comms[tid]; MPI_Comm_size(incomm, &size); MPI_Comm_rank(incomm, &rank); MPI_Comm_group(incomm, &ingroup); /* Idup incomm multiple times */ for (i = 0; i < NUM_IDUPS; i++) { MPI_Comm_idup(incomm, &idupcomms[i], &reqs[i]); } /* Overlap pending idups with various comm generation functions */ /* Comm_dup */ MPI_Comm_dup(incomm, &outcomm); errs[tid] += MTestTestComm(outcomm); MTestFreeComm(&outcomm); /* Comm_split */ MPI_Comm_split(incomm, rank % 2, size - rank, &outcomm); errs[tid] += MTestTestComm(outcomm); MTestFreeComm(&outcomm); /* Comm_create, high half of incomm */ ranges[0][0] = size / 2; ranges[0][1] = size - 1; ranges[0][2] = 1; MPI_Group_range_incl(ingroup, 1, ranges, &high_group); MPI_Comm_create(incomm, high_group, &outcomm); MPI_Group_free(&high_group); errs[tid] += MTestTestComm(outcomm); MTestFreeComm(&outcomm); /* Comm_create_group, even ranks of incomm */ /* exclude the odd ranks */ excl = malloc((size / 2) * sizeof(int)); for (i = 0; i < size / 2; i++) excl[i] = (2 * i) + 1; MPI_Group_excl(ingroup, size / 2, excl, &even_group); free(excl); if (rank % 2 == 0) { MPI_Comm_create_group(incomm, even_group, 0, &outcomm); } else { outcomm = MPI_COMM_NULL; } MPI_Group_free(&even_group); errs[tid] += MTestTestComm(outcomm); MTestFreeComm(&outcomm); /* Intercomm_create & Intercomm_merge */ MPI_Comm_split(incomm, (rank < size / 2), rank, &local_comm); if (rank == 0) { rleader = size / 2; } else if (rank == size / 2) { rleader = 0; } else { rleader = -1; } isLeft = rank < size / 2; MPI_Intercomm_create(local_comm, 0, incomm, rleader, 99, &inter_comm); MPI_Intercomm_merge(inter_comm, isLeft, &outcomm); MPI_Comm_free(&local_comm); errs[tid] += MTestTestComm(inter_comm); MTestFreeComm(&inter_comm); errs[tid] += MTestTestComm(outcomm); MTestFreeComm(&outcomm); MPI_Waitall(NUM_IDUPS, reqs, MPI_STATUSES_IGNORE); for (i = 0; i < NUM_IDUPS; i++) { errs[tid] += MTestTestComm(idupcomms[i]); MPI_Comm_free(&idupcomms[i]); } MPI_Group_free(&ingroup); return NULL; }
int main( int argc, char **argv ) { int size, rank, key, his_key, lrank, result; MPI_Comm myComm; MPI_Comm myFirstComm; MPI_Comm mySecondComm; int errors = 0, sum_errors; MPI_Status status; /* Initialization */ MPI_Init ( &argc, &argv ); MPI_Comm_rank ( MPI_COMM_WORLD, &rank); if (verbose) printf("[%d] MPI_Init complete!\n",rank);fflush(stdout); MPI_Comm_size ( MPI_COMM_WORLD, &size); /* Only works for 2 or more processes */ if (size >= 2) { MPI_Comm merge1, merge2, merge3, merge4; /* Generate membership key in the range [0,1] */ key = rank % 2; MPI_Comm_split ( MPI_COMM_WORLD, key, rank, &myComm ); /* This creates an intercomm that is the size of comm world but has processes grouped by even and odd */ MPI_Intercomm_create (myComm, 0, MPI_COMM_WORLD, (key+1)%2, 1, &myFirstComm ); /* Dup an intercomm */ MPI_Comm_dup ( myFirstComm, &mySecondComm ); MPI_Comm_rank( mySecondComm, &lrank ); his_key = -1; if (verbose) printf("[%d] Communicators created!\n",rank);fflush(stdout); /* Leaders communicate with each other */ if (lrank == 0) { MPI_Sendrecv (&key, 1, MPI_INT, 0, 0, &his_key, 1, MPI_INT, 0, 0, mySecondComm, &status); if (key != (his_key+1)%2) { printf( "Received %d but expected %d\n", his_key, (his_key+1)%2 ); errors++; } } if (verbose) printf("[%d] MPI_Sendrecv completed!\n",rank);fflush(stdout); if (errors) printf("[%d] Failed!\n",rank); if (verbose) printf( "About to merge intercommunicators\n" );fflush(stdout); MPI_Intercomm_merge ( mySecondComm, key, &merge1 ); if (verbose) printf( "merge1 done\n" );fflush(stdout); MPI_Intercomm_merge ( mySecondComm, (key+1)%2, &merge2 ); if (verbose) printf( "merge2 done\n" );fflush(stdout); MPI_Intercomm_merge ( mySecondComm, 0, &merge3 ); if (verbose) printf( "merge3 done\n" );fflush(stdout); MPI_Intercomm_merge ( mySecondComm, 1, &merge4 ); if (verbose) printf( "merge4 done\n" );fflush(stdout); if (verbose) printf("[%d] MPI_Intercomm_merge completed!\n",rank);fflush(stdout); /* We should check that these are correct! An easy test is that the merged comms are all MPI_SIMILAR (unless 2 processes used, in which case MPI_CONGRUENT is ok */ MPI_Comm_compare( merge1, MPI_COMM_WORLD, &result ); if ((size > 2 && result != MPI_SIMILAR) || (size == 2 && result != MPI_CONGRUENT)) { errors ++; printf( "merge1 is not the same size as comm world\n" ); } /* merge 2 isn't ordered the same way as the others, even for 2 processes */ MPI_Comm_compare( merge2, MPI_COMM_WORLD, &result ); if (result != MPI_SIMILAR) { errors ++; printf( "merge2 is not the same size as comm world\n" ); } MPI_Comm_compare( merge3, MPI_COMM_WORLD, &result ); if ((size > 2 && result != MPI_SIMILAR) || (size == 2 && result != MPI_CONGRUENT)) { errors ++; printf( "merge3 is not the same size as comm world\n" ); } MPI_Comm_compare( merge4, MPI_COMM_WORLD, &result ); if ((size > 2 && result != MPI_SIMILAR) || (size == 2 && result != MPI_CONGRUENT)) { errors ++; printf( "merge4 is not the same size as comm world\n" ); } if (verbose) printf("[%d] MPI_Comm_compare completed!\n",rank);fflush(stdout); /* Free communicators */ if (verbose) printf( "About to free communicators\n" ); MPI_Comm_free( &myComm ); MPI_Comm_free( &myFirstComm ); MPI_Comm_free( &mySecondComm ); MPI_Comm_free( &merge1 ); MPI_Comm_free( &merge2 ); MPI_Comm_free( &merge3 ); MPI_Comm_free( &merge4 ); if (verbose) printf("[%d] MPI_Comm_free completed!\n",rank);fflush(stdout); } else { errors ++; printf("[%d] Failed - at least 2 nodes must be used\n",rank); } MPI_Barrier( MPI_COMM_WORLD ); MPI_Allreduce( &errors, &sum_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (sum_errors > 0) { printf( "%d errors on process %d\n", errors, rank ); } else if (rank == 0) { printf( " No Errors\n" ); } /* Finalize and end! */ MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { char hostname[255] ; char buff[255] ; int role ; int num_clients ; int size, rank ; FILE *fp ; char server_port_name[MPI_MAX_PORT_NAME] ; MPI_Comm intercomm, intracomm ; MPI_Status status ; int msg_count ; int i ; /* sanity check the args */ if(argc != 3) { fprintf(stderr, "usage %s <num clients> <1:server | 0:client>\n", argv[0]) ; exit(1) ; } num_clients = atoi(argv[1]) ; role = atoi(argv[2]) ; if (num_clients <= 0 || (role != 0 && role != 1)) { fprintf(stderr, "usage %s <num clients> <1:server | 0:client>\n", argv[0]) ; exit(1) ; } /* initialize MPI */ CHK(MPI_Init(&argc, &argv)) ; /* get the node name */ { int retval = gethostname(hostname, 255) ; if(retval == -1) { fprintf(stderr, "gethostname failed: %s\n", strerror(errno)) ; exit(1) ; } } /* server */ if(role == 1) { printf("SERVER: on node '%s'\n", hostname) ; /* open port to establish connections */ CHK(MPI_Open_port(MPI_INFO_NULL, server_port_name)) ; printf("SERVER: opened port=%s\n", server_port_name) ; /* store the port name */ fp = fopen("server_port_name.txt", "w") ; if(fp == NULL) { fprintf(stderr, "fopen failed: %s\n", strerror(errno)) ; exit(1) ; } fprintf(fp, "%s", server_port_name) ; fclose(fp) ; /* the server accepts connections from all the clients */ for(i = 0 ; i < num_clients ; i++ ) { /* accept connections at this port */ CHK(MPI_Comm_accept(server_port_name, MPI_INFO_NULL, 0, i == 0 ? MPI_COMM_WORLD : intracomm, &intercomm)) ; printf("SERVER: accepted connection from client %d\n", i+1) ; /* merge, to form one intra communicator */ CHK(MPI_Intercomm_merge(intercomm, 0, &intracomm)) ; printf("SERVER: merged with client %d\n", i+1) ; CHK(MPI_Comm_size(intracomm, &size)) ; CHK(MPI_Comm_rank(intracomm, &rank)) ; printf("SERVER: after merging with client %d: size=%d rank=%d\n", i+1, size, rank) ; } } /* end server */ /* client */ if(role == 0) { printf("CLIENT: on node '%s'\n", hostname) ; fp = fopen("server_port_name.txt", "r") ; if(fp == NULL) { fprintf(stderr, "fopen failed: %s\n", strerror(errno)) ; exit(1) ; } fscanf(fp, "%s", server_port_name) ; fclose(fp) ; printf("CLIENT: attempting to connect to server on port=%s\n", server_port_name) ; /* connect to the server */ CHK(MPI_Comm_connect (server_port_name, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &intercomm)) ; printf("CLIENT: connected to server on port\n") ; /* merge the server and client to one intra communicator */ CHK(MPI_Intercomm_merge(intercomm, 1, &intracomm)) ; printf("CLIENT: merged with existing intracomm\n") ; CHK(MPI_Comm_size(intracomm, &size)) ; CHK(MPI_Comm_rank(intracomm, &rank)) ; printf("CLIENT: after merging, new comm: size=%d rank=%d\n", size, rank) ; for (i = rank ; i < num_clients ; i++) { /* client performs a collective accept */ CHK(MPI_Comm_accept(server_port_name, MPI_INFO_NULL, 0, intracomm, &intercomm)) ; printf("CLIENT: connected to server on port\n") ; /* merge the two intra comms back to one communicator */ CHK(MPI_Intercomm_merge(intercomm, 0, &intracomm)) ; printf("CLIENT: merged with existing members\n") ; CHK(MPI_Comm_size(intracomm, &size)) ; CHK(MPI_Comm_rank(intracomm, &rank)) ; printf("CLIENT: new size after merging with existing members: size=%d rank=%d\n", size, rank) ; } } /* end client */ CHK(MPI_Comm_size(intracomm, &size)) ; CHK(MPI_Comm_rank(intracomm, &rank)) ; printf("After fusion: size=%d rank=%d\n", size, rank) ; if(rank == 0) { msg_count = num_clients ; while(msg_count) { CHK(MPI_Recv(buff, 255, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, intracomm, &status)) ; printf("Received hello msg from '%s'\n", buff) ; msg_count-- ; } } else { /* all ranks > 0 */ CHK(MPI_Send(hostname, strlen(hostname) + 1, MPI_CHAR, 0, TAG, intracomm)) ; } CHK(MPI_Finalize()) ; fprintf(stderr, "Rank %d is exiting\n", rank); return 0 ; }
int main( int argc, char ** argv ) { MPI_Comm tmp, comm, startComm; char * fname; char * actualFname = NULL; char * globalFname = NULL; int totalSize, expectedRank, size, cachedRank; char portName[MPI_MAX_PORT_NAME]; int rankToAccept = 1; /* Debug - print out where we picked up the MPICH build from */ #ifdef MPICHLIBSTR msg( "MPICH library taken from: %s\n", MPICHLIBSTR ); #endif if( argc != 4 ) { printf( "Usage: %s <fname> <totalSize> <idx-1-based>\n", argv[0] ); exit( 1 ); } /* This is the base name of the file into which we write the port */ fname = argv[1]; /* This is the total number of processes launched */ totalSize = atoi( argv[2] ); /* Each process knows its expected rank */ expectedRank = atoi( argv[3] )-1; /* Start a watchdog thread which will abort after 120 seconds, and will * print stack traces using GDB every 5 seconds if you don't call * strokeWatchdog() */ startWatchdog( 120 ); /* Print a debug header */ msg( "Waiting for: %d - my rank is %d\n", totalSize, expectedRank ); /* Singleton init */ MPI_Init( 0, 0 ); /* Duplicate from MPI_COMM_SELF the starting point */ MPI_Comm_dup( MPI_COMM_SELF, &startComm ); if( expectedRank == 0 ) { /* This process opens the port, and writes the information to the file */ MPI_Open_port( MPI_INFO_NULL, portName ); /* Write the port to fname.<rank> so that the connecting processes can * wait their turn by checking for the correct file to show up */ actualFname = writePortToFile( portName, "%s.%d", fname, rankToAccept++ ); /* The wrapper script I'm using checks for the existance of "fname", so * create that - even though it isn't used */ globalFname = writePortToFile( portName, fname ); installExitHandler( globalFname ); comm = startComm; } else { char * readPort; readPort = getPortFromFile( "%s.%d", fname, expectedRank ); strncpy( portName, readPort, MPI_MAX_PORT_NAME ); free( readPort ); msg( "Read port <%s>\n", portName ); MPI_Comm_connect( portName, MPI_INFO_NULL, 0, startComm, &comm ); MPI_Intercomm_merge( comm, 1, &tmp ); comm = tmp; MPI_Comm_size( comm, &size ); msg( "After my first merge, size is now: %d\n", size ); } while( size < totalSize ) { /* Make sure we don't print a stack until we stall */ strokeWatchdog(); /* Accept the connection */ MPI_Comm_accept( portName, MPI_INFO_NULL, 0, comm, &tmp ); /* Merge into intracomm */ MPI_Intercomm_merge( tmp, 0, &comm ); /* Free the intercomm */ MPI_Comm_free( &tmp ); /* See where we're up to */ MPI_Comm_rank( comm, &cachedRank ); MPI_Comm_size( comm, &size ); if( expectedRank == 0 ) { msg( "Up to size: %d\n", size ); /* Delete the old file, create the new one */ unlink( actualFname ); free( actualFname ); /* Allow the next rank to connect */ actualFname = writePortToFile( portName, "%s.%d", fname, rankToAccept++ ); } } MPI_Comm_rank( comm, &cachedRank ); msg( "All done - I got rank: %d.\n", cachedRank ); MPI_Barrier( comm ); if( expectedRank == 0 ) { /* Cleanup on rank zero - delete some files */ sleep( 4 ); unlink( actualFname ); free( actualFname ); unlink( globalFname ); free( globalFname ); /* This lets my wrapper script know that we did everything correctly */ indicateConnectSucceeded(); } MPI_Finalize(); return 0; }
int MpiCommunicator::init( int minId, long thecomm_ ) { VT_FUNC_I( "MpiCommunicator::init" ); assert( sizeof(thecomm_) >= sizeof(MPI_Comm) ); MPI_Comm thecomm = (MPI_Comm)thecomm_; // turn wait mode on for intel mpi if possible // this should greatly improve performance for intel mpi PAL_SetEnvVar( "I_MPI_WAIT_MODE", "enable", 0); int flag; MPI_Initialized( &flag ); if ( ! flag ) { int p; //!! FIXME passing NULL ptr breaks mvapich1 mpi implementation MPI_Init_thread( 0, NULL, MPI_THREAD_MULTIPLE, &p ); if( p != MPI_THREAD_MULTIPLE ) { // can't use Speaker yet, need Channels to be inited std::cerr << "[CnC] Warning: not MPI_THREAD_MULTIPLE (" << MPI_THREAD_MULTIPLE << "), but " << p << std::endl; } } else if( thecomm == 0 ) { CNC_ABORT( "Process has already been initialized" ); } MPI_Comm myComm = MPI_COMM_WORLD; int rank; MPI_Comm parentComm; if( thecomm == 0 ) { MPI_Comm_get_parent( &parentComm ); } else { m_customComm = true; m_exit0CallOk = false; myComm = thecomm; } MPI_Comm_rank( myComm, &rank ); // father of all checks if he's requested to spawn processes: if ( rank == 0 && parentComm == MPI_COMM_NULL ) { // Ok, let's spawn the clients. // I need some information for the startup. // 1. Name of the executable (default is the current exe) const char * _tmp = getenv( "CNC_MPI_SPAWN" ); if ( _tmp ) { int nClientsToSpawn = atol( _tmp ); _tmp = getenv( "CNC_MPI_EXECUTABLE" ); std::string clientExe( _tmp ? _tmp : "" ); if( clientExe.empty() ) clientExe = PAL_GetProgname(); CNC_ASSERT( ! clientExe.empty() ); // 3. Special setting for MPI_Info: hosts const char * clientHost = getenv( "CNC_MPI_HOSTS" ); // Prepare MPI_Info object: MPI_Info clientInfo = MPI_INFO_NULL; if ( clientHost ) { MPI_Info_create( &clientInfo ); if ( clientHost ) { MPI_Info_set( clientInfo, const_cast< char * >( "host" ), const_cast< char * >( clientHost ) ); // can't use Speaker yet, need Channels to be inited std::cerr << "[CnC " << rank << "] Set MPI_Info_set( \"host\", \"" << clientHost << "\" )\n"; } } // Now spawn the client processes: // can't use Speaker yet, need Channels to be inited std::cerr << "[CnC " << rank << "] Spawning " << nClientsToSpawn << " MPI processes" << std::endl; int* errCodes = new int[nClientsToSpawn]; MPI_Comm interComm; int err = MPI_Comm_spawn( const_cast< char * >( clientExe.c_str() ), MPI_ARGV_NULL, nClientsToSpawn, clientInfo, 0, MPI_COMM_WORLD, &interComm, errCodes ); delete [] errCodes; if ( err ) { // can't use Speaker yet, need Channels to be inited std::cerr << "[CnC " << rank << "] Error in MPI_Comm_spawn. Skipping process spawning"; } else { MPI_Intercomm_merge( interComm, 0, &myComm ); } } // else { // No process spawning // MPI-1 situation: all clients to be started by mpiexec // myComm = MPI_COMM_WORLD; //} } if ( thecomm == 0 && parentComm != MPI_COMM_NULL ) { // I am a child. Build intra-comm to the parent. MPI_Intercomm_merge( parentComm, 1, &myComm ); } MPI_Comm_rank( myComm, &rank ); CNC_ASSERT( m_channel == NULL ); MpiChannelInterface* myChannel = new MpiChannelInterface( use_crc(), myComm ); m_channel = myChannel; int size; MPI_Comm_size( myComm, &size ); // Are we on the host or on the remote side? if ( rank == 0 ) { if( size <= 1 ) { Speaker oss( std::cerr ); oss << "Warning: no clients avabilable. Forgot to set CNC_MPI_SPAWN?"; } // ==> HOST startup: // This initializes the mpi environment in myChannel. MpiHostInitializer hostInitializer( *myChannel ); hostInitializer.init_mpi_comm( myComm ); } else { // ==> CLIENT startup: // This initializes the mpi environment in myChannel. MpiClientInitializer clientInitializer( *myChannel ); clientInitializer.init_mpi_comm( myComm ); } { Speaker oss( std::cerr ); oss << "MPI initialization complete (rank " << rank << ")."; } // MPI_Barrier( myComm ); // Now the mpi specific setup is finished. // Do the generic initialization stuff. GenericCommunicator::init( minId ); return 0; }
int main( int argc, char *argv[] ) { int i, j; int high; int leader; int buffer[3]; int errcodes[2]; int world_rank; int world_size; int merge_rank; int merge_size; int inter_rank; int inter_rem_size; int inter_loc_size; int univ_rank; int univ_size; MPI_Comm parent_comm = MPI_COMM_NULL; MPI_Comm spawn_comm = MPI_COMM_NULL; MPI_Comm merge_comm = MPI_COMM_NULL; MPI_Comm peer_comm = MPI_COMM_NULL; MPI_Comm inter_comm = MPI_COMM_NULL; MPI_Comm univ_comm = MPI_COMM_NULL; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); if(world_size != 2) { printf("This program needs exactly np = 2 processes! Calling MPI_Abort()...\n"); MPI_Abort(MPI_COMM_WORLD, -1); } MPI_Comm_get_parent( &parent_comm ); if(parent_comm == MPI_COMM_NULL) { MPI_Comm_spawn((char*)"./spawn_univ_comm", MPI_ARGV_NULL, 2, MPI_INFO_NULL, 0, MPI_COMM_SELF, &spawn_comm, errcodes); } else { spawn_comm = parent_comm; } if(parent_comm == MPI_COMM_NULL) { high = 1; } else { high = 0; } /* Merge each intercomm between the spawned groups into an intracomm: */ MPI_Intercomm_merge(spawn_comm, high, &merge_comm); MPI_Comm_rank(merge_comm, &merge_rank); MPI_Comm_size(merge_comm, &merge_size); /* Determine the leader (rank 0 & 1 of the origin world): */ if(parent_comm == MPI_COMM_NULL) leader = merge_rank; else leader = -1; MPI_Allgather(&leader, 1, MPI_INT, buffer, 1, MPI_INT, merge_comm); for(i=0; i<merge_size; i++) { if(buffer[i] != -1) { leader = i; break; } } /* Create an intercomm between the two merged intracomms (and use the origin world as bridge/peer communicator): */ peer_comm = MPI_COMM_WORLD; MPI_Intercomm_create(merge_comm, leader, peer_comm, (world_rank+1)%2, 123, &inter_comm); MPI_Comm_rank(inter_comm, &inter_rank); MPI_Comm_size(inter_comm, &inter_loc_size); MPI_Comm_remote_size(inter_comm, &inter_rem_size); /* Merge the new intercomm into one single univeser: */ MPI_Intercomm_merge(inter_comm, 0, &univ_comm); MPI_Comm_rank(univ_comm, &univ_rank); MPI_Comm_size(univ_comm, &univ_size); /* The following disconnects() will only decrement the VCR reference counters: */ /* (and could thus also be replaced by MPI_Comm_free()...) */ MPI_Comm_disconnect(&inter_comm); MPI_Comm_disconnect(&merge_comm); MPI_Comm_disconnect(&spawn_comm); /* Now, the MPI universe is almost flat: just three worlds forming one universe! */ /* Loop over all ranks for acting as root: */ for(j=0; j<univ_size; j++) { /* Test, if simple communication works in this new and flat universe: */ if(univ_rank == j) { int remote_ranks[univ_size]; MPI_Request send_req; MPI_Request recv_reqs[univ_size]; MPI_Status status_array[univ_size]; for(i=0; i<univ_size; i++) { MPI_Irecv(&remote_ranks[i], 1, MPI_INT, i, j, univ_comm, &recv_reqs[i]); } MPI_Isend(&univ_rank, 1, MPI_INT, j, j, univ_comm, &send_req); MPI_Waitall(univ_size, recv_reqs, status_array); for(i=0; i<univ_size; i++) { if(remote_ranks[i] != i) { printf("ERROR: Wrong sender in universe! (got %d /& expected %d)\n", i, remote_ranks[i]); } } MPI_Wait(&send_req, MPI_STATUS_IGNORE); } else { MPI_Send(&univ_rank, 1, MPI_INT, j, j, univ_comm); } } /* The following disconnect() might already shutdown certain pscom connections */ /* (depending on the setting of ENABLE_LAZY_DISCONNECT in mpid_vc.c ...*/ MPI_Comm_disconnect(&univ_comm); if(univ_rank == 0) { printf(" No errors\n"); } MPI_Finalize(); return 0; }
/* * Get an intracommunicator with at least min_size members. If "allowSmaller" * is true, allow the communicator to be smaller than MPI_COMM_WORLD and * for this routine to return MPI_COMM_NULL for some values. Returns 0 if * no more communicators are available. */ int MTestGetIntracommGeneral(MPI_Comm * comm, int min_size, int allowSmaller) { int size, rank, merr; int done = 0; int isBasic = 0; /* The while loop allows us to skip communicators that are too small. * MPI_COMM_NULL is always considered large enough */ while (!done) { isBasic = 0; intraCommName = ""; switch (intraCommIdx) { case 0: *comm = MPI_COMM_WORLD; isBasic = 1; intraCommName = "MPI_COMM_WORLD"; break; case 1: /* dup of world */ merr = MPI_Comm_dup(MPI_COMM_WORLD, comm); if (merr) MTestPrintError(merr); intraCommName = "Dup of MPI_COMM_WORLD"; break; case 2: /* reverse ranks */ merr = MPI_Comm_size(MPI_COMM_WORLD, &size); if (merr) MTestPrintError(merr); merr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (merr) MTestPrintError(merr); merr = MPI_Comm_split(MPI_COMM_WORLD, 0, size - rank, comm); if (merr) MTestPrintError(merr); intraCommName = "Rank reverse of MPI_COMM_WORLD"; break; case 3: /* subset of world, with reversed ranks */ merr = MPI_Comm_size(MPI_COMM_WORLD, &size); if (merr) MTestPrintError(merr); merr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (merr) MTestPrintError(merr); merr = MPI_Comm_split(MPI_COMM_WORLD, ((rank < size / 2) ? 1 : MPI_UNDEFINED), size - rank, comm); if (merr) MTestPrintError(merr); intraCommName = "Rank reverse of half of MPI_COMM_WORLD"; break; case 4: *comm = MPI_COMM_SELF; isBasic = 1; intraCommName = "MPI_COMM_SELF"; break; case 5: { #if MTEST_HAVE_MIN_MPI_VERSION(3,0) /* Dup of the world using MPI_Intercomm_merge */ int rleader, isLeft; MPI_Comm local_comm, inter_comm; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (size > 1) { merr = MPI_Comm_split(MPI_COMM_WORLD, (rank < size / 2), rank, &local_comm); if (merr) MTestPrintError(merr); if (rank == 0) { rleader = size / 2; } else if (rank == size / 2) { rleader = 0; } else { rleader = -1; } isLeft = rank < size / 2; merr = MPI_Intercomm_create(local_comm, 0, MPI_COMM_WORLD, rleader, 99, &inter_comm); if (merr) MTestPrintError(merr); merr = MPI_Intercomm_merge(inter_comm, isLeft, comm); if (merr) MTestPrintError(merr); MPI_Comm_free(&inter_comm); MPI_Comm_free(&local_comm); intraCommName = "Dup of WORLD created by MPI_Intercomm_merge"; } else { *comm = MPI_COMM_NULL; } } break; case 6: { /* Even of the world using MPI_Comm_create_group */ int i; MPI_Group world_group, even_group; int *excl = NULL; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (allowSmaller && (size + 1) / 2 >= min_size) { /* exclude the odd ranks */ excl = malloc((size / 2) * sizeof(int)); for (i = 0; i < size / 2; i++) excl[i] = (2 * i) + 1; MPI_Comm_group(MPI_COMM_WORLD, &world_group); MPI_Group_excl(world_group, size / 2, excl, &even_group); MPI_Group_free(&world_group); free(excl); if (rank % 2 == 0) { /* Even processes create a comm. for themselves */ MPI_Comm_create_group(MPI_COMM_WORLD, even_group, 0, comm); intraCommName = "Even of WORLD created by MPI_Comm_create_group"; } else { *comm = MPI_COMM_NULL; } MPI_Group_free(&even_group); } else { *comm = MPI_COMM_NULL; } #else *comm = MPI_COMM_NULL; #endif } break; case 7: { /* High half of the world using MPI_Comm_create */ int ranges[1][3]; MPI_Group world_group, high_group; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); ranges[0][0] = size / 2; ranges[0][1] = size - 1; ranges[0][2] = 1; if (allowSmaller && (size + 1) / 2 >= min_size) { MPI_Comm_group(MPI_COMM_WORLD, &world_group); merr = MPI_Group_range_incl(world_group, 1, ranges, &high_group); if (merr) MTestPrintError(merr); merr = MPI_Comm_create(MPI_COMM_WORLD, high_group, comm); if (merr) MTestPrintError(merr); MPI_Group_free(&world_group); MPI_Group_free(&high_group); intraCommName = "High half of WORLD created by MPI_Comm_create"; } else { *comm = MPI_COMM_NULL; } } break; /* These next cases are communicators that include some * but not all of the processes */ case 8: case 9: case 10: case 11: { int newsize; merr = MPI_Comm_size(MPI_COMM_WORLD, &size); if (merr) MTestPrintError(merr); newsize = size - (intraCommIdx - 7); if (allowSmaller && newsize >= min_size) { merr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (merr) MTestPrintError(merr); merr = MPI_Comm_split(MPI_COMM_WORLD, rank < newsize, rank, comm); if (merr) MTestPrintError(merr); if (rank >= newsize) { merr = MPI_Comm_free(comm); if (merr) MTestPrintError(merr); *comm = MPI_COMM_NULL; } else { intraCommName = "Split of WORLD"; } } else { /* Act like default */ *comm = MPI_COMM_NULL; intraCommIdx = -1; } } break; /* Other ideas: dup of self, cart comm, graph comm */ default: *comm = MPI_COMM_NULL; intraCommIdx = -1; break; } if (*comm != MPI_COMM_NULL) { merr = MPI_Comm_size(*comm, &size); if (merr) MTestPrintError(merr); if (size >= min_size) done = 1; } else { intraCommName = "MPI_COMM_NULL"; isBasic = 1; done = 1; } /* we are only done if all processes are done */ MPI_Allreduce(MPI_IN_PLACE, &done, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD); /* Advance the comm index whether we are done or not, otherwise we could * spin forever trying to allocate a too-small communicator over and * over again. */ intraCommIdx++; if (!done && !isBasic && *comm != MPI_COMM_NULL) { /* avoid leaking communicators */ merr = MPI_Comm_free(comm); if (merr) MTestPrintError(merr); } } return intraCommIdx; }
SEXP Rhpc_gethandle(SEXP procs) { int num_procs; MPI_Comm *ptr; SEXP com; int num; MPI_Comm pcomm; if (RHPC_Comm == MPI_COMM_NULL){ error("Rhpc_initialize is not called."); return(R_NilValue); } if(finalize){ warning("Rhpc were already finalized."); return(R_NilValue); } if(!initialize){ warning("Rhpc not initialized."); return(R_NilValue); } num_procs = INTEGER (procs)[0]; ptr = Calloc(1,MPI_Comm); PROTECT(com = R_MakeExternalPtr(ptr, R_NilValue, R_NilValue)); R_RegisterCFinalizer(com, comm_free); SXP2COMM(com) = RHPC_Comm; if (num_procs == NA_INTEGER){/* use mpirun */ _M(MPI_Comm_size(SXP2COMM(com), &num)); Rprintf("Detected communication size %d\n", num); if( num > 1 ){ if ( num_procs > 0){ warning("blind procs argument, return of MPI_COMM_WORLD"); } }else{ if ( num == 1){ warning("only current master process. not found worker process."); } SXP2COMM(com)=MPI_COMM_NULL; warning("please pecifies the number of processes in mpirun or mpiexec, or provide a number of process to spawn"); } UNPROTECT(1); return(com); }else{ /* spawn */ if(num_procs < 1){ warning("you need positive number of procs argument"); UNPROTECT(1); return(com); } _M(MPI_Comm_size(SXP2COMM(com), &num)); if(num > 1){ warning("blind procs argument, return of last communicator"); UNPROTECT(1); return(com); } } _M(MPI_Comm_spawn(RHPC_WORKER_CMD, MPI_ARGV_NULL, num_procs, MPI_INFO_NULL, 0, MPI_COMM_SELF, &pcomm, MPI_ERRCODES_IGNORE)); _M(MPI_Intercomm_merge( pcomm, 0, SXP2COMMP(com))); _M(MPI_Comm_free( &pcomm )); _M(MPI_Comm_size(SXP2COMM(com), &num)); RHPC_Comm = SXP2COMM(com); /* rewrite RHPC_Comm */ _M(MPI_Comm_set_errhandler(RHPC_Comm, MPI_ERRORS_RETURN)); _M(MPI_Comm_rank(RHPC_Comm, &MPI_rank)); _M(MPI_Comm_size(RHPC_Comm, &MPI_procs)); DPRINT("Rhpc_getHandle(MPI_Comm_spawn : rank:%d size:%d\n", MPI_rank, MPI_procs); Rhpc_set_options( MPI_rank, MPI_procs,RHPC_Comm); UNPROTECT(1); return(com); }