int MPIDI_Comm_spawn_multiple(int count, char **commands, char ***argvs, const int *maxprocs, MPID_Info **info_ptrs, int root, MPID_Comm *comm_ptr, MPID_Comm **intercomm, int *errcodes) { char port_name[MPI_MAX_PORT_NAME]; int *info_keyval_sizes=0, i, mpi_errno=MPI_SUCCESS; PMI_keyval_t **info_keyval_vectors=0, preput_keyval_vector; int *pmi_errcodes = 0, pmi_errno; int total_num_processes, should_accept = 1; MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE); if (comm_ptr->rank == root) { /* create an array for the pmi error codes */ total_num_processes = 0; for (i=0; i<count; i++) { total_num_processes += maxprocs[i]; } pmi_errcodes = (int*)MPIU_Malloc(sizeof(int) * total_num_processes); if (pmi_errcodes == NULL) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } /* initialize them to 0 */ for (i=0; i<total_num_processes; i++) pmi_errcodes[i] = 0; /* Open a port for the spawned processes to connect to */ /* FIXME: info may be needed for port name */ mpi_errno = MPID_Open_port(NULL, port_name); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) MPIU_ERR_POP(mpi_errno); /* --END ERROR HANDLING-- */ /* Spawn the processes */ #ifdef USE_PMI2_API MPIU_Assert(count > 0); { int *argcs = MPIU_Malloc(count*sizeof(int)); struct MPID_Info preput; struct MPID_Info *preput_p[1] = { &preput }; MPIU_Assert(argcs); /* info_keyval_sizes = MPIU_Malloc(count * sizeof(int)); */ /* FIXME cheating on constness */ preput.key = (char *)PARENT_PORT_KVSKEY; preput.value = port_name; preput.next = NULL; /* compute argcs array */ for (i = 0; i < count; ++i) { argcs[i] = 0; if (argvs != NULL && argvs[i] != NULL) { while (argvs[i][argcs[i]]) { ++argcs[i]; } } /* a fib for now */ /* info_keyval_sizes[i] = 0; */ } /* XXX DJG don't need this, PMI API is thread-safe? */ /*MPIU_THREAD_CS_ENTER(PMI,);*/ /* release the global CS for spawn PMI calls */ MPIU_THREAD_CS_EXIT(ALLFUNC,); pmi_errno = PMI2_Job_Spawn(count, (const char **)commands, argcs, (const char ***)argvs, maxprocs, info_keyval_sizes, (const MPID_Info **)info_ptrs, 1, (const struct MPID_Info **)preput_p, NULL, 0, /*jobId, jobIdSize,*/ /* XXX DJG job stuff? */ pmi_errcodes); MPIU_THREAD_CS_ENTER(ALLFUNC,); /*MPIU_THREAD_CS_EXIT(PMI,);*/ MPIU_Free(argcs); if (pmi_errno != PMI2_SUCCESS) { MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno); } } #else /* FIXME: This is *really* awkward. We should either Fix on MPI-style info data structures for PMI (avoid unnecessary duplication) or add an MPIU_Info_getall(...) that creates the necessary arrays of key/value pairs */ /* convert the infos into PMI keyvals */ info_keyval_sizes = (int *) MPIU_Malloc(count * sizeof(int)); info_keyval_vectors = (PMI_keyval_t**) MPIU_Malloc(count * sizeof(PMI_keyval_t*)); if (!info_keyval_sizes || !info_keyval_vectors) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } if (!info_ptrs) { for (i=0; i<count; i++) { info_keyval_vectors[i] = 0; info_keyval_sizes[i] = 0; } } else { for (i=0; i<count; i++) { mpi_errno = mpi_to_pmi_keyvals( info_ptrs[i], &info_keyval_vectors[i], &info_keyval_sizes[i] ); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } } } preput_keyval_vector.key = PARENT_PORT_KVSKEY; preput_keyval_vector.val = port_name; MPIU_THREAD_CS_ENTER(PMI,); pmi_errno = PMI_Spawn_multiple(count, (const char **) commands, (const char ***) argvs, maxprocs, info_keyval_sizes, (const PMI_keyval_t **) info_keyval_vectors, 1, &preput_keyval_vector, pmi_errcodes); MPIU_THREAD_CS_EXIT(PMI,); if (pmi_errno != PMI_SUCCESS) { MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno); } #endif if (errcodes != MPI_ERRCODES_IGNORE) { for (i=0; i<total_num_processes; i++) { /* FIXME: translate the pmi error codes here */ errcodes[i] = pmi_errcodes[i]; /* We want to accept if any of the spawns succeeded. Alternatively, this is the same as we want to NOT accept if all of them failed. should_accept = NAND(e_0, ..., e_n) Remember, success equals false (0). */ should_accept = should_accept && errcodes[i]; } should_accept = !should_accept; /* the `N' in NAND */ } }
int MPID_Comm_spawn_multiple(int count, char *commands[], char **argvs[], const int maxprocs[], MPIR_Info * info_ptrs[], int root, MPIR_Comm * comm_ptr, MPIR_Comm ** intercomm, int errcodes[]) { char port_name[MPI_MAX_PORT_NAME]; int *info_keyval_sizes = 0, i, mpi_errno = MPI_SUCCESS; PMI_keyval_t **info_keyval_vectors = 0, preput_keyval_vector; int *pmi_errcodes = 0, pmi_errno = 0; int total_num_processes, should_accept = 1; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE); memset(port_name, 0, sizeof(port_name)); if (comm_ptr->rank == root) { total_num_processes = 0; for (i = 0; i < count; i++) total_num_processes += maxprocs[i]; pmi_errcodes = (int *) MPL_malloc(sizeof(int) * total_num_processes, MPL_MEM_BUFFER); MPIR_ERR_CHKANDJUMP(!pmi_errcodes, mpi_errno, MPI_ERR_OTHER, "**nomem"); for (i = 0; i < total_num_processes; i++) pmi_errcodes[i] = 0; mpi_errno = MPID_Open_port(NULL, port_name); if (mpi_errno) MPIR_ERR_POP(mpi_errno); info_keyval_sizes = (int *) MPL_malloc(count * sizeof(int), MPL_MEM_BUFFER); MPIR_ERR_CHKANDJUMP(!info_keyval_sizes, mpi_errno, MPI_ERR_OTHER, "**nomem"); info_keyval_vectors = (PMI_keyval_t **) MPL_malloc(count * sizeof(PMI_keyval_t *), MPL_MEM_BUFFER); MPIR_ERR_CHKANDJUMP(!info_keyval_vectors, mpi_errno, MPI_ERR_OTHER, "**nomem"); if (!info_ptrs) for (i = 0; i < count; i++) { info_keyval_vectors[i] = 0; info_keyval_sizes[i] = 0; } else for (i = 0; i < count; i++) { mpi_errno = mpi_to_pmi_keyvals(info_ptrs[i], &info_keyval_vectors[i], &info_keyval_sizes[i]); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } preput_keyval_vector.key = MPIDI_PARENT_PORT_KVSKEY; preput_keyval_vector.val = port_name; pmi_errno = PMI_Spawn_multiple(count, (const char **) commands, (const char ***) argvs, maxprocs, info_keyval_sizes, (const PMI_keyval_t **) info_keyval_vectors, 1, &preput_keyval_vector, pmi_errcodes); if (pmi_errno != PMI_SUCCESS) MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno); if (errcodes != MPI_ERRCODES_IGNORE) { for (i = 0; i < total_num_processes; i++) { errcodes[i] = pmi_errcodes[0]; should_accept = should_accept && errcodes[i]; } should_accept = !should_accept; } } if (errcodes != MPI_ERRCODES_IGNORE) { MPIR_Errflag_t errflag = MPIR_ERR_NONE; mpi_errno = MPIR_Bcast(&should_accept, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Bcast(&pmi_errno, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Bcast(&total_num_processes, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Bcast(errcodes, total_num_processes, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } if (should_accept) { mpi_errno = MPID_Comm_accept(port_name, NULL, root, comm_ptr, intercomm); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { if ((pmi_errno == PMI_SUCCESS) && (errcodes[0] != 0)) { mpi_errno = MPIR_Comm_create(intercomm); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } } if (comm_ptr->rank == root) { mpi_errno = MPID_Close_port(port_name); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: if (info_keyval_vectors) { free_pmi_keyvals(info_keyval_vectors, count, info_keyval_sizes); MPL_free(info_keyval_vectors); } MPL_free(info_keyval_sizes); MPL_free(pmi_errcodes); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE); return mpi_errno; fn_fail: goto fn_exit; }