int MPIDI_Comm_spawn_multiple(int count, char **commands, char ***argvs, const int *maxprocs, MPIR_Info **info_ptrs, int root, MPIR_Comm *comm_ptr, MPIR_Comm **intercomm, int *errcodes) { char port_name[MPI_MAX_PORT_NAME]; int *info_keyval_sizes=0, i, mpi_errno=MPI_SUCCESS; PMI_keyval_t **info_keyval_vectors=0, preput_keyval_vector; int *pmi_errcodes = 0, pmi_errno; int total_num_processes, should_accept = 1; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE); if (comm_ptr->rank == root) { /* create an array for the pmi error codes */ total_num_processes = 0; for (i=0; i<count; i++) { total_num_processes += maxprocs[i]; } pmi_errcodes = (int*)MPL_malloc(sizeof(int) * total_num_processes); if (pmi_errcodes == NULL) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } /* initialize them to 0 */ for (i=0; i<total_num_processes; i++) pmi_errcodes[i] = 0; /* Open a port for the spawned processes to connect to */ /* FIXME: info may be needed for port name */ mpi_errno = MPID_Open_port(NULL, port_name); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* --END ERROR HANDLING-- */ /* Spawn the processes */ #ifdef USE_PMI2_API MPIR_Assert(count > 0); { int *argcs = MPL_malloc(count*sizeof(int)); struct MPIR_Info preput; struct MPIR_Info *preput_p[1] = { &preput }; MPIR_Assert(argcs); /* info_keyval_sizes = MPL_malloc(count * sizeof(int)); */ /* FIXME cheating on constness */ preput.key = (char *)PARENT_PORT_KVSKEY; preput.value = port_name; preput.next = NULL; /* compute argcs array */ for (i = 0; i < count; ++i) { argcs[i] = 0; if (argvs != NULL && argvs[i] != NULL) { while (argvs[i][argcs[i]]) { ++argcs[i]; } } /* a fib for now */ /* info_keyval_sizes[i] = 0; */ } /* XXX DJG don't need this, PMI API is thread-safe? */ /*MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);*/ /* release the global CS for spawn PMI calls */ MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); pmi_errno = PMI2_Job_Spawn(count, (const char **)commands, argcs, (const char ***)argvs, maxprocs, info_keyval_sizes, (const MPIR_Info **)info_ptrs, 1, (const struct MPIR_Info **)preput_p, NULL, 0, /*jobId, jobIdSize,*/ /* XXX DJG job stuff? */ pmi_errcodes); MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); /*MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);*/ MPL_free(argcs); if (pmi_errno != PMI2_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno); } } #else /* FIXME: This is *really* awkward. We should either Fix on MPI-style info data structures for PMI (avoid unnecessary duplication) or add an MPIU_Info_getall(...) that creates the necessary arrays of key/value pairs */ /* convert the infos into PMI keyvals */ info_keyval_sizes = (int *) MPL_malloc(count * sizeof(int)); info_keyval_vectors = (PMI_keyval_t**) MPL_malloc(count * sizeof(PMI_keyval_t*)); if (!info_keyval_sizes || !info_keyval_vectors) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } if (!info_ptrs) { for (i=0; i<count; i++) { info_keyval_vectors[i] = 0; info_keyval_sizes[i] = 0; } } else { for (i=0; i<count; i++) { mpi_errno = mpi_to_pmi_keyvals( info_ptrs[i], &info_keyval_vectors[i], &info_keyval_sizes[i] ); if (mpi_errno) { MPIR_ERR_POP(mpi_errno); } } } preput_keyval_vector.key = PARENT_PORT_KVSKEY; preput_keyval_vector.val = port_name; MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX); pmi_errno = PMI_Spawn_multiple(count, (const char **) commands, (const char ***) argvs, maxprocs, info_keyval_sizes, (const PMI_keyval_t **) info_keyval_vectors, 1, &preput_keyval_vector, pmi_errcodes); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX); if (pmi_errno != PMI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno); } #endif if (errcodes != MPI_ERRCODES_IGNORE) { for (i=0; i<total_num_processes; i++) { /* FIXME: translate the pmi error codes here */ errcodes[i] = pmi_errcodes[i]; /* We want to accept if any of the spawns succeeded. Alternatively, this is the same as we want to NOT accept if all of them failed. should_accept = NAND(e_0, ..., e_n) Remember, success equals false (0). */ should_accept = should_accept && errcodes[i]; } should_accept = !should_accept; /* the `N' in NAND */ } } if (errcodes != MPI_ERRCODES_IGNORE) { MPIR_Errflag_t errflag = MPIR_ERR_NONE; mpi_errno = MPIR_Bcast_impl(&should_accept, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Bcast_impl(&total_num_processes, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Bcast_impl(errcodes, total_num_processes, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); } if (should_accept) { mpi_errno = MPID_Comm_accept(port_name, NULL, root, comm_ptr, intercomm); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple"); } if (comm_ptr->rank == root) { /* Close the port opened for the spawned processes to connect to */ mpi_errno = MPID_Close_port(port_name); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } /* --END ERROR HANDLING-- */ } fn_exit: if (info_keyval_vectors) { free_pmi_keyvals(info_keyval_vectors, count, info_keyval_sizes); MPL_free(info_keyval_sizes); MPL_free(info_keyval_vectors); } if (pmi_errcodes) { MPL_free(pmi_errcodes); } MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_Comm_spawn_multiple(int count, char *commands[], char **argvs[], const int maxprocs[], MPIR_Info * info_ptrs[], int root, MPIR_Comm * comm_ptr, MPIR_Comm ** intercomm, int errcodes[]) { char port_name[MPI_MAX_PORT_NAME]; int *info_keyval_sizes = 0, i, mpi_errno = MPI_SUCCESS; PMI_keyval_t **info_keyval_vectors = 0, preput_keyval_vector; int *pmi_errcodes = 0, pmi_errno = 0; int total_num_processes, should_accept = 1; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE); memset(port_name, 0, sizeof(port_name)); if (comm_ptr->rank == root) { total_num_processes = 0; for (i = 0; i < count; i++) total_num_processes += maxprocs[i]; pmi_errcodes = (int *) MPL_malloc(sizeof(int) * total_num_processes, MPL_MEM_BUFFER); MPIR_ERR_CHKANDJUMP(!pmi_errcodes, mpi_errno, MPI_ERR_OTHER, "**nomem"); for (i = 0; i < total_num_processes; i++) pmi_errcodes[i] = 0; mpi_errno = MPID_Open_port(NULL, port_name); if (mpi_errno) MPIR_ERR_POP(mpi_errno); info_keyval_sizes = (int *) MPL_malloc(count * sizeof(int), MPL_MEM_BUFFER); MPIR_ERR_CHKANDJUMP(!info_keyval_sizes, mpi_errno, MPI_ERR_OTHER, "**nomem"); info_keyval_vectors = (PMI_keyval_t **) MPL_malloc(count * sizeof(PMI_keyval_t *), MPL_MEM_BUFFER); MPIR_ERR_CHKANDJUMP(!info_keyval_vectors, mpi_errno, MPI_ERR_OTHER, "**nomem"); if (!info_ptrs) for (i = 0; i < count; i++) { info_keyval_vectors[i] = 0; info_keyval_sizes[i] = 0; } else for (i = 0; i < count; i++) { mpi_errno = mpi_to_pmi_keyvals(info_ptrs[i], &info_keyval_vectors[i], &info_keyval_sizes[i]); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } preput_keyval_vector.key = MPIDI_PARENT_PORT_KVSKEY; preput_keyval_vector.val = port_name; pmi_errno = PMI_Spawn_multiple(count, (const char **) commands, (const char ***) argvs, maxprocs, info_keyval_sizes, (const PMI_keyval_t **) info_keyval_vectors, 1, &preput_keyval_vector, pmi_errcodes); if (pmi_errno != PMI_SUCCESS) MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno); if (errcodes != MPI_ERRCODES_IGNORE) { for (i = 0; i < total_num_processes; i++) { errcodes[i] = pmi_errcodes[0]; should_accept = should_accept && errcodes[i]; } should_accept = !should_accept; } } if (errcodes != MPI_ERRCODES_IGNORE) { MPIR_Errflag_t errflag = MPIR_ERR_NONE; mpi_errno = MPIR_Bcast(&should_accept, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Bcast(&pmi_errno, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Bcast(&total_num_processes, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Bcast(errcodes, total_num_processes, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } if (should_accept) { mpi_errno = MPID_Comm_accept(port_name, NULL, root, comm_ptr, intercomm); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { if ((pmi_errno == PMI_SUCCESS) && (errcodes[0] != 0)) { mpi_errno = MPIR_Comm_create(intercomm); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } } if (comm_ptr->rank == root) { mpi_errno = MPID_Close_port(port_name); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: if (info_keyval_vectors) { free_pmi_keyvals(info_keyval_vectors, count, info_keyval_sizes); MPL_free(info_keyval_vectors); } MPL_free(info_keyval_sizes); MPL_free(pmi_errcodes); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Comm_accept_impl(const char * port_name, MPIR_Info * info_ptr, int root, MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr) { return MPID_Comm_accept(port_name, info_ptr, root, comm_ptr, newcomm_ptr); }