int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, MPIR_Context_id_t * context_id, MPIR_Context_id_t * recvcontext_id) { MPIR_Context_id_t mycontext_id, remote_context_id; int mpi_errno = MPI_SUCCESS; int tag = 31567; /* FIXME - we need an internal tag or * communication channel. Can we use a different * context instead?. Or can we use the tag * provided in the intercomm routine? (not on a dup, * but in that case it can use the collective context) */ MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID); if (!comm_ptr->local_comm) { /* Manufacture the local communicator */ mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } mpi_errno = MPIR_Get_contextid_sparse(comm_ptr->local_comm, &mycontext_id, FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_Assert(mycontext_id != 0); /* MPIC routine uses an internal context id. The local leads (process 0) * exchange data */ remote_context_id = -1; if (comm_ptr->rank == 0) { mpi_errno = MPIC_Sendrecv(&mycontext_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, tag, &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, tag, comm_ptr, MPI_STATUS_IGNORE, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* Make sure that all of the local processes now have this * id */ mpi_errno = MPID_Bcast(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, comm_ptr->local_comm, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); /* The recvcontext_id must be the one that was allocated out of the local * group, not the remote group. Otherwise we could end up posting two * MPI_ANY_SOURCE,MPI_ANY_TAG recvs on the same context IDs even though we * are attempting to post them for two separate communicators. */ *context_id = remote_context_id; *recvcontext_id = mycontext_id; fn_fail: MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID); return mpi_errno; }
int MPIR_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag) { int mpi_errno = MPI_SUCCESS; if (MPIR_CVAR_BCAST_DEVICE_COLLECTIVE && MPIR_CVAR_DEVICE_COLLECTIVES) { mpi_errno = MPID_Bcast(buffer, count, datatype, root, comm_ptr, errflag); } else { mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm_ptr, errflag); } return mpi_errno; }
int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader, MPIR_Comm *peer_comm_ptr, int remote_leader, int tag, MPIR_Comm **new_intercomm_ptr) { int mpi_errno = MPI_SUCCESS; MPIR_Context_id_t final_context_id, recvcontext_id; int remote_size = 0, *remote_lpids = NULL; int comm_info[3]; int is_low_group = 0; int cts_tag; MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL); /* Shift tag into the tagged coll space (tag provided by the user is ignored as of MPI 3.0) */ cts_tag = MPIR_COMM_KIND__INTERCOMM_CREATE_TAG | MPIR_Process.tagged_coll_mask; mpi_errno = MPID_Intercomm_exchange_map(local_comm_ptr, local_leader, peer_comm_ptr, remote_leader, &remote_size, &remote_lpids, &is_low_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* * Create the contexts. Each group will have a context for sending * to the other group. All processes must be involved. Because * we know that the local and remote groups are disjoint, this * step will complete */ MPL_DBG_MSG_FMT(MPIR_DBG_COMM,VERBOSE, (MPL_DBG_FDEST,"About to get contextid (local_size=%d) on rank %d", local_comm_ptr->local_size, local_comm_ptr->rank )); /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the calling routine already holds the single criticial section */ /* TODO: Make sure this is tag-safe */ mpi_errno = MPIR_Get_contextid_sparse( local_comm_ptr, &recvcontext_id, FALSE ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_Assert(recvcontext_id != 0); MPL_DBG_MSG_FMT(MPIR_DBG_COMM,VERBOSE, (MPL_DBG_FDEST,"Got contextid=%d", recvcontext_id)); /* Leaders can now swap context ids and then broadcast the value to the local group of processes */ if (local_comm_ptr->rank == local_leader) { MPIR_Context_id_t remote_context_id; mpi_errno = MPIC_Sendrecv( &recvcontext_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, remote_leader, cts_tag, &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, remote_leader, cts_tag, peer_comm_ptr, MPI_STATUS_IGNORE, &errflag ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); final_context_id = remote_context_id; /* Now, send all of our local processes the remote_lpids, along with the final context id */ comm_info[0] = final_context_id; MPL_DBG_MSG(MPIR_DBG_COMM,VERBOSE,"About to bcast on local_comm"); mpi_errno = MPID_Bcast( comm_info, 1, MPI_INT, local_leader, local_comm_ptr, &errflag ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); MPL_DBG_MSG_D(MPIR_DBG_COMM,VERBOSE,"end of bcast on local_comm of size %d", local_comm_ptr->local_size ); } else { /* we're the other processes */ MPL_DBG_MSG(MPIR_DBG_COMM,VERBOSE,"About to receive bcast on local_comm"); mpi_errno = MPID_Bcast( comm_info, 1, MPI_INT, local_leader, local_comm_ptr, &errflag ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); /* Extract the context and group sign informatin */ final_context_id = comm_info[0]; } /* At last, we now have the information that we need to build the intercommunicator */ /* All processes in the local_comm now build the communicator */ mpi_errno = MPIR_Comm_create( new_intercomm_ptr ); if (mpi_errno) goto fn_fail; (*new_intercomm_ptr)->context_id = final_context_id; (*new_intercomm_ptr)->recvcontext_id = recvcontext_id; (*new_intercomm_ptr)->remote_size = remote_size; (*new_intercomm_ptr)->local_size = local_comm_ptr->local_size; (*new_intercomm_ptr)->pof2 = local_comm_ptr->pof2; (*new_intercomm_ptr)->rank = local_comm_ptr->rank; (*new_intercomm_ptr)->comm_kind = MPIR_COMM_KIND__INTERCOMM; (*new_intercomm_ptr)->local_comm = 0; (*new_intercomm_ptr)->is_low_group = is_low_group; mpi_errno = MPID_Create_intercomm_from_lpids( *new_intercomm_ptr, remote_size, remote_lpids ); if (mpi_errno) goto fn_fail; MPIR_Comm_map_dup(*new_intercomm_ptr, local_comm_ptr, MPIR_COMM_MAP_DIR__L2L); /* Inherit the error handler (if any) */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(local_comm_ptr)); (*new_intercomm_ptr)->errhandler = local_comm_ptr->errhandler; if (local_comm_ptr->errhandler) { MPIR_Errhandler_add_ref( local_comm_ptr->errhandler ); } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(local_comm_ptr)); mpi_errno = MPIR_Comm_commit(*new_intercomm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: if (remote_lpids) { MPL_free(remote_lpids); remote_lpids = NULL; } MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL); return mpi_errno; fn_fail: goto fn_exit; }
/* comm create impl for intercommunicators, assumes that the standard error * checking has already taken place in the calling function */ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr, MPIR_Comm **newcomm_ptr) { int mpi_errno = MPI_SUCCESS; MPIR_Context_id_t new_context_id; int *mapping = NULL; int *remote_mapping = NULL; MPIR_Comm *mapping_comm = NULL; int remote_size = -1; int rinfo[2]; MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPIR_CHKLMEM_DECL(1); MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTER); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE_INTER); MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM); /* Create a new communicator from the specified group members */ /* If there is a context id cache in oldcomm, use it here. Otherwise, use the appropriate algorithm to get a new context id. Creating the context id is collective over the *input* communicator, so it must be created before we decide if this process is a member of the group */ /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the calling routine already holds the single criticial section */ if (!comm_ptr->local_comm) { MPII_Setup_intercomm_localcomm( comm_ptr ); } mpi_errno = MPIR_Get_contextid_sparse( comm_ptr->local_comm, &new_context_id, FALSE ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_Assert(new_context_id != 0); MPIR_Assert(new_context_id != comm_ptr->recvcontext_id); mpi_errno = MPII_Comm_create_calculate_mapping(group_ptr, comm_ptr, &mapping, &mapping_comm); if (mpi_errno) MPIR_ERR_POP(mpi_errno); *newcomm_ptr = NULL; if (group_ptr->rank != MPI_UNDEFINED) { /* Get the new communicator structure and context id */ mpi_errno = MPIR_Comm_create( newcomm_ptr ); if (mpi_errno) goto fn_fail; (*newcomm_ptr)->recvcontext_id = new_context_id; (*newcomm_ptr)->rank = group_ptr->rank; (*newcomm_ptr)->comm_kind = comm_ptr->comm_kind; /* Since the group has been provided, let the new communicator know about the group */ (*newcomm_ptr)->local_comm = 0; (*newcomm_ptr)->local_group = group_ptr; MPIR_Group_add_ref( group_ptr ); (*newcomm_ptr)->local_size = group_ptr->size; (*newcomm_ptr)->pof2 = MPL_pof2((*newcomm_ptr)->local_size); (*newcomm_ptr)->remote_group = 0; (*newcomm_ptr)->is_low_group = comm_ptr->is_low_group; } /* There is an additional step. We must communicate the information on the local context id and the group members, given by the ranks so that the remote process can construct the appropriate network address mapping. First we exchange group sizes and context ids. Then the ranks in the remote group, from which the remote network address mapping can be constructed. We need to use the "collective" context in the original intercommunicator */ if (comm_ptr->rank == 0) { int info[2]; info[0] = new_context_id; info[1] = group_ptr->size; mpi_errno = MPIC_Sendrecv(info, 2, MPI_INT, 0, 0, rinfo, 2, MPI_INT, 0, 0, comm_ptr, MPI_STATUS_IGNORE, &errflag ); if (mpi_errno) { MPIR_ERR_POP( mpi_errno ); } if (*newcomm_ptr != NULL) { (*newcomm_ptr)->context_id = rinfo[0]; } remote_size = rinfo[1]; MPIR_CHKLMEM_MALLOC(remote_mapping,int*, remote_size*sizeof(int), mpi_errno,"remote_mapping",MPL_MEM_ADDRESS); /* Populate and exchange the ranks */ mpi_errno = MPIC_Sendrecv( mapping, group_ptr->size, MPI_INT, 0, 0, remote_mapping, remote_size, MPI_INT, 0, 0, comm_ptr, MPI_STATUS_IGNORE, &errflag ); if (mpi_errno) { MPIR_ERR_POP( mpi_errno ); } /* Broadcast to the other members of the local group */ mpi_errno = MPID_Bcast( rinfo, 2, MPI_INT, 0, comm_ptr->local_comm, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_Bcast( remote_mapping, remote_size, MPI_INT, 0, comm_ptr->local_comm, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); }
int MPIDI_Comm_spawn_multiple(int count, char **commands, char ***argvs, const int *maxprocs, MPIR_Info **info_ptrs, int root, MPIR_Comm *comm_ptr, MPIR_Comm **intercomm, int *errcodes) { char port_name[MPI_MAX_PORT_NAME]; int *info_keyval_sizes=0, i, mpi_errno=MPI_SUCCESS; PMI_keyval_t **info_keyval_vectors=0, preput_keyval_vector; int *pmi_errcodes = 0, pmi_errno; int total_num_processes, should_accept = 1; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE); if (comm_ptr->rank == root) { /* create an array for the pmi error codes */ total_num_processes = 0; for (i=0; i<count; i++) { total_num_processes += maxprocs[i]; } pmi_errcodes = (int*)MPL_malloc(sizeof(int) * total_num_processes, MPL_MEM_DYNAMIC); if (pmi_errcodes == NULL) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } /* initialize them to 0 */ for (i=0; i<total_num_processes; i++) pmi_errcodes[i] = 0; /* Open a port for the spawned processes to connect to */ /* FIXME: info may be needed for port name */ mpi_errno = MPID_Open_port(NULL, port_name); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* --END ERROR HANDLING-- */ /* Spawn the processes */ #ifdef USE_PMI2_API MPIR_Assert(count > 0); { int *argcs = MPL_malloc(count*sizeof(int), MPL_MEM_DYNAMIC); struct MPIR_Info preput; struct MPIR_Info *preput_p[1] = { &preput }; MPIR_Assert(argcs); /* info_keyval_sizes = MPL_malloc(count * sizeof(int), MPL_MEM_DYNAMIC); */ /* FIXME cheating on constness */ preput.key = (char *)PARENT_PORT_KVSKEY; preput.value = port_name; preput.next = NULL; /* compute argcs array */ for (i = 0; i < count; ++i) { argcs[i] = 0; if (argvs != NULL && argvs[i] != NULL) { while (argvs[i][argcs[i]]) { ++argcs[i]; } } /* a fib for now */ /* info_keyval_sizes[i] = 0; */ } /* XXX DJG don't need this, PMI API is thread-safe? */ /*MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);*/ /* release the global CS for spawn PMI calls */ MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); pmi_errno = PMI2_Job_Spawn(count, (const char **)commands, argcs, (const char ***)argvs, maxprocs, info_keyval_sizes, (const MPIR_Info **)info_ptrs, 1, (const struct MPIR_Info **)preput_p, NULL, 0, /*jobId, jobIdSize,*/ /* XXX DJG job stuff? */ pmi_errcodes); MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); /*MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);*/ MPL_free(argcs); if (pmi_errno != PMI2_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno); } } #else /* FIXME: This is *really* awkward. We should either Fix on MPI-style info data structures for PMI (avoid unnecessary duplication) or add an MPIU_Info_getall(...) that creates the necessary arrays of key/value pairs */ /* convert the infos into PMI keyvals */ info_keyval_sizes = (int *) MPL_malloc(count * sizeof(int), MPL_MEM_DYNAMIC); info_keyval_vectors = (PMI_keyval_t**) MPL_malloc(count * sizeof(PMI_keyval_t*), MPL_MEM_DYNAMIC); if (!info_keyval_sizes || !info_keyval_vectors) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } if (!info_ptrs) { for (i=0; i<count; i++) { info_keyval_vectors[i] = 0; info_keyval_sizes[i] = 0; } } else { for (i=0; i<count; i++) { mpi_errno = mpi_to_pmi_keyvals( info_ptrs[i], &info_keyval_vectors[i], &info_keyval_sizes[i] ); if (mpi_errno) { MPIR_ERR_POP(mpi_errno); } } } preput_keyval_vector.key = PARENT_PORT_KVSKEY; preput_keyval_vector.val = port_name; MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX); pmi_errno = PMI_Spawn_multiple(count, (const char **) commands, (const char ***) argvs, maxprocs, info_keyval_sizes, (const PMI_keyval_t **) info_keyval_vectors, 1, &preput_keyval_vector, pmi_errcodes); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX); if (pmi_errno != PMI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno); } #endif if (errcodes != MPI_ERRCODES_IGNORE) { for (i=0; i<total_num_processes; i++) { /* FIXME: translate the pmi error codes here */ errcodes[i] = pmi_errcodes[i]; /* We want to accept if any of the spawns succeeded. Alternatively, this is the same as we want to NOT accept if all of them failed. should_accept = NAND(e_0, ..., e_n) Remember, success equals false (0). */ should_accept = should_accept && errcodes[i]; } should_accept = !should_accept; /* the `N' in NAND */ } } if (errcodes != MPI_ERRCODES_IGNORE) { MPIR_Errflag_t errflag = MPIR_ERR_NONE; mpi_errno = MPID_Bcast(&should_accept, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_Bcast(&total_num_processes, 1, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_Bcast(errcodes, total_num_processes, MPI_INT, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); } if (should_accept) { mpi_errno = MPID_Comm_accept(port_name, NULL, root, comm_ptr, intercomm); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**pmi_spawn_multiple"); } if (comm_ptr->rank == root) { /* Close the port opened for the spawned processes to connect to */ mpi_errno = MPID_Close_port(port_name); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } /* --END ERROR HANDLING-- */ } fn_exit: if (info_keyval_vectors) { free_pmi_keyvals(info_keyval_vectors, count, info_keyval_sizes); MPL_free(info_keyval_sizes); MPL_free(info_keyval_vectors); } if (pmi_errcodes) { MPL_free(pmi_errcodes); } MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE); return mpi_errno; fn_fail: goto fn_exit; }