/*@ MPI_Send - Performs a basic send Input Parameters: + buf - initial address of send buffer (choice) . count - number of elements in send buffer (nonnegative integer) . datatype - datatype of each send buffer element (handle) . dest - rank of destination (integer) . tag - message tag (integer) - comm - communicator (handle) Notes: This routine may block until the message is received. .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM .N MPI_ERR_COUNT .N MPI_ERR_TYPE .N MPI_ERR_TAG .N MPI_ERR_RANK .seealso: MPI_Isend, MPI_Bsend @*/ EXPORT_MPI_API int MPI_Send( void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm ) { int mpi_errno = MPI_SUCCESS; struct MPIR_COMMUNICATOR *comm_ptr; struct MPIR_DATATYPE *dtype_ptr; static char myname[] = "MPI_SEND"; if (dest == MPI_PROC_NULL) return MPI_SUCCESS; comm_ptr = MPIR_GET_COMM_PTR(comm); MPIR_TEST_MPI_COMM(comm,comm_ptr,comm_ptr,myname); dtype_ptr = MPIR_GET_DTYPE_PTR(datatype); MPIR_TEST_DTYPE(datatype,dtype_ptr,comm_ptr,myname); #ifndef MPIR_NO_ERROR_CHECKING MPIR_TEST_COUNT(count); MPIR_TEST_SEND_TAG(tag); MPIR_TEST_SEND_RANK(comm_ptr,dest); if (mpi_errno) return MPIR_ERROR(comm_ptr, mpi_errno, myname ); #endif /* This COULD test for the contiguous homogeneous case first .... */ MPID_SendDatatype( comm_ptr, buf, count, dtype_ptr, comm_ptr->local_rank, tag, comm_ptr->send_context, comm_ptr->lrank_to_grank[dest], &mpi_errno ); MPIR_RETURN(comm_ptr, mpi_errno, myname ); }
/*@ MPI_Issend - Starts a nonblocking synchronous send Input Parameters: + buf - initial address of send buffer (choice) . count - number of elements in send buffer (integer) . datatype - datatype of each send buffer element (handle) . dest - rank of destination (integer) . tag - message tag (integer) - comm - communicator (handle) Output Parameter: . request - communication request (handle) .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM .N MPI_ERR_COUNT .N MPI_ERR_TYPE .N MPI_ERR_TAG .N MPI_ERR_RANK .N MPI_ERR_EXHAUSTED @*/ int MPI_Issend( void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request ) { struct MPIR_COMMUNICATOR *comm_ptr; struct MPIR_DATATYPE *dtype_ptr; MPIR_SHANDLE *shandle; static char myname[] = "MPI_ISSEND"; int mpi_errno = MPI_SUCCESS; disableSignal(); TR_PUSH(myname); comm_ptr = MPIR_GET_COMM_PTR(comm); MPIR_TEST_MPI_COMM(comm,comm_ptr,comm_ptr,myname); dtype_ptr = MPIR_GET_DTYPE_PTR(datatype); MPIR_TEST_DTYPE(datatype,dtype_ptr,comm_ptr,myname); #ifndef MPIR_NO_ERROR_CHECKING MPIR_TEST_COUNT(count); MPIR_TEST_SEND_TAG(tag); MPIR_TEST_SEND_RANK(comm_ptr,dest); if (mpi_errno) { revertSignal(); return MPIR_ERROR(comm_ptr, mpi_errno, myname ); } #endif MPIR_ALLOCFN(shandle,MPID_SendAlloc, comm_ptr,MPI_ERR_EXHAUSTED,myname ); *request = (MPI_Request)shandle; MPID_Request_init( shandle, MPIR_SEND ); MPIR_REMEMBER_SEND( shandle, buf, count, datatype, dest, tag, comm_ptr); if (dest == MPI_PROC_NULL) { shandle->is_complete = 1; revertSignal(); return MPI_SUCCESS; } /* This COULD test for the contiguous homogeneous case first .... */ MPID_IssendDatatype( comm_ptr, buf, count, dtype_ptr, comm_ptr->local_rank, tag, comm_ptr->send_context, comm_ptr->lrank_to_grank[dest], *request, &mpi_errno ); if (mpi_errno) { revertSignal(); return MPIR_ERROR( comm_ptr, mpi_errno, myname ); } TR_POP; revertSignal(); return MPI_SUCCESS; }
/*@ MPI_Ssend_init - Builds a handle for a synchronous send Input Parameters: + buf - initial address of send buffer (choice) . count - number of elements sent (integer) . datatype - type of each element (handle) . dest - rank of destination (integer) . tag - message tag (integer) - comm - communicator (handle) Output Parameter: . request - communication request (handle) .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM .N MPI_ERR_COUNT .N MPI_ERR_TYPE .N MPI_ERR_TAG .N MPI_ERR_RANK @*/ int MPI_Ssend_init( void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request ) { int mpi_errno = MPI_SUCCESS; struct MPIR_DATATYPE *dtype_ptr; struct MPIR_COMMUNICATOR *comm_ptr; static char myname[] = "MPI_SSEND_INIT"; MPIR_PSHANDLE *shandle; disableSignal(); TR_PUSH(myname); comm_ptr = MPIR_GET_COMM_PTR(comm); MPIR_TEST_MPI_COMM(comm,comm_ptr,comm_ptr,myname); #ifndef MPIR_NO_ERROR_CHECKING MPIR_TEST_COUNT(count); MPIR_TEST_SEND_TAG(tag); MPIR_TEST_SEND_RANK(comm_ptr,dest); if (mpi_errno) { revertSignal(); return MPIR_ERROR(comm_ptr, mpi_errno, myname ); } #endif /* This is IDENTICAL to the create_send code except for the send function */ MPIR_ALLOCFN(shandle,MPID_PSendAlloc, comm_ptr,MPI_ERR_EXHAUSTED,myname ); *request = (MPI_Request)shandle; MPID_Request_init( &(shandle->shandle), MPIR_PERSISTENT_SEND ); /* Save the information about the operation, being careful with ref-counted items */ dtype_ptr = MPIR_GET_DTYPE_PTR(datatype); MPIR_TEST_DTYPE(datatype,dtype_ptr,comm_ptr,myname); MPIR_REF_INCR(dtype_ptr); shandle->perm_datatype = dtype_ptr; shandle->perm_tag = tag; shandle->perm_dest = dest; shandle->perm_count = count; shandle->perm_buf = buf; MPIR_REF_INCR(comm_ptr); shandle->perm_comm = comm_ptr; shandle->active = 0; shandle->send = MPID_IssendDatatype; /* dest of MPI_PROC_NULL handled in start */ TR_POP; revertSignal(); return MPI_SUCCESS; }
/*@ MPI_Irsend - Starts a nonblocking ready send Input Parameters: + buf - initial address of send buffer (choice) . count - number of elements in send buffer (integer) . datatype - datatype of each send buffer element (handle) . dest - rank of destination (integer) . tag - message tag (integer) - comm - communicator (handle) Output Parameter: . request - communication request (handle) .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM .N MPI_ERR_COUNT .N MPI_ERR_TYPE .N MPI_ERR_TAG .N MPI_ERR_RANK .N MPI_ERR_EXHAUSTED @*/ EXPORT_MPI_API int MPI_Irsend( void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request ) { struct MPIR_COMMUNICATOR *comm_ptr; struct MPIR_DATATYPE *dtype_ptr; MPIR_SHANDLE *shandle; static char myname[] = "MPI_IRSEND"; int mpi_errno = MPI_SUCCESS; TR_PUSH(myname); comm_ptr = MPIR_GET_COMM_PTR(comm); MPIR_TEST_MPI_COMM(comm,comm_ptr,comm_ptr,myname); dtype_ptr = MPIR_GET_DTYPE_PTR(datatype); MPIR_TEST_DTYPE(datatype,dtype_ptr,comm_ptr,myname); #ifndef MPIR_NO_ERROR_CHECKING MPIR_TEST_COUNT(count); MPIR_TEST_SEND_TAG(tag); MPIR_TEST_SEND_RANK(comm_ptr,dest); if (mpi_errno) return MPIR_ERROR(comm_ptr, mpi_errno, myname ); #endif MPIR_ALLOCFN(shandle,MPID_Send_alloc, comm_ptr,MPI_ERR_EXHAUSTED,myname ); *request = (MPI_Request)shandle; MPID_Request_init( (MPI_Request)shandle, MPIR_SEND ); /* we need the rank of dest in MPI_COMM_ALL in MPID_Gateway_SendCancelPacket(), so we save it here */ shandle->partner_grank = comm_ptr->lrank_to_grank[dest]; MPIR_REMEMBER_SEND(shandle, buf, count, datatype, dest, tag, comm_ptr); if (dest == MPI_PROC_NULL) { shandle->is_complete = 1; return MPI_SUCCESS; } /* This COULD test for the contiguous homogeneous case first .... */ MPID_IrsendDatatype( comm_ptr, buf, count, dtype_ptr, comm_ptr->local_rank, tag, comm_ptr->send_context, comm_ptr->lrank_to_grank[dest], *request, &mpi_errno, 1 ); if (mpi_errno) return MPIR_ERROR( comm_ptr, mpi_errno, myname ); TR_POP; return MPI_SUCCESS; }
/*@ MPI_Intercomm_create - Creates an intercommuncator from two intracommunicators Input Paramters: + local_comm - Local (intra)communicator . local_leader - Rank in local_comm of leader (often 0) . peer_comm - Remote communicator . remote_leader - Rank in peer_comm of remote leader (often 0) - tag - Message tag to use in constructing intercommunicator; if multiple 'MPI_Intercomm_creates' are being made, they should use different tags (more precisely, ensure that the local and remote leaders are using different tags for each 'MPI_intercomm_create'). Output Parameter: . comm_out - Created intercommunicator Notes: The MPI 1.1 Standard contains two mutually exclusive comments on the input intracommunicators. One says that their repective groups must be disjoint; the other that the leaders can be the same process. After some discussion by the MPI Forum, it has been decided that the groups must be disjoint. Note that the `reason` given for this in the standard is `not` the reason for this choice; rather, the `other` operations on intercommunicators (like 'MPI_Intercomm_merge') do not make sense if the groups are not disjoint. .N fortran Algorithm: + 1) Allocate a send context, an inter-coll context, and an intra-coll context . 2) Send "send_context" and lrank_to_grank list from local comm group if I''m the local_leader. . 3) If I''m the local leader, then wait on the posted sends and receives to complete. Post the receive for the remote group information and wait for it to complete. . 4) Broadcast information received from the remote leader. . 5) Create the inter_communicator from the information we now have. - An inter-communicator ends up with three levels of communicators. The inter-communicator returned to the user, a "collective" inter-communicator that can be used for safe communications between local & remote groups, and a collective intra-communicator that can be used to allocate new contexts during the merge and dup operations. For the resulting inter-communicator, 'comm_out' .vb comm_out = inter-communicator comm_out->comm_coll = "collective" inter-communicator comm_out->comm_coll->comm_coll = safe collective intra-communicator .ve .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM .N MPI_ERR_TAG .N MPI_ERR_EXHAUSTED .N MPI_ERR_RANK .seealso: MPI_Intercomm_merge, MPI_Comm_free, MPI_Comm_remote_group, MPI_Comm_remote_size @*/ EXPORT_MPI_API int MPI_Intercomm_create ( MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *comm_out ) { int local_size, local_rank, peer_size, peer_rank; int remote_size; int mpi_errno = MPI_SUCCESS; MPIR_CONTEXT context, send_context; struct MPIR_GROUP *remote_group_ptr; struct MPIR_COMMUNICATOR *new_comm, *local_comm_ptr, *peer_comm_ptr; MPI_Request req[6]; MPI_Status status[6]; MPIR_ERROR_DECL; static char myname[]="MPI_INTERCOMM_CREATE"; TR_PUSH(myname); local_comm_ptr = MPIR_GET_COMM_PTR(local_comm); #ifndef MPIR_NO_ERROR_CHECKING /* Check for valid arguments to function */ MPIR_TEST_MPI_COMM(local_comm,local_comm_ptr,local_comm_ptr,myname); MPIR_TEST_SEND_TAG(tag); if (mpi_errno) return MPIR_ERROR(local_comm_ptr, mpi_errno, myname ); #endif if (local_comm == MPI_COMM_NULL) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_COMM, MPIR_ERR_LOCAL_COMM, myname, "Local communicator must not be MPI_COMM_NULL", (char *)0 ); return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); } (void) MPIR_Comm_size ( local_comm_ptr, &local_size ); (void) MPIR_Comm_rank ( local_comm_ptr, &local_rank ); if ( local_leader == local_rank ) { /* Peer_comm need be valid only at local_leader */ peer_comm_ptr = MPIR_GET_COMM_PTR(peer_comm); if ((MPIR_TEST_COMM_NOTOK(peer_comm,peer_comm_ptr) || (peer_comm == MPI_COMM_NULL))) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_COMM, MPIR_ERR_PEER_COMM, myname, "Peer communicator is not valid", (char *)0 ); return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); } (void) MPIR_Comm_size ( peer_comm_ptr, &peer_size ); (void) MPIR_Comm_rank ( peer_comm_ptr, &peer_rank ); if (((peer_rank == MPI_UNDEFINED) && (mpi_errno = MPI_ERR_RANK))) return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); if (((remote_leader >= peer_size) && (mpi_errno = MPI_ERR_RANK)) || ((remote_leader < 0) && (mpi_errno = MPI_ERR_RANK))) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_RANK, MPIR_ERR_REMOTE_RANK, myname, "Error specifying remote_leader", "Error specifying remote_leader; value %d not between 0 and %d", remote_leader, peer_size ); return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); } } if (((local_leader >= local_size) && (mpi_errno = MPI_ERR_RANK)) || ((local_leader < 0) && (mpi_errno = MPI_ERR_RANK))) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_RANK, MPIR_ERR_LOCAL_RANK, myname, "Error specifying local_leader", "Error specifying local_leader; value %d not in between 0 and %d", local_leader, local_size ); return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); } /* Allocate send context, inter-coll context and intra-coll context */ MPIR_Context_alloc ( local_comm_ptr, 3, &context ); /* If I'm the local leader, then exchange information */ if (local_rank == local_leader) { MPIR_ERROR_PUSH(peer_comm_ptr); /* Post the receives for the information from the remote_leader */ /* We don't post a receive for the remote group yet, because we */ /* don't know how big it is yet. */ MPIR_CALL_POP(MPI_Irecv (&remote_size, 1, MPI_INT, remote_leader, tag, peer_comm, &(req[2])),peer_comm_ptr,myname); MPIR_CALL_POP(MPI_Irecv (&send_context, 1, MPIR_CONTEXT_TYPE, remote_leader,tag, peer_comm, &(req[3])), peer_comm_ptr,myname); /* Send the lrank_to_grank table of the local_comm and an allocated */ /* context. Currently I use multiple messages to send this info. */ /* Eventually, this will change(?) */ MPIR_CALL_POP(MPI_Isend (&local_size, 1, MPI_INT, remote_leader, tag, peer_comm, &(req[0])),peer_comm_ptr,myname); MPIR_CALL_POP(MPI_Isend (&context, 1, MPIR_CONTEXT_TYPE, remote_leader, tag, peer_comm, &(req[1])),peer_comm_ptr,myname); /* Wait on the communication requests to finish */ MPIR_CALL_POP(MPI_Waitall ( 4, req, status ),peer_comm_ptr,myname); /* We now know how big the remote group is, so create it */ remote_group_ptr = MPIR_CreateGroup ( remote_size ); remote_group_ptr->self = (MPI_Group) MPIR_FromPointer( remote_group_ptr ); /* Post the receive for the group information */ MPIR_CALL_POP(MPI_Irecv (remote_group_ptr->lrank_to_grank, remote_size, MPI_INT, remote_leader, tag, peer_comm, &(req[5])),peer_comm_ptr,myname); /* Send the local group info to the remote group */ MPIR_CALL_POP(MPI_Isend (local_comm_ptr->group->lrank_to_grank, local_size, MPI_INT, remote_leader, tag, peer_comm, &(req[4])),peer_comm_ptr,myname); /* wait on the send and the receive for the group information */ MPIR_CALL_POP(MPI_Waitall ( 2, &(req[4]), &(status[4]) ),peer_comm_ptr, myname); MPIR_ERROR_POP(peer_comm_ptr); /* Now we can broadcast the group information to the other local comm */ /* members. */ MPIR_ERROR_PUSH(local_comm_ptr); MPIR_CALL_POP(MPI_Bcast(&remote_size,1,MPI_INT,local_rank,local_comm), local_comm_ptr,myname); MPIR_CALL_POP(MPI_Bcast(remote_group_ptr->lrank_to_grank, remote_size, MPI_INT, local_rank, local_comm),local_comm_ptr, myname); MPIR_ERROR_POP(local_comm_ptr); } /* Else I'm just an ordinary comm member, so receive the bcast'd */ /* info about the remote group */ else { MPIR_ERROR_PUSH(local_comm_ptr); MPIR_CALL_POP(MPI_Bcast(&remote_size, 1, MPI_INT, local_leader, local_comm),local_comm_ptr,myname); /* We now know how big the remote group is, so create it */ remote_group_ptr = MPIR_CreateGroup ( remote_size ); remote_group_ptr->self = (MPI_Group) MPIR_FromPointer( remote_group_ptr ); /* Receive the group info */ MPIR_CALL_POP(MPI_Bcast(remote_group_ptr->lrank_to_grank, remote_size, MPI_INT, local_leader, local_comm), local_comm_ptr,myname ); MPIR_ERROR_POP(local_comm_ptr); } MPIR_ERROR_PUSH(local_comm_ptr); /* Broadcast the send context */ MPIR_CALL_POP(MPI_Bcast(&send_context, 1, MPIR_CONTEXT_TYPE, local_leader, local_comm),local_comm_ptr,myname); MPIR_ERROR_POP(local_comm_ptr); /* We all now have all the information necessary, start building the */ /* inter-communicator */ MPIR_ALLOC(new_comm,NEW(struct MPIR_COMMUNICATOR),local_comm_ptr, MPI_ERR_EXHAUSTED,myname ); MPIR_Comm_init( new_comm, local_comm_ptr, MPIR_INTER ); *comm_out = new_comm->self; new_comm->group = remote_group_ptr; MPIR_Group_dup( local_comm_ptr->group, &(new_comm->local_group) ); new_comm->local_rank = new_comm->local_group->local_rank; new_comm->lrank_to_grank = new_comm->group->lrank_to_grank; new_comm->np = new_comm->group->np; new_comm->send_context = send_context; new_comm->recv_context = context; new_comm->comm_name = 0; if ((mpi_errno = MPID_CommInit( local_comm_ptr, new_comm )) ) return mpi_errno; (void) MPIR_Attr_create_tree ( new_comm ); /* Build the collective inter-communicator */ MPIR_Comm_make_coll( new_comm, MPIR_INTER ); MPIR_Comm_make_onesided( new_comm, MPIR_INTER ); /* Build the collective intra-communicator. Note that we require an intra-communicator for the "coll_comm" so that MPI_COMM_DUP can use it for some collective operations (do we need this for MPI-2 with intercommunicator collective?) Note that this really isn't the right thing to do; we need to replace *all* of the Mississippi state collective code. */ MPIR_Comm_make_coll( new_comm->comm_coll, MPIR_INTRA ); #if 0 MPIR_Comm_make_coll( new_comm->comm_onesided, MPIR_INTRA ); #endif /* Remember it for the debugger */ MPIR_Comm_remember ( new_comm ); TR_POP; return (mpi_errno); }
/*@ MPI_Bsend - Basic send with user-specified buffering Input Parameters: + buf - initial address of send buffer (choice) . count - number of elements in send buffer (nonnegative integer) . datatype - datatype of each send buffer element (handle) . dest - rank of destination (integer) . tag - message tag (integer) - comm - communicator (handle) Notes: This send is provided as a convenience function; it allows the user to send messages without worring about where they are buffered (because the user `must` have provided buffer space with 'MPI_Buffer_attach'). In deciding how much buffer space to allocate, remember that the buffer space is not available for reuse by subsequent 'MPI_Bsend's unless you are certain that the message has been received (not just that it should have been received). For example, this code does not allocate enough buffer space .vb MPI_Buffer_attach( b, n*sizeof(double) + MPI_BSEND_OVERHEAD ); for (i=0; i<m; i++) { MPI_Bsend( buf, n, MPI_DOUBLE, ... ); } .ve because only enough buffer space is provided for a single send, and the loop may start a second 'MPI_Bsend' before the first is done making use of the buffer. In C, you can force the messages to be delivered by .vb MPI_Buffer_detach( &b, &n ); MPI_Buffer_attach( b, n ); .ve (The 'MPI_Buffer_detach' will not complete until all buffered messages are delivered.) .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM .N MPI_ERR_COUNT .N MPI_ERR_TYPE .N MPI_ERR_RANK .N MPI_ERR_TAG .seealso: MPI_Buffer_attach, MPI_Ibsend, MPI_Bsend_init @*/ int MPI_Bsend( void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm ) { MPI_Request handle; MPI_Status status; int mpi_errno = MPI_SUCCESS; struct MPIR_COMMUNICATOR *comm_ptr; MPIR_ERROR_DECL; static char myname[] = "MPI_BSEND"; disableSignal(); TR_PUSH(myname); if (dest != MPI_PROC_NULL) { /* We should let Ibsend find the errors, but we will soon add a special case for faster Bsend and we'll need these tests then */ comm_ptr = MPIR_GET_COMM_PTR(comm); #ifndef MPIR_NO_ERROR_CHECKING MPIR_TEST_MPI_COMM(comm,comm_ptr,comm_ptr,myname); MPIR_TEST_COUNT(count); MPIR_TEST_SEND_TAG(tag); MPIR_TEST_SEND_RANK(comm_ptr,dest); if (mpi_errno) { revertSignal(); return MPIR_ERROR(comm_ptr, mpi_errno, myname ); } #endif /* ? BsendDatatype? MPID_BsendContig( comm, buf, len, src_lrank, tag, context_id, dest_grank, msgrep, &mpi_errno ); if (!mpi_errno) return MPI_SUCCESS; if (mpi_errno != MPIR_ERR_MAY_BLOCK) return MPIR_ERROR( comm, mpi_errno, myname ); */ MPIR_ERROR_PUSH(comm_ptr); /* We don't use MPIR_CALL_POP so that we can free the handle */ handle = MPI_REQUEST_NULL; if ((mpi_errno = MPI_Ibsend( buf, count, datatype, dest, tag, comm, &handle ))) { MPIR_ERROR_POP(comm_ptr); if (handle != MPI_REQUEST_NULL) MPID_SendFree( handle ); revertSignal(); return MPIR_ERROR(comm_ptr,mpi_errno,myname); } /* This Wait only completes the transfer of data into the buffer area. The test/wait in util/bsendutil.c completes the actual transfer */ MPIR_CALL_POP(MPI_Wait( &handle, &status ),comm_ptr,myname); MPIR_ERROR_POP(comm_ptr); } TR_POP; revertSignal(); return mpi_errno; }