static inline int MPID_PSendRequest(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPID_Request* sreq = *request = MPIDI_Request_create2(); sreq->kind = MPID_PREQUEST_SEND; sreq->comm = comm; MPIR_Comm_add_ref(comm); MPIDI_Request_setMatch(sreq, tag, rank, comm->context_id+context_offset); sreq->mpid.userbuf = (void*)buf; sreq->mpid.userbufcount = count; sreq->mpid.datatype = datatype; sreq->partner_request = NULL; MPIDI_Request_complete(sreq); if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPID_Datatype_get_ptr(datatype, sreq->mpid.datatype_ptr); MPID_Datatype_add_ref(sreq->mpid.datatype_ptr); } return MPI_SUCCESS; }
int MPID_Recv_init(void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPID_Request * rreq = *request = MPIDI_Request_create2(); rreq->kind = MPID_PREQUEST_RECV; rreq->comm = comm; MPIR_Comm_add_ref(comm); MPIDI_Request_setMatch(rreq, tag, rank, comm->recvcontext_id+context_offset); rreq->mpid.userbuf = buf; rreq->mpid.userbufcount = count; rreq->mpid.datatype = datatype; rreq->partner_request = NULL; MPIDI_Request_complete(rreq); MPIDI_Request_setPType(rreq, MPIDI_REQUEST_PTYPE_RECV); if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPID_Datatype_get_ptr(datatype, rreq->mpid.datatype_ptr); MPID_Datatype_add_ref(rreq->mpid.datatype_ptr); } return MPI_SUCCESS; }
static int MPID_PSP_persistent_init(const void *buf, MPI_Aint count, MPI_Datatype datatype, int rank, int tag, MPIR_Comm *comm, int context_offset, MPIR_Request **request, int (*call)(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int tag, struct MPIR_Comm * comm, int context_offset, MPIR_Request ** request), MPIR_Request_kind_t type) { MPIR_Request *req; struct MPID_DEV_Request_persistent *preq; /* printf("#%d ps--- %s() called\n", MPIDI_Process.my_pg_rank, __func__); printf("#%d buf %p, count %d, datatype 0x%0x, rank %d, tag %d, comm %p, off %d\n", MPIDI_Process.my_pg_rank, buf, count, datatype, rank, tag, comm, context_offset); printf("#%d ctx.id %d ctx.rank %d, ctx.name %s\n", MPIDI_Process.my_pg_rank, comm->context_id, comm->rank, comm->name); */ req = MPIR_Request_create(type); if (unlikely(!req)) goto err_request_recv_create; req->comm = comm; MPIR_Comm_add_ref(comm); req->u.persist.real_request = NULL; MPIDI_PSP_Request_set_completed(req); /* an inactive persistent request is a completed request. */ preq = &req->dev.kind.persistent; preq->buf = (void *)buf; preq->count = count; preq->datatype = datatype; MPID_PSP_Datatype_add_ref(preq->datatype); preq->rank = rank; preq->tag = tag; preq->comm = comm; // MPIR_Comm_add_ref(comm); preq->context_offset = context_offset; preq->call = call; *request = req; return MPI_SUCCESS; /* --- */ err_request_recv_create: return MPI_ERR_NO_MEM; }
int MPID_Recv_init(void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPID_Request * rreq; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_RECV_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_RECV_INIT); rreq = MPID_Request_create(); if (rreq == NULL) { /* --BEGIN ERROR HANDLING-- */ mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomemreq", 0); /* --END ERROR HANDLING-- */ goto fn_exit; } MPIU_Object_set_ref(rreq, 1); rreq->kind = MPID_PREQUEST_RECV; rreq->comm = comm; MPID_cc_set(&rreq->cc, 0); MPIR_Comm_add_ref(comm); rreq->dev.match.parts.rank = rank; rreq->dev.match.parts.tag = tag; rreq->dev.match.parts.context_id = comm->recvcontext_id + context_offset; rreq->dev.user_buf = (void *) buf; rreq->dev.user_count = count; rreq->dev.datatype = datatype; rreq->partner_request = NULL; MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_RECV); if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPID_Datatype_get_ptr(datatype, rreq->dev.datatype_ptr); MPID_Datatype_add_ref(rreq->dev.datatype_ptr); } *request = rreq; fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_RECV_INIT); return mpi_errno; }
void MPIDI_RecvMsg_procnull(MPID_Comm * comm, unsigned is_blocking, MPI_Status * status, MPID_Request ** request) { if (is_blocking) { MPIR_Status_set_procnull(status); *request = NULL; } else { MPID_Request * rreq; rreq = MPIDI_Request_create2(); MPIR_Status_set_procnull(&rreq->status); rreq->kind = MPID_REQUEST_RECV; rreq->comm = comm; MPIR_Comm_add_ref(comm); MPIDI_Request_complete(rreq); *request = rreq; } }
int MPID_nem_ofi_iprobe_impl(struct MPIDI_VC *vc, int source, int tag, MPID_Comm * comm, int context_offset, int *flag, MPI_Status * status, MPID_Request ** rreq_ptr) { int ret, mpi_errno = MPI_SUCCESS; fi_addr_t remote_proc = 0; uint64_t match_bits, mask_bits; size_t len; MPID_Request rreq_s, *rreq; BEGIN_FUNC(FCNAME); if (rreq_ptr) { MPIDI_Request_create_rreq(rreq, mpi_errno, goto fn_exit); *rreq_ptr = rreq; rreq->comm = comm; rreq->dev.match.parts.rank = source; rreq->dev.match.parts.tag = tag; rreq->dev.match.parts.context_id = comm->context_id; MPIR_Comm_add_ref(comm); } else {
/* * This function does all of the work or either revoking the communciator for * the first time or keeping track of an ongoing revocation. * * comm_ptr - The communicator being revoked * is_remote - If we received the revocation from a remote process, this should * be set to true. This way we'll know to decrement the counter twice * (once for our local revocation and once for the remote). */ int MPID_Comm_revoke(MPIR_Comm *comm_ptr, int is_remote) { MPIDI_VC_t *vc; MPL_IOV iov[MPL_IOV_LIMIT]; int mpi_errno = MPI_SUCCESS; int i, size, my_rank; MPIR_Request *request; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_revoke_t *revoke_pkt = &upkt.revoke; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_REVOKE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_REVOKE); if (0 == comm_ptr->revoked) { /* Mark the communicator as revoked locally */ comm_ptr->revoked = 1; if (comm_ptr->node_comm) comm_ptr->node_comm->revoked = 1; if (comm_ptr->node_roots_comm) comm_ptr->node_roots_comm->revoked = 1; /* Start a counter to track how many revoke messages we've received from * other ranks */ comm_ptr->dev.waiting_for_revoke = comm_ptr->local_size - 1 - is_remote; /* Subtract the processes who already know about the revoke */ MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, VERBOSE, (MPL_DBG_FDEST, "Comm %08x waiting_for_revoke: %d", comm_ptr->handle, comm_ptr->dev.waiting_for_revoke)); /* Keep a reference to this comm so it doesn't get destroyed while * it's being revoked */ MPIR_Comm_add_ref(comm_ptr); /* Send out the revoke message */ MPIDI_Pkt_init(revoke_pkt, MPIDI_CH3_PKT_REVOKE); revoke_pkt->revoked_comm = comm_ptr->context_id; size = comm_ptr->remote_size; my_rank = comm_ptr->rank; for (i = 0; i < size; i++) { if (i == my_rank) continue; request = NULL; MPIDI_Comm_get_vc_set_active(comm_ptr, i, &vc); iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) revoke_pkt; iov[0].MPL_IOV_LEN = sizeof(*revoke_pkt); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsgv(vc, iov, 1, &request); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno) comm_ptr->dev.waiting_for_revoke--; if (NULL != request) /* We don't need to keep a reference to this request. The * progress engine will keep a reference until it completes * later */ MPIR_Request_free(request); } /* Check to see if we are done revoking */ if (comm_ptr->dev.waiting_for_revoke == 0) { MPIR_Comm_release(comm_ptr); } /* Go clean up all of the existing operations involving this * communicator. This includes completing existing MPI requests, MPID * requests, and cleaning up the unexpected queue to make sure there * aren't any unexpected messages hanging around. */ /* Clean up the receive and unexpected queues */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); MPIDI_CH3U_Clean_recvq(comm_ptr); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); } else if (is_remote) { /* If this is local, we've already revoked and don't need to do it again. */ /* Decrement the revoke counter */ comm_ptr->dev.waiting_for_revoke--; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, VERBOSE, (MPL_DBG_FDEST, "Comm %08x waiting_for_revoke: %d", comm_ptr->handle, comm_ptr->dev.waiting_for_revoke)); /* Check to see if we are done revoking */ if (comm_ptr->dev.waiting_for_revoke == 0) { MPIR_Comm_release(comm_ptr); } } MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_REVOKE); return MPI_SUCCESS; }
MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag, int context_id, MPID_Comm *comm, void *user_buf, int user_count, MPI_Datatype datatype, int * foundp) { MPID_Time_t timer_start; int found; MPID_Request *rreq, *prev_rreq; MPIDI_Message_match match; MPIDI_Message_match mask; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); MPIU_THREAD_CS_ASSERT_HELD(MSGQUEUE); /* Store how much time is spent traversing the queue */ MPIR_T_START_TIMER(RECVQ_STATISTICS, timer_start); /* Optimize this loop for an empty unexpected receive queue */ rreq = recvq_unexpected_head; if (rreq) { prev_rreq = NULL; match.parts.context_id = context_id; match.parts.tag = tag; match.parts.rank = source; if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE) { do { MPIR_T_INC(RECVQ_STATISTICS, unexpected_recvq_match_attempts); if (MATCH_WITH_NO_MASK(rreq->dev.match, match)) { if (prev_rreq != NULL) { prev_rreq->dev.next = rreq->dev.next; } else { recvq_unexpected_head = rreq->dev.next; } if (rreq->dev.next == NULL) { recvq_unexpected_tail = prev_rreq; } MPIR_T_DEC(RECVQ_STATISTICS, unexpected_qlen); if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG) MPIR_T_SUBTRACT(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz); rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; found = TRUE; goto lock_exit; } prev_rreq = rreq; rreq = rreq->dev.next; } while (rreq); } else { mask.parts.context_id = mask.parts.rank = mask.parts.tag = ~0; if (tag == MPI_ANY_TAG) match.parts.tag = mask.parts.tag = 0; if (source == MPI_ANY_SOURCE) match.parts.rank = mask.parts.rank = 0; do { MPIR_T_INC(RECVQ_STATISTICS, unexpected_recvq_match_attempts); if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) { if (prev_rreq != NULL) { prev_rreq->dev.next = rreq->dev.next; } else { recvq_unexpected_head = rreq->dev.next; } if (rreq->dev.next == NULL) { recvq_unexpected_tail = prev_rreq; } MPIR_T_DEC(RECVQ_STATISTICS, unexpected_qlen); if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG) MPIR_T_SUBTRACT(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz); rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; found = TRUE; goto lock_exit; } prev_rreq = rreq; rreq = rreq->dev.next; } while (rreq); } } MPIR_T_END_TIMER(RECVQ_STATISTICS, timer_start, time_matching_unexpectedq); /* A matching request was not found in the unexpected queue, so we need to allocate a new request and add it to the posted queue */ { int mpi_errno = MPI_SUCCESS; found = FALSE; MPIDI_Request_create_rreq( rreq, mpi_errno, goto lock_exit ); rreq->dev.match.parts.tag = tag; rreq->dev.match.parts.rank = source; rreq->dev.match.parts.context_id = context_id; /* Added a mask for faster search on 64-bit capable * platforms */ rreq->dev.mask.parts.context_id = ~0; if (rreq->dev.match.parts.rank == MPI_ANY_SOURCE) rreq->dev.mask.parts.rank = 0; else rreq->dev.mask.parts.rank = ~0; if (rreq->dev.match.parts.tag == MPI_ANY_TAG) rreq->dev.mask.parts.tag = 0; else rreq->dev.mask.parts.tag = ~0; rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; /* check whether VC has failed, or this is an ANY_SOURCE in a failed communicator */ if (source != MPI_ANY_SOURCE) { MPIDI_VC_t *vc; MPIDI_Comm_get_vc(comm, source, &vc); if (vc->state == MPIDI_VC_STATE_MORIBUND) { MPIU_ERR_SET1(mpi_errno, MPIX_ERR_PROC_FAIL_STOP, "**comm_fail", "**comm_fail %d", vc->pg_rank); rreq->status.MPI_ERROR = mpi_errno; MPIDI_CH3U_Request_complete(rreq); goto lock_exit; } } else if (!MPIDI_CH3I_Comm_AS_enabled(comm)) { MPIU_ERR_SET(mpi_errno, MPIX_ERR_PROC_FAIL_STOP, "**comm_fail"); rreq->status.MPI_ERROR = mpi_errno; MPIDI_CH3U_Request_complete(rreq); goto lock_exit; } rreq->dev.next = NULL; if (recvq_posted_tail != NULL) { recvq_posted_tail->dev.next = rreq; } else { recvq_posted_head = rreq; } recvq_posted_tail = rreq; MPIR_T_INC(RECVQ_STATISTICS, posted_qlen); MPIDI_POSTED_RECV_ENQUEUE_HOOK(rreq); } lock_exit: *foundp = found; /* If a match was not found, the timer was stopped after the traversal */ if (found) MPIR_T_END_TIMER(RECVQ_STATISTICS, timer_start, time_matching_unexpectedq); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); return rreq; }
MPID_Request * MPIDI_CH3U_Recvq_FDU_matchonly(int source, int tag, int context_id, MPID_Comm *comm, int *foundp) { MPID_Time_t timer_start; int found = FALSE; MPID_Request *rreq, *prev_rreq; MPIDI_Message_match match; MPIDI_Message_match mask; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY); MPIU_THREAD_CS_ASSERT_HELD(MSGQUEUE); /* Store how much time is spent traversing the queue */ MPIR_T_START_TIMER(RECVQ_STATISTICS, timer_start); /* Optimize this loop for an empty unexpected receive queue */ rreq = recvq_unexpected_head; if (rreq) { prev_rreq = NULL; match.parts.context_id = context_id; match.parts.tag = tag; match.parts.rank = source; if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE) { do { MPIR_T_INC(RECVQ_STATISTICS, unexpected_recvq_match_attempts); if (MATCH_WITH_NO_MASK(rreq->dev.match, match)) { if (prev_rreq != NULL) { prev_rreq->dev.next = rreq->dev.next; } else { recvq_unexpected_head = rreq->dev.next; } if (rreq->dev.next == NULL) { recvq_unexpected_tail = prev_rreq; } MPIR_T_DEC(RECVQ_STATISTICS, unexpected_qlen); MPIR_T_SUBTRACT(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz); rreq->comm = comm; MPIR_Comm_add_ref(comm); /* don't have the (buf,count,type) info right now, can't add * it to the request */ found = TRUE; goto lock_exit; } prev_rreq = rreq; rreq = rreq->dev.next; } while (rreq); } else { mask.parts.context_id = mask.parts.rank = mask.parts.tag = ~0; if (tag == MPI_ANY_TAG) match.parts.tag = mask.parts.tag = 0; if (source == MPI_ANY_SOURCE) match.parts.rank = mask.parts.rank = 0; do { MPIR_T_INC(RECVQ_STATISTICS, unexpected_recvq_match_attempts); if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) { if (prev_rreq != NULL) { prev_rreq->dev.next = rreq->dev.next; } else { recvq_unexpected_head = rreq->dev.next; } if (rreq->dev.next == NULL) { recvq_unexpected_tail = prev_rreq; } MPIR_T_DEC(RECVQ_STATISTICS, unexpected_qlen); MPIR_T_SUBTRACT(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz); rreq->comm = comm; MPIR_Comm_add_ref(comm); /* don't have the (buf,count,type) info right now, can't add * it to the request */ found = TRUE; goto lock_exit; } prev_rreq = rreq; rreq = rreq->dev.next; } while (rreq); } } lock_exit: MPIR_T_END_TIMER(RECVQ_STATISTICS, timer_start, time_matching_unexpectedq); *foundp = found; MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY); return rreq; }
int ADD_SUFFIX(MPID_nem_ofi_iprobe_impl)(struct MPIDI_VC *vc, int source, int tag, MPIR_Comm * comm, int context_offset, int *flag, MPI_Status * status, MPIR_Request ** rreq_ptr) { int ret, mpi_errno = MPI_SUCCESS; fi_addr_t remote_proc = 0; uint64_t match_bits, mask_bits; size_t len; MPIR_Request rreq_s, *rreq; BEGIN_FUNC(FCNAME); if (rreq_ptr) { MPIDI_CH3I_NM_OFI_RC(MPID_nem_ofi_create_req(&rreq, 1)); rreq->kind = MPIR_REQUEST_KIND__RECV; *rreq_ptr = rreq; rreq->comm = comm; rreq->dev.match.parts.rank = source; rreq->dev.match.parts.tag = tag; rreq->dev.match.parts.context_id = comm->context_id; MPIR_Comm_add_ref(comm); } else { rreq = &rreq_s; rreq->dev.OnDataAvail = NULL; } REQ_OFI(rreq)->pack_buffer = NULL; REQ_OFI(rreq)->event_callback = ADD_SUFFIX(peek_callback); REQ_OFI(rreq)->match_state = PEEK_INIT; OFI_ADDR_INIT(source, vc, remote_proc); #if API_SET == API_SET_1 match_bits = init_recvtag(&mask_bits, comm->context_id + context_offset, source, tag); #elif API_SET == API_SET_2 match_bits = init_recvtag_2(&mask_bits, comm->context_id + context_offset, tag); #endif /* ------------------------------------------------------------------------- */ /* fi_recvmsg with FI_PEEK: */ /* Initiate a search for a match in the hardware or software queue. */ /* The search can complete immediately with -ENOMSG. */ /* I successful, libfabric will enqueue a context entry into the completion */ /* queue to make the search nonblocking. This code will poll until the */ /* entry is enqueued. */ /* ------------------------------------------------------------------------- */ msg_tagged_t msg; uint64_t msgflags = FI_PEEK; msg.msg_iov = NULL; msg.desc = NULL; msg.iov_count = 0; msg.addr = remote_proc; msg.tag = match_bits; msg.ignore = mask_bits; msg.context = (void *) &(REQ_OFI(rreq)->ofi_context); msg.data = 0; if(*flag == CLAIM_PEEK) msgflags|=FI_CLAIM; ret = fi_trecvmsg(gl_data.endpoint,&msg,msgflags); if(ret == -ENOMSG) { if (rreq_ptr) { MPIR_Request_free(rreq); *rreq_ptr = NULL; *flag = 0; } MPID_nem_ofi_poll(MPID_NONBLOCKING_POLL); goto fn_exit; } MPIR_ERR_CHKANDJUMP4((ret < 0), mpi_errno, MPI_ERR_OTHER, "**ofi_peek", "**ofi_peek %s %d %s %s", __SHORT_FILE__, __LINE__, FCNAME, fi_strerror(-ret)); while (PEEK_INIT == REQ_OFI(rreq)->match_state) MPID_nem_ofi_poll(MPID_BLOCKING_POLL); if (PEEK_NOT_FOUND == REQ_OFI(rreq)->match_state) { if (rreq_ptr) { MPIR_Request_free(rreq); *rreq_ptr = NULL; *flag = 0; } MPID_nem_ofi_poll(MPID_NONBLOCKING_POLL); goto fn_exit; } if (status != MPI_STATUS_IGNORE) *status = rreq->status; MPIR_Request_add_ref(rreq); *flag = 1; END_FUNC_RC(FCNAME); }
int MPIR_Comm_delete_internal(MPID_Comm * comm_ptr) { int in_use; int mpi_errno = MPI_SUCCESS; MPID_MPI_STATE_DECL(MPID_STATE_COMM_DELETE_INTERNAL); MPID_MPI_FUNC_ENTER(MPID_STATE_COMM_DELETE_INTERNAL); MPIU_Assert(MPIU_Object_get_ref(comm_ptr) == 0); /* sanity check */ /* Remove the attributes, executing the attribute delete routine. * Do this only if the attribute functions are defined. * This must be done first, because if freeing the attributes * returns an error, the communicator is not freed */ if (MPIR_Process.attr_free && comm_ptr->attributes) { /* Temporarily add a reference to this communicator because * the attr_free code requires a valid communicator */ MPIU_Object_add_ref(comm_ptr); mpi_errno = MPIR_Process.attr_free(comm_ptr->handle, &comm_ptr->attributes); /* Release the temporary reference added before the call to * attr_free */ MPIU_Object_release_ref(comm_ptr, &in_use); } /* If the attribute delete functions return failure, the * communicator must not be freed. That is the reason for the * test on mpi_errno here. */ if (mpi_errno == MPI_SUCCESS) { /* If this communicator is our parent, and we're disconnecting * from the parent, mark that fact */ if (MPIR_Process.comm_parent == comm_ptr) MPIR_Process.comm_parent = NULL; /* Notify the device that the communicator is about to be * destroyed */ mpi_errno = MPID_Dev_comm_destroy_hook(comm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Free info hints */ if (comm_ptr->info != NULL) { MPIU_Info_free(comm_ptr->info); } /* release our reference to the collops structure, comes after the * destroy_hook to allow the device to manage these vtables in a custom * fashion */ if (comm_ptr->coll_fns && --comm_ptr->coll_fns->ref_count == 0) { MPIU_Free(comm_ptr->coll_fns); comm_ptr->coll_fns = NULL; } if (comm_ptr->comm_kind == MPID_INTERCOMM && comm_ptr->local_comm) MPIR_Comm_release(comm_ptr->local_comm); /* Free the local and remote groups, if they exist */ if (comm_ptr->local_group) MPIR_Group_release(comm_ptr->local_group); if (comm_ptr->remote_group) MPIR_Group_release(comm_ptr->remote_group); /* free the intra/inter-node communicators, if they exist */ if (comm_ptr->node_comm) MPIR_Comm_release(comm_ptr->node_comm); if (comm_ptr->node_roots_comm) MPIR_Comm_release(comm_ptr->node_roots_comm); if (comm_ptr->intranode_table != NULL) MPIU_Free(comm_ptr->intranode_table); if (comm_ptr->internode_table != NULL) MPIU_Free(comm_ptr->internode_table); /* Free the context value. This should come after freeing the * intra/inter-node communicators since those free calls won't * release this context ID and releasing this before then could lead * to races once we make threading finer grained. */ /* This must be the recvcontext_id (i.e. not the (send)context_id) * because in the case of intercommunicators the send context ID is * allocated out of the remote group's bit vector, not ours. */ MPIR_Free_contextid(comm_ptr->recvcontext_id); /* We need to release the error handler */ if (comm_ptr->errhandler && !(HANDLE_GET_KIND(comm_ptr->errhandler->handle) == HANDLE_KIND_BUILTIN)) { int errhInuse; MPIR_Errhandler_release_ref(comm_ptr->errhandler, &errhInuse); if (!errhInuse) { MPIU_Handle_obj_free(&MPID_Errhandler_mem, comm_ptr->errhandler); } } /* Remove from the list of active communicators if * we are supporting message-queue debugging. We make this * conditional on having debugger support since the * operation is not constant-time */ MPIR_COMML_FORGET(comm_ptr); /* Check for predefined communicators - these should not * be freed */ if (!(HANDLE_GET_KIND(comm_ptr->handle) == HANDLE_KIND_BUILTIN)) MPIU_Handle_obj_free(&MPID_Comm_mem, comm_ptr); } else { /* If the user attribute free function returns an error, * then do not free the communicator */ MPIR_Comm_add_ref(comm_ptr); } fn_exit: MPID_MPI_FUNC_EXIT(MPID_STATE_COMM_DELETE_INTERNAL); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPID_Comm *comm, int context_offset, int *flag, MPID_Request **message, MPI_Status *status) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; ptl_process_t id_any; ptl_me_t me; MPID_Request *req; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IMPROBE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_IMPROBE); id_any.phys.nid = PTL_NID_ANY; id_any.phys.pid = PTL_PID_ANY; /* create a request */ req = MPID_Request_create(); MPID_nem_ptl_init_req(req); MPIR_ERR_CHKANDJUMP1(!req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create"); MPIU_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */ REQ_PTL(req)->event_handler = handle_mprobe; req->kind = MPID_REQUEST_MPROBE; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, source); if (source == MPI_ANY_SOURCE) me.match_id = id_any; else { if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } me.match_id = vc_ptl->id; } if (tag == MPI_ANY_TAG) me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG; else me.ignore_bits = NPTL_MATCH_IGNORE; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPID_Request_is_complete(req)); *flag = REQ_PTL(req)->found; if (*flag) { req->comm = comm; MPIR_Comm_add_ref(comm); MPIR_Request_extract_status(req, status); *message = req; } else { MPID_Request_release(req); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_IMPROBE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_Put_generic(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPIR_Win *win_ptr, MPIR_Request **request) { int mpi_error = MPI_SUCCESS; MPID_PSP_Datatype_info dt_info; MPID_PSP_packed_msg_t msg; MPID_Win_rank_info *ri = win_ptr->rank_info + target_rank; char *target_buf; #if 0 fprintf(stderr, "int MPID_Put(origin_addr: %p, origin_count: %d, origin_datatype: %08x," " target_rank: %d, target_disp: %d, target_count: %d, target_datatype: %08x," " *win_ptr: %p)\n", origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win_ptr); #endif /* Datatype */ MPID_PSP_Datatype_get_info(target_datatype, &dt_info); if(request) { *request = MPIR_Request_create(MPIR_REQUEST_KIND__SEND); (*request)->comm = win_ptr->comm_ptr; MPIR_Comm_add_ref(win_ptr->comm_ptr); } if (unlikely(target_rank == MPI_PROC_NULL)) { goto fn_completed; } /* Request-based RMA operations are only valid within a passive target epoch! */ if(request && win_ptr->epoch_state != MPID_PSP_EPOCH_LOCK && win_ptr->epoch_state != MPID_PSP_EPOCH_LOCK_ALL) { mpi_error = MPI_ERR_RMA_SYNC; goto err_sync_rma; } /* Check that we are within an access/exposure epoch: */ if (win_ptr->epoch_state == MPID_PSP_EPOCH_NONE) { mpi_error = MPI_ERR_RMA_SYNC; goto err_sync_rma; } /* Track access epoch state: */ if (win_ptr->epoch_state == MPID_PSP_EPOCH_FENCE_ISSUED) { win_ptr->epoch_state = MPID_PSP_EPOCH_FENCE; } /* If the put is a local operation, do it here */ if (target_rank == win_ptr->rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) { void *base; int disp_unit; if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) { MPID_PSP_shm_rma_get_base(win_ptr, target_rank, &disp_unit, &base); } else { base = win_ptr->base; disp_unit = win_ptr->disp_unit; } mpi_error = MPIR_Localcopy(origin_addr, origin_count, origin_datatype, (char *) base + disp_unit * target_disp, target_count, target_datatype); if (mpi_error) { goto err_local_copy; } goto fn_completed; } /* Data */ mpi_error = MPID_PSP_packed_msg_prepare(origin_addr, origin_count, origin_datatype, &msg); if (unlikely(mpi_error != MPI_SUCCESS)) goto err_create_packed_msg; MPID_PSP_packed_msg_pack(origin_addr, origin_count, origin_datatype, &msg); target_buf = (char *) ri->base_addr + ri->disp_unit * target_disp; if (0 && MPID_PSP_Datatype_is_contig(&dt_info)) { /* ToDo: reenable pscom buildin rma_write */ /* Contig message. Use pscom buildin rma */ pscom_request_t *req = pscom_request_create(0, 0); req->data_len = msg.msg_sz; req->data = msg.msg; req->connection = ri->con; /* ToDo: need a new io_done. inside io_done, call MPID_PSP_packed_msg_cleanup(msg)!!! */ req->ops.io_done = pscom_request_free; req->xheader.rma_write.dest = target_buf; pscom_post_rma_write(req); /* win_ptr->rma_puts_accs[target_rank]++; / ToDo: Howto receive this? */ } else { unsigned int encode_dt_size = MPID_PSP_Datatype_get_size(&dt_info); unsigned int xheader_len = sizeof(MPID_PSCOM_XHeader_Rma_put_t) + encode_dt_size; pscom_request_t *req = pscom_request_create(xheader_len, sizeof(pscom_request_put_send_t)); MPID_PSCOM_XHeader_Rma_put_t *xheader = &req->xheader.user.put; /* encoded datatype too large for xheader? */ assert(xheader_len < (1<<(8*sizeof(((struct PSCOM_header_net*)0)->xheader_len)))); req->user->type.put_send.msg = msg; req->user->type.put_send.win_ptr = win_ptr; MPID_PSP_Datatype_encode(&dt_info, &xheader->encoded_type); xheader->common.tag = 0; xheader->common.context_id = 0; xheader->common.type = MPID_PSP_MSGTYPE_RMA_PUT; xheader->common._reserved_ = 0; xheader->common.src_rank = win_ptr->rank; /* xheader->target_disp = target_disp; */ xheader->target_count = target_count; xheader->target_buf = target_buf; /* xheader->epoch = ri->epoch_origin; */ xheader->win_ptr = ri->win_ptr; /* remote win_ptr */ req->xheader_len = xheader_len; req->data = msg.msg; req->data_len = msg.msg_sz; req->ops.io_done = rma_put_done; req->user->type.put_send.target_rank = target_rank; req->connection = ri->con; win_ptr->rma_local_pending_cnt++; win_ptr->rma_local_pending_rank[target_rank]++; win_ptr->rma_puts_accs[target_rank]++; if(request) { MPIR_Request *mpid_req = *request; /* TODO: Use a new and 'put_send'-dedicated MPID_DEV_Request_create() */ /* instead of allocating and overloading a common send request. */ pscom_request_free(mpid_req->dev.kind.common.pscom_req); mpid_req->dev.kind.common.pscom_req = req; MPIR_Request_add_ref(mpid_req); req->user->type.put_send.mpid_req = mpid_req; } else { req->user->type.put_send.mpid_req = NULL; } pscom_post_send(req); } fn_exit: return MPI_SUCCESS; fn_completed: if(request) { MPIDI_PSP_Request_set_completed(*request); } return MPI_SUCCESS; /* --- */ err_exit: if(request) { MPIDI_PSP_Request_set_completed(*request); MPIR_Request_free(*request); } return mpi_error; /* --- */ err_create_packed_msg: goto err_exit; err_local_copy: goto err_exit; err_sync_rma: goto err_exit; }
MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag, int context_id, MPID_Comm *comm, void *user_buf, int user_count, MPI_Datatype datatype, int * foundp) { int found; MPID_Request *rreq, *prev_rreq; MPIDI_Message_match match; MPIDI_Message_match mask; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); MPIU_THREAD_CS_ASSERT_HELD(MSGQUEUE); /* Optimize this loop for an empty unexpected receive queue */ rreq = recvq_unexpected_head; if (rreq) { prev_rreq = NULL; match.parts.context_id = context_id; match.parts.tag = tag; match.parts.rank = source; if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE) { do { if (MATCH_WITH_NO_MASK(rreq->dev.match, match)) { if (prev_rreq != NULL) { prev_rreq->dev.next = rreq->dev.next; } else { recvq_unexpected_head = rreq->dev.next; } if (rreq->dev.next == NULL) { recvq_unexpected_tail = prev_rreq; } rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; found = TRUE; goto lock_exit; } prev_rreq = rreq; rreq = rreq->dev.next; } while (rreq); } else { mask.parts.context_id = mask.parts.rank = mask.parts.tag = ~0; if (tag == MPI_ANY_TAG) match.parts.tag = mask.parts.tag = 0; if (source == MPI_ANY_SOURCE) match.parts.rank = mask.parts.rank = 0; do { if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) { if (prev_rreq != NULL) { prev_rreq->dev.next = rreq->dev.next; } else { recvq_unexpected_head = rreq->dev.next; } if (rreq->dev.next == NULL) { recvq_unexpected_tail = prev_rreq; } rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; found = TRUE; goto lock_exit; } prev_rreq = rreq; rreq = rreq->dev.next; } while (rreq); } } /* A matching request was not found in the unexpected queue, so we need to allocate a new request and add it to the posted queue */ { int mpi_errno = MPI_SUCCESS; found = FALSE; MPIDI_Request_create_rreq( rreq, mpi_errno, goto lock_exit ); rreq->dev.match.parts.tag = tag; rreq->dev.match.parts.rank = source; rreq->dev.match.parts.context_id = context_id; /* Added a mask for faster search on 64-bit capable * platforms */ rreq->dev.mask.parts.context_id = ~0; if (rreq->dev.match.parts.rank == MPI_ANY_SOURCE) rreq->dev.mask.parts.rank = 0; else rreq->dev.mask.parts.rank = ~0; if (rreq->dev.match.parts.tag == MPI_ANY_TAG) rreq->dev.mask.parts.tag = 0; else rreq->dev.mask.parts.tag = ~0; rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; /* check whether VC has failed, or this is an ANY_SOURCE in a failed communicator */ if (source != MPI_ANY_SOURCE) { MPIDI_VC_t *vc; MPIDI_Comm_get_vc(comm, source, &vc); if (vc->state == MPIDI_VC_STATE_MORIBUND) { MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank); rreq->status.MPI_ERROR = mpi_errno; MPIDI_CH3U_Request_complete(rreq); goto lock_exit; } } else if (MPID_VCRT_Contains_failed_vc(comm->vcrt)) { MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**comm_fail"); rreq->status.MPI_ERROR = mpi_errno; MPIDI_CH3U_Request_complete(rreq); goto lock_exit; } rreq->dev.next = NULL; if (recvq_posted_tail != NULL) { recvq_posted_tail->dev.next = rreq; } else { recvq_posted_head = rreq; } recvq_posted_tail = rreq; MPIDI_POSTED_RECV_ENQUEUE_HOOK(rreq); } lock_exit: *foundp = found; MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); return rreq; }
int MPIDI_Win_init( MPI_Aint length, int disp_unit, MPID_Win **win_ptr, MPID_Info *info, MPID_Comm *comm_ptr, int create_flavor, int model) { int mpi_errno=MPI_SUCCESS; size_t rank, size; MPIDI_Win_info *winfo; static char FCNAME[] = "MPIDI_Win_init"; /* ----------------------------------------- */ /* Setup the common sections of the window */ /* ----------------------------------------- */ MPID_Win *win = (MPID_Win*)MPIU_Handle_obj_alloc(&MPID_Win_mem); MPIU_ERR_CHKANDSTMT(win == NULL, mpi_errno, MPI_ERR_NO_MEM, return mpi_errno, "**nomem"); *win_ptr = win; memset(&win->mpid, 0, sizeof(struct MPIDI_Win)); win->comm_ptr = comm_ptr; MPIR_Comm_add_ref(comm_ptr); size = comm_ptr->local_size; rank = comm_ptr->rank; win->mpid.info = MPIU_Malloc(size * sizeof(struct MPIDI_Win_info)); MPID_assert(win->mpid.info != NULL); memset((void *) win->mpid.info,0,(size * sizeof(struct MPIDI_Win_info))); winfo = &win->mpid.info[rank]; win->errhandler = NULL; win->base = NULL; win->size = length; win->disp_unit = disp_unit; win->create_flavor = create_flavor; win->model = model; win->copyCreateFlavor = 0; win->copyModel = 0; win->attributes = NULL; win->comm_ptr = comm_ptr; if ((info != NULL) && ((int *)info != (int *) MPI_INFO_NULL)) { mpi_errno= MPIDI_Win_set_info(win, info); MPID_assert(mpi_errno == 0); } MPID_assert(mpi_errno == 0); /* Initialize the info (hint) flags per window */ win->mpid.info_args.no_locks = 0; win->mpid.info_args.accumulate_ordering = (MPIDI_ACCU_ORDER_RAR | MPIDI_ACCU_ORDER_RAW | MPIDI_ACCU_ORDER_WAR | MPIDI_ACCU_ORDER_WAW); win->mpid.info_args.accumulate_ops = MPIDI_ACCU_SAME_OP_NO_OP; /*default */ win->mpid.info_args.same_size = 0; win->mpid.info_args.alloc_shared_noncontig = 0; win->copyDispUnit=0; win->copySize=0; winfo->memregion_used = 0; winfo->disp_unit = disp_unit; return mpi_errno; }