/* * Signal that a message has completed. * @param msg (IN) Message to wait on. * @param peer (IN) the peer of the message * @retval ORTE_SUCCESS or error code on failure. */ int mca_oob_tcp_msg_complete(mca_oob_tcp_msg_t* msg, orte_process_name_t * peer) { OPAL_THREAD_LOCK(&msg->msg_lock); msg->msg_complete = true; if(NULL != msg->msg_cbfunc) { OPAL_THREAD_UNLOCK(&msg->msg_lock); /* post to a global list of completed messages */ if ((msg->msg_flags & ORTE_RML_FLAG_RECURSIVE_CALLBACK) == 0) { int size; OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); opal_list_append(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg); size = opal_list_get_size(&mca_oob_tcp_component.tcp_msg_completed); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); if(size > 1) { return ORTE_SUCCESS; } } /* invoke message callback */ msg->msg_cbfunc(msg->msg_rc, peer, msg->msg_uiov, msg->msg_ucnt, msg->msg_hdr.msg_tag, msg->msg_cbdata); /* dispatch any completed events */ if ((msg->msg_flags & ORTE_RML_FLAG_RECURSIVE_CALLBACK) == 0) { opal_list_item_t* item; OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg); MCA_OOB_TCP_MSG_RETURN(msg); while(NULL != (item = opal_list_remove_first(&mca_oob_tcp_component.tcp_msg_completed))) { msg = (mca_oob_tcp_msg_t*)item; OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); msg->msg_cbfunc( msg->msg_rc, &msg->msg_peer, msg->msg_uiov, msg->msg_ucnt, msg->msg_hdr.msg_tag, msg->msg_cbdata); OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); MCA_OOB_TCP_MSG_RETURN(msg); } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); } else { MCA_OOB_TCP_MSG_RETURN(msg); } } else { opal_condition_broadcast(&msg->msg_condition); OPAL_THREAD_UNLOCK(&msg->msg_lock); } return ORTE_SUCCESS; }
static void mca_oob_tcp_msg_matched(mca_oob_tcp_msg_t* msg, mca_oob_tcp_msg_t* match) { int i,rc; if(match->msg_rc < 0) { rc = match->msg_rc; } else { if (msg->msg_flags & ORTE_RML_ALLOC) match->msg_flags |= ORTE_RML_ALLOC; /* if we are just doing peek, return bytes without dequeing message */ rc = mca_oob_tcp_msg_copy(match, msg->msg_uiov, msg->msg_ucnt); if(rc >= 0 && ORTE_RML_TRUNC & msg->msg_flags) { rc = 0; for(i=1; i<match->msg_rwcnt+1; i++) rc += match->msg_rwiov[i].iov_len; } if(ORTE_RML_PEEK & msg->msg_flags) { OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); msg->msg_cbfunc(rc, &match->msg_peer, msg->msg_uiov, msg->msg_ucnt, match->msg_hdr.msg_tag, msg->msg_cbdata); OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); return; } } /* otherwise remove the match */ opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_recv, (opal_list_item_t *) match); /* invoke callback */ OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); msg->msg_cbfunc(rc, &match->msg_peer, msg->msg_uiov, msg->msg_ucnt, match->msg_hdr.msg_tag, msg->msg_cbdata); OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); /* return match to free list */ MCA_OOB_TCP_MSG_RETURN(match); }
void mca_oob_tcp_msg_recv_complete(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer) { switch(msg->msg_hdr.msg_type) { case MCA_OOB_TCP_IDENT: mca_oob_tcp_msg_ident(msg,peer); break; case MCA_OOB_TCP_PING: mca_oob_tcp_msg_ping(msg,peer); break; case MCA_OOB_TCP_DATA: mca_oob_tcp_msg_data(msg,peer); break; default: opal_output(0, "%s mca_oob_tcp_msg_recv_complete: invalid message type: %d from peer %s\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg->msg_hdr.msg_type, ORTE_NAME_PRINT(&peer->peer_name)); MCA_OOB_TCP_MSG_RETURN(msg); break; } }
int mca_oob_tcp_recv_cancel( orte_process_name_t* name, int tag) { int matched = 0; opal_list_item_t *item, *next; /* wait for any previously matched messages to be processed */ OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); #if OMPI_ENABLE_PROGRESS_THREADS if(opal_event_progress_thread() == false) { while(mca_oob_tcp_component.tcp_match_count) { opal_condition_wait( &mca_oob_tcp_component.tcp_match_cond, &mca_oob_tcp_component.tcp_match_lock); } } #endif /* remove any matching posted receives */ for(item = opal_list_get_first(&mca_oob_tcp_component.tcp_msg_post); item != opal_list_get_end(&mca_oob_tcp_component.tcp_msg_post); item = next) { mca_oob_tcp_msg_t* msg = (mca_oob_tcp_msg_t*)item; next = opal_list_get_next(item); if (OPAL_EQUAL == opal_dss.compare(name, &msg->msg_peer, ORTE_NAME)) { if (msg->msg_hdr.msg_tag == tag) { opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_post, &msg->super.super); MCA_OOB_TCP_MSG_RETURN(msg); matched++; } } } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); return (matched > 0) ? ORTE_SUCCESS : ORTE_ERR_NOT_FOUND; }
/* * Non-blocking version of mca_oob_send(). * * @param peer (IN) Opaque name of peer process. * @param msg (IN) Array of iovecs describing user buffers and lengths. * @param count (IN) Number of elements in iovec array. * @param flags (IN) Currently unused. * @param cbfunc (IN) Callback function on send completion. * @param cbdata (IN) User data that is passed to callback function. * @return OMPI error code (<0) on error number of bytes actually sent. * */ int mca_oob_tcp_send_nb( orte_process_name_t* target, orte_process_name_t* origin, struct iovec* iov, int count, int tag, int flags, orte_rml_callback_fn_t cbfunc, void* cbdata) { mca_oob_tcp_peer_t* peer = mca_oob_tcp_peer_lookup(target); mca_oob_tcp_msg_t* msg; int size; int rc; if(NULL == peer) return ORTE_ERR_UNREACH; MCA_OOB_TCP_MSG_ALLOC(msg, rc); if(NULL == msg) { return rc; } /* calculate the size of the message */ size = 0; for(rc = 0; rc < count; rc++) { size += iov[rc].iov_len; } if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { opal_output(0, "%s-%s mca_oob_tcp_send_nb: tag %d size %lu\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), tag, (unsigned long)size ); } /* turn the size to network byte order so there will be no problems */ msg->msg_hdr.msg_type = MCA_OOB_TCP_DATA; msg->msg_hdr.msg_size = size; msg->msg_hdr.msg_tag = tag; msg->msg_hdr.msg_origin = *origin; msg->msg_hdr.msg_src = *ORTE_PROC_MY_NAME; msg->msg_hdr.msg_dst = *target; /* create one additional iovect that will hold the size of the message */ msg->msg_type = MCA_OOB_TCP_POSTED; msg->msg_rc = 0; msg->msg_flags = flags; msg->msg_uiov = iov; msg->msg_ucnt = count; msg->msg_rwiov = mca_oob_tcp_msg_iov_alloc(msg,count+1); msg->msg_rwiov[0].iov_base = (ompi_iov_base_ptr_t)(&msg->msg_hdr); msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr); msg->msg_rwptr = msg->msg_rwiov; msg->msg_rwcnt = msg->msg_rwnum = count + 1; memcpy(msg->msg_rwiov+1, msg->msg_uiov, sizeof(struct iovec)*msg->msg_ucnt); msg->msg_rwbuf = NULL; msg->msg_cbfunc = cbfunc; msg->msg_cbdata = cbdata; msg->msg_complete = false; msg->msg_peer = peer->peer_name; if (OPAL_EQUAL == mca_oob_tcp_process_name_compare(target, ORTE_PROC_MY_NAME)) { /* local delivery */ rc = mca_oob_tcp_send_self(peer,msg,iov,count); if (rc < 0 ) { return rc; } else if (size == rc) { return ORTE_SUCCESS; } else { return ORTE_ERROR; } } MCA_OOB_TCP_HDR_HTON(&msg->msg_hdr); rc = mca_oob_tcp_peer_send(peer, msg); if(rc != ORTE_SUCCESS) { if (rc != ORTE_ERR_ADDRESSEE_UNKNOWN) { MCA_OOB_TCP_MSG_RETURN(msg); } return rc; } return ORTE_SUCCESS; }
int mca_oob_tcp_send( orte_process_name_t* name, struct iovec *iov, int count, int tag, int flags) { mca_oob_tcp_peer_t* peer = mca_oob_tcp_peer_lookup(name); mca_oob_tcp_msg_t* msg; int size; int rc; if(NULL == peer) return ORTE_ERR_UNREACH; if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_send: tag %d\n", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(&(peer->peer_name)), tag); } MCA_OOB_TCP_MSG_ALLOC(msg, rc); if(NULL == msg) return rc; /* calculate the size of the message */ size = 0; for(rc = 0; rc < count; rc++) { size += iov[rc].iov_len; } /* turn the size to network byte order so there will be no problems */ msg->msg_hdr.msg_type = MCA_OOB_TCP_DATA; msg->msg_hdr.msg_size = size; msg->msg_hdr.msg_tag = tag; if (NULL == orte_process_info.my_name) { msg->msg_hdr.msg_src = *ORTE_NAME_INVALID; } else { msg->msg_hdr.msg_src = *orte_process_info.my_name; } msg->msg_hdr.msg_dst = *name; /* create one additional iovect that will hold the header */ msg->msg_type = MCA_OOB_TCP_POSTED; msg->msg_rc = 0; msg->msg_flags = flags; msg->msg_uiov = iov; msg->msg_ucnt = count; msg->msg_rwiov = mca_oob_tcp_msg_iov_alloc(msg, count+1); msg->msg_rwiov[0].iov_base = (ompi_iov_base_ptr_t)(&msg->msg_hdr); msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr); msg->msg_rwptr = msg->msg_rwiov; msg->msg_rwcnt = msg->msg_rwnum = count + 1; memcpy(msg->msg_rwiov+1, msg->msg_uiov, sizeof(struct iovec)*msg->msg_ucnt); msg->msg_rwbuf = NULL; msg->msg_cbfunc = NULL; msg->msg_cbdata = NULL; msg->msg_complete = false; msg->msg_peer = peer->peer_name; if (NULL != name && NULL != orte_process_info.my_name && ORTE_EQUAL == mca_oob_tcp_process_name_compare(name, orte_process_info.my_name)) { /* local delivery */ return mca_oob_tcp_send_self(peer,msg,iov,count); } MCA_OOB_TCP_HDR_HTON(&msg->msg_hdr); rc = mca_oob_tcp_peer_send(peer, msg); if(rc != ORTE_SUCCESS) { MCA_OOB_TCP_MSG_RETURN(msg); return rc; } rc = mca_oob_tcp_msg_wait(msg, &size); MCA_OOB_TCP_MSG_RETURN(msg); if(rc != ORTE_SUCCESS) return rc; size -= sizeof(mca_oob_tcp_hdr_t); return size; }
/* * Non-blocking version of mca_oob_recv(). * * @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive. * @param msg (IN) Array of iovecs describing user buffers and lengths. * @param count (IN) Number of elements in iovec array. * @param tag (IN) User supplied tag for matching send/recv. * @param flags (IN) May be MCA_OOB_PEEK to return up to size bytes of msg w/out removing it from the queue, * @param cbfunc (IN) Callback function on recv completion. * @param cbdata (IN) User data that is passed to callback function. * @return OMPI error code (<0) on error. */ int mca_oob_tcp_recv_nb( orte_process_name_t* peer, struct iovec* iov, int count, int tag, int flags, orte_rml_callback_fn_t cbfunc, void* cbdata) { mca_oob_tcp_msg_t *msg; mca_oob_tcp_msg_t *match; int i, rc, size = 0; /* validate params */ if(NULL == iov || 0 == count) { return ORTE_ERR_BAD_PARAM; } /* allocate/initialize the posted receive */ MCA_OOB_TCP_MSG_ALLOC(msg, rc); if(NULL == msg) { OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); return rc; } /* determine overall size of user supplied buffer */ for(i = 0; i < count; i++) { size += iov[i].iov_len; } /* fill in the header */ msg->msg_hdr.msg_origin = *peer; msg->msg_hdr.msg_src = *ORTE_PROC_MY_NAME; msg->msg_hdr.msg_dst = *peer; msg->msg_hdr.msg_size = size; msg->msg_hdr.msg_tag = tag; msg->msg_type = MCA_OOB_TCP_POSTED; msg->msg_rc = 0; msg->msg_flags = flags; msg->msg_uiov = iov; msg->msg_ucnt = count; msg->msg_cbfunc = cbfunc; msg->msg_cbdata = cbdata; msg->msg_complete = false; msg->msg_peer = *peer; msg->msg_rwbuf = NULL; msg->msg_rwiov = NULL; /* acquire the match lock */ OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); if(flags & ORTE_RML_PERSISTENT) { opal_list_append(&mca_oob_tcp_component.tcp_msg_post, (opal_list_item_t *) msg); while(NULL != (match = mca_oob_tcp_msg_match_recv(peer,tag))) { mca_oob_tcp_msg_matched(msg, match); } } else { /* check to see if a matching receive is on the list */ match = mca_oob_tcp_msg_match_recv(peer, tag); if(NULL != match) { mca_oob_tcp_msg_matched(msg, match); MCA_OOB_TCP_MSG_RETURN(msg); } else { opal_list_append(&mca_oob_tcp_component.tcp_msg_post, (opal_list_item_t *) msg); } } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); return 0; }
/* * Progress a completed recv: * (1) signal a posted recv as complete * (2) queue an unexpected message in the recv list */ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer) { /* attempt to match unexpected message to a posted recv */ mca_oob_tcp_msg_t* post; int rc; OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); /* if I'm not a proc, check if this message came from * another job family - procs dont' need to do this because * they always route through their daemons anyway */ if (!ORTE_PROC_IS_MPI) { if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) && (0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) { /* this message came from a different job family that is not * a local slave, so we may * not know how to route any reply back to the originator. Update * our route so we can dynamically build the routing table */ if (ORTE_SUCCESS != (rc = orte_routed.update_route(&(msg->msg_hdr.msg_origin), &(msg->msg_hdr.msg_src)))) { /* Nothing we can do about errors here as we definitely want * the receive to complete, but at least bark loudly */ ORTE_ERROR_LOG(rc); } } } /* match msg against posted receives */ post = mca_oob_tcp_msg_match_post(&msg->msg_hdr.msg_origin, msg->msg_hdr.msg_tag); if(NULL != post) { if(NULL == post->msg_uiov || 0 == post->msg_ucnt) { opal_output(0, "msg_data returning bad param"); post->msg_rc = ORTE_ERR_BAD_PARAM; } else { /* copy msg data into posted recv */ if (post->msg_flags & ORTE_RML_ALLOC) msg->msg_flags |= ORTE_RML_ALLOC; post->msg_rc = mca_oob_tcp_msg_copy(msg, post->msg_uiov, post->msg_ucnt); if(post->msg_flags & ORTE_RML_TRUNC) { int i, size = 0; for(i=1; i<msg->msg_rwcnt+1; i++) size += msg->msg_rwiov[i].iov_len; post->msg_rc = size; } } if(post->msg_flags & ORTE_RML_PEEK) { /* will need message for actual receive */ opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, &msg->super.super); } else { MCA_OOB_TCP_MSG_RETURN(msg); } mca_oob_tcp_component.tcp_match_count++; OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); if(post->msg_flags & ORTE_RML_PERSISTENT) { post->msg_cbfunc( post->msg_rc, &peer->peer_name, post->msg_uiov, post->msg_ucnt, post->msg_hdr.msg_tag, post->msg_cbdata); } else { mca_oob_tcp_msg_complete(post, &msg->msg_hdr.msg_origin); } OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); if(--mca_oob_tcp_component.tcp_match_count == 0) opal_condition_signal(&mca_oob_tcp_component.tcp_match_cond); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); } else { opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, (opal_list_item_t*)msg); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); } }