static void orte_iof_hnp_exception_handler(const orte_process_name_t* peer, orte_rml_exception_t reason) { #if 0 orte_iof_base_endpoint_t *endpoint; opal_output_verbose(1, orte_iof_base.iof_output, "iof svc exception handler! %s\n", ORTE_NAME_PRINT((orte_process_name_t*)peer)); /* If we detect an exception on the RML connection to a peer, delete all of its subscriptions and publications. Note that exceptions can be detected during a normal RML shutdown; they are recoverable events (no need to abort). */ orte_iof_hnp_sub_delete_all(peer); orte_iof_hnp_pub_delete_all(peer); opal_output_verbose(1, orte_iof_base.iof_output, "deleted all pubs and subs\n"); /* Find any streams on any endpoints for this peer and close them */ while (NULL != (endpoint = orte_iof_base_endpoint_match(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY))) { orte_iof_base_endpoint_closed(endpoint); /* Delete the endpoint that we just matched */ orte_iof_base_endpoint_delete(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY); } #endif opal_output_verbose(1, orte_iof_base.iof_output, "done with exception handler\n"); }
int orte_iof_base_endpoint_forward( orte_iof_base_endpoint_t* endpoint, const orte_process_name_t* origin, orte_iof_base_msg_header_t* hdr, const unsigned char* data) { opal_list_item_t* item; orte_iof_base_frag_t* frag; size_t len = hdr->msg_len; int rc = 0; if(endpoint->ep_mode != ORTE_IOF_SINK) { return ORTE_ERR_BAD_PARAM; } /* allocate and initialize a fragment */ ORTE_IOF_BASE_FRAG_ALLOC(frag, rc); if(NULL == frag) { return ORTE_ERR_OUT_OF_RESOURCE; } OPAL_THREAD_LOCK(&orte_iof_base.iof_lock); frag->frag_owner = endpoint; frag->frag_src = *origin; frag->frag_hdr.hdr_msg = *hdr; frag->frag_len = len; /* call any registered callbacks */ for(item = opal_list_get_first(&endpoint->ep_callbacks); item != opal_list_get_end(&endpoint->ep_callbacks); item = opal_list_get_next(item)) { orte_iof_base_callback_t* cb = (orte_iof_base_callback_t*)item; cb->cb_func( &hdr->msg_origin, hdr->msg_tag, cb->cb_data, data, hdr->msg_len); } if(endpoint->ep_fd >= 0) { /* try to write w/out copying data */ if(opal_list_get_size(&endpoint->ep_sink_frags) == 0) { if(len == 0) { /* No ACK required because the frag is of 0 length (ACKs are based on fragment length; an ACK of 0 bytes would do nothing) */ ORTE_IOF_BASE_FRAG_RETURN(frag); orte_iof_base_endpoint_closed(endpoint); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return ORTE_SUCCESS; } rc = write(endpoint->ep_fd,data,len); if(rc < 0) { if (errno != EAGAIN && errno != EINTR) { orte_iof_base_endpoint_closed(endpoint); /* Send a ACK-AND-CLOSE back to the service so that it knows not to wait for any further ACKs */ orte_iof_base_frag_ack(frag, true); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return ORTE_SUCCESS; } rc = 0; /* don't affect the remaining length of the data */ } frag->frag_len -= rc; } /* Ensure to handle both cases: 1. When ep_sink_frags was not empty (regardless of frag_len) 2. When ep_sink_frags was empty, but we fell through from above */ if(frag->frag_len > 0 || 0 == len) { /* handle incomplete write - also queue up 0 byte message * and recognize this as a request to close the descriptor * when all pending operations complete */ frag->frag_ptr = frag->frag_data; memcpy(frag->frag_ptr, data+rc, frag->frag_len); opal_list_append(&endpoint->ep_sink_frags, &frag->super.super); /* If we're the first frag to be put on the sink_frags list, then enable the event that will tell us when the fd becomes writeable */ if(opal_list_get_size(&endpoint->ep_sink_frags) == 1) { opal_output(orte_iof_base.iof_output, "iof_base_endpoint forwarding frag; re-enabled reading for endpoint"); opal_event_add(&endpoint->ep_event,0); } OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); } else { OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); /* acknowledge fragment */ orte_iof_base_frag_ack(frag, false); } } else { OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); /* acknowledge fragment */ orte_iof_base_frag_ack(frag, false); } return ORTE_SUCCESS; }
static void orte_iof_base_endpoint_write_handler(int sd, short flags, void *user) { int errno_save; orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)user; /* * step through the list of queued fragments and attempt to write * until the output descriptor would block */ OPAL_THREAD_LOCK(&orte_iof_base.iof_lock); while(opal_list_get_size(&endpoint->ep_sink_frags)) { orte_iof_base_frag_t* frag = (orte_iof_base_frag_t*)opal_list_get_first(&endpoint->ep_sink_frags); int rc; /* close connection on zero byte message */ if(frag->frag_len == 0) { orte_iof_base_endpoint_closed(endpoint); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return; } /* progress pending messages */ rc = write(endpoint->ep_fd, frag->frag_ptr, frag->frag_len); errno_save = errno; if (rc < 0) { if (EAGAIN == errno_save) { break; } if (EINTR == errno_save) { continue; } /* All other errors -- to include sigpipe -- mean that Something Bad happened and we should abort in despair. */ orte_iof_base_endpoint_closed(endpoint); /* Send a ACK-AND-CLOSE back to the service so that it knows not to wait for any further ACKs */ orte_iof_base_frag_ack(frag, true); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return; } frag->frag_len -= rc; frag->frag_ptr += rc; if(frag->frag_len > 0) { break; } opal_list_remove_item(&endpoint->ep_sink_frags, &frag->super.super); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); orte_iof_base_frag_ack(frag, false); OPAL_THREAD_LOCK(&orte_iof_base.iof_lock); } /* is there anything left to write? */ if(opal_list_get_size(&endpoint->ep_sink_frags) == 0) { opal_event_del(&endpoint->ep_event); if(orte_iof_base.iof_waiting) { opal_condition_signal(&orte_iof_base.iof_condition); } } OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); }
static void orte_iof_base_endpoint_read_handler(int fd, short flags, void *cbdata) { orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)cbdata; orte_iof_base_frag_t* frag; orte_iof_base_header_t* hdr; int rc; /* allocate a fragment */ ORTE_IOF_BASE_FRAG_ALLOC(frag,rc); if(NULL == frag) { /* JMS shouldn't we do something here? */ return; } OPAL_THREAD_LOCK(&orte_iof_base.iof_lock); /* read up to the fragment size */ #if !defined(__WINDOWS__) rc = read(fd, frag->frag_data, sizeof(frag->frag_data)); #else { DWORD readed; HANDLE handle = (HANDLE)_get_osfhandle(fd); ReadFile(handle, frag->frag_data, sizeof(frag->frag_data), &readed, NULL); rc = (int)readed; } #endif /* !defined(__WINDOWS__) */ if (rc < 0) { /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { ORTE_IOF_BASE_FRAG_RETURN(frag); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return; } /* Error on the connection */ orte_iof_base_endpoint_closed(endpoint); /* Fall through to send 0 byte message to other side indicating that the endpoint is now closed. */ rc = 0; } else if (rc == 0) { /* peer has closed connection (will fall through to send a 0 byte message, therefore telling the RML side that the fd side has closed its connection) */ orte_iof_base_endpoint_closed(endpoint); } /* Do not append the fragment before we know that we have some data (even a 0 byte mesage is OK -- that indicates that the file descriptor has closed) */ frag->frag_owner = endpoint; opal_list_append(&endpoint->ep_source_frags, &frag->super.super); opal_output(orte_iof_base.iof_output, "iof_base_endpoint: read handler, source_frags list len: %d", (int) opal_list_get_size(&endpoint->ep_source_frags)); frag->frag_iov[1].iov_len = frag->frag_len = rc; /* fill in the header */ hdr = &frag->frag_hdr; hdr->hdr_common.hdr_type = ORTE_IOF_BASE_HDR_MSG; hdr->hdr_msg.msg_origin = endpoint->ep_origin; hdr->hdr_msg.msg_proxy = *ORTE_PROC_MY_NAME; hdr->hdr_msg.msg_tag = endpoint->ep_tag; hdr->hdr_msg.msg_seq = endpoint->ep_seq; hdr->hdr_msg.msg_len = frag->frag_len; ORTE_IOF_BASE_HDR_MSG_HTON(hdr->hdr_msg); /* if window size has been exceeded - disable forwarding */ endpoint->ep_seq += frag->frag_len; if(ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) > orte_iof_base.iof_window_size) { opal_output(orte_iof_base.iof_output, "iof_base_endpoint read handler: window exceeded -- reading disabled"); opal_event_del(&endpoint->ep_event); } OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); /* Increment the refcount on the endpoint so that it doesn't get deleted before the frag */ OBJ_RETAIN(endpoint); /* start non-blocking RML call to forward received data */ rc = orte_rml.send_nb( orte_iof_base.iof_service, frag->frag_iov, 2, ORTE_RML_TAG_IOF_SVC, 0, orte_iof_base_endpoint_send_cb, frag); }