static void stdin_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; orte_iof_write_event_t *wev = sink->wev; opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s orted:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); /* lock us up to protect global operations */ OPAL_THREAD_LOCK(&mca_iof_orted_component.lock); wev->pending = false; while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (0 == output->numbytes) { /* this indicates we are to close the fd - there is * nothing to write */ OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output, "%s iof:orted closing fd %d on write event due to zero bytes output", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); OBJ_RELEASE(wev); sink->wev = NULL; goto DEPART; } num_written = write(wev->fd, output->data, output->numbytes); OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s orted:stdin:write:handler wrote %d bytes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written)); if (num_written < 0) { if (EAGAIN == errno || EINTR == errno) { /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again * when the fd is ready. */ wev->pending = true; opal_event_add(wev->ev, 0); goto CHECK; } /* otherwise, something bad happened so all we can do is declare an * error and abort */ OBJ_RELEASE(output); OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output, "%s iof:orted closing fd %d on write event due to negative bytes written", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); OBJ_RELEASE(wev); sink->wev = NULL; /* tell the HNP to stop sending us stuff */ if (!mca_iof_orted_component.xoff) { mca_iof_orted_component.xoff = true; orte_iof_orted_send_xonxoff(ORTE_IOF_XOFF); } goto DEPART; } else if (num_written < output->numbytes) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s orted:stdin:write:handler incomplete write %d - adjusting data", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written)); /* incomplete write - adjust data to avoid duplicate output */ memmove(output->data, &output->data[num_written], output->numbytes - num_written); /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again * when the fd is ready. */ wev->pending = true; opal_event_add(wev->ev, 0); goto CHECK; } OBJ_RELEASE(output); } CHECK: if (mca_iof_orted_component.xoff) { /* if we have told the HNP to stop reading stdin, see if * the proc has absorbed enough to justify restart * * RHC: Note that when multiple procs want stdin, we * can get into a fight between a proc turnin stdin * back "on" and other procs turning it "off". There * is no clear way to resolve this as different procs * may take input at different rates. */ if (opal_list_get_size(&wev->outputs) < ORTE_IOF_MAX_INPUT_BUFFERS) { /* restart the read */ mca_iof_orted_component.xoff = false; orte_iof_orted_send_xonxoff(ORTE_IOF_XON); } } DEPART: /* unlock and go */ OPAL_THREAD_UNLOCK(&mca_iof_orted_component.lock); }
/* * The only messages coming to an orted are either: * * (a) stdin, which is to be copied to whichever local * procs "pull'd" a copy * * (b) flow control messages */ void orte_iof_orted_recv(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { unsigned char data[ORTE_IOF_BASE_MSG_MAX]; orte_iof_tag_t stream; int32_t count, numbytes; orte_process_name_t target; opal_list_item_t *item; int rc; /* see what stream generated this data */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* if this isn't stdin, then we have an error */ if (ORTE_IOF_STDIN != stream) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); goto CLEAN_RETURN; } /* unpack the intended target */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &target, &count, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* unpack the data */ numbytes=ORTE_IOF_BASE_MSG_MAX; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes for local proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&target))); /* cycle through our list of sinks */ for (item = opal_list_get_first(&mca_iof_orted_component.sinks); item != opal_list_get_end(&mca_iof_orted_component.sinks); item = opal_list_get_next(item)) { orte_iof_sink_t* sink = (orte_iof_sink_t*)item; /* is this intended for this jobid? */ if (target.jobid == sink->name.jobid) { /* yes - is this intended for all vpids or this vpid? */ if (ORTE_VPID_WILDCARD == target.vpid || sink->name.vpid == target.vpid) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s writing data to local proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&sink->name))); if (NULL == sink->wev || sink->wev->fd < 0) { /* this sink was already closed - ignore this data */ goto CLEAN_RETURN; } /* send the bytes down the pipe - we even send 0 byte events * down the pipe so it forces out any preceding data before * closing the output stream */ if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&target, stream, data, numbytes, sink->wev)) { /* getting too backed up - tell the HNP to hold off any more input if we * haven't already told it */ if (!mca_iof_orted_component.xoff) { mca_iof_orted_component.xoff = true; orte_iof_orted_send_xonxoff(ORTE_IOF_XOFF); } } } } } CLEAN_RETURN: return; }