static int tool_output(const orte_process_name_t* peer, orte_iof_tag_t source_tag, const char *msg) { /* output this to our local output */ if (ORTE_IOF_STDOUT & source_tag || orte_xml_output) { orte_iof_base_write_output(peer, source_tag, (const unsigned char*)msg, strlen(msg), orte_iof_base.iof_write_stdout->wev); } else { orte_iof_base_write_output(peer, source_tag, (const unsigned char*)msg, strlen(msg), orte_iof_base.iof_write_stderr->wev); } return ORTE_SUCCESS; }
static void mrhnp_complete(const orte_job_t *jdata) { orte_job_t *jptr; orte_job_map_t *map; orte_proc_t *daemon; orte_iof_proc_t *proct; unsigned char data[1]; opal_list_item_t *item; int i; orte_node_t *node; orte_jobid_t stdout_target, *jbptr; stdout_target = ORTE_JOBID_INVALID; jbptr = &stdout_target; if (!orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID)) { /* nothing to do */ return; } /* the job is complete - close out the stdin * of any procs it was feeding */ jptr = orte_get_job_data_object(stdout_target); map = jptr->map; /* cycle thru the map to find any node that has at least * one proc from this job */ for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } daemon = node->daemon; if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) { for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == jptr->jobid) { if (NULL != proct->sink) { /* need to write a 0-byte event to clear the stream and close it */ orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev); proct->sink = NULL; } } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s sending close stdin to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&daemon->name))); /* need to send a 0-byte message to clear the stream and close it */ send_data(&daemon->name, ORTE_IOF_STDIN, jptr->jobid, data, 0); } } }
/* this is the read handler for my own child procs and stdin */ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) { orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; int32_t numbytes; opal_list_item_t *item; orte_iof_proc_t *proct; int i, j; orte_ns_cmp_bitmask_t mask; orte_job_t *jdata; orte_iof_job_t *iofjob; orte_node_t *node; orte_proc_t *daemon; orte_job_map_t *map; bool write_out=false; /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); #else { DWORD readed; HANDLE handle = (HANDLE)_get_osfhandle(fd); ReadFile(handle, data, sizeof(data), &readed, NULL); numbytes = (int)readed; } #endif /* !defined(__WINDOWS__) */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s iof:mrhnp:read handler read %d bytes from %s:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), fd)); if (numbytes < 0) { /* either we have a connection error or it was a non-blocking read */ /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { opal_event_add(rev->ev, 0); return; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s iof:mrhnp:read handler %s Error on connection:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&rev->name), fd)); /* Un-recoverable error. Allow the code to flow as usual in order to * to send the zero bytes message up the stream, and then close the * file descriptor and delete the event. */ numbytes = 0; } /* if job termination has been ordered, just ignore the * data and delete the stdin read event, if that is what fired */ if (orte_job_term_ordered) { if (ORTE_IOF_STDIN & rev->tag) { OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev); } return; } if (ORTE_IOF_STDIN & rev->tag) { /* The event has fired, so it's no longer active until we * re-add it */ mca_iof_mr_hnp_component.stdinev->active = false; /* if this was read from my stdin, I need to send this input to all * daemons who host mapper procs */ for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) { if (NULL == (iofjob = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) { continue; } jdata = iofjob->jdata; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s read %d bytes from stdin - writing to job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_JOBID_PRINT(jdata->jobid))); map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } daemon = node->daemon; if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) { /* if it is me, then send the bytes down the stdin pipe * for every local proc (they are all on my proct list) - we even send 0 byte events * down the pipe so it forces out any preceding data before * closing the output stream. We add a 0 byte message if * numbytes < sizeof(data) as this means the chunk we read * was the end of the file. */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == jdata->jobid) { if (NULL == proct->sink) { opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name)); continue; } if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev)) { /* getting too backed up - stop the read event for now if it is still active */ if (mca_iof_mr_hnp_component.stdinev->active) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "buffer backed up - holding")); mca_iof_mr_hnp_component.stdinev->active = false; } return; } if (0 < numbytes && numbytes < (int)sizeof(data)) { /* need to write a 0-byte event to clear the stream and close it */ orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev); proct->sink = NULL; } } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s sending %d bytes from stdin to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&daemon->name))); /* send the data to the daemon so it can * write it to all local procs from this job. * If the connection closed, * numbytes will be zero so zero bytes will be * sent - this will tell the daemon to close * the fd for stdin to that proc */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes); if (0 < numbytes && numbytes < (int)sizeof(data)) { /* need to send a 0-byte message to clear the stream and close it */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, 0); } } } } /* if num_bytes was zero, then we need to terminate the event */ if (0 == numbytes || numbytes < (int)sizeof(data)) { /* this will also close our stdin file descriptor */ if (NULL != mca_iof_mr_hnp_component.stdinev) { OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev); } } else { /* if we are looking at a tty, then we just go ahead and restart the * read event assuming we are not backgrounded */ if (orte_iof_mrhnp_stdin_check(fd)) { restart_stdin(fd, 0, NULL); } else { /* delay for awhile and then restart */ ORTE_TIMER_EVENT(0, 10000, restart_stdin, ORTE_INFO_PRI); } } return; } if (ORTE_IOF_STDOUT & rev->tag && 0 < numbytes) { /* see if we need to forward this output */ jdata = orte_get_job_data_object(rev->name.jobid); if (ORTE_JOBID_INVALID == jdata->stdout_target) { /* end of the chain - just output the info */ write_out = true; goto PROCESS; } /* it goes to the next job in the chain */ jdata = orte_get_job_data_object(jdata->stdout_target); map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } daemon = node->daemon; if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) { /* if it is me, then send the bytes down the stdin pipe * for every local proc (they are all on my proct list) */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == jdata->jobid) { if (NULL == proct->sink) { opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name)); continue; } orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev); } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s sending %d bytes from stdout of %s to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), ORTE_NAME_PRINT(&daemon->name))); /* send the data to the daemon so it can * write it to all local procs from this job */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes); } } } PROCESS: OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s read %d bytes from %s of %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"), ORTE_NAME_PRINT(&rev->name))); if (0 == numbytes) { /* if we read 0 bytes from the stdout/err/diag, find this proc * on our list and * release the appropriate event. This will delete the * read event and close the file descriptor */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; mask = ORTE_NS_CMP_ALL; if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) { /* found it - release corresponding event. This deletes * the read event and closes the file descriptor */ if (rev->tag & ORTE_IOF_STDOUT) { OBJ_RELEASE(proct->revstdout); } else if (rev->tag & ORTE_IOF_STDERR) { OBJ_RELEASE(proct->revstderr); } else if (rev->tag & ORTE_IOF_STDDIAG) { OBJ_RELEASE(proct->revstddiag); } /* check to see if they are all done */ if (NULL == proct->revstdout && NULL == proct->revstderr && NULL == proct->revstddiag) { /* this proc's iof is complete */ opal_list_remove_item(&mca_iof_mr_hnp_component.procs, item); ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE); OBJ_RELEASE(proct); } break; } } return; } else { /* output this to our local output */ if (ORTE_IOF_STDOUT & rev->tag) { if (write_out) { orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev); } } else { orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev); } } /* re-add the event */ opal_event_add(rev->ev, 0); return; }
/* * The only messages coming to an orted are either: * * (a) stdin, which is to be copied to whichever local * procs "pull'd" a copy * * (b) flow control messages */ void orte_iof_mrorted_recv(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { unsigned char data[ORTE_IOF_BASE_MSG_MAX]; orte_iof_tag_t stream; int32_t count, numbytes; orte_jobid_t jobid; opal_list_item_t *item; int rc; /* see what stream generated this data */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* if this isn't stdin, then we have an error */ if (ORTE_IOF_STDIN != stream) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); goto CLEAN_RETURN; } /* unpack the intended target */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &count, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* unpack the data */ numbytes=ORTE_IOF_BASE_MSG_MAX; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes for local job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_JOBID_PRINT(jobid))); /* cycle through our list of procs */ for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs); item != opal_list_get_end(&mca_iof_mr_orted_component.procs); item = opal_list_get_next(item)) { orte_iof_proc_t* sink = (orte_iof_proc_t*)item; /* is this intended for this jobid? */ if (jobid == sink->name.jobid) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s writing data to local proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&sink->name))); if (NULL == sink->sink->wev || sink->sink->wev->fd < 0) { /* this sink was already closed - ignore this data */ goto CLEAN_RETURN; } /* send the bytes down the pipe - we even send 0 byte events * down the pipe so it forces out any preceding data before * closing the output stream */ if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&sink->name, stream, data, numbytes, sink->sink->wev)) { /* getting too backed up - tell the HNP to hold off any more input if we * haven't already told it */ if (!sink->sink->xoff) { sink->sink->xoff = true; orte_iof_mrorted_send_xonxoff(&sink->name, ORTE_IOF_XOFF); } } } } CLEAN_RETURN: return; }
void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) { orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; opal_buffer_t *buf=NULL; int rc; int32_t numbytes; opal_list_item_t *item; orte_iof_proc_t *proct; orte_ns_cmp_bitmask_t mask; orte_job_t *jdata; orte_job_map_t *map; int i; bool write_out=false; orte_node_t *node; orte_proc_t *daemon; orte_jobid_t stdout_target, *jbptr; /* read up to the fragment size */ numbytes = read(fd, data, sizeof(data)); OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrorted:read handler read %d bytes from %s, fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), fd)); if (numbytes <= 0) { if (0 > numbytes) { /* either we have a connection error or it was a non-blocking read */ if (EAGAIN == errno || EINTR == errno) { /* non-blocking, retry */ opal_event_add(rev->ev, 0); return; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrorted:read handler %s Error on connection:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&rev->name), fd)); } /* numbytes must have been zero, so go down and close the fd etc */ goto CLEAN_RETURN; } /* see if the user wanted the output directed to files */ if (NULL != orte_output_filename) { /* find the sink for this rank */ for (item = opal_list_get_first(&mca_iof_mr_orted_component.sinks); item != opal_list_get_end(&mca_iof_mr_orted_component.sinks); item = opal_list_get_next(item)) { orte_iof_sink_t *sink = (orte_iof_sink_t*)item; /* if the target is set, then this sink is for another purpose - ignore it */ if (ORTE_JOBID_INVALID != sink->daemon.jobid) { continue; } /* if this sink isn't for output, ignore it */ if (ORTE_IOF_STDIN & sink->tag) { continue; } mask = ORTE_NS_CMP_ALL; /* is this the desired proc? */ if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, &rev->name)) { /* output to the corresponding file */ orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev); /* done */ break; } } } if (ORTE_IOF_STDOUT & rev->tag) { /* see if we need to forward this output */ stdout_target = ORTE_JOBID_INVALID; jbptr = &stdout_target; jdata = orte_get_job_data_object(rev->name.jobid); if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID)) { /* end of the chain - just output the info */ write_out = true; goto PROCESS; } /* it goes to the next job in the chain */ jdata = orte_get_job_data_object(stdout_target); map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } daemon = node->daemon; if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) { /* if it is me, then send the bytes down the stdin pipe * for every local proc (they are all on my proct list) */ for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs); item != opal_list_get_end(&mca_iof_mr_orted_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == jdata->jobid) { if (NULL == proct->sink) { opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name)); continue; } orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev); } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s sending %d bytes from stdout of %s to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), ORTE_NAME_PRINT(&daemon->name))); /* send the data to the daemon so it can * write it to all local procs from this job */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes); } } } PROCESS: if (write_out) { /* prep the buffer */ buf = OBJ_NEW(opal_buffer_t); /* pack the stream first - we do this so that flow control messages can * consist solely of the tag */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* pack name of process that gave us this data */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* pack the data - only pack the #bytes we read! */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* start non-blocking RML call to forward received data */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrorted:read handler sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes)); orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, orte_rml_send_callback, NULL); } /* re-add the event */ opal_event_add(rev->ev, 0); return; CLEAN_RETURN: /* must be an error, or zero bytes were read indicating that the * proc terminated this IOF channel - either way, find this proc * on our list and clean up */ for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs); item != opal_list_get_end(&mca_iof_mr_orted_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; mask = ORTE_NS_CMP_ALL; if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) { /* found it - release corresponding event. This deletes * the read event and closes the file descriptor */ if (rev->tag & ORTE_IOF_STDOUT) { if( NULL != proct->revstdout ) { OBJ_RELEASE(proct->revstdout); } } else if (rev->tag & ORTE_IOF_STDERR) { if( NULL != proct->revstderr ) { OBJ_RELEASE(proct->revstderr); } } else if (rev->tag & ORTE_IOF_STDDIAG) { if( NULL != proct->revstddiag ) { OBJ_RELEASE(proct->revstddiag); } } /* check to see if they are all done */ if (NULL == proct->revstdout && NULL == proct->revstderr && NULL == proct->revstddiag) { /* this proc's iof is complete */ opal_list_remove_item(&mca_iof_mr_orted_component.procs, item); ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE); OBJ_RELEASE(proct); } break; } } if (NULL != buf) { OBJ_RELEASE(buf); } return; }
void orte_iof_orted_read_handler(int fd, short event, void *cbdata) { orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; opal_buffer_t *buf=NULL; int rc; int32_t numbytes; orte_iof_proc_t *proct = (orte_iof_proc_t*)rev->proc; /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); #else { DWORD readed; HANDLE handle = (HANDLE)_get_osfhandle(fd); ReadFile(handle, data, sizeof(data), &readed, NULL); numbytes = (int)readed; } #endif /* !defined(__WINDOWS__) */ if (NULL == proct) { /* nothing we can do */ ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN); return; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler read %d bytes from %s, fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&proct->name), fd)); if (numbytes <= 0) { if (0 > numbytes) { /* either we have a connection error or it was a non-blocking read */ if (EAGAIN == errno || EINTR == errno) { /* non-blocking, retry */ opal_event_add(rev->ev, 0); return; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler %s Error on connection:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proct->name), fd)); } /* numbytes must have been zero, so go down and close the fd etc */ goto CLEAN_RETURN; } /* see if the user wanted the output directed to files */ if (NULL != rev->sink) { /* output to the corresponding file */ orte_iof_base_write_output(&proct->name, rev->tag, data, numbytes, rev->sink->wev); } if (!proct->copy) { /* re-add the event */ opal_event_add(rev->ev, 0); return; } /* prep the buffer */ buf = OBJ_NEW(opal_buffer_t); /* pack the stream first - we do this so that flow control messages can * consist solely of the tag */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* pack name of process that gave us this data */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &proct->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* pack the data - only pack the #bytes we read! */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* start non-blocking RML call to forward received data */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes)); orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, send_cb, NULL); /* re-add the event */ opal_event_add(rev->ev, 0); return; CLEAN_RETURN: /* must be an error, or zero bytes were read indicating that the * proc terminated this IOF channel - either way, release the * corresponding event. This deletes the read event and closes * the file descriptor */ if (rev->tag & ORTE_IOF_STDOUT) { if( NULL != proct->revstdout ) { orte_iof_base_static_dump_output(proct->revstdout); OBJ_RELEASE(proct->revstdout); } } else if (rev->tag & ORTE_IOF_STDERR) { if( NULL != proct->revstderr ) { orte_iof_base_static_dump_output(proct->revstderr); OBJ_RELEASE(proct->revstderr); } } else if (rev->tag & ORTE_IOF_STDDIAG) { if( NULL != proct->revstddiag ) { orte_iof_base_static_dump_output(proct->revstddiag); OBJ_RELEASE(proct->revstddiag); } } /* check to see if they are all done */ if (NULL == proct->revstdout && NULL == proct->revstderr && NULL == proct->revstddiag) { /* this proc's iof is complete */ opal_list_remove_item(&mca_iof_orted_component.procs, &proct->super); ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE); OBJ_RELEASE(proct); } if (NULL != buf) { OBJ_RELEASE(buf); } return; }
/* this is the read handler for my own child procs. In this case, * the data is going nowhere - I just output it myself */ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) { orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; int32_t numbytes; opal_list_item_t *item; orte_iof_proc_t *proct; int rc; OPAL_THREAD_LOCK(&mca_iof_hnp_component.lock); /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); #else { DWORD readed; HANDLE handle = (HANDLE)_get_osfhandle(fd); ReadFile(handle, data, sizeof(data), &readed, NULL); numbytes = (int)readed; } #endif /* !defined(__WINDOWS__) */ if (numbytes < 0) { /* either we have a connection error or it was a non-blocking read */ /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { opal_event_add(&rev->ev, 0); OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock); return; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s iof:hnp:read handler %s Error on connection:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&rev->name), fd)); /* Un-recoverable error. Allow the code to flow as usual in order to * to send the zero bytes message up the stream, and then close the * file descriptor and delete the event. */ numbytes = 0; } /* is this read from our stdin? */ if (ORTE_IOF_STDIN & rev->tag) { /* if job termination has been ordered, just ignore the * data and delete the read event */ if (orte_job_term_ordered) { OBJ_RELEASE(mca_iof_hnp_component.stdinev); OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock); return; } /* cycle through our list of sinks */ for (item = opal_list_get_first(&mca_iof_hnp_component.sinks); item != opal_list_get_end(&mca_iof_hnp_component.sinks); item = opal_list_get_next(item)) { orte_iof_sink_t* sink = (orte_iof_sink_t*)item; /* only look at stdin sinks */ if (!(ORTE_IOF_STDIN & sink->tag)) { continue; } /* if the daemon is me, then this is a local sink */ if (ORTE_PROC_MY_NAME->jobid == sink->daemon.jobid && ORTE_PROC_MY_NAME->vpid == sink->daemon.vpid) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s read %d bytes from stdin - writing to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name))); /* send the bytes down the pipe - we even send 0 byte events * down the pipe so it forces out any preceding data before * closing the output stream */ if (NULL != sink->wev) { if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev)) { /* getting too backed up - stop the read event for now if it is still active */ if (mca_iof_hnp_component.stdinev->active) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "buffer backed up - holding")); mca_iof_hnp_component.stdinev->active = false; } OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock); return; } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s sending %d bytes from stdin to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&sink->daemon))); /* send the data to the daemon so it can * write it to the proc's fd - in this case, * we pass sink->name to indicate who is to * receive the data. If the connection closed, * numbytes will be zero so zero bytes will be * sent - this will tell the daemon to close * the fd for stdin to that proc */ orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &sink->name, ORTE_IOF_STDIN, data, numbytes); } } /* if num_bytes was zero, then we need to terminate the event */ if (0 == numbytes) { /* this will also close our stdin file descriptor */ OBJ_RELEASE(mca_iof_hnp_component.stdinev); } else { /* if we are looking at a tty, then we just go ahead and restart the * read event assuming we are not backgrounded */ if (orte_iof_hnp_stdin_check(fd)) { restart_stdin(fd, 0, NULL); } else { /* delay for awhile and then restart */ ORTE_TIMER_EVENT(0, 10000, restart_stdin); } } /* nothing more to do */ OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock); return; } /* this must be output from one of my local procs - see * if anyone else has requested a copy of this info */ for (item = opal_list_get_first(&mca_iof_hnp_component.sinks); item != opal_list_get_end(&mca_iof_hnp_component.sinks); item = opal_list_get_next(item)) { orte_iof_sink_t *sink = (orte_iof_sink_t*)item; /* if the target isn't set, then this sink is for another purpose - ignore it */ if (ORTE_JOBID_INVALID == sink->daemon.jobid) { continue; } if ((sink->tag & rev->tag) && sink->name.jobid == rev->name.jobid && (ORTE_VPID_WILDCARD == sink->name.vpid || sink->name.vpid == rev->name.vpid)) { /* need to send the data to the remote endpoint - if * the connection closed, numbytes will be zero, so * the remote endpoint will know to close its local fd. * In this case, we pass rev->name to indicate who the * data came from. */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s sending data to tool %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&sink->daemon))); orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &rev->name, rev->tag, data, numbytes); } } OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s read %d bytes from %s of %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"), ORTE_NAME_PRINT(&rev->name))); if (0 == numbytes) { /* if we read 0 bytes from the stdout/err/diag, there is * nothing to output - find this proc on our list and * release the appropriate event. This will delete the * read event and close the file descriptor */ for (item = opal_list_get_first(&mca_iof_hnp_component.procs); item != opal_list_get_end(&mca_iof_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == rev->name.jobid && proct->name.vpid == rev->name.vpid) { /* found it - release corresponding event. This deletes * the read event and closes the file descriptor */ if (rev->tag & ORTE_IOF_STDOUT) { OBJ_RELEASE(proct->revstdout); } else if (rev->tag & ORTE_IOF_STDERR) { OBJ_RELEASE(proct->revstderr); } else if (rev->tag & ORTE_IOF_STDDIAG) { OBJ_RELEASE(proct->revstddiag); } /* check to see if they are all done */ if (NULL == proct->revstdout && NULL == proct->revstderr && NULL == proct->revstddiag) { opal_buffer_t cmdbuf; orte_daemon_cmd_flag_t command; /* this proc's iof is complete */ opal_list_remove_item(&mca_iof_hnp_component.procs, item); /* setup a cmd to notify that the iof is complete */ OBJ_CONSTRUCT(&cmdbuf, opal_buffer_t); command = ORTE_DAEMON_IOF_COMPLETE; if (ORTE_SUCCESS != (rc = opal_dss.pack(&cmdbuf, &command, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } if (ORTE_SUCCESS != (rc = opal_dss.pack(&cmdbuf, &proct->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &cmdbuf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor); CLEANUP: OBJ_DESTRUCT(&cmdbuf); OBJ_RELEASE(proct); } break; } } OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock); return; } /* see if the user wanted the output directed to files */ if (NULL != orte_output_filename) { /* find the sink for this rank */ for (item = opal_list_get_first(&mca_iof_hnp_component.sinks); item != opal_list_get_end(&mca_iof_hnp_component.sinks); item = opal_list_get_next(item)) { orte_iof_sink_t *sink = (orte_iof_sink_t*)item; /* if the target is set, then this sink is for another purpose - ignore it */ if (ORTE_JOBID_INVALID != sink->daemon.jobid) { continue; } /* if this sink isn't for output, ignore it */ if (ORTE_IOF_STDIN & sink->tag) { continue; } /* is this the desired proc? */ if (sink->name.jobid == rev->name.jobid && sink->name.vpid == rev->name.vpid) { /* output to the corresponding file */ orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev); /* done */ break; } } } else { /* output this to our local output */ if (ORTE_IOF_STDOUT & rev->tag || orte_xml_output) { orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev); } else { orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev); } } /* re-add the event */ opal_event_add(&rev->ev, 0); OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock); return; }
void orte_iof_orted_read_handler(int fd, short event, void *cbdata) { orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; opal_buffer_t *buf=NULL; int rc; int32_t numbytes; opal_list_item_t *item; orte_iof_proc_t *proct; orte_ns_cmp_bitmask_t mask; /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); #else { DWORD readed; HANDLE handle = (HANDLE)_get_osfhandle(fd); ReadFile(handle, data, sizeof(data), &readed, NULL); numbytes = (int)readed; } #endif /* !defined(__WINDOWS__) */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler read %d bytes from %s, fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), fd)); if (numbytes <= 0) { if (0 > numbytes) { /* either we have a connection error or it was a non-blocking read */ if (EAGAIN == errno || EINTR == errno) { /* non-blocking, retry */ opal_event_add(rev->ev, 0); return; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler %s Error on connection:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&rev->name), fd)); } /* numbytes must have been zero, so go down and close the fd etc */ goto CLEAN_RETURN; } /* see if the user wanted the output directed to files */ if (NULL != orte_output_filename) { /* find the sink for this rank */ for (item = opal_list_get_first(&mca_iof_orted_component.sinks); item != opal_list_get_end(&mca_iof_orted_component.sinks); item = opal_list_get_next(item)) { orte_iof_sink_t *sink = (orte_iof_sink_t*)item; /* if the target is set, then this sink is for another purpose - ignore it */ if (ORTE_JOBID_INVALID != sink->daemon.jobid) { continue; } /* if this sink isn't for output, ignore it */ if (ORTE_IOF_STDIN & sink->tag) { continue; } mask = ORTE_NS_CMP_ALL; /* is this the desired proc? */ if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, &rev->name)) { /* output to the corresponding file */ orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev); /* done */ break; } } goto RESTART; } /* prep the buffer */ buf = OBJ_NEW(opal_buffer_t); /* pack the stream first - we do this so that flow control messages can * consist solely of the tag */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* pack name of process that gave us this data */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* pack the data - only pack the #bytes we read! */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* start non-blocking RML call to forward received data */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes)); orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, send_cb, NULL); RESTART: /* re-add the event */ opal_event_add(rev->ev, 0); return; CLEAN_RETURN: /* must be an error, or zero bytes were read indicating that the * proc terminated this IOF channel - either way, find this proc * on our list and clean up */ for (item = opal_list_get_first(&mca_iof_orted_component.procs); item != opal_list_get_end(&mca_iof_orted_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; mask = ORTE_NS_CMP_ALL; if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) { /* found it - release corresponding event. This deletes * the read event and closes the file descriptor */ if (rev->tag & ORTE_IOF_STDOUT) { if( NULL != proct->revstdout ) { OBJ_RELEASE(proct->revstdout); } } else if (rev->tag & ORTE_IOF_STDERR) { if( NULL != proct->revstderr ) { OBJ_RELEASE(proct->revstderr); } } else if (rev->tag & ORTE_IOF_STDDIAG) { if( NULL != proct->revstddiag ) { OBJ_RELEASE(proct->revstddiag); } } /* check to see if they are all done */ if (NULL == proct->revstdout && NULL == proct->revstderr && NULL == proct->revstddiag) { /* this proc's iof is complete */ opal_list_remove_item(&mca_iof_orted_component.procs, item); ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE); OBJ_RELEASE(proct); } break; } } if (NULL != buf) { OBJ_RELEASE(buf); } return; }
void orte_iof_hnp_recv(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { orte_process_name_t origin, requestor; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; orte_iof_tag_t stream; int32_t count, numbytes; orte_iof_sink_t *sink; opal_list_item_t *item, *next; int rc; bool exclusive; OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s received IOF from proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); /* unpack the stream first as this may be flow control info */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } if (ORTE_IOF_XON & stream) { /* re-start the stdin read event */ if (NULL != mca_iof_hnp_component.stdinev && !orte_job_term_ordered && !mca_iof_hnp_component.stdinev->active) { mca_iof_hnp_component.stdinev->active = true; opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } goto CLEAN_RETURN; } else if (ORTE_IOF_XOFF & stream) { /* stop the stdin read event */ if (NULL != mca_iof_hnp_component.stdinev && !mca_iof_hnp_component.stdinev->active) { opal_event_del(mca_iof_hnp_component.stdinev->ev); mca_iof_hnp_component.stdinev->active = false; } goto CLEAN_RETURN; } /* get name of the process whose io we are discussing */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &origin, &count, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s received IOF cmd from sender %s for source %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&requestor), ORTE_NAME_PRINT(&origin))); /* check to see if a tool has requested something */ if (ORTE_IOF_PULL & stream) { /* get name of the process wishing to be the sink */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &requestor, &count, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s received pull cmd from remote tool %s for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&requestor), ORTE_NAME_PRINT(&origin))); if (ORTE_IOF_EXCLUSIVE & stream) { exclusive = true; } else { exclusive = false; } /* a tool is requesting that we send it a copy of the specified stream(s) * from the specified process(es), so create a sink for it */ if (ORTE_IOF_STDOUT & stream) { ORTE_IOF_SINK_DEFINE(&sink, &origin, -1, ORTE_IOF_STDOUT, NULL, &mca_iof_hnp_component.sinks); sink->daemon.jobid = requestor.jobid; sink->daemon.vpid = requestor.vpid; sink->exclusive = exclusive; } if (ORTE_IOF_STDERR & stream) { ORTE_IOF_SINK_DEFINE(&sink, &origin, -1, ORTE_IOF_STDERR, NULL, &mca_iof_hnp_component.sinks); sink->daemon.jobid = requestor.jobid; sink->daemon.vpid = requestor.vpid; sink->exclusive = exclusive; } if (ORTE_IOF_STDDIAG & stream) { ORTE_IOF_SINK_DEFINE(&sink, &origin, -1, ORTE_IOF_STDDIAG, NULL, &mca_iof_hnp_component.sinks); sink->daemon.jobid = requestor.jobid; sink->daemon.vpid = requestor.vpid; sink->exclusive = exclusive; } goto CLEAN_RETURN; } if (ORTE_IOF_CLOSE & stream) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s received close cmd from remote tool %s for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender), ORTE_NAME_PRINT(&origin))); /* a tool is requesting that we no longer forward a copy of the * specified stream(s) from the specified process(es) - remove the sink */ item = opal_list_get_first(&mca_iof_hnp_component.sinks); while (item != opal_list_get_end(&mca_iof_hnp_component.sinks)) { next = opal_list_get_next(item); sink = (orte_iof_sink_t*)item; /* if the target isn't set, then this sink is for another purpose - ignore it */ if (ORTE_JOBID_INVALID == sink->daemon.jobid) { continue; } /* if this sink is the designated one, then remove it from list */ if ((stream & sink->tag) && sink->name.jobid == origin.jobid && (ORTE_VPID_WILDCARD == sink->name.vpid || ORTE_VPID_WILDCARD == origin.vpid || sink->name.vpid == origin.vpid)) { /* send an ack message to the requestor - this ensures that the RML has * completed sending anything to that requestor before it exits */ orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &origin, ORTE_IOF_CLOSE, NULL, 0); opal_list_remove_item(&mca_iof_hnp_component.sinks, item); OBJ_RELEASE(item); } item = next; } goto CLEAN_RETURN; } /* this must have come from a daemon forwarding output - unpack the data */ numbytes=ORTE_IOF_BASE_MSG_MAX; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes from remote proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&origin))); /* cycle through the endpoints to see if someone else wants a copy */ exclusive = false; for (item = opal_list_get_first(&mca_iof_hnp_component.sinks); item != opal_list_get_end(&mca_iof_hnp_component.sinks); item = opal_list_get_next(item)) { sink = (orte_iof_sink_t*)item; /* if the target isn't set, then this sink is for another purpose - ignore it */ if (ORTE_JOBID_INVALID == sink->daemon.jobid) { continue; } if ((stream & sink->tag) && sink->name.jobid == origin.jobid && (ORTE_VPID_WILDCARD == sink->name.vpid || ORTE_VPID_WILDCARD == origin.vpid || sink->name.vpid == origin.vpid)) { /* send the data to the tool */ orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &origin, stream, data, numbytes); if (sink->exclusive) { exclusive = true; } } } /* output this to our local output unless one of the sinks was exclusive */ if (!exclusive) { if (ORTE_IOF_STDOUT & stream || orte_xml_output) { orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stdout->wev); } else { orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev); } } CLEAN_RETURN: return; }
void orte_iof_mrhnp_recv(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { orte_process_name_t origin; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; orte_iof_tag_t stream; int32_t count, numbytes; int rc; /* unpack the stream first as this may be flow control info */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } if (ORTE_IOF_XON & stream) { /* re-start the stdin read event */ if (NULL != mca_iof_mr_hnp_component.stdinev && !orte_job_term_ordered && !mca_iof_mr_hnp_component.stdinev->active) { mca_iof_mr_hnp_component.stdinev->active = true; opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0); } goto CLEAN_RETURN; } else if (ORTE_IOF_XOFF & stream) { /* stop the stdin read event */ if (NULL != mca_iof_mr_hnp_component.stdinev && !mca_iof_mr_hnp_component.stdinev->active) { opal_event_del(mca_iof_mr_hnp_component.stdinev->ev); mca_iof_mr_hnp_component.stdinev->active = false; } goto CLEAN_RETURN; } /* get name of the process whose io we are discussing */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &origin, &count, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* this must have come from a daemon forwarding output - unpack the data */ numbytes=ORTE_IOF_BASE_MSG_MAX; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes from remote proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&origin))); /* output this to our local output */ if (ORTE_IOF_STDOUT & stream || orte_xml_output) { orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stdout->wev); } else { orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev); } CLEAN_RETURN: return; }