示例#1
0
文件: iof_tool.c 项目: anandhis/ompi
static int tool_output(const orte_process_name_t* peer,
                       orte_iof_tag_t source_tag,
                       const char *msg)
{
    /* output this to our local output */
    if (ORTE_IOF_STDOUT & source_tag || orte_xml_output) {
        orte_iof_base_write_output(peer, source_tag, (const unsigned char*)msg, strlen(msg), orte_iof_base.iof_write_stdout->wev);
    } else {
        orte_iof_base_write_output(peer, source_tag, (const unsigned char*)msg, strlen(msg), orte_iof_base.iof_write_stderr->wev);
    }

    return ORTE_SUCCESS;
}
示例#2
0
文件: iof_mrhnp.c 项目: orcmuser/orcm
static void mrhnp_complete(const orte_job_t *jdata)
{
    orte_job_t *jptr;
    orte_job_map_t *map;
    orte_proc_t *daemon;
    orte_iof_proc_t *proct;
    unsigned char data[1];
    opal_list_item_t *item;
    int i;
    orte_node_t *node;
    orte_jobid_t stdout_target, *jbptr;

    stdout_target = ORTE_JOBID_INVALID;
    jbptr = &stdout_target;
    if (!orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID)) {
        /* nothing to do */
        return;
    }

    /* the job is complete - close out the stdin
     * of any procs it was feeding
     */
    jptr = orte_get_job_data_object(stdout_target);
    map = jptr->map;
    /* cycle thru the map to find any node that has at least
     * one proc from this job
     */
    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }
        daemon = node->daemon;
        if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
            for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
                 item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
                 item = opal_list_get_next(item)) {
                proct = (orte_iof_proc_t*)item;
                if (proct->name.jobid == jptr->jobid) {
                    if (NULL != proct->sink) {
                        /* need to write a 0-byte event to clear the stream and close it */
                        orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev);
                        proct->sink = NULL;
                    }
                }
            }
        } else {
            OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                                 "%s sending close stdin to daemon %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&daemon->name)));
                
            /* need to send a 0-byte message to clear the stream and close it */
            send_data(&daemon->name, ORTE_IOF_STDIN, jptr->jobid, data, 0);
        }
    }
}
/* this is the read handler for my own child procs and stdin
 */
void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata)
{
    orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    int32_t numbytes;
    opal_list_item_t *item;
    orte_iof_proc_t *proct;
    int i, j;
    orte_ns_cmp_bitmask_t mask;
    orte_job_t *jdata;
    orte_iof_job_t *iofjob;
    orte_node_t *node;
    orte_proc_t *daemon;
    orte_job_map_t *map;
    bool write_out=false;

    /* read up to the fragment size */
#if !defined(__WINDOWS__)
    numbytes = read(fd, data, sizeof(data));
#else
    {
        DWORD readed;
        HANDLE handle = (HANDLE)_get_osfhandle(fd);
        ReadFile(handle, data, sizeof(data), &readed, NULL);
        numbytes = (int)readed;
    }
#endif  /* !defined(__WINDOWS__) */
    
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                         "%s iof:mrhnp:read handler read %d bytes from %s:%d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         ORTE_NAME_PRINT(&rev->name), fd));

    if (numbytes < 0) {
        /* either we have a connection error or it was a non-blocking read */
        
        /* non-blocking, retry */
        if (EAGAIN == errno || EINTR == errno) {
            opal_event_add(rev->ev, 0);
            return;
        } 

        OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                             "%s iof:mrhnp:read handler %s Error on connection:%d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&rev->name), fd));
        /* Un-recoverable error. Allow the code to flow as usual in order to
         * to send the zero bytes message up the stream, and then close the
         * file descriptor and delete the event.
         */
        numbytes = 0;
    }
    
    /* if job termination has been ordered, just ignore the
     * data and delete the stdin read event, if that is what fired
     */
    if (orte_job_term_ordered) {
        if (ORTE_IOF_STDIN & rev->tag) {
            OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev);
        }
        return;
    }

    if (ORTE_IOF_STDIN & rev->tag) {
        /* The event has fired, so it's no longer active until we
         * re-add it
         */
        mca_iof_mr_hnp_component.stdinev->active = false;    
        /* if this was read from my stdin, I need to send this input to all
         * daemons who host mapper procs
         */
        for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) {
            if (NULL == (iofjob = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) {
                continue;
            }
            jdata = iofjob->jdata;
            OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                 "%s read %d bytes from stdin - writing to job %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                 ORTE_JOBID_PRINT(jdata->jobid)));
            map = jdata->map;
            for (i=0; i < map->nodes->size; i++) {
                if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                    continue;
                }
                daemon = node->daemon;

                if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
                    /* if it is me, then send the bytes down the stdin pipe
                     * for every local proc (they are all on my proct list) - we even send 0 byte events
                     * down the pipe so it forces out any preceding data before
                     * closing the output stream. We add a 0 byte message if
                     * numbytes < sizeof(data) as this means the chunk we read
                     * was the end of the file.
                     */
                    for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
                         item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
                         item = opal_list_get_next(item)) {
                        proct = (orte_iof_proc_t*)item;
                        if (proct->name.jobid == jdata->jobid) {
                            if (NULL == proct->sink) {
                                opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
                                continue;
                            }
                            if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev)) {
                                /* getting too backed up - stop the read event for now if it is still active */
                                if (mca_iof_mr_hnp_component.stdinev->active) {
                                    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                                         "buffer backed up - holding"));
                                    mca_iof_mr_hnp_component.stdinev->active = false;
                                }
                                return;
                            }
                            if (0 < numbytes && numbytes < (int)sizeof(data)) {
                                /* need to write a 0-byte event to clear the stream and close it */
                                orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev);
                                proct->sink = NULL;
                            }
                        }
                    }
                } else {
                    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                         "%s sending %d bytes from stdin to daemon %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                         ORTE_NAME_PRINT(&daemon->name)));
                
                    /* send the data to the daemon so it can
                     * write it to all local procs from this job.
                     * If the connection closed,
                     * numbytes will be zero so zero bytes will be
                     * sent - this will tell the daemon to close
                     * the fd for stdin to that proc
                     */
                    send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
                    if (0 < numbytes && numbytes < (int)sizeof(data)) {
                        /* need to send a 0-byte message to clear the stream and close it */
                        send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, 0);
                    }
                }
            }
        }
        /* if num_bytes was zero, then we need to terminate the event */
        if (0 == numbytes || numbytes < (int)sizeof(data)) {
            /* this will also close our stdin file descriptor */
            if (NULL != mca_iof_mr_hnp_component.stdinev) {
                OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev);
            }
        } else {
            /* if we are looking at a tty, then we just go ahead and restart the
             * read event assuming we are not backgrounded
             */
            if (orte_iof_mrhnp_stdin_check(fd)) {
                restart_stdin(fd, 0, NULL);
            } else {
                /* delay for awhile and then restart */
                ORTE_TIMER_EVENT(0, 10000, restart_stdin, ORTE_INFO_PRI);
            }
        }
        return;
    }

    if (ORTE_IOF_STDOUT & rev->tag && 0 < numbytes) {
        /* see if we need to forward this output */
        jdata = orte_get_job_data_object(rev->name.jobid);
        if (ORTE_JOBID_INVALID == jdata->stdout_target) {
            /* end of the chain - just output the info */
            write_out = true;
            goto PROCESS;
        }
        /* it goes to the next job in the chain */
        jdata = orte_get_job_data_object(jdata->stdout_target);
        map = jdata->map;
        for (i=0; i < map->nodes->size; i++) {
            if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                continue;
            }
            daemon = node->daemon;

            if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
                /* if it is me, then send the bytes down the stdin pipe
                 * for every local proc (they are all on my proct list)
                 */
                for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
                     item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
                     item = opal_list_get_next(item)) {
                    proct = (orte_iof_proc_t*)item;
                    if (proct->name.jobid == jdata->jobid) {
                        if (NULL == proct->sink) {
                            opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
                            continue;
                        }
                        orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev);
                    }
                }
            } else {
                OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                     "%s sending %d bytes from stdout of %s to daemon %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                     ORTE_NAME_PRINT(&rev->name),
                                     ORTE_NAME_PRINT(&daemon->name)));
                
                /* send the data to the daemon so it can
                 * write it to all local procs from this job
                 */
                send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
            }
        }
    }
    
 PROCESS:
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                         "%s read %d bytes from %s of %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"),
                         ORTE_NAME_PRINT(&rev->name)));
    
    if (0 == numbytes) {
        /* if we read 0 bytes from the stdout/err/diag, find this proc
         * on our list and
         * release the appropriate event. This will delete the
         * read event and close the file descriptor
         */
        for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
             item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
             item = opal_list_get_next(item)) {
            proct = (orte_iof_proc_t*)item;
            mask = ORTE_NS_CMP_ALL;
            if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) {
                /* found it - release corresponding event. This deletes
                 * the read event and closes the file descriptor
                 */
                if (rev->tag & ORTE_IOF_STDOUT) {
                    OBJ_RELEASE(proct->revstdout);
                } else if (rev->tag & ORTE_IOF_STDERR) {
                    OBJ_RELEASE(proct->revstderr);
                } else if (rev->tag & ORTE_IOF_STDDIAG) {
                    OBJ_RELEASE(proct->revstddiag);
                }
                /* check to see if they are all done */
                if (NULL == proct->revstdout &&
                    NULL == proct->revstderr &&
                    NULL == proct->revstddiag) {
                    /* this proc's iof is complete */
                    opal_list_remove_item(&mca_iof_mr_hnp_component.procs, item);
                    ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
                    OBJ_RELEASE(proct);
                }
                break;
            }
        }
        return;
    } else {
        /* output this to our local output */
        if (ORTE_IOF_STDOUT & rev->tag) {
            if (write_out) {
                orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev);
            }
        } else {
            orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev);
        }
    }
    
    /* re-add the event */
    opal_event_add(rev->ev, 0);

    return;
}
示例#4
0
/*
 * The only messages coming to an orted are either:
 *
 * (a) stdin, which is to be copied to whichever local
 *     procs "pull'd" a copy
 *
 * (b) flow control messages
 */
void orte_iof_mrorted_recv(int status, orte_process_name_t* sender,
                         opal_buffer_t* buffer, orte_rml_tag_t tag,
                         void* cbdata)
{
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    orte_iof_tag_t stream;
    int32_t count, numbytes;
    orte_jobid_t jobid;
    opal_list_item_t *item;
    int rc;
    
    /* see what stream generated this data */
    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }
    
    /* if this isn't stdin, then we have an error */
    if (ORTE_IOF_STDIN != stream) {
        ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
        goto CLEAN_RETURN;
    }
    
    /* unpack the intended target */
    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &count, ORTE_JOBID))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* unpack the data */
    numbytes=ORTE_IOF_BASE_MSG_MAX;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }
    /* numbytes will contain the actual #bytes that were sent */
    
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s unpacked %d bytes for local job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         ORTE_JOBID_PRINT(jobid)));
    
    /* cycle through our list of procs */
    for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
         item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
         item = opal_list_get_next(item)) {
        orte_iof_proc_t* sink = (orte_iof_proc_t*)item;
        
        /* is this intended for this jobid? */
        if (jobid == sink->name.jobid) {
            OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                                 "%s writing data to local proc %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&sink->name)));
            if (NULL == sink->sink->wev || sink->sink->wev->fd < 0) {
                /* this sink was already closed - ignore this data */
                goto CLEAN_RETURN;
            }
            /* send the bytes down the pipe - we even send 0 byte events
             * down the pipe so it forces out any preceding data before
             * closing the output stream
             */
            if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&sink->name, stream, data, numbytes, sink->sink->wev)) {
                /* getting too backed up - tell the HNP to hold off any more input if we
                 * haven't already told it
                 */
                if (!sink->sink->xoff) {
                    sink->sink->xoff = true;
                    orte_iof_mrorted_send_xonxoff(&sink->name, ORTE_IOF_XOFF);
                }
            }
        }
    }

CLEAN_RETURN:
    return;
}
示例#5
0
void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata)
{
    orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    opal_buffer_t *buf=NULL;
    int rc;
    int32_t numbytes;
    opal_list_item_t *item;
    orte_iof_proc_t *proct;
    orte_ns_cmp_bitmask_t mask;
    orte_job_t *jdata;
    orte_job_map_t *map;
    int i;
    bool write_out=false;
    orte_node_t *node;
    orte_proc_t *daemon;
    orte_jobid_t stdout_target, *jbptr;

    /* read up to the fragment size */
    numbytes = read(fd, data, sizeof(data));
    
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s iof:mrorted:read handler read %d bytes from %s, fd %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         numbytes, ORTE_NAME_PRINT(&rev->name), fd));
    
    if (numbytes <= 0) {
        if (0 > numbytes) {
            /* either we have a connection error or it was a non-blocking read */
            if (EAGAIN == errno || EINTR == errno) {
                /* non-blocking, retry */
                opal_event_add(rev->ev, 0);
                return;
            } 

            OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                                 "%s iof:mrorted:read handler %s Error on connection:%d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&rev->name), fd));
        }
        /* numbytes must have been zero, so go down and close the fd etc */
        goto CLEAN_RETURN;
    }
    
    /* see if the user wanted the output directed to files */
    if (NULL != orte_output_filename) {
        /* find the sink for this rank */
        for (item = opal_list_get_first(&mca_iof_mr_orted_component.sinks);
             item != opal_list_get_end(&mca_iof_mr_orted_component.sinks);
             item = opal_list_get_next(item)) {
            orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
            /* if the target is set, then this sink is for another purpose - ignore it */
            if (ORTE_JOBID_INVALID != sink->daemon.jobid) {
                continue;
            }
            /* if this sink isn't for output, ignore it */
            if (ORTE_IOF_STDIN & sink->tag) {
                continue;
            }

            mask = ORTE_NS_CMP_ALL;

            /* is this the desired proc? */
            if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, &rev->name)) {
                /* output to the corresponding file */
                orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev);
                /* done */
                break;
            }
        }
    }
    
    if (ORTE_IOF_STDOUT & rev->tag) {
        /* see if we need to forward this output */
        stdout_target = ORTE_JOBID_INVALID;
        jbptr = &stdout_target;
        jdata = orte_get_job_data_object(rev->name.jobid);
        if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID)) {
            /* end of the chain - just output the info */
            write_out = true;
            goto PROCESS;
        }
        /* it goes to the next job in the chain */
        jdata = orte_get_job_data_object(stdout_target);
        map = jdata->map;
        for (i=0; i < map->nodes->size; i++) {
            if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                continue;
            }
            daemon = node->daemon;
            if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
                /* if it is me, then send the bytes down the stdin pipe
                 * for every local proc (they are all on my proct list)
                 */
                for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
                     item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
                     item = opal_list_get_next(item)) {
                    proct = (orte_iof_proc_t*)item;
                    if (proct->name.jobid == jdata->jobid) {
                        if (NULL == proct->sink) {
                            opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
                            continue;
                        }
                        orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev);
                    }
                }
            } else {
                OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                                     "%s sending %d bytes from stdout of %s to daemon %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                     ORTE_NAME_PRINT(&rev->name),
                                     ORTE_NAME_PRINT(&daemon->name)));
                
                /* send the data to the daemon so it can
                 * write it to all local procs from this job
                 */
                send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
            }
        }
    }
    
 PROCESS:
    if (write_out) {
        /* prep the buffer */
        buf = OBJ_NEW(opal_buffer_t);
    
        /* pack the stream first - we do this so that flow control messages can
         * consist solely of the tag
         */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) {
            ORTE_ERROR_LOG(rc);
            goto CLEAN_RETURN;
        }
    
        /* pack name of process that gave us this data */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto CLEAN_RETURN;
        }
    
        /* pack the data - only pack the #bytes we read! */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) {
            ORTE_ERROR_LOG(rc);
            goto CLEAN_RETURN;
        }

        /* start non-blocking RML call to forward received data */
        OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                             "%s iof:mrorted:read handler sending %d bytes to HNP",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));
    
        orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
                                orte_rml_send_callback, NULL);
    }
    
    /* re-add the event */
    opal_event_add(rev->ev, 0);

    return;
   
 CLEAN_RETURN:
    /* must be an error, or zero bytes were read indicating that the
     * proc terminated this IOF channel - either way, find this proc
     * on our list and clean up
     */
    for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
         item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
         item = opal_list_get_next(item)) {
        proct = (orte_iof_proc_t*)item;
        mask = ORTE_NS_CMP_ALL;
        if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) {
            /* found it - release corresponding event. This deletes
             * the read event and closes the file descriptor
             */
            if (rev->tag & ORTE_IOF_STDOUT) {
                if( NULL != proct->revstdout ) {
                    OBJ_RELEASE(proct->revstdout);
                }
            } else if (rev->tag & ORTE_IOF_STDERR) {
                if( NULL != proct->revstderr ) {
                    OBJ_RELEASE(proct->revstderr);
                }
            } else if (rev->tag & ORTE_IOF_STDDIAG) {
                if( NULL != proct->revstddiag ) {
                    OBJ_RELEASE(proct->revstddiag);
                }
            }
            /* check to see if they are all done */
            if (NULL == proct->revstdout &&
                NULL == proct->revstderr &&
                NULL == proct->revstddiag) {
                /* this proc's iof is complete */
                opal_list_remove_item(&mca_iof_mr_orted_component.procs, item);
                ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
                OBJ_RELEASE(proct);
            }
            break;
        }
    }
    if (NULL != buf) {
        OBJ_RELEASE(buf);
    }
    return;
}
示例#6
0
void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
{
    orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    opal_buffer_t *buf=NULL;
    int rc;
    int32_t numbytes;
    orte_iof_proc_t *proct = (orte_iof_proc_t*)rev->proc;

    /* read up to the fragment size */
#if !defined(__WINDOWS__)
    numbytes = read(fd, data, sizeof(data));
#else
    {
        DWORD readed;
        HANDLE handle = (HANDLE)_get_osfhandle(fd);
        ReadFile(handle, data, sizeof(data), &readed, NULL);
        numbytes = (int)readed;
    }
#endif  /* !defined(__WINDOWS__) */

    if (NULL == proct) {
        /* nothing we can do */
        ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
        return;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s iof:orted:read handler read %d bytes from %s, fd %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         numbytes, ORTE_NAME_PRINT(&proct->name), fd));

    if (numbytes <= 0) {
        if (0 > numbytes) {
            /* either we have a connection error or it was a non-blocking read */
            if (EAGAIN == errno || EINTR == errno) {
                /* non-blocking, retry */
                opal_event_add(rev->ev, 0);
                return;
            }

            OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                                 "%s iof:orted:read handler %s Error on connection:%d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&proct->name), fd));
        }
        /* numbytes must have been zero, so go down and close the fd etc */
        goto CLEAN_RETURN;
    }

    /* see if the user wanted the output directed to files */
    if (NULL != rev->sink) {
        /* output to the corresponding file */
        orte_iof_base_write_output(&proct->name, rev->tag, data, numbytes, rev->sink->wev);
    }
    if (!proct->copy) {
        /* re-add the event */
        opal_event_add(rev->ev, 0);
        return;
    }

    /* prep the buffer */
    buf = OBJ_NEW(opal_buffer_t);

    /* pack the stream first - we do this so that flow control messages can
     * consist solely of the tag
     */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* pack name of process that gave us this data */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &proct->name, 1, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* pack the data - only pack the #bytes we read! */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* start non-blocking RML call to forward received data */
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s iof:orted:read handler sending %d bytes to HNP",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));

    orte_rml.send_buffer_nb(orte_mgmt_conduit,
                                    ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
                                    send_cb, NULL);

    /* re-add the event */
    opal_event_add(rev->ev, 0);

    return;

 CLEAN_RETURN:
    /* must be an error, or zero bytes were read indicating that the
     * proc terminated this IOF channel - either way, release the
     * corresponding event. This deletes the read event and closes
     * the file descriptor */
    if (rev->tag & ORTE_IOF_STDOUT) {
        if( NULL != proct->revstdout ) {
            orte_iof_base_static_dump_output(proct->revstdout);
            OBJ_RELEASE(proct->revstdout);
        }
    } else if (rev->tag & ORTE_IOF_STDERR) {
        if( NULL != proct->revstderr ) {
            orte_iof_base_static_dump_output(proct->revstderr);
            OBJ_RELEASE(proct->revstderr);
        }
    } else if (rev->tag & ORTE_IOF_STDDIAG) {
        if( NULL != proct->revstddiag ) {
            orte_iof_base_static_dump_output(proct->revstddiag);
            OBJ_RELEASE(proct->revstddiag);
        }
    }
    /* check to see if they are all done */
    if (NULL == proct->revstdout &&
        NULL == proct->revstderr &&
        NULL == proct->revstddiag) {
        /* this proc's iof is complete */
        opal_list_remove_item(&mca_iof_orted_component.procs, &proct->super);
        ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
        OBJ_RELEASE(proct);
    }
    if (NULL != buf) {
        OBJ_RELEASE(buf);
    }
    return;
}
示例#7
0
/* this is the read handler for my own child procs. In this case,
 * the data is going nowhere - I just output it myself
 */
void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
{
    orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    int32_t numbytes;
    opal_list_item_t *item;
    orte_iof_proc_t *proct;
    int rc;
    
    OPAL_THREAD_LOCK(&mca_iof_hnp_component.lock);
    
    /* read up to the fragment size */
#if !defined(__WINDOWS__)
    numbytes = read(fd, data, sizeof(data));
#else
    {
        DWORD readed;
        HANDLE handle = (HANDLE)_get_osfhandle(fd);
        ReadFile(handle, data, sizeof(data), &readed, NULL);
        numbytes = (int)readed;
    }
#endif  /* !defined(__WINDOWS__) */
    
    if (numbytes < 0) {
        /* either we have a connection error or it was a non-blocking read */
        
        /* non-blocking, retry */
        if (EAGAIN == errno || EINTR == errno) {
            opal_event_add(&rev->ev, 0);
            OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
            return;
        } 

        OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                             "%s iof:hnp:read handler %s Error on connection:%d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&rev->name), fd));
        /* Un-recoverable error. Allow the code to flow as usual in order to
         * to send the zero bytes message up the stream, and then close the
         * file descriptor and delete the event.
         */
        numbytes = 0;
    }
    
    /* is this read from our stdin? */
    if (ORTE_IOF_STDIN & rev->tag) {
        /* if job termination has been ordered, just ignore the
         * data and delete the read event
         */
        if (orte_job_term_ordered) {
            OBJ_RELEASE(mca_iof_hnp_component.stdinev);
            OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
            return;
        }
        /* cycle through our list of sinks */
        for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
             item != opal_list_get_end(&mca_iof_hnp_component.sinks);
             item = opal_list_get_next(item)) {
            orte_iof_sink_t* sink = (orte_iof_sink_t*)item;
            
            /* only look at stdin sinks */
            if (!(ORTE_IOF_STDIN & sink->tag)) {
                continue;
            }
            
            /* if the daemon is me, then this is a local sink */
            if (ORTE_PROC_MY_NAME->jobid == sink->daemon.jobid &&
                ORTE_PROC_MY_NAME->vpid == sink->daemon.vpid) {
                OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                     "%s read %d bytes from stdin - writing to %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                     ORTE_NAME_PRINT(&rev->name)));
                /* send the bytes down the pipe - we even send 0 byte events
                 * down the pipe so it forces out any preceding data before
                 * closing the output stream
                 */
                if (NULL != sink->wev) {
                    if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev)) {
                        /* getting too backed up - stop the read event for now if it is still active */
                        if (mca_iof_hnp_component.stdinev->active) {
                            OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                                 "buffer backed up - holding"));
                            mca_iof_hnp_component.stdinev->active = false;
                        }
                        OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
                        return;
                    }
                }
            } else {
                OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                     "%s sending %d bytes from stdin to daemon %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                     ORTE_NAME_PRINT(&sink->daemon)));
                
                /* send the data to the daemon so it can
                 * write it to the proc's fd - in this case,
                 * we pass sink->name to indicate who is to
                 * receive the data. If the connection closed,
                 * numbytes will be zero so zero bytes will be
                 * sent - this will tell the daemon to close
                 * the fd for stdin to that proc
                 */
                orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &sink->name, ORTE_IOF_STDIN, data, numbytes);
            }
        }
        /* if num_bytes was zero, then we need to terminate the event */
        if (0 == numbytes) {
            /* this will also close our stdin file descriptor */
            OBJ_RELEASE(mca_iof_hnp_component.stdinev);
        } else {
            /* if we are looking at a tty, then we just go ahead and restart the
             * read event assuming we are not backgrounded
             */
            if (orte_iof_hnp_stdin_check(fd)) {
                restart_stdin(fd, 0, NULL);
            } else {
                /* delay for awhile and then restart */
                ORTE_TIMER_EVENT(0, 10000, restart_stdin);
            }
        }
        /* nothing more to do */
        OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
        return;
    }
    
    /* this must be output from one of my local procs - see
     * if anyone else has requested a copy of this info
     */
    for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
         item != opal_list_get_end(&mca_iof_hnp_component.sinks);
         item = opal_list_get_next(item)) {
        orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
        /* if the target isn't set, then this sink is for another purpose - ignore it */
        if (ORTE_JOBID_INVALID == sink->daemon.jobid) {
            continue;
        }
        if ((sink->tag & rev->tag) &&
            sink->name.jobid == rev->name.jobid &&
            (ORTE_VPID_WILDCARD == sink->name.vpid || sink->name.vpid == rev->name.vpid)) {
            /* need to send the data to the remote endpoint - if
             * the connection closed, numbytes will be zero, so
             * the remote endpoint will know to close its local fd.
             * In this case, we pass rev->name to indicate who the
             * data came from.
             */
            OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                 "%s sending data to tool %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&sink->daemon)));
            orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &rev->name, rev->tag, data, numbytes);
        }
    }

    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                         "%s read %d bytes from %s of %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"),
                         ORTE_NAME_PRINT(&rev->name)));
    
    if (0 == numbytes) {
        /* if we read 0 bytes from the stdout/err/diag, there is
         * nothing to output - find this proc on our list and
         * release the appropriate event. This will delete the
         * read event and close the file descriptor
         */
        for (item = opal_list_get_first(&mca_iof_hnp_component.procs);
             item != opal_list_get_end(&mca_iof_hnp_component.procs);
             item = opal_list_get_next(item)) {
            proct = (orte_iof_proc_t*)item;
            if (proct->name.jobid == rev->name.jobid &&
                proct->name.vpid == rev->name.vpid) {
                /* found it - release corresponding event. This deletes
                 * the read event and closes the file descriptor
                 */
                if (rev->tag & ORTE_IOF_STDOUT) {
                    OBJ_RELEASE(proct->revstdout);
                } else if (rev->tag & ORTE_IOF_STDERR) {
                    OBJ_RELEASE(proct->revstderr);
                } else if (rev->tag & ORTE_IOF_STDDIAG) {
                    OBJ_RELEASE(proct->revstddiag);
                }
                /* check to see if they are all done */
                if (NULL == proct->revstdout &&
                    NULL == proct->revstderr &&
                    NULL == proct->revstddiag) {
                    opal_buffer_t cmdbuf;
                    orte_daemon_cmd_flag_t command;
                    /* this proc's iof is complete */
                    opal_list_remove_item(&mca_iof_hnp_component.procs, item);
                    /* setup a cmd to notify that the iof is complete */
                    OBJ_CONSTRUCT(&cmdbuf, opal_buffer_t);
                    command = ORTE_DAEMON_IOF_COMPLETE;
                    if (ORTE_SUCCESS != (rc = opal_dss.pack(&cmdbuf, &command, 1, ORTE_DAEMON_CMD))) {
                        ORTE_ERROR_LOG(rc);
                        goto CLEANUP;
                    }
                    if (ORTE_SUCCESS != (rc = opal_dss.pack(&cmdbuf, &proct->name, 1, ORTE_NAME))) {
                        ORTE_ERROR_LOG(rc);
                        goto CLEANUP;
                    }
                    ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &cmdbuf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
                CLEANUP:
                    OBJ_DESTRUCT(&cmdbuf);
                    OBJ_RELEASE(proct);
                }
                break;
            }
        }
        OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
        return;
    }
    
    /* see if the user wanted the output directed to files */
    if (NULL != orte_output_filename) {
        /* find the sink for this rank */
        for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
             item != opal_list_get_end(&mca_iof_hnp_component.sinks);
             item = opal_list_get_next(item)) {
            orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
            /* if the target is set, then this sink is for another purpose - ignore it */
            if (ORTE_JOBID_INVALID != sink->daemon.jobid) {
                continue;
            }
            /* if this sink isn't for output, ignore it */
            if (ORTE_IOF_STDIN & sink->tag) {
                continue;
            }
            /* is this the desired proc? */
            if (sink->name.jobid == rev->name.jobid &&
                sink->name.vpid == rev->name.vpid) {
                /* output to the corresponding file */
                orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev);
                /* done */
                break;
            }
        }
    } else {
        /* output this to our local output */
        if (ORTE_IOF_STDOUT & rev->tag || orte_xml_output) {
            orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev);
        } else {
            orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev);
        }
    }
    
    /* re-add the event */
    opal_event_add(&rev->ev, 0);

     OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
    return;
}
示例#8
0
void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
{
    orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    opal_buffer_t *buf=NULL;
    int rc;
    int32_t numbytes;
    opal_list_item_t *item;
    orte_iof_proc_t *proct;
    orte_ns_cmp_bitmask_t mask;

    /* read up to the fragment size */
#if !defined(__WINDOWS__)
    numbytes = read(fd, data, sizeof(data));
#else
    {
        DWORD readed;
        HANDLE handle = (HANDLE)_get_osfhandle(fd);
        ReadFile(handle, data, sizeof(data), &readed, NULL);
        numbytes = (int)readed;
    }
#endif  /* !defined(__WINDOWS__) */

    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s iof:orted:read handler read %d bytes from %s, fd %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         numbytes, ORTE_NAME_PRINT(&rev->name), fd));

    if (numbytes <= 0) {
        if (0 > numbytes) {
            /* either we have a connection error or it was a non-blocking read */
            if (EAGAIN == errno || EINTR == errno) {
                /* non-blocking, retry */
                opal_event_add(rev->ev, 0);
                return;
            }

            OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                                 "%s iof:orted:read handler %s Error on connection:%d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&rev->name), fd));
        }
        /* numbytes must have been zero, so go down and close the fd etc */
        goto CLEAN_RETURN;
    }

    /* see if the user wanted the output directed to files */
    if (NULL != orte_output_filename) {
        /* find the sink for this rank */
        for (item = opal_list_get_first(&mca_iof_orted_component.sinks);
             item != opal_list_get_end(&mca_iof_orted_component.sinks);
             item = opal_list_get_next(item)) {
            orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
            /* if the target is set, then this sink is for another purpose - ignore it */
            if (ORTE_JOBID_INVALID != sink->daemon.jobid) {
                continue;
            }
            /* if this sink isn't for output, ignore it */
            if (ORTE_IOF_STDIN & sink->tag) {
                continue;
            }

            mask = ORTE_NS_CMP_ALL;

            /* is this the desired proc? */
            if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, &rev->name)) {
                /* output to the corresponding file */
                orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev);
                /* done */
                break;
            }
        }
        goto RESTART;
    }

    /* prep the buffer */
    buf = OBJ_NEW(opal_buffer_t);

    /* pack the stream first - we do this so that flow control messages can
     * consist solely of the tag
     */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* pack name of process that gave us this data */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* pack the data - only pack the #bytes we read! */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* start non-blocking RML call to forward received data */
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s iof:orted:read handler sending %d bytes to HNP",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));

    orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
                            send_cb, NULL);

 RESTART:
    /* re-add the event */
    opal_event_add(rev->ev, 0);

    return;

 CLEAN_RETURN:
    /* must be an error, or zero bytes were read indicating that the
     * proc terminated this IOF channel - either way, find this proc
     * on our list and clean up
     */
    for (item = opal_list_get_first(&mca_iof_orted_component.procs);
         item != opal_list_get_end(&mca_iof_orted_component.procs);
         item = opal_list_get_next(item)) {
        proct = (orte_iof_proc_t*)item;
        mask = ORTE_NS_CMP_ALL;
        if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) {
            /* found it - release corresponding event. This deletes
             * the read event and closes the file descriptor
             */
            if (rev->tag & ORTE_IOF_STDOUT) {
                if( NULL != proct->revstdout ) {
                    OBJ_RELEASE(proct->revstdout);
                }
            } else if (rev->tag & ORTE_IOF_STDERR) {
                if( NULL != proct->revstderr ) {
                    OBJ_RELEASE(proct->revstderr);
                }
            } else if (rev->tag & ORTE_IOF_STDDIAG) {
                if( NULL != proct->revstddiag ) {
                    OBJ_RELEASE(proct->revstddiag);
                }
            }
            /* check to see if they are all done */
            if (NULL == proct->revstdout &&
                NULL == proct->revstderr &&
                NULL == proct->revstddiag) {
                /* this proc's iof is complete */
                opal_list_remove_item(&mca_iof_orted_component.procs, item);
                ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
                OBJ_RELEASE(proct);
            }
            break;
        }
    }
    if (NULL != buf) {
        OBJ_RELEASE(buf);
    }
    return;
}
示例#9
0
void orte_iof_hnp_recv(int status, orte_process_name_t* sender,
                       opal_buffer_t* buffer, orte_rml_tag_t tag,
                       void* cbdata)
{
    orte_process_name_t origin, requestor;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    orte_iof_tag_t stream;
    int32_t count, numbytes;
    orte_iof_sink_t *sink;
    opal_list_item_t *item, *next;
    int rc;
    bool exclusive;

    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s received IOF from proc %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(sender)));

    /* unpack the stream first as this may be flow control info */
    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    if (ORTE_IOF_XON & stream) {
        /* re-start the stdin read event */
        if (NULL != mca_iof_hnp_component.stdinev &&
            !orte_job_term_ordered &&
            !mca_iof_hnp_component.stdinev->active) {
            mca_iof_hnp_component.stdinev->active = true;
            opal_event_add(mca_iof_hnp_component.stdinev->ev, 0);
        }
        goto CLEAN_RETURN;
    } else if (ORTE_IOF_XOFF & stream) {
        /* stop the stdin read event */
        if (NULL != mca_iof_hnp_component.stdinev &&
            !mca_iof_hnp_component.stdinev->active) {
            opal_event_del(mca_iof_hnp_component.stdinev->ev);
            mca_iof_hnp_component.stdinev->active = false;
        }
        goto CLEAN_RETURN;
    }

    /* get name of the process whose io we are discussing */
    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &origin, &count, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s received IOF cmd from sender %s for source %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(&requestor),
                         ORTE_NAME_PRINT(&origin)));

    /* check to see if a tool has requested something */
    if (ORTE_IOF_PULL & stream) {
        /* get name of the process wishing to be the sink */
        count = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &requestor, &count, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto CLEAN_RETURN;
        }

        OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                             "%s received pull cmd from remote tool %s for proc %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&requestor),
                             ORTE_NAME_PRINT(&origin)));

        if (ORTE_IOF_EXCLUSIVE & stream) {
            exclusive = true;
        } else {
            exclusive = false;
        }
        /* a tool is requesting that we send it a copy of the specified stream(s)
         * from the specified process(es), so create a sink for it
         */
        if (ORTE_IOF_STDOUT & stream) {
            ORTE_IOF_SINK_DEFINE(&sink, &origin, -1, ORTE_IOF_STDOUT,
                                 NULL, &mca_iof_hnp_component.sinks);
            sink->daemon.jobid = requestor.jobid;
            sink->daemon.vpid = requestor.vpid;
            sink->exclusive = exclusive;
        }
        if (ORTE_IOF_STDERR & stream) {
            ORTE_IOF_SINK_DEFINE(&sink, &origin, -1, ORTE_IOF_STDERR,
                                 NULL, &mca_iof_hnp_component.sinks);
            sink->daemon.jobid = requestor.jobid;
            sink->daemon.vpid = requestor.vpid;
            sink->exclusive = exclusive;
        }
        if (ORTE_IOF_STDDIAG & stream) {
            ORTE_IOF_SINK_DEFINE(&sink, &origin, -1, ORTE_IOF_STDDIAG,
                                 NULL, &mca_iof_hnp_component.sinks);
            sink->daemon.jobid = requestor.jobid;
            sink->daemon.vpid = requestor.vpid;
            sink->exclusive = exclusive;
        }
        goto CLEAN_RETURN;
    }

    if (ORTE_IOF_CLOSE & stream) {
        OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                             "%s received close cmd from remote tool %s for proc %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(sender),
                             ORTE_NAME_PRINT(&origin)));
        /* a tool is requesting that we no longer forward a copy of the
         * specified stream(s) from the specified process(es) - remove the sink
         */
        item = opal_list_get_first(&mca_iof_hnp_component.sinks);
        while (item != opal_list_get_end(&mca_iof_hnp_component.sinks)) {
            next = opal_list_get_next(item);
            sink = (orte_iof_sink_t*)item;
            /* if the target isn't set, then this sink is for another purpose - ignore it */
            if (ORTE_JOBID_INVALID == sink->daemon.jobid) {
                continue;
            }
            /* if this sink is the designated one, then remove it from list */
            if ((stream & sink->tag) &&
                sink->name.jobid == origin.jobid &&
                (ORTE_VPID_WILDCARD == sink->name.vpid ||
                 ORTE_VPID_WILDCARD == origin.vpid ||
                 sink->name.vpid == origin.vpid)) {
                /* send an ack message to the requestor - this ensures that the RML has
                 * completed sending anything to that requestor before it exits
                 */
                orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &origin, ORTE_IOF_CLOSE, NULL, 0);
                opal_list_remove_item(&mca_iof_hnp_component.sinks, item);
                OBJ_RELEASE(item);
            }
            item = next;
        }
        goto CLEAN_RETURN;
    }

    /* this must have come from a daemon forwarding output - unpack the data */
    numbytes=ORTE_IOF_BASE_MSG_MAX;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }
    /* numbytes will contain the actual #bytes that were sent */

    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s unpacked %d bytes from remote proc %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         ORTE_NAME_PRINT(&origin)));

    /* cycle through the endpoints to see if someone else wants a copy */
    exclusive = false;
    for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
         item != opal_list_get_end(&mca_iof_hnp_component.sinks);
         item = opal_list_get_next(item)) {
        sink = (orte_iof_sink_t*)item;
        /* if the target isn't set, then this sink is for another purpose - ignore it */
        if (ORTE_JOBID_INVALID == sink->daemon.jobid) {
            continue;
        }
        if ((stream & sink->tag) &&
            sink->name.jobid == origin.jobid &&
            (ORTE_VPID_WILDCARD == sink->name.vpid ||
             ORTE_VPID_WILDCARD == origin.vpid ||
             sink->name.vpid == origin.vpid)) {
            /* send the data to the tool */
            orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &origin, stream, data, numbytes);
            if (sink->exclusive) {
                exclusive = true;
            }
        }
    }

    /* output this to our local output unless one of the sinks was exclusive */
    if (!exclusive) {
        if (ORTE_IOF_STDOUT & stream || orte_xml_output) {
            orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stdout->wev);
        } else {
            orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev);
        }
    }

 CLEAN_RETURN:
    return;
}
示例#10
0
void orte_iof_mrhnp_recv(int status, orte_process_name_t* sender,
                       opal_buffer_t* buffer, orte_rml_tag_t tag,
                       void* cbdata)
{
    orte_process_name_t origin;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    orte_iof_tag_t stream;
    int32_t count, numbytes;
    int rc;

    
    /* unpack the stream first as this may be flow control info */
    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    if (ORTE_IOF_XON & stream) {
        /* re-start the stdin read event */
        if (NULL != mca_iof_mr_hnp_component.stdinev &&
            !orte_job_term_ordered &&
            !mca_iof_mr_hnp_component.stdinev->active) {
            mca_iof_mr_hnp_component.stdinev->active = true;
            opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0);
        }
        goto CLEAN_RETURN;
    } else if (ORTE_IOF_XOFF & stream) {
        /* stop the stdin read event */
        if (NULL != mca_iof_mr_hnp_component.stdinev &&
            !mca_iof_mr_hnp_component.stdinev->active) {
            opal_event_del(mca_iof_mr_hnp_component.stdinev->ev);
            mca_iof_mr_hnp_component.stdinev->active = false;
        }
        goto CLEAN_RETURN;
    }
    
    /* get name of the process whose io we are discussing */
    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &origin, &count, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }
    
    /* this must have come from a daemon forwarding output - unpack the data */
    numbytes=ORTE_IOF_BASE_MSG_MAX;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }
    /* numbytes will contain the actual #bytes that were sent */
    
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s unpacked %d bytes from remote proc %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         ORTE_NAME_PRINT(&origin)));
    
    /* output this to our local output */
    if (ORTE_IOF_STDOUT & stream || orte_xml_output) {
        orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stdout->wev);
    } else {
        orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev);
    }
    
CLEAN_RETURN:
    return;
}