예제 #1
0
void orte_iof_mrhnp_stdin_cb(int fd, short event, void *cbdata)
{
    bool should_process = orte_iof_mrhnp_stdin_check(0);
    
    if (should_process) {
        mca_iof_mr_hnp_component.stdinev->active = true;
        opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0);
    } else {
        opal_event_del(mca_iof_mr_hnp_component.stdinev->ev);
        mca_iof_mr_hnp_component.stdinev->active = false;
    }
}
예제 #2
0
/* this is the read handler for my own child procs and stdin
 */
void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata)
{
    orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    int32_t numbytes;
    opal_list_item_t *item;
    orte_iof_proc_t *proct;
    int i, j;
    orte_ns_cmp_bitmask_t mask;
    orte_job_t *jdata;
    orte_iof_job_t *iofjob;
    orte_node_t *node;
    orte_proc_t *daemon;
    orte_job_map_t *map;
    bool write_out=false;

    /* read up to the fragment size */
#if !defined(__WINDOWS__)
    numbytes = read(fd, data, sizeof(data));
#else
    {
        DWORD readed;
        HANDLE handle = (HANDLE)_get_osfhandle(fd);
        ReadFile(handle, data, sizeof(data), &readed, NULL);
        numbytes = (int)readed;
    }
#endif  /* !defined(__WINDOWS__) */
    
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                         "%s iof:mrhnp:read handler read %d bytes from %s:%d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         ORTE_NAME_PRINT(&rev->name), fd));

    if (numbytes < 0) {
        /* either we have a connection error or it was a non-blocking read */
        
        /* non-blocking, retry */
        if (EAGAIN == errno || EINTR == errno) {
            opal_event_add(rev->ev, 0);
            return;
        } 

        OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                             "%s iof:mrhnp:read handler %s Error on connection:%d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&rev->name), fd));
        /* Un-recoverable error. Allow the code to flow as usual in order to
         * to send the zero bytes message up the stream, and then close the
         * file descriptor and delete the event.
         */
        numbytes = 0;
    }
    
    /* if job termination has been ordered, just ignore the
     * data and delete the stdin read event, if that is what fired
     */
    if (orte_job_term_ordered) {
        if (ORTE_IOF_STDIN & rev->tag) {
            OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev);
        }
        return;
    }

    if (ORTE_IOF_STDIN & rev->tag) {
        /* The event has fired, so it's no longer active until we
         * re-add it
         */
        mca_iof_mr_hnp_component.stdinev->active = false;    
        /* if this was read from my stdin, I need to send this input to all
         * daemons who host mapper procs
         */
        for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) {
            if (NULL == (iofjob = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) {
                continue;
            }
            jdata = iofjob->jdata;
            OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                 "%s read %d bytes from stdin - writing to job %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                 ORTE_JOBID_PRINT(jdata->jobid)));
            map = jdata->map;
            for (i=0; i < map->nodes->size; i++) {
                if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                    continue;
                }
                daemon = node->daemon;

                if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
                    /* if it is me, then send the bytes down the stdin pipe
                     * for every local proc (they are all on my proct list) - we even send 0 byte events
                     * down the pipe so it forces out any preceding data before
                     * closing the output stream. We add a 0 byte message if
                     * numbytes < sizeof(data) as this means the chunk we read
                     * was the end of the file.
                     */
                    for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
                         item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
                         item = opal_list_get_next(item)) {
                        proct = (orte_iof_proc_t*)item;
                        if (proct->name.jobid == jdata->jobid) {
                            if (NULL == proct->sink) {
                                opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
                                continue;
                            }
                            if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev)) {
                                /* getting too backed up - stop the read event for now if it is still active */
                                if (mca_iof_mr_hnp_component.stdinev->active) {
                                    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                                         "buffer backed up - holding"));
                                    mca_iof_mr_hnp_component.stdinev->active = false;
                                }
                                return;
                            }
                            if (0 < numbytes && numbytes < (int)sizeof(data)) {
                                /* need to write a 0-byte event to clear the stream and close it */
                                orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev);
                                proct->sink = NULL;
                            }
                        }
                    }
                } else {
                    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                         "%s sending %d bytes from stdin to daemon %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                         ORTE_NAME_PRINT(&daemon->name)));
                
                    /* send the data to the daemon so it can
                     * write it to all local procs from this job.
                     * If the connection closed,
                     * numbytes will be zero so zero bytes will be
                     * sent - this will tell the daemon to close
                     * the fd for stdin to that proc
                     */
                    send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
                    if (0 < numbytes && numbytes < (int)sizeof(data)) {
                        /* need to send a 0-byte message to clear the stream and close it */
                        send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, 0);
                    }
                }
            }
        }
        /* if num_bytes was zero, then we need to terminate the event */
        if (0 == numbytes || numbytes < (int)sizeof(data)) {
            /* this will also close our stdin file descriptor */
            if (NULL != mca_iof_mr_hnp_component.stdinev) {
                OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev);
            }
        } else {
            /* if we are looking at a tty, then we just go ahead and restart the
             * read event assuming we are not backgrounded
             */
            if (orte_iof_mrhnp_stdin_check(fd)) {
                restart_stdin(fd, 0, NULL);
            } else {
                /* delay for awhile and then restart */
                ORTE_TIMER_EVENT(0, 10000, restart_stdin, ORTE_INFO_PRI);
            }
        }
        return;
    }

    if (ORTE_IOF_STDOUT & rev->tag && 0 < numbytes) {
        /* see if we need to forward this output */
        jdata = orte_get_job_data_object(rev->name.jobid);
        if (ORTE_JOBID_INVALID == jdata->stdout_target) {
            /* end of the chain - just output the info */
            write_out = true;
            goto PROCESS;
        }
        /* it goes to the next job in the chain */
        jdata = orte_get_job_data_object(jdata->stdout_target);
        map = jdata->map;
        for (i=0; i < map->nodes->size; i++) {
            if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                continue;
            }
            daemon = node->daemon;

            if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
                /* if it is me, then send the bytes down the stdin pipe
                 * for every local proc (they are all on my proct list)
                 */
                for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
                     item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
                     item = opal_list_get_next(item)) {
                    proct = (orte_iof_proc_t*)item;
                    if (proct->name.jobid == jdata->jobid) {
                        if (NULL == proct->sink) {
                            opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
                            continue;
                        }
                        orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev);
                    }
                }
            } else {
                OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                     "%s sending %d bytes from stdout of %s to daemon %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                     ORTE_NAME_PRINT(&rev->name),
                                     ORTE_NAME_PRINT(&daemon->name)));
                
                /* send the data to the daemon so it can
                 * write it to all local procs from this job
                 */
                send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
            }
        }
    }
    
 PROCESS:
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                         "%s read %d bytes from %s of %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"),
                         ORTE_NAME_PRINT(&rev->name)));
    
    if (0 == numbytes) {
        /* if we read 0 bytes from the stdout/err/diag, find this proc
         * on our list and
         * release the appropriate event. This will delete the
         * read event and close the file descriptor
         */
        for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
             item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
             item = opal_list_get_next(item)) {
            proct = (orte_iof_proc_t*)item;
            mask = ORTE_NS_CMP_ALL;
            if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) {
                /* found it - release corresponding event. This deletes
                 * the read event and closes the file descriptor
                 */
                if (rev->tag & ORTE_IOF_STDOUT) {
                    OBJ_RELEASE(proct->revstdout);
                } else if (rev->tag & ORTE_IOF_STDERR) {
                    OBJ_RELEASE(proct->revstderr);
                } else if (rev->tag & ORTE_IOF_STDDIAG) {
                    OBJ_RELEASE(proct->revstddiag);
                }
                /* check to see if they are all done */
                if (NULL == proct->revstdout &&
                    NULL == proct->revstderr &&
                    NULL == proct->revstddiag) {
                    /* this proc's iof is complete */
                    opal_list_remove_item(&mca_iof_mr_hnp_component.procs, item);
                    ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
                    OBJ_RELEASE(proct);
                }
                break;
            }
        }
        return;
    } else {
        /* output this to our local output */
        if (ORTE_IOF_STDOUT & rev->tag) {
            if (write_out) {
                orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev);
            }
        } else {
            orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev);
        }
    }
    
    /* re-add the event */
    opal_event_add(rev->ev, 0);

    return;
}
예제 #3
0
파일: iof_mrhnp.c 프로젝트: orcmuser/orcm
/* Setup to read from stdin. 
 */
static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd)
{
    orte_job_t *jdata;
    orte_iof_sink_t *sink;
    orte_iof_proc_t *proct;
    opal_list_item_t *item;
    int flags;
    char *outfile;
    int fdout;
    int np, numdigs;
    orte_ns_cmp_bitmask_t mask;
    orte_iof_job_t *jptr;
    int j;
    bool found;

    /* don't do this if the dst vpid is invalid or the fd is negative! */
    if (ORTE_VPID_INVALID == dst_name->vpid || fd < 0) {
        return ORTE_SUCCESS;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s iof:mrhnp pushing fd %d for process %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         fd, ORTE_NAME_PRINT(dst_name)));
    
    /* we get a push for stdout, stderr, and stddiag on every LOCAL process, so
     * setup to read those streams and forward them to the next app_context
     */
    if (!(src_tag & ORTE_IOF_STDIN)) {
        /* set the file descriptor to non-blocking - do this before we setup
         * and activate the read event in case it fires right away
         */
        if((flags = fcntl(fd, F_GETFL, 0)) < 0) {
            opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", 
                        __FILE__, __LINE__, errno);
        } else {
            flags |= O_NONBLOCK;
            fcntl(fd, F_SETFL, flags);
        }
        /* do we already have this process in our list? */
        for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
             item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
             item = opal_list_get_next(item)) {
            proct = (orte_iof_proc_t*)item;
            mask = ORTE_NS_CMP_ALL;
            if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, dst_name)) {
                /* found it */
                goto SETUP;
            }
        }
        /* if we get here, then we don't yet have this proc in our list */
        proct = OBJ_NEW(orte_iof_proc_t);
        proct->name.jobid = dst_name->jobid;
        proct->name.vpid = dst_name->vpid;
        opal_list_append(&mca_iof_mr_hnp_component.procs, &proct->super);
        /* see if we are to output to a file */
        if (NULL != orte_output_filename) {
            /* get the jobdata for this proc */
            if (NULL == (jdata = orte_get_job_data_object(dst_name->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                return ORTE_ERR_NOT_FOUND;
            }
            np = jdata->num_procs / 10;
            /* determine the number of digits required for max vpid */
            numdigs = 1;
            while (np > 0) {
                numdigs++;
                np = np / 10;
            }
            /* construct the filename */
            asprintf(&outfile, "%s.%d.%0*lu", orte_output_filename,
                     (int)ORTE_LOCAL_JOBID(proct->name.jobid),
                     numdigs, (unsigned long)proct->name.vpid);
            /* create the file */
            fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
            free(outfile);
            if (fdout < 0) {
                /* couldn't be opened */
                ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
                return ORTE_ERR_FILE_OPEN_FAILURE;
            }
            /* define a sink to that file descriptor */
            ORTE_IOF_SINK_DEFINE(&sink, dst_name, fdout, ORTE_IOF_STDOUTALL,
                                 orte_iof_base_write_handler,
                                 &mca_iof_mr_hnp_component.sinks);
        }
        
    SETUP:
        /* define a read event but don't activate it */
        if (src_tag & ORTE_IOF_STDOUT) {
            ORTE_IOF_READ_EVENT(&proct->revstdout, dst_name, fd, ORTE_IOF_STDOUT,
                                orte_iof_mrhnp_read_local_handler, false);
        } else if (src_tag & ORTE_IOF_STDERR) {
            ORTE_IOF_READ_EVENT(&proct->revstderr, dst_name, fd, ORTE_IOF_STDERR,
                                orte_iof_mrhnp_read_local_handler, false);
        } else if (src_tag & ORTE_IOF_STDDIAG) {
            ORTE_IOF_READ_EVENT(&proct->revstddiag, dst_name, fd, ORTE_IOF_STDDIAG,
                                orte_iof_mrhnp_read_local_handler, false);
        }
        /* if -all- of the readevents for this proc have been defined, then
         * activate them. Otherwise, we can think that the proc is complete
         * because one of the readevents fires -prior- to all of them having been defined!
         */
        if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) {
            /* now activate read events */
            proct->revstdout->active = true;
            opal_event_add(proct->revstdout->ev, 0);
            proct->revstderr->active = true;
            opal_event_add(proct->revstderr->ev, 0);
            proct->revstddiag->active = true;
            opal_event_add(proct->revstddiag->ev, 0);
        }
        return ORTE_SUCCESS;
    }

    /*** HANDLE STDIN PUSH ***/

    /* get the job object for this proc and check to see if it
     * is a mapper - if so, add it to the jobs that receive
     * our stdin
     */
    jdata = orte_get_job_data_object(dst_name->jobid);
    if (orte_get_attribute(&jdata->attributes, ORTE_JOB_MAPPER, NULL, OPAL_BOOL)) {
        /* see if we already have it */
        found = false;
        for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) {
            if (NULL == (jptr = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) {
                continue;
            }
            if (jptr->jdata->jobid == jdata->jobid) {
                found = true;
                break;
            }
        }
        if (!found) {
            jptr = OBJ_NEW(orte_iof_job_t);
            OBJ_RETAIN(jdata);
            jptr->jdata = jdata;
            opal_bitmap_init(&jptr->xoff, jdata->num_procs);
            opal_pointer_array_add(&mca_iof_mr_hnp_component.stdin_jobs, jptr);
        }
    }

    /* now setup the read - but check to only do this once */
    if (NULL == mca_iof_mr_hnp_component.stdinev) {
        /* Since we are the HNP, we don't want to set nonblocking on our
         * stdio stream.  If we do so, we set the file descriptor to
         * non-blocking for everyone that has that file descriptor, which
         * includes everyone else in our shell pipeline chain.  (See
         * http://lists.freebsd.org/pipermail/freebsd-hackers/2005-January/009742.html).
         * This causes things like "mpirun -np 1 big_app | cat" to lose
         * output, because cat's stdout is then ALSO non-blocking and cat
         * isn't built to deal with that case (same with almost all other
         * unix text utils). 
         */
        if (0 != fd) {
            if((flags = fcntl(fd, F_GETFL, 0)) < 0) {
                opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", 
                            __FILE__, __LINE__, errno);
            } else {
                flags |= O_NONBLOCK;
                fcntl(fd, F_SETFL, flags);
            }            
        }
        if (isatty(fd)) {
            /* We should avoid trying to read from stdin if we
             * have a terminal, but are backgrounded.  Catch the
             * signals that are commonly used when we switch
             * between being backgrounded and not.  If the
             * filedescriptor is not a tty, don't worry about it
             * and always stay connected.
             */
            opal_event_signal_set(orte_event_base, &mca_iof_mr_hnp_component.stdinsig,
                                  SIGCONT, orte_iof_mrhnp_stdin_cb,
                                  NULL);
            
            /* setup a read event to read stdin, but don't activate it yet. The
             * dst_name indicates who should receive the stdin. If that recipient
             * doesn't do a corresponding pull, however, then the stdin will
             * be dropped upon receipt at the local daemon
             */
            ORTE_IOF_READ_EVENT(&mca_iof_mr_hnp_component.stdinev,
                                dst_name, fd, ORTE_IOF_STDIN,
                                orte_iof_mrhnp_read_local_handler, false);
            
            /* check to see if we want the stdin read event to be
             * active - we will always at least define the event,
             * but may delay its activation
             */
            if (!(src_tag & ORTE_IOF_STDIN) || orte_iof_mrhnp_stdin_check(fd)) {
                mca_iof_mr_hnp_component.stdinev->active = true;
                opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0);
            }
        } else {
            /* if we are not looking at a tty, just setup a read event
             * and activate it
             */
            ORTE_IOF_READ_EVENT(&mca_iof_mr_hnp_component.stdinev,
                                dst_name, fd, ORTE_IOF_STDIN,
                                orte_iof_mrhnp_read_local_handler, true);
        }
    }
    return ORTE_SUCCESS;
}