コード例 #1
0
int orte_iof_base_endpoint_ack(
    orte_iof_base_endpoint_t* endpoint,
    uint32_t seq)
{
    bool window_closed, window_open;

    OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
    window_closed =
        ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) >= orte_iof_base.iof_window_size;
    endpoint->ep_ack = seq;
    window_open =
        ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) < orte_iof_base.iof_window_size;

    /* someone is waiting on all output to be flushed */
    if(orte_iof_base.iof_waiting && endpoint->ep_seq == endpoint->ep_ack) {
        opal_condition_signal(&orte_iof_base.iof_condition);
    }

    /* check to see if we need to reenable forwarding */
    if(window_closed && window_open) {
        opal_output(orte_iof_base.iof_output, "iof_base_endpoint ack; re-enabled reading for endpoint");
        opal_event_add(&endpoint->ep_event, 0);
    }
    OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
    return ORTE_SUCCESS;
}
コード例 #2
0
ファイル: orted.c プロジェクト: aosm/openmpi
static void exit_callback(int fd, short event, void *arg)
{
    /* Trigger the normal exit conditions */
    orted_globals.exit_condition = true;
    opal_condition_signal(&orted_globals.condition);
    OPAL_THREAD_UNLOCK(&orted_globals.mutex);
}
コード例 #3
0
static void orte_pls_rsh_launch_cb(int fd, short event, void* args)
{
    orte_pls_rsh_stack_t *stack = (orte_pls_rsh_stack_t*)args;
    OPAL_THREAD_LOCK(&stack->mutex);
    stack->rc = orte_pls_rsh_launch(stack->jobid);
    stack->complete = true;
    opal_condition_signal(&stack->cond);
    OPAL_THREAD_UNLOCK(&stack->mutex);
}
コード例 #4
0
ファイル: orte_wait.c プロジェクト: bringhurst/ompi
/*********************************************************************
 *
 * Local Functions
 *
 * None of these functions should lock mutex.  All but blk_waitpid_cb
 * should only be called if the mutex is already locked.
 *
 ********************************************************************/
static void
blk_waitpid_cb(pid_t wpid, int status, void *data)
{
    blk_waitpid_data_t *wp_data = (blk_waitpid_data_t*) data;

    wp_data->status = status;
    wp_data->done = 1;
    opal_condition_signal(wp_data->cond);
    wp_data->free = 1;
}
コード例 #5
0
ファイル: oob_ud_req.c プロジェクト: 00datman/ompi
int mca_oob_ud_msg_status_update (mca_oob_ud_msg_t *msg, mca_oob_ud_status_t status)
{
    int rc;

    opal_output_verbose(10, orte_oob_base_framework.framework_output,
                         "%s oob:ud:msg_status_update setting status of msg %p to %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) msg, (int) status);

    OPAL_THREAD_LOCK(&msg->lock);

    if (status != msg->status) {
        if (MCA_OOB_UD_MSG_STATUS_COMPLETE == status) {
            opal_output_verbose(10, orte_oob_base_framework.framework_output,
                                 "%s oob:ud:msg_status_update setting peer %s as available",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&msg->peer->peer_name));

            msg->peer->peer_available = true;
        }

        switch (status) {
        case MCA_OOB_UD_MSG_STATUS_TIMEOUT:
            rc = ORTE_ERR_TIMEOUT;
            break;
        case MCA_OOB_UD_MSG_STATUS_COMPLETE:
            rc = ORTE_SUCCESS;
            break;
        case MCA_OOB_UD_MSG_STATUS_ERROR:
        default:
            rc = ORTE_ERROR;
        }

        if (msg->cbfunc) {
            msg->cbfunc (msg, rc);
        }

        /* signal status change */
        msg->status = status;
        opal_condition_signal (&msg->status_changed);

        OPAL_THREAD_UNLOCK(&msg->lock);

        if (false == msg->persist) {
            mca_oob_ud_msg_return (msg);
        }

        return ORTE_SUCCESS;
    }

    OPAL_THREAD_UNLOCK(&msg->lock);

    return ORTE_SUCCESS;
}
コード例 #6
0
static int opal_crs_blcr_thread_callback(void *arg) {
    const struct cr_checkpoint_info *ckpt_info = cr_get_checkpoint_info();
    int ret;
    
    opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: thread_callback()");

    OPAL_THREAD_LOCK(&blcr_lock);
    blcr_current_state = OPAL_CRS_CHECKPOINT;

    /*
     * Allow the checkpoint to be taken, if we requested it
     */
#if CRS_BLCR_HAVE_INFO_REQUESTER == 1
    if( ckpt_info->requester != my_pid ) {
        ret = cr_checkpoint(CR_CHECKPOINT_OMIT);
        blcr_current_state = OPAL_CRS_RUNNING;
        opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                            "crs:blcr: thread_callback(); WARNING: An external agent attempted to checkpoint this process "
                            "when it did not expect to be checkpointed. Skipping this checkpoint request."
                            " [%d != %d].", ckpt_info->requester, my_pid);
        return 0;
    }
    else
#endif
    {
        ret = cr_checkpoint(0);
    }
    
    /*
     * Restarting
     */
    if ( 0 < ret ) {
        opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                            "crs:blcr: thread_callback: Restarting.");
        blcr_current_state = OPAL_CRS_RESTART;
    }
    /*
     * Continuing
     */
    else {
        opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                            "crs:blcr: thread_callback: Continue.");
        blcr_current_state = OPAL_CRS_CONTINUE;
    }

    OPAL_THREAD_UNLOCK(&blcr_lock);
    opal_condition_signal(&blcr_cond);

    return 0;
}
コード例 #7
0
/*
 *  Request completed - free buffer and decrement pending count 
 */
int mca_pml_base_bsend_request_free(void* addr)
{
    /* remove from list of pending requests */
    OPAL_THREAD_LOCK(&mca_pml_bsend_mutex);

    /* free buffer */
    mca_pml_bsend_allocator->alc_free(mca_pml_bsend_allocator, addr);
    
    /* decrement count of buffered requests */
    if(--mca_pml_bsend_count == 0)
        opal_condition_signal(&mca_pml_bsend_condition);

    OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);
    return OMPI_SUCCESS;
}
コード例 #8
0
ファイル: opal_condition.c プロジェクト: 00datman/ompi
static void* thr2_run(opal_object_t* obj)
{
    int i;
    clock_t c1, c2;
    opal_mutex_lock(&mutex);
    c1 = clock();
    for(i=0; i<TEST_COUNT; i++) {
        opal_condition_signal(&thr1_cond);
        opal_condition_wait(&thr2_cond, &mutex);
        thr2_count++;
    }
    c2 = clock();
    opal_mutex_unlock(&mutex);
    fprintf(stderr, "thr2: time per iteration: %ld usec\n", (long)((c2 - c1) / TEST_COUNT));
    return NULL;
}
コード例 #9
0
ファイル: pls_base_orted_cmds.c プロジェクト: aosm/openmpi
static void orte_pls_base_orted_default_wakeup(int fd, short event, void *arg)
{
    /* protect for threads */
    OPAL_THREAD_LOCK(&orte_pls_base.orted_cmd_lock);
    
    /* cancel the receive - we didn't get everyone's response in time */
    orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED_ACK);
    
    /* set the completion status to reflect timeout error */
    completion_status = ORTE_ERR_TIMEOUT;

    /* declare us "done" so we can exit cleanly */
    opal_condition_signal(&orte_pls_base.orted_cmd_cond);
    
    /* unlock us */
    OPAL_THREAD_UNLOCK(&orte_pls_base.orted_cmd_lock);
}
コード例 #10
0
void
mca_io_base_request_progress_fini(void)
{
#if OMPI_ENABLE_PROGRESS_THREADS
    void *ret;

    /* make the helper thread die */
    thread_done = true;
    if (thread_running) {
        opal_condition_signal(&progress_cond);
        opal_thread_join(&progress_thread, &ret);
    }

    /* clean up */
    OBJ_DESTRUCT(&progress_thread);
    OBJ_DESTRUCT(&progress_cond);
    OBJ_DESTRUCT(&progress_mutex);
#endif /* OMPI_ENABLE_PROGRESS_THREADS */
}
コード例 #11
0
ファイル: pls_base_orted_cmds.c プロジェクト: aosm/openmpi
int orte_pls_base_orted_cancel_operation(void)
{
    /* protect for threads */
    OPAL_THREAD_LOCK(&orte_pls_base.orted_cmd_lock);
    
    /* cancel any waiting receive - we don't want to hear it */
    orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED_ACK);
    
    /* set the completion status to reflect cancellation -- no need to
       print anything */
    completion_status = ORTE_ERR_SILENT;
    
    /* declare us "done" so we can exit cleanly */
    opal_condition_signal(&orte_pls_base.orted_cmd_cond);
    
    /* unlock us */
    OPAL_THREAD_UNLOCK(&orte_pls_base.orted_cmd_lock);
    
    return ORTE_SUCCESS;
}
コード例 #12
0
OMPI_DECLSPEC void
mca_io_base_request_progress_add(void)
{
#if OMPI_ENABLE_PROGRESS_THREADS
    /* if we don't have a progress thread, make us have a progress
       thread */
    if (! thread_running) {
        OPAL_THREAD_LOCK(&progress_mutex);
        if (! thread_running) {
            thread_running = true;
            opal_thread_start(&progress_thread);
        }
        OPAL_THREAD_UNLOCK(&progress_mutex);
    }
#endif /* OMPI_ENABLE_PROGRESS_THREADS */

    OPAL_THREAD_ADD32(&mca_io_base_request_num_pending, 1);

#if OMPI_ENABLE_PROGRESS_THREADS
    opal_condition_signal(&progress_cond);
#endif /* OMPI_ENABLE_PROGRESS_THREADS */
}
コード例 #13
0
/*
 *  Request completed - free buffer and decrement pending count 
 */
int mca_pml_base_bsend_request_fini(ompi_request_t* request)
{
    mca_pml_base_send_request_t* sendreq = (mca_pml_base_send_request_t*)request;
    if(sendreq->req_bytes_packed == 0 || 
       sendreq->req_addr == NULL || 
       sendreq->req_addr == sendreq->req_base.req_addr)
        return OMPI_SUCCESS;

    /* remove from list of pending requests */
    OPAL_THREAD_LOCK(&mca_pml_bsend_mutex);

    /* free buffer */
    mca_pml_bsend_allocator->alc_free(mca_pml_bsend_allocator, sendreq->req_addr);
    sendreq->req_addr = sendreq->req_base.req_addr;

    /* decrement count of buffered requests */
    if(--mca_pml_bsend_count == 0)
        opal_condition_signal(&mca_pml_bsend_condition);

    OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);
    return OMPI_SUCCESS;
}
コード例 #14
0
ファイル: pls_base_orted_cmds.c プロジェクト: aosm/openmpi
static void orte_pls_base_cmd_ack(int status, orte_process_name_t* sender,
                                  orte_buffer_t* buffer, orte_rml_tag_t tag,
                                  void* cbdata)
{
    int ret;
    
    OPAL_THREAD_LOCK(&orte_pls_base.orted_cmd_lock);
    
    orted_cmd_num_active--;
    if (orted_cmd_num_active == 0) {
        opal_condition_signal(&orte_pls_base.orted_cmd_cond);
    } else {
        ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED_ACK,
                                      ORTE_RML_NON_PERSISTENT, orte_pls_base_cmd_ack, NULL);
        if (ret != ORTE_SUCCESS) {
            ORTE_ERROR_LOG(ret);
            return;
        }
    }
    
    OPAL_THREAD_UNLOCK(&orte_pls_base.orted_cmd_lock);
    return;
}
コード例 #15
0
ファイル: oob_tcp_msg.c プロジェクト: bringhurst/ompi
/*
 * Progress a completed recv:
 * (1) signal a posted recv as complete
 * (2) queue an unexpected message in the recv list
 */
static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer)
{
    /* attempt to match unexpected message to a posted recv */
    mca_oob_tcp_msg_t* post;
    int rc;
    OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock);

    /* if I'm not a proc, check if this message came from
     * another job family - procs dont' need to do this because
     * they always route through their daemons anyway
     */
    if (!ORTE_PROC_IS_MPI) {
        if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) !=
             ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) &&
            (0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) {
            /* this message came from a different job family that is not
             * a local slave, so we may
             * not know how to route any reply back to the originator. Update
             * our route so we can dynamically build the routing table
             */
            if (ORTE_SUCCESS != (rc = orte_routed.update_route(&(msg->msg_hdr.msg_origin),
                                                               &(msg->msg_hdr.msg_src)))) {
                /* Nothing we can do about errors here as we definitely want
                 * the receive to complete, but at least bark loudly
                 */
                ORTE_ERROR_LOG(rc);
            }
        }
    }
    
    /* match msg against posted receives */
    post = mca_oob_tcp_msg_match_post(&msg->msg_hdr.msg_origin, msg->msg_hdr.msg_tag);
    if(NULL != post) {

        if(NULL == post->msg_uiov || 0 == post->msg_ucnt) {
            opal_output(0, "msg_data returning bad param");
            post->msg_rc = ORTE_ERR_BAD_PARAM;
        } else {
            /* copy msg data into posted recv */
            if (post->msg_flags & ORTE_RML_ALLOC) msg->msg_flags |= ORTE_RML_ALLOC;
            post->msg_rc = mca_oob_tcp_msg_copy(msg, post->msg_uiov, post->msg_ucnt);
            if(post->msg_flags & ORTE_RML_TRUNC) {
                 int i, size = 0;
                 for(i=1; i<msg->msg_rwcnt+1; i++)
                     size += msg->msg_rwiov[i].iov_len;
                 post->msg_rc = size;
            }
        }

        if(post->msg_flags & ORTE_RML_PEEK) {
            /* will need message for actual receive */
            opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, &msg->super.super);
        } else {
            MCA_OOB_TCP_MSG_RETURN(msg);
        }
        mca_oob_tcp_component.tcp_match_count++;
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);

        if(post->msg_flags & ORTE_RML_PERSISTENT) {
            post->msg_cbfunc(
                post->msg_rc, 
                &peer->peer_name, 
                post->msg_uiov, 
                post->msg_ucnt, 
                post->msg_hdr.msg_tag, 
                post->msg_cbdata);
        } else {
            mca_oob_tcp_msg_complete(post, &msg->msg_hdr.msg_origin);
        }

        OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock);
        if(--mca_oob_tcp_component.tcp_match_count == 0)
            opal_condition_signal(&mca_oob_tcp_component.tcp_match_cond);
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);

    } else {
        opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, (opal_list_item_t*)msg);
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
    }
}
コード例 #16
0
ファイル: orted.c プロジェクト: aosm/openmpi
static void orte_daemon_recv(int status, orte_process_name_t* sender,
                             orte_buffer_t *buffer, orte_rml_tag_t tag,
                             void* cbdata)
{
    orte_buffer_t *answer;
    orte_daemon_cmd_flag_t command;
    int ret;
    orte_std_cntr_t n;
    char *contact_info;
    
    OPAL_TRACE(1);
    
    OPAL_THREAD_LOCK(&orted_globals.mutex);
    
    if (orted_globals.debug_daemons) {
        opal_output(0, "[%lu,%lu,%lu] orted_recv: received message from [%ld,%ld,%ld]",
                    ORTE_NAME_ARGS(orte_process_info.my_name),
                    ORTE_NAME_ARGS(sender));
    }
    
    n = 1;
    if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) {
        ORTE_ERROR_LOG(ret);
        OPAL_THREAD_UNLOCK(&orted_globals.mutex);
        return;
    }
    
    answer = OBJ_NEW(orte_buffer_t);
    if (NULL == answer) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        goto DONE;
    }
    
    switch(command) {
        /****    EXIT COMMAND    ****/
        case ORTE_DAEMON_EXIT_CMD:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv: received exit",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            
            orted_globals.exit_condition = true;
            opal_condition_signal(&orted_globals.condition);
            break;

        /****    HALT VM COMMAND    ****/
        case ORTE_DAEMON_HALT_VM_CMD:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv: received halt vm",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            halt_vm();
            break;
            
        /****     CONTACT QUERY COMMAND    ****/
        case ORTE_DAEMON_CONTACT_QUERY_CMD:
            /* send back contact info */
            contact_info = orte_rml.get_uri();
            
            if (NULL == contact_info) {
                ORTE_ERROR_LOG(ORTE_ERROR);
                goto CLEANUP;
            }
            
            if (ORTE_SUCCESS != (ret = orte_dss.pack(answer, &contact_info, 1, ORTE_STRING))) {
                ORTE_ERROR_LOG(ret);
                goto CLEANUP;
            }
            
            if (0 > orte_rml.send_buffer(sender, answer, tag, 0)) {
                ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
            }
            break;
        
        /****     HOSTFILE COMMAND    ****/
        case ORTE_DAEMON_HOSTFILE_CMD:
            ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED);
            break;
        
        /****     SCRIPTFILE COMMAND    ****/
        case ORTE_DAEMON_SCRIPTFILE_CMD:
            ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED);
            break;
        
        /****     HEARTBEAT COMMAND    ****/
        case ORTE_DAEMON_HEARTBEAT_CMD:
            ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED);
            break;
            
        default:
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
    }
    
CLEANUP:
    OBJ_RELEASE(answer);
    
DONE:
    OPAL_THREAD_UNLOCK(&orted_globals.mutex);
    
    /* reissue the non-blocking receive */
    ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, ORTE_RML_NON_PERSISTENT, orte_daemon_recv, NULL);
    if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(ret);
    }
    
    return;
}
コード例 #17
0
static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
{
    orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
    orte_mapped_node_t *node;
    orte_mapped_proc_t *proc;
    opal_list_item_t *item;
    int rc;
    unsigned long deltat;
    struct timeval launchstop;

    /* if ssh exited abnormally, set the child processes to aborted
       and print something useful to the user.  The usual reasons for
       ssh to exit abnormally all are a pretty good indication that
       the child processes aren't going to start up properly.

       This should somehow be pushed up to the calling level, but we
       don't really have a way to do that just yet.
    */
    if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) {
        /* get the mapping for our node so we can cancel the right things */
        rc = orte_rmaps.get_node_map(&node, info->cell,
                                     info->nodename, info->active_job);
        if (ORTE_SUCCESS != rc) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* set state of all processes associated with the daemon as
           terminated */
        for(item =  opal_list_get_first(&node->procs);
            item != opal_list_get_end(&node->procs);
            item =  opal_list_get_next(item)) {
            proc = (orte_mapped_proc_t*) item;

                /* Clean up the session directory as if we were the
                   process itself.  This covers the case where the
                   process died abnormally and didn't cleanup its own
                   session directory. */

                orte_session_dir_finalize(&(proc->name));

                rc = orte_smr.set_proc_state(&(proc->name),
                                           ORTE_PROC_STATE_ABORTED, status);
            if (ORTE_SUCCESS != rc) {
                ORTE_ERROR_LOG(rc);
            }
        }
        OBJ_RELEASE(node);

 cleanup:
        /* tell the user something went wrong */
        opal_output(0, "ERROR: A daemon on node %s failed to start as expected.",
                    info->nodename);
        opal_output(0, "ERROR: There may be more information available from");
        opal_output(0, "ERROR: the remote shell (see above).");

        if (WIFEXITED(status)) {
            opal_output(0, "ERROR: The daemon exited unexpectedly with status %d.",
                   WEXITSTATUS(status));
        } else if (WIFSIGNALED(status)) {
#ifdef WCOREDUMP
            if (WCOREDUMP(status)) {
                opal_output(0, "The daemon received a signal %d (with core).",
                            WTERMSIG(status));
            } else {
                opal_output(0, "The daemon received a signal %d.", WTERMSIG(status));
            }
#else
            opal_output(0, "The daemon received a signal %d.", WTERMSIG(status));
#endif /* WCOREDUMP */
        } else {
            opal_output(0, "No extra status information is available: %d.", status);
        }
        OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
        /* tell the system that this daemon is gone */
        if (ORTE_SUCCESS != (rc = orte_pls_base_remove_daemon(info))) {
            ORTE_ERROR_LOG(rc);
        }
        
        /* remove the daemon from our local list */
        opal_list_remove_item(&active_daemons, &info->super);
        OBJ_RELEASE(info);
        OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
    } /* if abnormal exit */

    /* release any waiting threads */
    OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
    /* first check timing request */
    if (mca_pls_rsh_component.timing) {
        if (0 != gettimeofday(&launchstop, NULL)) {
            opal_output(0, "pls_rsh: could not obtain stop time");
        } else {
            deltat = (launchstop.tv_sec - launchstart[info->name->vpid].tv_sec)*1000000 +
                     (launchstop.tv_usec - launchstart[info->name->vpid].tv_usec);
            avgtime = avgtime + deltat;
            if (deltat < mintime) {
                mintime = deltat;
                miniter = (unsigned long)info->name->vpid;
            }
            if (deltat > maxtime) {
                maxtime = deltat;
                maxiter = (unsigned long)info->name->vpid;
            }
        }
    }

    if (mca_pls_rsh_component.num_children-- >=
        mca_pls_rsh_component.num_concurrent ||
        mca_pls_rsh_component.num_children == 0) {
        opal_condition_signal(&mca_pls_rsh_component.cond);
    }

    if (mca_pls_rsh_component.timing && mca_pls_rsh_component.num_children == 0) {
        if (0 != gettimeofday(&joblaunchstop, NULL)) {
            opal_output(0, "pls_rsh: could not obtain job launch stop time");
        } else {
            deltat = (joblaunchstop.tv_sec - joblaunchstart.tv_sec)*1000000 +
                     (joblaunchstop.tv_usec - joblaunchstart.tv_usec);
            opal_output(0, "pls_rsh: total time to launch job is %lu usec", deltat);
            if (mintime < 999999999) {
                /* had at least one non-local node */
                avgtime = avgtime/opal_list_get_size(&active_daemons);
                opal_output(0, "pls_rsh: average time to launch one daemon %f usec", avgtime);
                opal_output(0, "pls_rsh: min time to launch a daemon was %lu usec for iter %lu", mintime, miniter);
                opal_output(0, "pls_rsh: max time to launch a daemon was %lu usec for iter %lu", maxtime, maxiter);
            } else {
                opal_output(0, "No nonlocal launches to report for timing info");
            }
        }
        free(launchstart);
    }
    
    OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);

}
コード例 #18
0
ファイル: odls_bproc.c プロジェクト: aosm/openmpi
/**
 * Setup io for the current node, then tell orterun we are ready for the actual
 * processes.
 * @retval ORTE_SUCCESS
 * @retval error
 */
int
orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_environ)
{
    odls_bproc_child_t *child;
    opal_list_item_t* item;
    orte_gpr_value_t *value, **values;
    orte_gpr_keyval_t *kval;
    char *node_name;
    int rc;
    orte_std_cntr_t i, j, kv, kv2, *sptr;
    int src = 0;
    orte_buffer_t *ack;
    bool connect_stdin;
    orte_jobid_t jobid;
    int cycle = 0;

    /* first, retrieve the job number we are to launch from the
     * returned data - we can extract the jobid directly from the
     * subscription name we created
     */
    if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&jobid, data->target))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /**
     * hack for bproc4, change process group so that we do not receive signals
     * from the parent/front-end process, as bproc4 does not currently allow the
     * process to intercept the signal
    */
    setpgid(0,0);

    /* loop through the returned data to find the global info and
     * the info for processes going onto this node
     */
    values = (orte_gpr_value_t**)(data->values)->addr;
    for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) {  /* loop through all returned values */
        if (NULL != values[j]) {
            i++;
            value = values[j];
            /* this must have come from one of the process containers, so it must
            * contain data for a proc structure - see if it belongs to this node
            */
            for (kv=0; kv < value->cnt; kv++) {
                kval = value->keyvals[kv];
                if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) {
                    /* Most C-compilers will bark if we try to directly compare the string in the
                    * kval data area against a regular string, so we need to "get" the data
                    * so we can access it */
                    if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) {
                        ORTE_ERROR_LOG(rc);
                        return rc;
                    }
                    /* if this is our node...must also protect against a zero-length string  */
                    if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) {
                        /* ...harvest the info into a new child structure */
                        child = OBJ_NEW(odls_bproc_child_t);
                        for (kv2 = 0; kv2 < value->cnt; kv2++) {
                            kval = value->keyvals[kv2];
                            if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) {
                                /* copy the name into the child object */
                                if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) {
                                    ORTE_ERROR_LOG(rc);
                                    return rc;
                                }
                                continue;
                            }
                            if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
                                if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) {
                                    ORTE_ERROR_LOG(rc);
                                    return rc;
                                }
                                child->app_idx = *sptr;  /* save the index into the app_context objects */
                                continue;
                            }
                        } /* kv2 */
                        /* protect operation on the global list of children */
                        OPAL_THREAD_LOCK(&mca_odls_bproc_component.mutex);
                        opal_list_append(&mca_odls_bproc_component.children, &child->super);
                        opal_condition_signal(&mca_odls_bproc_component.cond);
                        OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.mutex);

                    }
                }
            } /* for kv */
        } /* for j */
    }

    /* set up the io files for our children */
    for(item =  opal_list_get_first(&mca_odls_bproc_component.children);
        item != opal_list_get_end(&mca_odls_bproc_component.children);
        item =  opal_list_get_next(item)) {
        child = (odls_bproc_child_t *) item;
        if(0 < mca_odls_bproc_component.debug) {
            opal_output(0, "orte_odls_bproc_launch: setting up io for "
                            "[%lu,%lu,%lu] proc rank %lu\n",
                            ORTE_NAME_ARGS((child->name)),
                            child->name->vpid);
        }
        /* only setup to forward stdin if it is rank 0, otherwise connect
            * to /dev/null */
        if(0 == child->name->vpid) {
            connect_stdin = true;
        } else {
            connect_stdin = false;
        }

        rc = odls_bproc_setup_stdio(child->name, cycle,
                                    jobid, child->app_idx,
                                    connect_stdin);
        if (ORTE_SUCCESS != rc) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        cycle++;
    }

    /* message to indicate that we are ready */
    ack = OBJ_NEW(orte_buffer_t);
    rc = orte_dss.pack(ack, &src, 1, ORTE_INT);
    if(ORTE_SUCCESS != rc) {
        ORTE_ERROR_LOG(rc);
    }
    rc = mca_oob_send_packed_nb(ORTE_PROC_MY_HNP, ack, ORTE_RML_TAG_BPROC, 0,
        odls_bproc_send_cb, NULL);
    if (0 > rc) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    rc = ORTE_SUCCESS;

cleanup:

    return rc;
}
コード例 #19
0
ファイル: orted.c プロジェクト: aosm/openmpi
static void orte_daemon_recv_pls(int status, orte_process_name_t* sender,
                 orte_buffer_t *buffer, orte_rml_tag_t tag,
                 void* cbdata)
{
    orte_daemon_cmd_flag_t command;
    orte_buffer_t answer;
    int ret;
    orte_std_cntr_t n;
    int32_t signal;
    orte_gpr_notify_data_t *ndat;
    orte_jobid_t job;

    OPAL_TRACE(1);

    OPAL_THREAD_LOCK(&orted_globals.mutex);

    if (orted_globals.debug_daemons) {
       opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received message from [%ld,%ld,%ld]",
                   ORTE_NAME_ARGS(orte_process_info.my_name),
                   ORTE_NAME_ARGS(sender));
    }

    /* unpack the command */
    n = 1;
    if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) {
        ORTE_ERROR_LOG(ret);
        goto CLEANUP;
    }
    
    switch(command) {

        /****    KILL_LOCAL_PROCS   ****/
        case ORTE_DAEMON_KILL_LOCAL_PROCS:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received kill_local_procs",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            /* unpack the jobid - could be JOBID_WILDCARD, which would indicatge
             * we should kill all local procs. Otherwise, only kill those within
             * the specified jobid
             */
            n = 1;
            if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &job, &n, ORTE_JOBID))) {
                ORTE_ERROR_LOG(ret);
                goto CLEANUP;
            }

            if (ORTE_SUCCESS != (ret = orte_odls.kill_local_procs(job, true))) {
                ORTE_ERROR_LOG(ret);
            }
            break;
            
        /****    SIGNAL_LOCAL_PROCS   ****/
        case ORTE_DAEMON_SIGNAL_LOCAL_PROCS:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received signal_local_procs",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            /* get the signal */
            n = 1;
            if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &signal, &n, ORTE_INT32))) {
                ORTE_ERROR_LOG(ret);
                goto CLEANUP;
            }
                
            /* see if they specified a process to signal, or if we
             * should just signal them all
             *
             * NOTE: FOR NOW, WE JUST SIGNAL ALL CHILDREN
             */

            if (ORTE_SUCCESS != (ret = orte_odls.signal_local_procs(NULL, signal))) {
                ORTE_ERROR_LOG(ret);
            }
            break;

            /****    ADD_LOCAL_PROCS   ****/
        case ORTE_DAEMON_ADD_LOCAL_PROCS:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received add_local_procs",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            /* unpack the notify data object */
            n = 1;
            if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &ndat, &n, ORTE_GPR_NOTIFY_DATA))) {
                ORTE_ERROR_LOG(ret);
                goto CLEANUP;
            }
            
            /* launch the processes */
            if (ORTE_SUCCESS != (ret = orte_odls.launch_local_procs(ndat, orted_globals.saved_environ))) {
                ORTE_ERROR_LOG(ret);
            }

            /* cleanup the memory */
            OBJ_RELEASE(ndat);
            break;
           
            /****    EXIT COMMAND    ****/
        case ORTE_DAEMON_EXIT_CMD:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received exit",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            /* no response to send here - we'll send it when nearly exit'd */
            orted_globals.exit_condition = true;
            opal_condition_signal(&orted_globals.condition);
            OPAL_THREAD_UNLOCK(&orted_globals.mutex);
            return;
            break;

        default:
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
            break;
    }

 CLEANUP:
    /* send an ack that command is done */
    OBJ_CONSTRUCT(&answer, orte_buffer_t);
    if (0 > orte_rml.send_buffer(sender, &answer, ORTE_RML_TAG_PLS_ORTED_ACK, 0)) {
        ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
    }
    OBJ_DESTRUCT(&answer);
    
    OPAL_THREAD_UNLOCK(&orted_globals.mutex);

    /* reissue the non-blocking receive */
    ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED, ORTE_RML_NON_PERSISTENT, orte_daemon_recv_pls, NULL);
    if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(ret);
    }

    return;
}
コード例 #20
0
ファイル: orted.c プロジェクト: aosm/openmpi
static void signal_callback(int fd, short flags, void *arg)
{
    OPAL_TRACE(1);
    orted_globals.exit_condition = true;
    opal_condition_signal(&orted_globals.condition);
}
コード例 #21
0
static int construct_child_list(opal_buffer_t *data, orte_jobid_t *job)
{
    int rc;
    orte_vpid_t j, host_daemon;
    orte_odls_child_t *child;
    orte_std_cntr_t cnt;
    orte_process_name_t proc;
    orte_odls_job_t *jobdat=NULL;
    opal_list_item_t *item;
    orte_app_idx_t *app_idx=NULL;
    orte_proc_state_t *states=NULL;
    orte_vpid_t *locations=NULL;
    int32_t *restarts=NULL;
    char **slot_str=NULL;
    bool add_child;
    
    orte_job_t *jptr, *daemons;
    orte_proc_t *pptr, *dptr;
    orte_node_t *nptr;
    int32_t ljob;

    OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                         "%s odls:constructing child list",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* unpack the jobid we are to launch */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, job, &cnt, ORTE_JOBID))) {
        /* if the buffer was empty, then we know that all we are doing is
         * launching debugger daemons
         */
        if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) {
            goto done;
        }
        *job = ORTE_JOBID_INVALID;
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    
    OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                         "%s odls:construct_child_list unpacking data to launch job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job)));
    
    /* even though we are unpacking an add_local_procs cmd, we cannot assume
     * that no job record for this jobid exists. A race condition exists that
     * could allow another daemon's procs to call us with a collective prior
     * to our unpacking add_local_procs. So lookup the job record for this jobid
     * and see if it already exists
     */
    for (item = opal_list_get_first(&orte_local_jobdata);
         item != opal_list_get_end(&orte_local_jobdata);
         item = opal_list_get_next(item)) {
        orte_odls_job_t *jdat = (orte_odls_job_t*)item;
        
        /* is this the specified job? */
        if (jdat->jobid == *job) {
            OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                                 "%s odls:construct_child_list found existing jobdat for job %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job)));
            jobdat = jdat;
            break;
        }
    }
    if (NULL == jobdat) {
        /* setup jobdat object for this job */
        OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                             "%s odls:construct_child_list adding new jobdat for job %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job)));
        jobdat = OBJ_NEW(orte_odls_job_t);
        jobdat->jobid = *job;
        opal_list_append(&orte_local_jobdata, &jobdat->super);
    }
    
    
    /* UNPACK JOB-SPECIFIC DATA */
    /* unpack the job instance */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->instance, &cnt, OPAL_STRING))) {
        *job = ORTE_JOBID_INVALID;
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack the job name */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->name, &cnt, OPAL_STRING))) {
        *job = ORTE_JOBID_INVALID;
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack the job state so we can know if this is a restart vs initial launch */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->state, &cnt, ORTE_JOB_STATE))) {
        *job = ORTE_JOBID_INVALID;
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    
    /* unpack the number of nodes involved in this job */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_nodes, &cnt, ORTE_STD_CNTR))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }    
    /* unpack the number of procs in this launch */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_procs, &cnt, ORTE_VPID))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }    
    /* unpack the total slots allocated to us */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->total_slots_alloc, &cnt, ORTE_STD_CNTR))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack the mapping policy for the job */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->policy, &cnt, ORTE_MAPPING_POLICY))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack the cpus/rank for the job */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->cpus_per_rank, &cnt, OPAL_INT16))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack the stride for the job */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->stride, &cnt, OPAL_INT16))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack the control flags for the job */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->controls, &cnt, ORTE_JOB_CONTROL))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack the stdin target for the job */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->stdin_target, &cnt, ORTE_VPID))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack whether or not process recovery is allowed for this job */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->enable_recovery, &cnt, OPAL_BOOL))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    /* unpack the number of app_contexts for this job */
    cnt=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_apps, &cnt, ORTE_APP_IDX))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                         "%s odls:construct_child_list unpacking %ld app_contexts",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)jobdat->num_apps));
    
    /* allocate space and unpack the app_contexts for this job - the HNP checked
     * that there must be at least one, so don't bother checking here again
     */
    if (NULL != jobdat->apps) {
        free(jobdat->apps);
    }
    jobdat->apps = (orte_app_context_t**)malloc(jobdat->num_apps * sizeof(orte_app_context_t*));
    if (NULL == jobdat->apps) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        goto REPORT_ERROR;
    }
    cnt = jobdat->num_apps;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, jobdat->apps, &cnt, ORTE_APP_CONTEXT))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    
    /* allocate memory for app_idx */
    app_idx = (orte_app_idx_t*)malloc(jobdat->num_procs * sizeof(orte_app_idx_t));
    /* unpack app_idx in one shot */
    cnt=jobdat->num_procs;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, app_idx, &cnt, ORTE_APP_IDX))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    
    /* allocate memory for states */
    states = (orte_proc_state_t*)malloc(jobdat->num_procs  * sizeof(orte_proc_state_t));
    /* unpack states in one shot */
    cnt=jobdat->num_procs;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, states, &cnt, ORTE_PROC_STATE))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    
    /* allocate memory for locations */
    locations = (orte_vpid_t*)malloc(jobdat->num_procs  * sizeof(orte_vpid_t));
    /* unpack locations in one shot */
    cnt=jobdat->num_procs;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, locations, &cnt, ORTE_VPID))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }
    
    /* allocate memory for restarts */
    restarts = (int32_t*)malloc(jobdat->num_procs  * sizeof(int32_t));
    /* unpack restarts in one shot */
    cnt=jobdat->num_procs;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, restarts, &cnt, OPAL_INT32))) {
        ORTE_ERROR_LOG(rc);
        goto REPORT_ERROR;
    }

    /* cycle thru the procs and build/update the global arrays */
    daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
    if (NULL == (jptr = orte_get_job_data_object(jobdat->jobid))) {
        jptr = OBJ_NEW(orte_job_t);
        jptr->jobid = jobdat->jobid;
        /* store it on the global job data pool */
        ljob = ORTE_LOCAL_JOBID(jptr->jobid);
        opal_pointer_array_set_item(orte_job_data, ljob, jptr);
    }
    jptr->enable_recovery = jobdat->enable_recovery;
    for (j=0; j < jobdat->num_procs; j++) {
        if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, j))) {
            pptr = OBJ_NEW(orte_proc_t);
            pptr->name.jobid = jobdat->jobid;
            pptr->name.vpid = j;
            opal_pointer_array_set_item(jptr->procs, j, pptr);
        }
        pptr->local_rank = 0;
        pptr->node_rank = 0;
        pptr->state = states[j];
        pptr->app_idx = app_idx[j];
        pptr->restarts = restarts[j];
        if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, locations[j]))) {
            nptr = OBJ_NEW(orte_node_t);
            nptr->index = locations[j];
            opal_pointer_array_set_item(orte_node_pool, locations[j], nptr);
        }
        OBJ_RETAIN(nptr);  /* maintain accounting */
        pptr->node = nptr;
        if (NULL == (dptr = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, locations[j]))) {
            /* got BIG problem */
            opal_output(0, "%s CANNOT FIND REFERENCED DAEMON %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_VPID_PRINT(locations[j]));
            rc = ORTE_ERR_NOT_FOUND;
            goto REPORT_ERROR;
        }
        OBJ_RETAIN(dptr);
        nptr->daemon = dptr;
    }
    /* cycle through the procs and find mine */
    proc.jobid = jobdat->jobid;
    for (j=0; j < jobdat->num_procs; j++) {
        proc.vpid = j;
        if (ORTE_PROC_STATE_INIT != states[j]) {
            OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                                 "%s odls:constructing child list - proc %s not at INIT",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc)));
            continue;
        }
        host_daemon = locations[j];
#if 0
        /* get the vpid of the daemon that is to host this proc */
        if (ORTE_VPID_INVALID == (host_daemon = orte_ess.proc_get_daemon(&proc))) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            rc = ORTE_ERR_NOT_FOUND;
            goto REPORT_ERROR;
        }
#endif
        OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                             "%s odls:constructing child list - checking proc %s on daemon %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc),
                             ORTE_VPID_PRINT(host_daemon)));

        /* does this proc belong to us? */
        if (ORTE_PROC_MY_NAME->vpid == host_daemon) {
            
            OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                                 "%s odls:constructing child list - found proc %s for me!",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc)));
            
            add_child = true;
            /* if this job is restarting procs, then we need to treat things
             * a little differently. We may be adding a proc to our local
             * children (if the proc moved here from somewhere else), or we
             * may simply be restarting someone already here.
             */
            if (ORTE_JOB_STATE_RESTART == jobdat->state) {
                /* look for this job on our current list of children */
                for (item = opal_list_get_first(&orte_local_children);
                     item != opal_list_get_end(&orte_local_children);
                     item = opal_list_get_next(item)) {
                    child = (orte_odls_child_t*)item;
                    if (child->name->jobid == proc.jobid &&
                        child->name->vpid == proc.vpid) {
                        /* do not duplicate this child on the list! */
                        OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                                             "proc %s is on list and is %s",
                                             ORTE_NAME_PRINT(&proc),
                                             (child->alive) ? "ALIVE" : "DEAD"));
                        add_child = false;
                        child->do_not_barrier = true;
                        child->restarts = restarts[j];
                        /* mark that this app_context is being used on this node */
                        jobdat->apps[app_idx[j]]->used_on_node = true;
                        break;
                    }
                }
            }
            
            /* if we need to add the child, do so */
            if (add_child) {
                OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                                     "adding proc %s to my local list",
                                     ORTE_NAME_PRINT(&proc)));
                /* keep tabs of the number of local procs */
                jobdat->num_local_procs++;
                /* add this proc to our child list */
                child = OBJ_NEW(orte_odls_child_t);
                /* copy the name to preserve it */
                if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, &proc, ORTE_NAME))) {
                    ORTE_ERROR_LOG(rc);
                    goto REPORT_ERROR;
                }
                child->app_idx = app_idx[j];  /* save the index into the app_context objects */
                /* if the job is in restart mode, the child must not barrier when launched */
                if (ORTE_JOB_STATE_RESTART == jobdat->state) {
                    child->do_not_barrier = true;
                }
                child->restarts = restarts[j];
                if (NULL != slot_str && NULL != slot_str[j]) {
                    child->slot_list = strdup(slot_str[j]);
                }
                /* mark that this app_context is being used on this node */
                jobdat->apps[app_idx[j]]->used_on_node = true;
                /* protect operation on the global list of children */
                OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
                opal_list_append(&orte_local_children, &child->super);
                opal_condition_signal(&orte_odls_globals.cond);
                OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
            }
        }
    }
    
    /* flag that the launch msg has been processed so daemon collectives can proceed */
    OPAL_THREAD_LOCK(&jobdat->lock);
    jobdat->launch_msg_processed = true;
    opal_condition_broadcast(&jobdat->cond);
    OPAL_THREAD_UNLOCK(&jobdat->lock);
    
 done:
    if (NULL != app_idx) {
        free(app_idx);
        app_idx = NULL;
    }
    if (NULL != states) {
        free(states);
        states = NULL;
    }
    if (NULL != slot_str) {
        for (j=0; j < jobdat->num_procs; j++) {
            free(slot_str[j]);
        }
        free(slot_str);
        slot_str = NULL;
    }
    
    return ORTE_SUCCESS;

 REPORT_ERROR:
    /* we have to report an error back to the HNP so we don't just
     * hang. Although there shouldn't be any errors once this is
     * all debugged, it is still good practice to have a way
     * for it to happen - especially so developers don't have to
     * deal with the hang!
     */
    orte_errmgr.update_state(*job, ORTE_JOB_STATE_NEVER_LAUNCHED,
                             NULL, ORTE_PROC_STATE_UNDEF, 0, rc);
   
    if (NULL != app_idx) {
        free(app_idx);
        app_idx = NULL;
    }
    if (NULL != states) {
        free(states);
        states = NULL;
    }
    if (NULL != slot_str && NULL != jobdat) {
        for (j=0; j < jobdat->num_procs; j++) {
            if (NULL != slot_str[j]) {
                free(slot_str[j]);
            }
        }
        free(slot_str);
        slot_str = NULL;
    }
    
    return rc;
}
コード例 #22
0
static void orte_iof_base_endpoint_write_handler(int sd, short flags, void *user)
{
    int errno_save;
    orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)user;

    /*
     * step through the list of queued fragments and attempt to write
     * until the output descriptor would block
    */
    OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
    while(opal_list_get_size(&endpoint->ep_sink_frags)) {
        orte_iof_base_frag_t* frag = (orte_iof_base_frag_t*)opal_list_get_first(&endpoint->ep_sink_frags);
        int rc;

        /* close connection on zero byte message */
        if(frag->frag_len == 0) {
            orte_iof_base_endpoint_closed(endpoint);
            OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
            return;
        }

        /* progress pending messages */
        rc = write(endpoint->ep_fd, frag->frag_ptr, frag->frag_len);
        errno_save = errno;
        if (rc < 0) {
            if (EAGAIN == errno_save) {
                break;
            }
            if (EINTR == errno_save) {
                continue;
            }
            /* All other errors -- to include sigpipe -- mean that
               Something Bad happened and we should abort in
               despair. */
            orte_iof_base_endpoint_closed(endpoint);

            /* Send a ACK-AND-CLOSE back to the service so that it
               knows not to wait for any further ACKs */
            orte_iof_base_frag_ack(frag, true);

            OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
            return;
        }
        frag->frag_len -= rc;
        frag->frag_ptr += rc;
        if(frag->frag_len > 0) {
            break;
        }
        opal_list_remove_item(&endpoint->ep_sink_frags, &frag->super.super);
        OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
        orte_iof_base_frag_ack(frag, false);
        OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
    }

    /* is there anything left to write? */
    if(opal_list_get_size(&endpoint->ep_sink_frags) == 0) {
        opal_event_del(&endpoint->ep_event);
        if(orte_iof_base.iof_waiting) {
            opal_condition_signal(&orte_iof_base.iof_condition);
        }
    }
    OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
}