コード例 #1
0
static orte_rml_module_t*
rml_oob_init(int* priority)
{
    if (mca_oob_base_init() != ORTE_SUCCESS)
        return NULL;
    *priority = 1;
    
    OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t);
    OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions_lock, opal_mutex_t);
    OBJ_CONSTRUCT(&orte_rml_oob_module.queued_routing_messages, opal_list_t);
    OBJ_CONSTRUCT(&orte_rml_oob_module.queued_lock, opal_mutex_t);
    /* Set default timeout for queued messages to be 1/2 second */
    orte_rml_oob_module.timeout.tv_sec = 0;
    orte_rml_oob_module.timeout.tv_usec = 500000;
    orte_rml_oob_module.timer_event = (opal_event_t *) malloc(sizeof(opal_event_t));
    if (NULL == orte_rml_oob_module.timer_event) {
        return NULL;
    }
    opal_evtimer_set(orte_rml_oob_module.timer_event, rml_oob_queued_progress,
                     NULL);

    orte_rml_oob_module.active_oob = &mca_oob;
    orte_rml_oob_module.active_oob->oob_exception_callback = 
        orte_rml_oob_exception_callback;

    return &orte_rml_oob_module.super;
}
コード例 #2
0
ファイル: oob_tcp_peer.c プロジェクト: aosm/openmpi
/*
 * This is the constructor function for the mca_oob_tcp_peer
 * struct. Note that this function and OBJ_NEW should NEVER
 * be called directly. Instead, use mca_oob_tcp_add_peer
 *
 * @param peer a pointer to the mca_oob_tcp_peer_t struct to be initialized
 * @retval none
 */
static void mca_oob_tcp_peer_construct(mca_oob_tcp_peer_t* peer) 
{ 
    OBJ_CONSTRUCT(&(peer->peer_send_queue), opal_list_t);
    OBJ_CONSTRUCT(&(peer->peer_lock), opal_mutex_t);
    memset(&peer->peer_send_event, 0, sizeof(peer->peer_send_event));
    memset(&peer->peer_recv_event, 0, sizeof(peer->peer_recv_event));
    memset(&peer->peer_timer_event, 0, sizeof(peer->peer_timer_event));
    opal_evtimer_set(&peer->peer_timer_event, mca_oob_tcp_peer_timer_handler, peer);
}
コード例 #3
0
ファイル: orted.c プロジェクト: aosm/openmpi
static void halt_vm(void)
{
    int ret;
    struct timeval tv = { 1, 0 };
    opal_event_t* event;
    opal_list_t attrs;
    opal_list_item_t *item;
    
    /* terminate the vm - this will also wake us up so we can exit */
    OBJ_CONSTRUCT(&attrs, opal_list_t);
    orte_rmgr.add_attribute(&attrs, ORTE_NS_INCLUDE_DESCENDANTS, ORTE_UNDEF, NULL, ORTE_RMGR_ATTR_OVERRIDE);
    ret = orte_pls.terminate_orteds(0, &orte_abort_timeout, &attrs);
    while (NULL != (item = opal_list_remove_first(&attrs))) OBJ_RELEASE(item);
    OBJ_DESTRUCT(&attrs);
    
    /* setup a delay to give the orteds time to complete their departure */
    if (NULL != (event = (opal_event_t*)malloc(sizeof(opal_event_t)))) {
        opal_evtimer_set(event, exit_callback, NULL);
        opal_evtimer_add(event, &tv);
    }
}
コード例 #4
0
static int orte_pls_rsh_launch_threaded(orte_jobid_t jobid)
{
    struct timeval tv = { 0, 0 };
    struct opal_event event;
    struct orte_pls_rsh_stack_t stack;

    OBJ_CONSTRUCT(&stack, orte_pls_rsh_stack_t);

    stack.jobid = jobid;
    if( opal_event_progress_thread() ) {
        stack.rc = orte_pls_rsh_launch( jobid );
    } else {
        opal_evtimer_set(&event, orte_pls_rsh_launch_cb, &stack);
        opal_evtimer_add(&event, &tv);

        OPAL_THREAD_LOCK(&stack.mutex);
        while (stack.complete == false) {
            opal_condition_wait(&stack.cond, &stack.mutex);
        }
        OPAL_THREAD_UNLOCK(&stack.mutex);
    }
    OBJ_DESTRUCT(&stack);
    return stack.rc;
}
コード例 #5
0
ファイル: pls_base_orted_cmds.c プロジェクト: aosm/openmpi
int orte_pls_base_orted_kill_local_procs(opal_list_t *daemons, orte_jobid_t job, struct timeval *timeout)
{
    int rc;
    orte_buffer_t cmd;
    orte_daemon_cmd_flag_t command=ORTE_DAEMON_KILL_LOCAL_PROCS;
    opal_list_item_t *item;
    orte_pls_daemon_info_t *dmn;
    opal_event_t* event = NULL;

    OPAL_TRACE(1);

    OBJ_CONSTRUCT(&cmd, orte_buffer_t);
    
    /* pack the command */
    if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_DAEMON_CMD))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&cmd);
        return rc;
    }
    
    /* pack the jobid */
    if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &job, 1, ORTE_JOBID))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&cmd);
        return rc;
    }
    
    /* send the commands as fast as we can */
    for (item = opal_list_get_first(daemons);
         item != opal_list_get_end(daemons);
         item = opal_list_get_next(item)) {
        dmn = (orte_pls_daemon_info_t*)item;

        if (0 > orte_rml.send_buffer_nb(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED,
                                        0, orte_pls_base_orted_send_cb, NULL)) {
            ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
            OBJ_DESTRUCT(&cmd);
            return rc;
        }
            
        orted_cmd_num_active++;
    }
    OBJ_DESTRUCT(&cmd);

    /* post the receive for the ack's */
    rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED_ACK,
                                 ORTE_RML_NON_PERSISTENT, orte_pls_base_cmd_ack, NULL);
    if (rc != ORTE_SUCCESS) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    
    /* define the default completion status */
    completion_status = ORTE_SUCCESS;
    
    /* wait for all commands to have been received */
    OPAL_THREAD_LOCK(&orte_pls_base.orted_cmd_lock);
    if (orted_cmd_num_active > 0) {
        /* setup a delay to give the orteds time to complete their departure - wake us up if they
        * don't exit by the prescribed time
        */
        if (NULL != timeout &&  /* only do this if the user gave us a time to wait */
            NULL != (event = (opal_event_t*)malloc(sizeof(opal_event_t)))) {
            opal_evtimer_set(event, orte_pls_base_orted_default_wakeup, NULL);
            opal_evtimer_add(event, timeout);
        }
        /* now go to sleep until woken up */
        opal_condition_wait(&orte_pls_base.orted_cmd_cond, &orte_pls_base.orted_cmd_lock);
    }
    OPAL_THREAD_UNLOCK(&orte_pls_base.orted_cmd_lock);
    
    /* log an error if one occurred */
    if (ORTE_SUCCESS != completion_status) {
        ORTE_ERROR_LOG(completion_status);
    }

    /* if started, kill the timer event so it doesn't hit us later */
    if (NULL != event) {
        opal_evtimer_del(event);
        free(event);
    }    
    
    /* we're done! */
    return completion_status;
}
コード例 #6
0
ファイル: show_help.c プロジェクト: rjrpaz/MyOpenMPI
static int show_help(const char *filename, const char *topic,
                     const char *output, orte_process_name_t *sender)
{
    int rc;
    tuple_list_item_t *tli = NULL;
    orte_namelist_t *pnli;
    time_t now = time(NULL);

    /* If we're aggregating, check for duplicates.  Otherwise, don't
       track duplicates at all and always display the message. */
    if (orte_help_want_aggregate) {
        rc = get_tli(filename, topic, &tli);
    } else {
        rc = ORTE_ERR_NOT_FOUND;
    }

    /* Was it already displayed? */
    if (ORTE_SUCCESS == rc) {
        /* Yes.  But do we want to print anything?  That's complicated.

           We always show the first message of a given (filename,
           topic) tuple as soon as it arrives.  But we don't want to
           show duplicate notices often, because we could get overrun
           with them.  So we want to gather them up and say "We got N
           duplicates" every once in a while.

           And keep in mind that at termination, we'll unconditionally
           show all accumulated duplicate notices.

           A simple scheme is as follows:
           - when the first of a (filename, topic) tuple arrives
             - print the message
             - if a timer is not set, set T=now
           - when a duplicate (filename, topic) tuple arrives
             - if now>(T+5) and timer is not set (due to
               non-pre-emptiveness of our libevent, a timer *could* be
               set!)
               - print all accumulated duplicates
               - reset T=now
             - else if a timer was not set, set the timer for T+5
             - else if a timer was set, do nothing (just wait)
           - set T=now when the timer expires
        */
        ++tli->tli_count_since_last_display;
        if (now > show_help_time_last_displayed + 5 && !show_help_timer_set) {
            show_accumulated_duplicates(0, 0, NULL);
        } else if (!show_help_timer_set) {
            opal_evtimer_set(&show_help_timer_event,
                             show_accumulated_duplicates, NULL);
            opal_evtimer_add(&show_help_timer_event, &show_help_interval);
            show_help_timer_set = true;
        }
    }
    /* Not already displayed */
    else if (ORTE_ERR_NOT_FOUND == rc) {
        if (orte_xml_output) {
            char *tmp;
            tmp = xml_format((unsigned char*)output);
            fprintf(orte_xml_fp, "%s", tmp);
            fflush(orte_xml_fp);
            free(tmp);
        } else {
            fprintf(stderr, "%s", output);
        }
        if (!show_help_timer_set) {
            show_help_time_last_displayed = now;
        }
    }
    /* Some other error occurred */
    else {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /* If we're aggregating, add this process name to the list */
    if (orte_help_want_aggregate) {
        pnli = OBJ_NEW(orte_namelist_t);
        if (NULL == pnli) {
            rc = ORTE_ERR_OUT_OF_RESOURCE;
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        pnli->name = *sender;
        opal_list_append(&(tli->tli_processes), &(pnli->item));
    }
    return ORTE_SUCCESS;
}