Ejemplo n.º 1
0
/*
 * Start monitoring of local temps
 */
static void start(orte_jobid_t jobid)
{
    /* start a separate udsensors progress thread for sampling */
    if (mca_sensor_udsensors_component.use_progress_thread) {
        if (!orcm_sensor_udsensors.ev_active) {
            orcm_sensor_udsensors.ev_active = true;
            if (NULL == (orcm_sensor_udsensors.ev_base = opal_progress_thread_init("udsensors"))) {
                orcm_sensor_udsensors.ev_active = false;
                return;
            }
        }

        /* setup udsensors sampler */
        udsensors_sampler = OBJ_NEW(orcm_sensor_sampler_t);

        /* check if udsensors sample rate is provided for this*/
        if (!mca_sensor_udsensors_component.sample_rate) {
            mca_sensor_udsensors_component.sample_rate = orcm_sensor_base.sample_rate;
        }
        udsensors_sampler->rate.tv_sec = mca_sensor_udsensors_component.sample_rate;
        udsensors_sampler->log_data = orcm_sensor_base.log_samples;
        opal_event_evtimer_set(orcm_sensor_udsensors.ev_base, &udsensors_sampler->ev,
                               perthread_udsensors_sample, udsensors_sampler);
        opal_event_evtimer_add(&udsensors_sampler->ev, &udsensors_sampler->rate);
    }else{
        mca_sensor_udsensors_component.sample_rate = orcm_sensor_base.sample_rate;
    }
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
{
    struct timeval tv;
    int i;
#ifdef WIN32
    WORD wVersionRequested;
    WSADATA wsaData;
    int	err;

    wVersionRequested = MAKEWORD(2, 2);

    err = WSAStartup(wVersionRequested, &wsaData);
#endif

    /* Initialize the event library */
    opal_init(&argc, &argv);

    for (i = 0; i < NEVENT; i++) {
        /* Initalize one event */
        ev[i] = (opal_event_t*)malloc(sizeof(opal_event_t));
        opal_event_evtimer_set(opal_event_base, ev[i], time_cb, ev[i]);
        tv.tv_sec = 0;
        tv.tv_usec = rand_int(50000);
        opal_event_evtimer_add(ev[i], &tv);
    }

    opal_event_dispatch(opal_event_base);

    opal_finalize();
    return (called < NEVENT);
}
Ejemplo n.º 3
0
void orte_sensor_base_start(orte_jobid_t job)
{
    orte_sensor_active_module_t *i_module;
    int i;

    opal_output_verbose(5, orte_sensor_base.output,
                        "%s sensor:base: starting sensors",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

    /* call the start function of all modules in priority order */
    for (i=0; i < orte_sensor_base.modules.size; i++) {
        if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) {
            continue;
        }
        mods_active = true;
        if (NULL != i_module->module->start) {
            i_module->module->start(job);
        }
    }

    if (mods_active && !orte_sensor_base.active) {
        /* setup a buffer to collect samples */
        orte_sensor_base.samples = OBJ_NEW(opal_buffer_t);
        /* startup a timer to wake us up periodically
         * for a data sample
         */
        orte_sensor_base.active = true;
        opal_event_evtimer_set(orte_event_base, &orte_sensor_base.sample_ev,
                               orte_sensor_base_sample, NULL);
        opal_event_evtimer_add(&orte_sensor_base.sample_ev, &orte_sensor_base.rate);
    }
    return;    
}
Ejemplo n.º 4
0
void mca_oob_ud_req_timer_set (mca_oob_ud_req_t *req, const struct timeval *timeout,
                               int max_tries, void (*cb)(evutil_socket_t, short, void *))
{
    opal_event_evtimer_set (orte_event_base, &req->timer.event, cb, (void *) req);
    req->timer.value.tv_sec  = timeout->tv_sec;
    req->timer.value.tv_usec = timeout->tv_usec;
    opal_event_evtimer_add (&req->timer.event, &req->timer.value);
}
Ejemplo n.º 5
0
void mca_oob_ud_peer_start_timer (mca_oob_ud_peer_t *peer)
{
    if (!peer->peer_timer.active && opal_list_get_size (&peer->peer_flying_messages)) {
        peer->peer_timer.active = true;

        opal_event_evtimer_set (orte_event_base, &peer->peer_timer.event,
                                mca_oob_ud_peer_msg_timeout, (void *) peer);
        opal_event_evtimer_add (&peer->peer_timer.event, &peer->peer_timer.value);
    }
}
Ejemplo n.º 6
0
static void start(orte_jobid_t job)
{
    if (!check_active && NULL != daemons) {
        /* setup the check event */
        check_time.tv_sec = 3 * orte_sensor_base.rate.tv_sec;
        check_time.tv_usec = 0;
        opal_event_evtimer_set(orte_event_base, &check_ev, check_heartbeat, &check_ev);
        opal_event_evtimer_add(&check_ev, &check_time);
        check_active = true;
    }
}
Ejemplo n.º 7
0
/* this function only gets called when FORCED_TERMINATE
 * has been invoked, which means that there is some
 * internal failure (e.g., to pack/unpack a correct value).
 * We could just exit, but that doesn't result in any
 * meaningful error message to the user. Likewise, just
 * printing something to stdout/stderr won't necessarily
 * get back to the user. Instead, we will send an error
 * report to mpirun and give it a chance to order our
 * termination. In order to ensure we _do_ terminate,
 * we set a timer - if it fires before we receive the
 * termination command, then we will exit on our own. This
 * protects us in the case that the failure is in the
 * messaging system itself */
static void hnp_abort(int error_code, char *fmt, ...)
{
    va_list arglist;
    char *outmsg = NULL;
    orte_timer_t *timer;

    /* only do this once */
    if (orte_abnormal_term_ordered) {
        return;
    }

    /* ensure we exit with non-zero status */
    ORTE_UPDATE_EXIT_STATUS(error_code);

    /* set the aborting flag */
    orte_abnormal_term_ordered = true;

    /* If there was a message, construct it */
    va_start(arglist, fmt);
    if (NULL != fmt) {
        vasprintf(&outmsg, fmt, arglist);
    }
    va_end(arglist);

    /* use the show-help system to get the message out */
    orte_show_help("help-errmgr-base.txt", "simple-message", true, outmsg);

    /* this could have happened very early, so see if it happened
     * before we started anything - if so, we can just finalize */
    if (orte_never_launched) {
        orte_quit(0, 0, NULL);
        return;
    }

    /* tell the daemons to terminate */
    if (ORTE_SUCCESS != orte_plm.terminate_orteds()) {
        orte_quit(0, 0, NULL);
        return;
    }

    /* set a timer for exiting - this also gives the message a chance
     * to get out! */
    if (NULL == (timer = OBJ_NEW(orte_timer_t))) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        return;
    }
    timer->tv.tv_sec = 5;
    timer->tv.tv_usec = 0;
    opal_event_evtimer_set(orte_event_base, timer->ev, wakeup, NULL);
    opal_event_set_priority(timer->ev, ORTE_ERROR_PRI);
    ORTE_POST_OBJECT(timer);
    opal_event_evtimer_add(timer->ev, &timer->tv);
}
Ejemplo n.º 8
0
void mca_oob_ud_event_queue_completed (mca_oob_ud_req_t *req)
{
    struct timeval now = {0, 0};

    mca_oob_ud_req_append_to_list (req, &mca_oob_ud_component.ud_event_queued_reqs);

    if (!opal_event_evtimer_pending (&mca_oob_ud_component.ud_complete_event, &now)) {
        opal_event_evtimer_set (orte_event_base, &mca_oob_ud_component.ud_complete_event,
                                mca_oob_ud_complete_dispatch, NULL);
        opal_event_add (&mca_oob_ud_component.ud_complete_event, &now);
    }
}
Ejemplo n.º 9
0
/*
 * Start killing local processes
 */
static void start(orte_jobid_t jobid)
{
    if (NULL == sample_ev) {
        /* startup a timer to wake us up periodically */
        sample_ev =  (opal_event_t *) malloc(sizeof(opal_event_t));
        opal_event_evtimer_set(orte_event_base, sample_ev, sample, sample_ev);
        sample_time.tv_sec = mca_sensor_ft_tester_component.fail_rate;
        sample_time.tv_usec = 0;
        opal_event_evtimer_add(sample_ev, &sample_time);
    }
    return;
}
Ejemplo n.º 10
0
static int ack_init_recv (void *channel, opal_list_t *attributes) {
    int32_t rc = ORTE_SUCCESS;
    uint32_t eviction_timeout;
    orte_qos_ack_channel_t *ack_chan;
    ack_chan = (orte_qos_ack_channel_t*) channel;
    /* TO DO - need to adjust eviction timeout according to window size
       lets keep max time out for the first pass */
    eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000;
    /* init outstanding msg hotel */
    opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS,
                     orte_event_base, eviction_timeout, 0,
                     orte_qos_ack_recv_msg_timeout_callback);
    OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
                         "%s ack_open channel = %p init hotel timeout =%d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (void*)ack_chan, eviction_timeout));
    opal_event_evtimer_set (orte_event_base, &ack_chan->msg_ack_timer_event,
                            orte_qos_ack_msg_window_timeout_callback, (void *) ack_chan);
    return rc;
}
Ejemplo n.º 11
0
static void plm_yarn_launch_apps(int fd, short args, void *cbdata)
{
    int rc;
    orte_job_t *jdata;
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;

    /* convenience */
    jdata = caddy->jdata;

    if (ORTE_JOB_STATE_LAUNCH_APPS != caddy->job_state) {
        ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /* update job state */
    jdata->state = caddy->job_state;

    /* register recv callback for daemons sync request */
    if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                      ORTE_RML_TAG_YARN_SYNC_REQUEST,
                                                      ORTE_RML_PERSISTENT,
                                                      yarn_hnp_sync_recv, jdata))) {
        ORTE_ERROR_LOG(rc);
    }

    orte_plm_base_launch_apps(fd, args, cbdata);

	//============heartbeat with AM======
	opal_event_t *ev = NULL;
	ev = (opal_event_t*) malloc(sizeof(opal_event_t));

	struct timeval delay;
	delay.tv_sec = 1;
	delay.tv_usec = 0;

	opal_event_evtimer_set(orte_event_base, ev, heartbeat_with_AM_cb, jdata);
	opal_event_evtimer_add(ev, &delay);
	//===================================
}
Ejemplo n.º 12
0
static orte_rml_module_t*
rml_oob_init(int* priority)
{
    if (init_done) {
        *priority = 1;
        return &orte_rml_oob_module.super;
    }
    
    if (mca_oob_base_init() != ORTE_SUCCESS) {
        *priority = -1;
        return NULL;
    }

    *priority = 1;
    
    OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t);
    OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions_lock, opal_mutex_t);
    OBJ_CONSTRUCT(&orte_rml_oob_module.queued_routing_messages, opal_list_t);
    OBJ_CONSTRUCT(&orte_rml_oob_module.queued_lock, opal_mutex_t);
    /* Set default timeout for queued messages to be 1/2 second */
    orte_rml_oob_module.timeout.tv_sec = 0;
    orte_rml_oob_module.timeout.tv_usec = 500000;
    orte_rml_oob_module.timer_event =  (opal_event_t *) malloc(sizeof(opal_event_t));
    if (NULL == orte_rml_oob_module.timer_event) {
        return NULL;
    }
    opal_event_evtimer_set(orte_event_base, orte_rml_oob_module.timer_event,
                           rml_oob_queued_progress,
                           NULL);

    orte_rml_oob_module.active_oob = &mca_oob;
    orte_rml_oob_module.active_oob->oob_exception_callback = 
        orte_rml_oob_exception_callback;
    
    init_done = true;
    return &orte_rml_oob_module.super;
}
Ejemplo n.º 13
0
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
{
    int ret;
    char *error = NULL;

    if (0 < orte_initialized) {
        /* track number of times we have been called */
        orte_initialized++;
        return ORTE_SUCCESS;
    }
    orte_initialized++;

    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
        error = "opal_init";
        goto error;
    }
    
    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }
    
    /* Register all MCA Params */
    if (ORTE_SUCCESS != (ret = orte_register_params())) {
        error = "orte_register_params";
        goto error;
    }
    
    /* setup the orte_show_help system */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        error = "opal_output_init";
        goto error;
    }
    
    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* open the ESS and select the correct module for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess_base_open())) {
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        error = "orte_ess_base_select";
        goto error;
    }

    if (ORTE_PROC_IS_APP) {
#if !ORTE_DISABLE_FULL_SUPPORT && ORTE_ENABLE_PROGRESS_THREADS
#if OPAL_EVENT_HAVE_THREAD_SUPPORT
        /* get a separate orte event base */
        orte_event_base = opal_event_base_create();
        /* setup the finalize event - we'll need it
         * to break the thread out of the event lib
         * when we want to stop it
         */
        opal_event_set(orte_event_base, &orte_finalize_event, -1, OPAL_EV_WRITE, ignore_callback, NULL);
        opal_event_set_priority(&orte_finalize_event, ORTE_ERROR_PRI);
#if 0
        {
            /* seems strange, but wake us up once a second just so we can check for new events */
            opal_event_t *ev;
            struct timeval tv = {1,0};
            ev = opal_event_alloc();
            opal_event_evtimer_set(orte_event_base,
                               ev, ignore_callback, ev);
            opal_event_set_priority(ev, ORTE_INFO_PRI);
            opal_event_evtimer_add(ev, &tv);
        }
#endif
        /* construct the thread object */
        OBJ_CONSTRUCT(&orte_progress_thread, opal_thread_t);
        /* fork off a thread to progress it */
        orte_progress_thread.t_run = orte_progress_thread_engine;
        if (OPAL_SUCCESS != (ret = opal_thread_start(&orte_progress_thread))) {
            error = "orte progress thread start";
            goto error;
        }
#else
        error = "event thread support is not configured";
        ret = ORTE_ERROR;
        goto error;
#endif
#else
        /* set the event base to the opal one */
        orte_event_base = opal_event_base;
#endif
    } else {
        /* set the event base to the opal one */
        orte_event_base = opal_event_base;
    }

    /* initialize the RTE for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        error = "orte_ess_init";
        goto error;
    }
    
    /* All done */
    return ORTE_SUCCESS;
    
 error:
    if (ORTE_ERR_SILENT != ret) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Ejemplo n.º 14
0
static void recover_procs(orte_process_name_t *daemon)
{
    orte_job_t *jdt;
    orte_proc_t *proc;
    orte_node_t *node=NULL;
    int i, rc;
    opal_buffer_t *bfr;
    uint16_t jfam;
    struct timeval offset={0, 0};
    int32_t max_fails=0;
    orte_errmgr_caddy_t *cd;

    /* the thread is locked by the caller, so don't do anything here */

    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                         "%s ATTEMPTING TO RECOVER PROCS FROM DAEMON %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(daemon)));

    /* if not already done, mark this daemon as down */
    if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(daemon_job->procs, daemon->vpid))) {
        /* correctly track number of alive daemons */
        daemon_job->num_terminated++;
        orte_process_info.num_procs--;
        /* get the corresponding node */
        node = proc->node;
        /* maintain accounting */
        OBJ_RELEASE(proc);
        proc->node = NULL;
    } else {
        /* if it has already been removed, then we need to find the node it was on.
         * this doesn't necessarily correspond to the daemon's vpid, so we have
         * to search the array
         */
        opal_output(0, "RECOVER PROCS - MISSING NODE");
        return;
    }
    /* mark the node as down so it won't be used in mapping
     * procs to be relaunched
     */
    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                         "%s MARKING NODE %s DOWN",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         node->name));

    node->state = ORTE_NODE_STATE_DOWN;
    node->daemon = NULL;
    max_fails = 0;
    /* mark all procs on this node as having terminated */
    for (i=0; i < node->procs->size; i++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
            continue;
        }
        /* get the job data object for this process */
        if (NULL == (jdt = orte_get_job_data_object(proc->name.jobid))) {
            /* major problem */
            opal_output(0, "%s COULD NOT GET JOB OBJECT FOR PROC %s(%d): state %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&proc->name), i,
                        orte_proc_state_to_str(proc->state));
            continue;
        }
        /* since the proc failed for reasons other than its own, this restart
         * does not count against its total - so mark it for restart
         */
        proc->state = ORTE_PROC_STATE_RESTART;
        proc->pid = 0;
        jdt->state = ORTE_JOB_STATE_RESTART;
        if (max_fails < proc->restarts) {
            max_fails = proc->restarts;
        }
        /* adjust the num terminated so that acctg works right */
        jdt->num_terminated++;
    }

    /* calculate a delay to avoid racy situation when a proc
     * is continuously failing due to, e.g., a bad command
     * syntax
     */
    if (1 < max_fails) {
        if (4 < max_fails) {
            /* cap the delay at 4 secs */
            offset.tv_sec = 4;
        } else {
            /* add a sec for each failure beyond the first */
            offset.tv_sec = max_fails - 1;
        }
    }

    /* now cycle thru the jobs and restart all those that were flagged */
    for (i=0; i < orte_job_data->size; i++) {
        if (NULL == (jdt = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, i))) {
            continue;
        }
        if (ORTE_JOB_STATE_RESTART == jdt->state) {
            OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                 "%s DELAYING RESTART OF JOB %s FOR %d SECS",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(jdt->jobid), (int)offset.tv_sec));
            cd = OBJ_NEW(orte_errmgr_caddy_t);
            cd->jdata = jdt;
            opal_event_evtimer_set(opal_event_base, &cd->ev, launch_restart, cd);
            opal_event_evtimer_add(&cd->ev, &offset);
        }
    }
}
Ejemplo n.º 15
0
/*
 * Start monitoring of local processes
 */
static void start(orte_jobid_t jobid)
{
    mca_base_component_t *c = &mca_sensor_file_component.super.base_version;
    opal_list_item_t *item;
    orte_odls_job_t *jobdat;
    orte_app_context_t *app, *aptr;
    int rc, tmp;
    char *filename;
    file_tracker_t *ft;

    /* cannot monitor my own job */
    if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) {
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s starting file monitoring for job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobid)));
    
    /* get the local jobdat for this job */
    for (item = opal_list_get_first(&orte_local_jobdata);
         item != opal_list_get_end(&orte_local_jobdata);
         item = opal_list_get_end(&orte_local_jobdata)) {
        jobdat = (orte_odls_job_t*)item;
        if (jobid == jobdat->jobid || ORTE_JOBID_WILDCARD == jobid) {
            /* must be at least one app_context, so use the first one found */
            app = NULL;
            for (tmp=0; tmp < jobdat->apps.size; tmp++) {
                if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(&jobdat->apps, tmp))) {
                    app = aptr;
                    break;
                }
            }
            if (NULL == app) {
                /* got a problem */
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                continue;
            }
            
            /* search the environ to get the filename */
            if (ORTE_SUCCESS != (rc = mca_base_param_find_string(c, "filename", app->env, &filename))) {
                /* was a default file given */
                if (NULL == mca_sensor_file_component.file) {
                    /* can't do anything without a file */
                    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                         "%s sensor:file no file for job %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         ORTE_JOBID_PRINT(jobid)));
                    continue;
                }
                filename = mca_sensor_file_component.file;
            }
            
            /* create the tracking object */
            ft = OBJ_NEW(file_tracker_t);
            ft->jobid = jobid;
            ft->file = strdup(filename);
            
            /* search the environ to see what we are checking */
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_size", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_size) {
                    ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size);
                }
            } else {
                ft->check_size = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_access", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_access) {
                    ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access);
                }
            } else {
                ft->check_access = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_mod", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_mod) {
                    ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod);
                }
            } else {
                ft->check_mod = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "limit", app->env, &tmp))) {
                ft->limit = mca_sensor_file_component.limit;
            } else {
                ft->limit = tmp;
            }
            opal_list_append(&jobs, &ft->super);
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s file %s monitored for %s%s%s with limit %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ft->file, ft->check_size ? "SIZE:" : " ",
                                 ft->check_access ? "ACCESS TIME:" : " ",
                                 ft->check_mod ? "MOD TIME" : " ", ft->limit));
        }
    }
    
    /* start sampling */
    if (NULL == sample_ev && !opal_list_is_empty(&jobs)) {
        /* startup a timer to wake us up periodically
         * for a data sample
         */
        sample_ev =  (opal_event_t *) malloc(sizeof(opal_event_t));
        opal_event_evtimer_set(opal_event_base, sample_ev, sample, sample_ev);
        sample_time.tv_sec = mca_sensor_file_component.sample_rate;
        sample_time.tv_usec = 0;
        opal_event_evtimer_add(sample_ev, &sample_time);
    }
    return;
}
Ejemplo n.º 16
0
/* we cannot use the RML to communicate with SLURM as it doesn't
 * understand our internal protocol, so we have to do a bare-bones
 * exchange based on sockets
 */
static int dyn_allocate(orte_job_t *jdata)
{
    char *cmd_str, **cmd=NULL, *tmp, *jstring;
    char *node_list;
    orte_app_context_t *app;
    int i;
    struct timeval tv;
    local_jobtracker_t *jtrk;
    int64_t i64, *i64ptr;

    if (NULL == mca_ras_slurm_component.config_file) {
        opal_output(0, "Cannot perform dynamic allocation as no Slurm configuration file provided");
        return ORTE_ERR_NOT_FOUND;
    }

    /* track this request */
    jtrk = OBJ_NEW(local_jobtracker_t);
    jtrk->jobid = jdata->jobid;
    opal_list_append(&jobs, &jtrk->super);

    /* construct the command - note that the jdata structure contains
     * a field for the minimum number of nodes required for the job.
     * The node list can be constructed from the union of all the nodes
     * contained in the dash_host field of the app_contexts. So you'll
     * need to do a little work to build the command. We don't currently
     * have a field in the jdata structure for "mandatory" vs "optional"
     * allocations, so we'll have to add that someday. Likewise, you may
     * want to provide a param to adjust the timeout value
     */
    /* construct the cmd string */
    opal_argv_append_nosize(&cmd, "allocate");
    /* add the jobid */
    orte_util_convert_jobid_to_string(&jstring, jdata->jobid);
    asprintf(&tmp, "jobid=%s", jstring);
    opal_argv_append_nosize(&cmd, tmp);
    free(tmp);
    free(jstring);
    /* if we want the allocation for all apps in one shot,
     * then tell slurm
     *
     * RHC: we don't currently have the ability to handle
     * rolling allocations in the rest of the code base
     */
#if 0
    if (!mca_ras_slurm_component.rolling_alloc) {
        opal_argv_append_nosize(&cmd, "return=all");
    }
#else
    opal_argv_append_nosize(&cmd, "return=all");
#endif

    /* pass the timeout */
    asprintf(&tmp, "timeout=%d", mca_ras_slurm_component.timeout);
    opal_argv_append_nosize(&cmd, tmp);
    free(tmp);

    /* for each app, add its allocation request info */
    i64ptr = &i64;
    for (i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }
        /* add the app id, preceded by a colon separator */
        asprintf(&tmp, ": app=%d", (int)app->idx);
        opal_argv_append_nosize(&cmd, tmp);
        free(tmp);
        /* add the number of process "slots" we need */
        asprintf(&tmp, "np=%d", app->num_procs);
        opal_argv_append_nosize(&cmd, tmp);
        free(tmp);
        /* if we were given a minimum number of nodes, pass it along */
        if (orte_get_attribute(&app->attributes, ORTE_APP_MIN_NODES, (void**)&i64ptr, OPAL_INT64)) {
            asprintf(&tmp, "N=%ld", (long int)i64);
            opal_argv_append_nosize(&cmd, tmp);
            free(tmp);
        }
        /* add the list of nodes, if one was given, ensuring
         * that each node only appears once
         */
        node_list =  get_node_list(app);
        if (NULL != node_list) {
            asprintf(&tmp, "node_list=%s", node_list);
            opal_argv_append_nosize(&cmd, tmp);
            free(node_list);
            free(tmp);
        }
        /* add the mandatory/optional flag */
        if (orte_get_attribute(&app->attributes, ORTE_APP_MANDATORY, NULL, OPAL_BOOL)) {
            opal_argv_append_nosize(&cmd, "flag=mandatory");
        } else {
            opal_argv_append_nosize(&cmd, "flag=optional");
        }
    }

    /* assemble it into the final cmd to be sent */
    cmd_str = opal_argv_join(cmd, ' ');
    opal_argv_free(cmd);

    /* start a timer - if the response to our request doesn't appear
     * in the defined time, then we will error out as Slurm isn't
     * responding to us
     */
    opal_event_evtimer_set(orte_event_base, &jtrk->timeout_ev, timeout, jtrk);
    tv.tv_sec = mca_ras_slurm_component.timeout * 2;
    tv.tv_usec = 0;
    opal_event_evtimer_add(&jtrk->timeout_ev, &tv);

    opal_output_verbose(2, orte_ras_base_framework.framework_output,
                        "%s slurm:dynalloc cmd_str = %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        cmd_str);

    if (send(socket_fd, cmd_str, strlen(cmd_str)+1, 0) < 0) {
        ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
    }
    free(cmd_str);

    /* we cannot wait here for a response as we
     * are already in an event. So return a value
     * that indicates we are waiting for an
     * allocation so the base functions know
     * that they shouldn't progress the job
     */
    return ORTE_ERR_ALLOCATION_PENDING;
}
Ejemplo n.º 17
0
static void heartbeat_with_AM_cb(int fd, short event, void *data)
{
    int i, rc;
    orte_job_t *jdata = (orte_job_t*)data;
    orte_job_t* daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);

    /* 1. create heartbeat request msg */
    /*
    message HeartbeatRequestProto {
    }
    */
    struct pbc_wmessage* request_msg = pbc_wmessage_new(orte_hdclient_pb_env, "HeartbeatRequestProto");
    if (!request_msg) {
        opal_output(0, "%s plm:yarn:heartbeat_with_AM_cb: failed to create request_msg",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /* 2. send heartbeat request msg */
    rc = orte_hdclient_send_message_and_delete(request_msg, HAMSTER_MSG_HEARTBEAT);
    if (rc != 0) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: error happened when send request_msg to AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /* 3. recv response and parse the msg*/
    /*
     message HeartbeatResponseProto {
         repeated ProcessStatusProto completed_processes = 1;
     }

     message ProcessStatusProto {
         optional ProcessNameProto name = 1;
         optional ProcessStateProto state = 2;
         optional int32 exit_value = 3;
     }

     enum ProcessStateProto {
         RUNNING = 1;
         COMPLETED = 2;
     }

     message ProcessNameProto {
         optional int32 jobid = 1;
         optional int32 vpid = 2;
     }
     */

    struct pbc_rmessage* response_msg = orte_hdclient_recv_message("HeartbeatResponseProto");
    if (!response_msg) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: error happened when recv HeartbeatResponseProto msg from AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        goto cleanup;
    }

    int n = pbc_rmessage_size(response_msg, "completed_processes");
    if (n < 0) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: got n(=%d) < 0, please check",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n);
        goto cleanup;
    }

    for (i = 0; i < n; i++) {
        struct pbc_rmessage* completed_procs_msg = pbc_rmessage_message(response_msg, "completed_processes", i);
        if (!completed_procs_msg) {
            opal_output(0,
                    "%s plm:yarn:heartbeat_with_AM_cb: error when parse returned completed_procs_msg from AM",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            goto cleanup;
        }

        struct pbc_rmessage* proc_name_msg = pbc_rmessage_message(completed_procs_msg, "name", 0);
        if (!proc_name_msg) {
            opal_output(0,
                    "%s plm:yarn:heartbeat_with_AM_cb: error when parse proc_name_msg",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            goto cleanup;
        }

        uint32_t local_jobid = pbc_rmessage_integer(proc_name_msg, "jobid", 0, NULL);
        uint32_t vpid = pbc_rmessage_integer(proc_name_msg, "vpid", 0, NULL);

        uint32_t exit_value = pbc_rmessage_integer(completed_procs_msg, "exit_value", 0, NULL);

        /* next, we will modify proc's state */
        orte_job_t* tmp_jdata = (orte_job_t*) opal_pointer_array_get_item(orte_job_data, local_jobid);
        orte_proc_t* proc = (orte_proc_t*) opal_pointer_array_get_item(tmp_jdata->procs, vpid);


        if (tmp_jdata->jobid == jdata->jobid) {
			num_completed_jdata_procs++;
		}

        if (exit_value == 0) {
        	proc->state = ORTE_PROC_STATE_TERMINATED;
        }

        /* if this process is already terminated, just skip over */
        if (proc->state >= ORTE_PROC_STATE_TERMINATED) {
            continue;
        }

        if (exit_value == -1000 || exit_value == -100 || exit_value == -101) {
            opal_output(0, "%s plm:yarn:heartbeat_with_AM_cb proc failed to start", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            ORTE_ERROR_LOG(ORTE_ERROR);
            proc->state = ORTE_PROC_STATE_FAILED_TO_START;
            ORTE_ACTIVATE_PROC_STATE(&proc->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
        } else {
            /* here, means currently the proc's state < ORTE_PROC_STATE_TERMINATED,
             * however, from AM's heartbeat response, we got the proc's container is terminated,
             * to solve this dilemma , we set a timer event to reconfirm this proc's state,
             */
            opal_event_t *ev = NULL;
            ev = (opal_event_t*) malloc(sizeof(opal_event_t));

            struct timeval delay;
            delay.tv_sec = 15;
            delay.tv_usec = 0;

            opal_event_evtimer_set(orte_event_base, ev, process_state_monitor_cb, proc);
            opal_event_evtimer_add(ev, &delay);
        }
    }

cleanup:
    if (response_msg) {
        pbc_rmessage_delete(response_msg);
    }

    if (num_completed_jdata_procs == jdata->num_procs) {
        /*
         * all procs are completed, send finish request to AM,
         * modify job state to ORTE_JOB_STATE_TERMINATED
         */
        jdata->state = ORTE_JOB_STATE_TERMINATED;
        finish_app_master(0 == orte_exit_status);
        return;
    } else {
        /* next heartbeat */
        opal_event_t *ev = NULL;
        ev = (opal_event_t*) malloc(sizeof(opal_event_t));

        struct timeval delay;
        delay.tv_sec = 1;
        delay.tv_usec = 0;

        opal_event_evtimer_set(orte_event_base, ev, heartbeat_with_AM_cb, jdata);
		opal_event_evtimer_add(ev, &delay);
    }
}
Ejemplo n.º 18
0
static void activate(void)
{
    int rc;
    DIR *dirp;

    /* take control */
    ORTE_ACQUIRE_THREAD(&orcm_cfgi_base.ctl);

    if (enabled) {
        /* we get reentered when daemons reappear so that
         * any pending jobs can be started
         */
        check_installed(true);
        /* release control */
        ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl);
        return;
    }
    enabled = true;

    /* check for existence of the directory. If it doesn't yet
     * exist, then we have to use the timer until it shows up
     */
    if (NULL == (dirp = opendir(mca_orcm_cfgi_file_component.dir))) {
        if (0 < opal_output_get_verbosity(orcm_cfgi_base.output)) {
            orte_show_help("help-cfgi-file.txt", "no-dir",
                           true, mca_orcm_cfgi_file_component.dir);
        }
        timer_in_use = true;
        goto fallback;
    }

#ifdef HAVE_SYS_INOTIFY_H
    /* setup to watch the config dir - CREATE always is followed by
     * a MODIFY event, so don't need both
     */
    if (0 > (watch = inotify_add_watch(notifier, mca_orcm_cfgi_file_component.dir,
                                           IN_DELETE | IN_MODIFY | IN_MOVE))) {
        /* error */
        close(notifier);
        goto fallback;
    }
    /* start the watcher event */
    probe_ev = (opal_event_t*)malloc(sizeof(opal_event_t));
    opal_event_set(opal_event_base, probe_ev, notifier,
                   OPAL_EV_READ|OPAL_EV_PERSIST, inotify_handler, NULL);
    timer_in_use = false;
    ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl);
    /* process it the first time */
    check_config(0, 0, NULL);
    return;
#endif

 fallback:

    /* setup the probe timer */
    if (0 <  mca_orcm_cfgi_file_component.rate) {
        probe_time.tv_sec = mca_orcm_cfgi_file_component.rate;
        probe_time.tv_usec = 0;
        probe_ev = (opal_event_t*)malloc(sizeof(opal_event_t));
        opal_event_evtimer_set(opal_event_base, probe_ev, check_config, NULL);
        timer_in_use = true;
        /* process it the first time */
        ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl);
        check_config(0, 0, NULL);
        return;
    }

    opal_output(0, "%s CANNOT ACTIVATE INSTALL CONFIG MONITORING",
                   ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
    enabled = false;
    ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl);
}
Ejemplo n.º 19
0
/*
 * Start monitoring of local processes
 */
static void start(orte_jobid_t jobid)
{
    orte_job_t *jobdat;
    orte_app_context_t *app, *aptr;
    int i;
    char *filename;
    file_tracker_t *ft;
    char *ptr;

    /* cannot monitor my own job */
    if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) {
        return;
    }

    OPAL_OUTPUT_VERBOSE((1, orcm_sensor_base_framework.framework_output,
                         "%s starting file monitoring for job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobid)));

    /* get the local jobdat for this job */
    if (NULL == (jobdat = orte_get_job_data_object(jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return;
    }

    /* must be at least one app_context, so use the first one found */
    app = NULL;
    for (i=0; i < jobdat->apps->size; i++) {
        if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, i))) {
            app = aptr;
            break;
        }
    }
    if (NULL == app) {
        /* got a problem */
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return;
    }

    /* search the environ to get the filename */
    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_filename", &filename)) {
        /* was a default file given */
        if (NULL == mca_sensor_file_component.file) {
            /* can't do anything without a file */
            OPAL_OUTPUT_VERBOSE((1, orcm_sensor_base_framework.framework_output,
                                 "%s sensor:file no file for job %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(jobid)));
            return;
        }
        filename = strdup(mca_sensor_file_component.file);
    }

    /* create the tracking object */
    ft = OBJ_NEW(file_tracker_t);
    ft->jobid = jobid;
    ft->file = strdup(filename);
    free(filename);

    /* search the environ to see what we are checking */
    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_check_size", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_size) {
            ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size);
        }
    } else {
        ft->check_size = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_check_access", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_access) {
            ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access);
        }
    } else {
        ft->check_access = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_check_mod", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_mod) {
            ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod);
        }
    } else {
        ft->check_mod = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_limit", &ptr)) {
        ft->limit = mca_sensor_file_component.limit;
    } else {
        ft->limit = strtol(ptr, NULL, 10);
        free(ptr);
    }
    opal_list_append(&jobs, &ft->super);
    OPAL_OUTPUT_VERBOSE((1, orcm_sensor_base_framework.framework_output,
                         "%s file %s monitored for %s%s%s with limit %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ft->file, ft->check_size ? "SIZE:" : " ",
                         ft->check_access ? "ACCESS TIME:" : " ",
                         ft->check_mod ? "MOD TIME" : " ", ft->limit));

    /* start a separate file progress thread for sampling */
    if (mca_sensor_file_component.use_progress_thread) {
        if (!orcm_sensor_file.ev_active) {
            orcm_sensor_file.ev_active = true;
            if (NULL == (orcm_sensor_file.ev_base = opal_progress_thread_init("file"))) {
                orcm_sensor_file.ev_active = false;
                return;
            }
        }

        /* setup file sampler */
        file_sampler = OBJ_NEW(orcm_sensor_sampler_t);

        /* check if file sample rate is provided for this*/
        if (mca_sensor_file_component.sample_rate) {
            file_sampler->rate.tv_sec = mca_sensor_file_component.sample_rate;
        } else {
            file_sampler->rate.tv_sec = orcm_sensor_base.sample_rate;
        }
        file_sampler->log_data = orcm_sensor_base.log_samples;
        opal_event_evtimer_set(orcm_sensor_file.ev_base, &file_sampler->ev,
                               perthread_file_sample, file_sampler);
        opal_event_evtimer_add(&file_sampler->ev, &file_sampler->rate);
    }
    return;
}
Ejemplo n.º 20
0
/*****************
 * Local Functions
 *****************/
static void errmgr_autor_process_fault_app(orte_job_t *jdata,
                                           orte_process_name_t *proc,
                                           orte_proc_state_t state)
{
    errmgr_autor_wp_item_t *wp_item = NULL;
    struct timeval soon;

    OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                         "%s errmgr:hnp(autor): process_fault() "
                         "Process fault! proc %s (0x%x)",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(proc),
                         state));

    if( !orte_sstore_base_is_checkpoint_available ) {
        OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                             "%s errmgr:hnp(autor): process_fault() "
                             "No checkpoints are available for this job! Cannot Automaticly Recover!",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ));
        opal_show_help("help-orte-errmgr-hnp.txt", "autor_failed_to_recover_proc", true,
                       ORTE_NAME_PRINT(proc), proc->vpid);
        return;
    }

    mca_errmgr_hnp_component.ignore_current_update = true;

    /*
     * If we are already in the shutdown stage of the recovery, then just skip it
     */
    if( autor_mask_faults ) {
        OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                             "%s errmgr:hnp(autor):process_fault() "
                             "Currently recovering the job. Failure masked!",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return;
    }

    /*
     * Append this process to the list to process
     */
    wp_item = OBJ_NEW(errmgr_autor_wp_item_t);
    wp_item->name.jobid = proc->jobid;
    wp_item->name.vpid = proc->vpid;
    ORTE_EPOCH_SET(wp_item->name.epoch,proc->epoch);
    wp_item->state = state;

    opal_list_append(procs_pending_recovery, &(wp_item->super));

    /*
     * Activate the timer, if it is not already setup
     */
    if( !autor_timer_active ) {
        autor_timer_active = true;

        opal_event_evtimer_set(opal_event_base, autor_timer_event, errmgr_autor_recover_processes, NULL);
        soon.tv_sec  = mca_errmgr_hnp_component.autor_recovery_delay;
        soon.tv_usec = 0;
        opal_event_evtimer_add(autor_timer_event, &soon);
    }

    return;
}
Ejemplo n.º 21
0
void orcm_sensor_base_start(orte_jobid_t job)
{
    orcm_sensor_active_module_t *i_module;
    int i;
    orcm_sensor_sampler_t *sampler;

    opal_output_verbose(5, orcm_sensor_base_framework.framework_output,
                        "%s sensor:base: sensor start called",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

    /* if no modules are active, then there is nothing to do */
    if (0 == orcm_sensor_base.modules.size) {
        return;
    }

    if (!mods_active) {
        opal_output_verbose(5, orcm_sensor_base_framework.framework_output,
                            "%s sensor:base: starting sensors",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

        if (!orcm_sensor_base.dbhandle_requested) {
            orcm_db.open("sensor", NULL, db_open_cb, NULL);
            orcm_sensor_base.dbhandle_requested = true;
        }

        /* call the start function of all modules in priority order */
        for (i=0; i < orcm_sensor_base.modules.size; i++) {
            if (NULL == (i_module = (orcm_sensor_active_module_t*)opal_pointer_array_get_item(&orcm_sensor_base.modules, i))) {
                continue;
            }
            mods_active = true;
            if (NULL != i_module->module->start) {
                i_module->module->start(job);
            }
        }

        /* create the event base and start the progress engine, if necessary */
        if (!orcm_sensor_base.ev_active) {
            orcm_sensor_base.ev_active = true;
            if (NULL == (orcm_sensor_base.ev_base = orcm_start_progress_thread("sensor", progress_thread_engine, NULL))) {
                orcm_sensor_base.ev_active = false;
                return;
            }
        }


        if (mods_active && 0 < orcm_sensor_base.sample_rate) {
            /* startup a timer to wake us up periodically
             * for a data sample, and pass in the sampler
             */
            opal_output_verbose(5, orcm_sensor_base_framework.framework_output,
                                "%s sensor:base: creating sampler with rate %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                orcm_sensor_base.sample_rate);
            sampler = OBJ_NEW(orcm_sensor_sampler_t);
            sampler->rate.tv_sec = orcm_sensor_base.sample_rate;
            sampler->log_data = orcm_sensor_base.log_samples;
            opal_event_evtimer_set(orcm_sensor_base.ev_base, &sampler->ev,
                                   take_sample, sampler);
            opal_event_evtimer_add(&sampler->ev, &sampler->rate);
        }
    } else if (!orcm_sensor_base.ev_active) {
        orcm_sensor_base.ev_active = true;
        orcm_restart_progress_thread("sensor");
    }

    return;    
}
Ejemplo n.º 22
0
/* failure notifications come here */
static void remote_update(int status,
                          orte_process_name_t *sender,
                          orcm_pnp_tag_t tag,
                          struct iovec *msg,
                          int count,
                          opal_buffer_t *buffer,
                          void *cbdata)
{
    int rc, n, k, cnt;
    orte_process_name_t name;
    uint8_t flag;
    orte_job_t *jdata;
    orte_proc_t *proc, *pptr;
    orte_node_t *node;
    orte_app_context_t *app;
    opal_buffer_t *bfr;
    orte_proc_state_t state;
    orte_exit_code_t exit_code;
    pid_t pid;
    bool restart_reqd, job_released, job_done;
    uint16_t jfam;
    struct timeval offset={0, 0};
    int32_t max_fails=0;
    orte_errmgr_caddy_t *cd;

    OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output,
                         "%s errmgr:sched:receive proc state notification from %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(sender)));

    /* get the node object for the sender */
    if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, sender->vpid))) {
        opal_output(0, "%s CANNOT FIND NODE FOR DAEMON %s",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender));
        return;
    }

    /* unpack the names of the procs */
    restart_reqd = false;
    n=1;
    while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &name, &n, ORTE_NAME))) {

        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s GOT UPDATE FOR %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&name)));

        /* unpack the pid of the proc */
        n=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &pid, &n, OPAL_PID))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* unpack the state of the proc */
        n=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &state, &n, ORTE_PROC_STATE))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* unpack the exit_code of the proc */
        n=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &exit_code, &n, ORTE_EXIT_CODE))) {
            ORTE_ERROR_LOG(rc);
            return;
        }

        /* get the job object for this proc */
        if (NULL == (jdata = orte_get_job_data_object(name.jobid))) {
            /* BIG problem*/
            opal_output(0, "%s errmgr:sched JOB %s NOT FOUND",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_JOBID_PRINT(name.jobid));
            return;
        }

        /* get the proc object */
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, name.vpid))) {
            /* unknown proc - race condition when killing a proc on cmd */
            OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                 "%s MISSING PROC %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&name)));
            continue;
        }
        /* update data */
        proc->pid = pid;
        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s CHANGING STATE OF PROC %s FROM %s TO %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&name),
                             orte_proc_state_to_str(proc->state),
                             orte_proc_state_to_str(state)));
        proc->state = state;
        proc->exit_code = exit_code;
        /* if the proc has failed, mark the job for restart unless
         * it was killed by our own cmd
         */
        if (ORTE_PROC_STATE_UNTERMINATED < state) {
            /* reset the stats */
            OBJ_DESTRUCT(&proc->stats);
            OBJ_CONSTRUCT(&proc->stats, opal_pstats_t);
            if (ORTE_PROC_STATE_KILLED_BY_CMD == state) {
                /* this is a response to our killing a proc - remove it
                 * from the system
                 */
                opal_pointer_array_set_item(jdata->procs, name.vpid, NULL);
                jdata->num_procs--;
                /* clean it off of the node */
                for (k=0; k < node->procs->size; k++) {
                    if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) {
                        continue;
                    }
                    if (pptr->name.jobid == proc->name.jobid &&
                        pptr->name.vpid == proc->name.vpid) {
                        /* found it */
                        OPAL_OUTPUT_VERBOSE((7, orte_errmgr_base.output,
                                             "%s REMOVING ENTRY %d FOR PROC %s FROM NODE %s",
                                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), k,
                                             ORTE_NAME_PRINT(&proc->name),
                                             ORTE_VPID_PRINT(sender->vpid)));
                        opal_pointer_array_set_item(node->procs, k, NULL);
                        node->num_procs--;
                        /* maintain acctg */
                        OBJ_RELEASE(proc);
                        break;
                    }
                }
                /* release the object */
                OBJ_RELEASE(proc);
                /* if the job is now empty, or if the only procs remaining are stopped
                 * due to exceeding restart (and thus cannot run), remove it too
                 */
                if (0 == jdata->num_procs) {
                    opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), NULL);
                    OBJ_RELEASE(jdata);
                } else {
                    job_done = true;
                    for (k=0; k < jdata->procs->size; k++) {
                        if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, k))) {
                            continue;
                        }
                        OPAL_OUTPUT_VERBOSE((3, orte_errmgr_base.output,
                                             "%s CHECKING PROC %s STATE %s",
                                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                             ORTE_NAME_PRINT(&pptr->name),
                                             orte_proc_state_to_str(pptr->state)));
                        if (pptr->state < ORTE_PROC_STATE_UNTERMINATED ||
                            ORTE_PROC_STATE_CANNOT_RESTART != pptr->state) {
                            job_done = false;
                            break;
                        }
                    }
                    if (job_done) {
                        opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), NULL);
                        OBJ_RELEASE(jdata);
                    }
                }
            } else {
                OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                     "%s FLAGGING JOB %s AS CANDIDATE FOR RESTART",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_JOBID_PRINT(jdata->jobid)));
                jdata->state = ORTE_JOB_STATE_RESTART;
                /* flag that at least one job requires restart */
                restart_reqd = true;
            }
        }
        /* prep for next round */
        n=1;
    }
    if (ORCM_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
        ORTE_ERROR_LOG(rc);
    }

    /* if restart not reqd, nothing more to do */
    if (!restart_reqd) {
        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s NO RESTARTS REQUIRED",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return;
    }

    /* cycle thru the array of jobs looking for those requiring restart */
    for (n=1; n < orte_job_data->size; n++) {
        if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, n))) {
            continue;
        }
        if (ORTE_JOB_STATE_RESTART != jdata->state) {
            continue;
        }
        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s JOB %s CANDIDATE FOR RESTART",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jdata->jobid)));
        /* find the proc that needs restarting */
        restart_reqd = false;
        job_released = false;
        max_fails = 0;
        offset.tv_sec = 0;
        for (cnt=0; cnt < jdata->procs->size; cnt++) {
            if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, cnt))) {
                continue;
            }
            if (ORTE_PROC_STATE_UNTERMINATED < proc->state &&
                ORTE_PROC_STATE_KILLED_BY_CMD != proc->state) {
                /* get the app for this proc */
                app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx);
                if (NULL == app) {
                    opal_output(0, "%s UNKNOWN APP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                    continue;
                }

                /* check the number of restarts to see if the limit has been reached */
                if (app->max_restarts < 0 ||
                    proc->restarts < app->max_restarts) {
                    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                         "%s FLAGGING PROC %s FOR RESTART",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         ORTE_NAME_PRINT(&proc->name)));
                    /* flag the proc for restart */
                    proc->state = ORTE_PROC_STATE_RESTART;
                    restart_reqd = true;
                    /* adjust accounting */
                    jdata->num_terminated++;
                    /* increment the restart counter since the proc will be restarted */
                    proc->restarts++;
                    /* track max failures */
                    if (max_fails < proc->restarts) {
                        max_fails = proc->restarts;
                    }
                } else {
                    /* limit reached - don't restart it */
                    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                         "%s PROC %s AT LIMIT - CANNOT RESTART",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         ORTE_NAME_PRINT(&proc->name)));
                    /* leave the proc in the system so users can see that it
                     * reached the restart limit
                     */
                    proc->state = ORTE_PROC_STATE_CANNOT_RESTART;
                    proc->pid = 0;
                    /* increment his restarts this once so it shows as too high */
                    proc->restarts++;
                    /* adjust accounting */
                    jdata->num_procs--;
                    jdata->num_terminated++;
                    /* clean it off of the node */
                    if (NULL == (node = proc->node)) {
                        continue;
                    }
                    for (k=0; k < node->procs->size; k++) {
                        if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) {
                            continue;
                        }
                        if (pptr == proc) {
                            /* found it */
                            opal_pointer_array_set_item(node->procs, k, NULL);
                            node->num_procs--;
                            /* maintain acctg */
                            OBJ_RELEASE(proc);
                            proc->node = NULL;
                            break;
                        }
                    }
                }
            }
        }
        /* if the job was released, then move on */
        if (job_released) {
            continue;
        }
        /* if no procs require restart, then move on to next job */
        if (!restart_reqd) {
            jdata->state = ORTE_JOB_STATE_RUNNING;  /* reset this */
            continue;
        }

        /* calculate a delay to avoid racy situation when a proc
         * is continuously failing due to, e.g., a bad command
         * syntax
         */
        if (1 < max_fails) {
            if (4 < max_fails) {
                /* cap the delay at 4 secs */
                offset.tv_sec = 4;
            } else {
                /* add a sec for each failure beyond the first */
                offset.tv_sec = max_fails - 1;
            }
        }
        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s DELAYING RESTART OF JOB %s FOR %d SECS",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jdata->jobid), (int)offset.tv_sec));
        cd = OBJ_NEW(orte_errmgr_caddy_t);
        cd->jdata = jdata;
        opal_event_evtimer_set(opal_event_base, &cd->ev, launch_restart, cd);
        opal_event_evtimer_add(&cd->ev, &offset);
    }
}
Ejemplo n.º 23
0
static int show_help(const char *filename, const char *topic,
                     const char *output, orte_process_name_t *sender)
{
    int rc;
    tuple_list_item_t *tli = NULL;
    orte_namelist_t *pnli;
    time_t now = time(NULL);

    /* If we're aggregating, check for duplicates.  Otherwise, don't
       track duplicates at all and always display the message. */
    if (orte_help_want_aggregate) {
        rc = get_tli(filename, topic, &tli);
    } else {
        rc = ORTE_ERR_NOT_FOUND;
    }

    /* If there's no output string (i.e., this is a control message
       asking us to suppress), then skip to the end. */
    if (NULL == output) {
        tli->tli_display = false;
        goto after_output;
    }

    /* Was it already displayed? */
    if (ORTE_SUCCESS == rc) {
        /* Yes.  But do we want to print anything?  That's complicated.

           We always show the first message of a given (filename,
           topic) tuple as soon as it arrives.  But we don't want to
           show duplicate notices often, because we could get overrun
           with them.  So we want to gather them up and say "We got N
           duplicates" every once in a while.

           And keep in mind that at termination, we'll unconditionally
           show all accumulated duplicate notices.

           A simple scheme is as follows:
           - when the first of a (filename, topic) tuple arrives
             - print the message
             - if a timer is not set, set T=now
           - when a duplicate (filename, topic) tuple arrives
             - if now>(T+5) and timer is not set (due to
               non-pre-emptiveness of our libevent, a timer *could* be
               set!)
               - print all accumulated duplicates
               - reset T=now
             - else if a timer was not set, set the timer for T+5
             - else if a timer was set, do nothing (just wait)
           - set T=now when the timer expires
        */           
        ++tli->tli_count_since_last_display;
        if (now > show_help_time_last_displayed + 5 && !show_help_timer_set) {
            show_accumulated_duplicates(0, 0, NULL);
        } else if (!show_help_timer_set) {
            opal_event_evtimer_set(orte_event_base, &show_help_timer_event,
                                   show_accumulated_duplicates, NULL);
            opal_event_evtimer_add(&show_help_timer_event, &show_help_interval);
            show_help_timer_set = true;
        }
    } 
    /* Not already displayed */
    else if (ORTE_ERR_NOT_FOUND == rc) {
        if (orte_xml_output) {
            char *tmp;
            tmp = xml_format((unsigned char*)output);
            fprintf(orte_xml_fp, "%s", tmp);
            fflush(orte_xml_fp);
            free(tmp);
        } else {
            opal_output(orte_clean_output, "%s", output);
        }
        if (!show_help_timer_set) {
            show_help_time_last_displayed = now;
        }
    }
    /* Some other error occurred */
    else {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

 after_output:
    /* If we're aggregating, add this process name to the list */
    if (orte_help_want_aggregate) {
        pnli = OBJ_NEW(orte_namelist_t);
        if (NULL == pnli) {
            rc = ORTE_ERR_OUT_OF_RESOURCE;
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        pnli->name = *sender;
        opal_list_append(&(tli->tli_processes), &(pnli->super));
    }
    return ORTE_SUCCESS;
}
Ejemplo n.º 24
0
static int native_abort(int flag, const char msg[])
{
    opal_buffer_t *bfr;
    pmix_cmd_t cmd = PMIX_ABORT_CMD;
    int rc;
    pmix_cb_t *cb;
    opal_event_t ev;
    struct timeval tv = {1, 0};

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native abort called",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    if (NULL == mca_pmix_native_component.uri) {
        /* no server available, so just return */
        return OPAL_SUCCESS;
    }

    if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) {
        /* create a buffer to hold the message */
        bfr = OBJ_NEW(opal_buffer_t);
        /* pack the cmd */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &cmd, 1, PMIX_CMD_T))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }
        /* pack the status flag */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &flag, 1, OPAL_INT))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }
        /* pack the string message - a NULL is okay */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &msg, 1, OPAL_STRING))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }

        /* create a callback object as we need to pass it to the
         * recv routine so we know which callback to use when
         * the return message is recvd */
        cb = OBJ_NEW(pmix_cb_t);
        cb->active = true;

        /* push a timeout event to wake us up just in case this
         * message cannot get thru - e.g., someone else may have
         * detected the failure of the server and ordered an abort */
        opal_event_evtimer_set(mca_pmix_native_component.evbase,
                               &ev, timeout, cb);
        opal_event_evtimer_add(&ev, &tv);

        /* push the message into our event base to send to the server */
        PMIX_ACTIVATE_SEND_RECV(bfr, wait_cbfunc, cb);

        /* wait for the release */
        PMIX_WAIT_FOR_COMPLETION(cb->active);
        OBJ_RELEASE(cb);
    }
    return OPAL_SUCCESS;
}