示例#1
0
static void time_cb(int fd, short event, void *arg)
{
    struct timeval tv;
    opal_event_t *tmp = (opal_event_t*)arg;

    called++;

    if (0 == (called % 1000)) {
        fprintf(stderr, "Fired event %d\n", called);
    }

    if (called < 10*NEVENT) {
        tv.tv_sec = 0;
        tv.tv_usec = rand_int(50000);
        opal_event_evtimer_add(tmp, &tv);
    }
}
示例#2
0
static int ack_recv (void *qos_channel, orte_rml_recv_t *msg) {
    orte_qos_ack_channel_t *ack_chan;
    ack_chan = (orte_qos_ack_channel_t*) (qos_channel);
    int32_t rc;
    struct timeval ack_timeout;
    OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
                         "%s ack_recv msg = %p seq_num = %d from peer = %s\n",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (void*)msg, msg->seq_num,
                         ORTE_NAME_PRINT(&msg->sender)));
    /** HACK - drop every third msg to stimulate lost msg */
 /*   if ((msg->seq_num == 3) && (hack == 0)) {
        OBJ_RELEASE(msg);
        hack = 1;
        return ORTE_ERROR;
    }*/
    /* check if this is the next expected msg*/
    if((ack_chan->in_msg_seq_num + 1 == msg->seq_num) && (ack_chan->ack_msg_seq_num < msg->seq_num))
    {
        /* check if we are at the end of the window */
        if(ack_chan->window == (msg->seq_num - ack_chan->ack_msg_seq_num)) {
            /* stop window ack timer */
            opal_event_evtimer_del (&ack_chan->msg_ack_timer_event);
            rc = send_ack (ack_chan, msg->channel_num, ACK_WINDOW_COMPLETE, msg->seq_num);
        } else {
            if(ack_chan->in_msg_seq_num ==  ack_chan->ack_msg_seq_num) {
                /* begining window -start window ack timer */
                ack_timeout.tv_sec = ack_chan->timeout_secs;
                ack_timeout.tv_usec = 0;
                opal_event_evtimer_add (&ack_chan->msg_ack_timer_event, &ack_timeout);
            }
            rc = ORTE_SUCCESS;
        }
        ack_chan->in_msg_seq_num = msg->seq_num;
    }
    else {
        rc = process_out_of_order_msg(ack_chan, msg);
    }
    return rc;
}
示例#3
0
void orte_sensor_base_sample(int fd, short args, void *cbdata)
{
    orte_sensor_active_module_t *i_module;
    int i;
    
    if (!mods_active) {
        return;
    }

    /* see if we were ordered to stop */
    if (!orte_sensor_base.active) {
        return;
    }

    opal_output_verbose(5, orte_sensor_base.output,
                        "%s sensor:base: sampling sensors",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

    /* call the sample function of all modules in priority order from
     * highest to lowest - the heartbeat should always be the lowest
     * priority, so it will send any collected data
     */
    for (i=0; i < orte_sensor_base.modules.size; i++) {
        if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) {
            continue;
        }
        if (NULL != i_module->module->sample) {
            opal_output_verbose(5, orte_sensor_base.output,
                                "%s sensor:base: sampling component %s",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                i_module->component->mca_component_name);
            i_module->module->sample();
        }
    }

    /* restart the timer */
    opal_event_evtimer_add(&orte_sensor_base.sample_ev, &orte_sensor_base.rate);

    return;
}
示例#4
0
static void perthread_file_sample(int fd, short args, void *cbdata)
{
    orcm_sensor_sampler_t *sampler = (orcm_sensor_sampler_t*)cbdata;

    opal_output_verbose(5, orcm_sensor_base_framework.framework_output,
                            "%s sensor file : perthread_file_sample: called",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

    /* this has fired in the sampler thread, so we are okay to
     * just go ahead and sample since we do NOT allow both the
     * base thread and the component thread to both be actively
     * calling this component */
    collect_sample(sampler);
    /* we now need to push the results into the base event thread
     * so it can add the data to the base bucket */
    ORCM_SENSOR_XFER(&sampler->bucket);
    /* clear the bucket */
    OBJ_DESTRUCT(&sampler->bucket);
    OBJ_CONSTRUCT(&sampler->bucket, opal_buffer_t);
    /* set ourselves to sample again */
    opal_event_evtimer_add(&sampler->ev, &sampler->rate);
}
示例#5
0
static void plm_yarn_launch_apps(int fd, short args, void *cbdata)
{
    int rc;
    orte_job_t *jdata;
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;

    /* convenience */
    jdata = caddy->jdata;

    if (ORTE_JOB_STATE_LAUNCH_APPS != caddy->job_state) {
        ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /* update job state */
    jdata->state = caddy->job_state;

    /* register recv callback for daemons sync request */
    if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                      ORTE_RML_TAG_YARN_SYNC_REQUEST,
                                                      ORTE_RML_PERSISTENT,
                                                      yarn_hnp_sync_recv, jdata))) {
        ORTE_ERROR_LOG(rc);
    }

    orte_plm_base_launch_apps(fd, args, cbdata);

	//============heartbeat with AM======
	opal_event_t *ev = NULL;
	ev = (opal_event_t*) malloc(sizeof(opal_event_t));

	struct timeval delay;
	delay.tv_sec = 1;
	delay.tv_usec = 0;

	opal_event_evtimer_set(orte_event_base, ev, heartbeat_with_AM_cb, jdata);
	opal_event_evtimer_add(ev, &delay);
	//===================================
}
示例#6
0
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
{
    int ret;
    char *error = NULL;

    if (0 < orte_initialized) {
        /* track number of times we have been called */
        orte_initialized++;
        return ORTE_SUCCESS;
    }
    orte_initialized++;

    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
        error = "opal_init";
        goto error;
    }
    
    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }
    
    /* Register all MCA Params */
    if (ORTE_SUCCESS != (ret = orte_register_params())) {
        error = "orte_register_params";
        goto error;
    }
    
    /* setup the orte_show_help system */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        error = "opal_output_init";
        goto error;
    }
    
    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* open the ESS and select the correct module for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess_base_open())) {
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        error = "orte_ess_base_select";
        goto error;
    }

    if (ORTE_PROC_IS_APP) {
#if !ORTE_DISABLE_FULL_SUPPORT && ORTE_ENABLE_PROGRESS_THREADS
#if OPAL_EVENT_HAVE_THREAD_SUPPORT
        /* get a separate orte event base */
        orte_event_base = opal_event_base_create();
        /* setup the finalize event - we'll need it
         * to break the thread out of the event lib
         * when we want to stop it
         */
        opal_event_set(orte_event_base, &orte_finalize_event, -1, OPAL_EV_WRITE, ignore_callback, NULL);
        opal_event_set_priority(&orte_finalize_event, ORTE_ERROR_PRI);
#if 0
        {
            /* seems strange, but wake us up once a second just so we can check for new events */
            opal_event_t *ev;
            struct timeval tv = {1,0};
            ev = opal_event_alloc();
            opal_event_evtimer_set(orte_event_base,
                               ev, ignore_callback, ev);
            opal_event_set_priority(ev, ORTE_INFO_PRI);
            opal_event_evtimer_add(ev, &tv);
        }
#endif
        /* construct the thread object */
        OBJ_CONSTRUCT(&orte_progress_thread, opal_thread_t);
        /* fork off a thread to progress it */
        orte_progress_thread.t_run = orte_progress_thread_engine;
        if (OPAL_SUCCESS != (ret = opal_thread_start(&orte_progress_thread))) {
            error = "orte progress thread start";
            goto error;
        }
#else
        error = "event thread support is not configured";
        ret = ORTE_ERROR;
        goto error;
#endif
#else
        /* set the event base to the opal one */
        orte_event_base = opal_event_base;
#endif
    } else {
        /* set the event base to the opal one */
        orte_event_base = opal_event_base;
    }

    /* initialize the RTE for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        error = "orte_ess_init";
        goto error;
    }
    
    /* All done */
    return ORTE_SUCCESS;
    
 error:
    if (ORTE_ERR_SILENT != ret) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
static void sample(int fd, short event, void *arg)
{
    float prob;
    orte_proc_t *child;
    int i;

    /* if we are not sampling any more, then just return */
    if (NULL == sample_ev) {
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s sample:ft_tester considering killing something",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* are we including ourselves? */
    if (ORTE_PROC_IS_DAEMON && 0 < mca_sensor_ft_tester_component.daemon_fail_prob) {
        OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                             "%s sample:ft_tester considering killing me!",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        /* roll the dice */
        prob = (double)random() / (double)INT32_MAX;
        if (prob < mca_sensor_ft_tester_component.daemon_fail_prob) {
            /* commit suicide */
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s sample:ft_tester committing suicide",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            orte_errmgr.abort(1, NULL);
            return;
        }
    }

    /* see if we should kill a child */
    for (i=0; i < orte_local_children->size; i++) {
        if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
            continue;
        }
        if (!child->alive || 0 == child->pid ||
            ORTE_PROC_STATE_UNTERMINATED < child->state) {
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s sample:ft_tester ignoring child: %s alive %s pid %lu state %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&child->name),
                                 child->alive ? "TRUE" : "FALSE",
                                 (unsigned long)child->pid, orte_proc_state_to_str(child->state)));
            continue;
        }
        /* roll the dice */
        prob = (double)random() / (double)INT32_MAX;
        OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                             "%s sample:ft_tester child: %s dice: %f prob %f",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&child->name),
                             prob, mca_sensor_ft_tester_component.fail_prob));
        if (prob < mca_sensor_ft_tester_component.fail_prob) {
            /* you shall die... */
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s sample:ft_tester killing %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&child->name)));
            kill(child->pid, SIGTERM);
            /* are we allowing multiple deaths */
            if (!mca_sensor_ft_tester_component.multi_fail) {
                break;
            }
        }
    }

    /* restart the timer */
    if (NULL != sample_ev) {
        opal_event_evtimer_add(sample_ev, &sample_time);
    }
}
示例#8
0
/*****************
 * Local Functions
 *****************/
static void errmgr_autor_process_fault_app(orte_job_t *jdata,
                                           orte_process_name_t *proc,
                                           orte_proc_state_t state)
{
    errmgr_autor_wp_item_t *wp_item = NULL;
    struct timeval soon;

    OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                         "%s errmgr:hnp(autor): process_fault() "
                         "Process fault! proc %s (0x%x)",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(proc),
                         state));

    if( !orte_sstore_base_is_checkpoint_available ) {
        OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                             "%s errmgr:hnp(autor): process_fault() "
                             "No checkpoints are available for this job! Cannot Automaticly Recover!",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ));
        opal_show_help("help-orte-errmgr-hnp.txt", "autor_failed_to_recover_proc", true,
                       ORTE_NAME_PRINT(proc), proc->vpid);
        return;
    }

    mca_errmgr_hnp_component.ignore_current_update = true;

    /*
     * If we are already in the shutdown stage of the recovery, then just skip it
     */
    if( autor_mask_faults ) {
        OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                             "%s errmgr:hnp(autor):process_fault() "
                             "Currently recovering the job. Failure masked!",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return;
    }

    /*
     * Append this process to the list to process
     */
    wp_item = OBJ_NEW(errmgr_autor_wp_item_t);
    wp_item->name.jobid = proc->jobid;
    wp_item->name.vpid = proc->vpid;
    ORTE_EPOCH_SET(wp_item->name.epoch,proc->epoch);
    wp_item->state = state;

    opal_list_append(procs_pending_recovery, &(wp_item->super));

    /*
     * Activate the timer, if it is not already setup
     */
    if( !autor_timer_active ) {
        autor_timer_active = true;

        opal_event_evtimer_set(opal_event_base, autor_timer_event, errmgr_autor_recover_processes, NULL);
        soon.tv_sec  = mca_errmgr_hnp_component.autor_recovery_delay;
        soon.tv_usec = 0;
        opal_event_evtimer_add(autor_timer_event, &soon);
    }

    return;
}
示例#9
0
static void sample(int fd, short event, void *arg)
{
    struct stat buf;
    opal_list_item_t *item;
    file_tracker_t *ft;

    /* if we are not sampling any more, then just return */
    if (NULL == sample_ev) {
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s sampling files",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    for (item = opal_list_get_first(&jobs);
         item != opal_list_get_end(&jobs);
         item = opal_list_get_next(item)) {
        ft = (file_tracker_t*)item;
        
        /* stat the file and get its size */
        if (0 > stat(ft->file, &buf)) {
            /* cannot stat file */
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s could not stat %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ft->file));
            continue;
        }
        
        OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                             "%s size %lu access %s\tmod %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             (unsigned long)buf.st_size, ctime(&buf.st_atime), ctime(&buf.st_mtime)));

        if (ft->check_size) {
            if (buf.st_size == ft->file_size) {
                ft->tick++;
                goto CHECK;
            } else {
                ft->tick = 0;
                ft->file_size = buf.st_size;
            }
        }
        if (ft->check_access) {
            if (buf.st_atime == ft->last_access) {
                ft->tick++;
                goto CHECK;
            } else {
                ft->tick = 0;
                ft->last_access = buf.st_atime;
            }
        }
        if (ft->check_mod) {
            if (buf.st_mtime == ft->last_mod) {
                ft->tick++;
                goto CHECK;
            } else {
                ft->tick = 0;
                ft->last_mod = buf.st_mtime;
            }
        }
        
    CHECK:
        OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                             "%s sampled file %s tick %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ft->file, ft->tick));

        if (ft->tick == ft->limit) {
            orte_show_help("help-orte-sensor-file.txt", "file-stalled", true,
                           ft->file, ft->file_size, ctime(&ft->last_access), ctime(&ft->last_mod));
            orte_errmgr.update_state(ft->jobid, ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED,
                                     NULL, ORTE_PROC_STATE_UNDEF,
                                     0, ORTE_ERR_PROC_STALLED);
        }
    }
        
    /* restart the timer */
        opal_event_evtimer_add(sample_ev, &sample_time);
}
示例#10
0
static void heartbeat_with_AM_cb(int fd, short event, void *data)
{
    int i, rc;
    orte_job_t *jdata = (orte_job_t*)data;
    orte_job_t* daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);

    /* 1. create heartbeat request msg */
    /*
    message HeartbeatRequestProto {
    }
    */
    struct pbc_wmessage* request_msg = pbc_wmessage_new(orte_hdclient_pb_env, "HeartbeatRequestProto");
    if (!request_msg) {
        opal_output(0, "%s plm:yarn:heartbeat_with_AM_cb: failed to create request_msg",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /* 2. send heartbeat request msg */
    rc = orte_hdclient_send_message_and_delete(request_msg, HAMSTER_MSG_HEARTBEAT);
    if (rc != 0) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: error happened when send request_msg to AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /* 3. recv response and parse the msg*/
    /*
     message HeartbeatResponseProto {
         repeated ProcessStatusProto completed_processes = 1;
     }

     message ProcessStatusProto {
         optional ProcessNameProto name = 1;
         optional ProcessStateProto state = 2;
         optional int32 exit_value = 3;
     }

     enum ProcessStateProto {
         RUNNING = 1;
         COMPLETED = 2;
     }

     message ProcessNameProto {
         optional int32 jobid = 1;
         optional int32 vpid = 2;
     }
     */

    struct pbc_rmessage* response_msg = orte_hdclient_recv_message("HeartbeatResponseProto");
    if (!response_msg) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: error happened when recv HeartbeatResponseProto msg from AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        goto cleanup;
    }

    int n = pbc_rmessage_size(response_msg, "completed_processes");
    if (n < 0) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: got n(=%d) < 0, please check",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n);
        goto cleanup;
    }

    for (i = 0; i < n; i++) {
        struct pbc_rmessage* completed_procs_msg = pbc_rmessage_message(response_msg, "completed_processes", i);
        if (!completed_procs_msg) {
            opal_output(0,
                    "%s plm:yarn:heartbeat_with_AM_cb: error when parse returned completed_procs_msg from AM",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            goto cleanup;
        }

        struct pbc_rmessage* proc_name_msg = pbc_rmessage_message(completed_procs_msg, "name", 0);
        if (!proc_name_msg) {
            opal_output(0,
                    "%s plm:yarn:heartbeat_with_AM_cb: error when parse proc_name_msg",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            goto cleanup;
        }

        uint32_t local_jobid = pbc_rmessage_integer(proc_name_msg, "jobid", 0, NULL);
        uint32_t vpid = pbc_rmessage_integer(proc_name_msg, "vpid", 0, NULL);

        uint32_t exit_value = pbc_rmessage_integer(completed_procs_msg, "exit_value", 0, NULL);

        /* next, we will modify proc's state */
        orte_job_t* tmp_jdata = (orte_job_t*) opal_pointer_array_get_item(orte_job_data, local_jobid);
        orte_proc_t* proc = (orte_proc_t*) opal_pointer_array_get_item(tmp_jdata->procs, vpid);


        if (tmp_jdata->jobid == jdata->jobid) {
			num_completed_jdata_procs++;
		}

        if (exit_value == 0) {
        	proc->state = ORTE_PROC_STATE_TERMINATED;
        }

        /* if this process is already terminated, just skip over */
        if (proc->state >= ORTE_PROC_STATE_TERMINATED) {
            continue;
        }

        if (exit_value == -1000 || exit_value == -100 || exit_value == -101) {
            opal_output(0, "%s plm:yarn:heartbeat_with_AM_cb proc failed to start", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            ORTE_ERROR_LOG(ORTE_ERROR);
            proc->state = ORTE_PROC_STATE_FAILED_TO_START;
            ORTE_ACTIVATE_PROC_STATE(&proc->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
        } else {
            /* here, means currently the proc's state < ORTE_PROC_STATE_TERMINATED,
             * however, from AM's heartbeat response, we got the proc's container is terminated,
             * to solve this dilemma , we set a timer event to reconfirm this proc's state,
             */
            opal_event_t *ev = NULL;
            ev = (opal_event_t*) malloc(sizeof(opal_event_t));

            struct timeval delay;
            delay.tv_sec = 15;
            delay.tv_usec = 0;

            opal_event_evtimer_set(orte_event_base, ev, process_state_monitor_cb, proc);
            opal_event_evtimer_add(ev, &delay);
        }
    }

cleanup:
    if (response_msg) {
        pbc_rmessage_delete(response_msg);
    }

    if (num_completed_jdata_procs == jdata->num_procs) {
        /*
         * all procs are completed, send finish request to AM,
         * modify job state to ORTE_JOB_STATE_TERMINATED
         */
        jdata->state = ORTE_JOB_STATE_TERMINATED;
        finish_app_master(0 == orte_exit_status);
        return;
    } else {
        /* next heartbeat */
        opal_event_t *ev = NULL;
        ev = (opal_event_t*) malloc(sizeof(opal_event_t));

        struct timeval delay;
        delay.tv_sec = 1;
        delay.tv_usec = 0;

        opal_event_evtimer_set(orte_event_base, ev, heartbeat_with_AM_cb, jdata);
		opal_event_evtimer_add(ev, &delay);
    }
}
示例#11
0
static void recover_procs(orte_process_name_t *daemon)
{
    orte_job_t *jdt;
    orte_proc_t *proc;
    orte_node_t *node=NULL;
    int i, rc;
    opal_buffer_t *bfr;
    uint16_t jfam;
    struct timeval offset={0, 0};
    int32_t max_fails=0;
    orte_errmgr_caddy_t *cd;

    /* the thread is locked by the caller, so don't do anything here */

    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                         "%s ATTEMPTING TO RECOVER PROCS FROM DAEMON %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(daemon)));

    /* if not already done, mark this daemon as down */
    if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(daemon_job->procs, daemon->vpid))) {
        /* correctly track number of alive daemons */
        daemon_job->num_terminated++;
        orte_process_info.num_procs--;
        /* get the corresponding node */
        node = proc->node;
        /* maintain accounting */
        OBJ_RELEASE(proc);
        proc->node = NULL;
    } else {
        /* if it has already been removed, then we need to find the node it was on.
         * this doesn't necessarily correspond to the daemon's vpid, so we have
         * to search the array
         */
        opal_output(0, "RECOVER PROCS - MISSING NODE");
        return;
    }
    /* mark the node as down so it won't be used in mapping
     * procs to be relaunched
     */
    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                         "%s MARKING NODE %s DOWN",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         node->name));

    node->state = ORTE_NODE_STATE_DOWN;
    node->daemon = NULL;
    max_fails = 0;
    /* mark all procs on this node as having terminated */
    for (i=0; i < node->procs->size; i++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
            continue;
        }
        /* get the job data object for this process */
        if (NULL == (jdt = orte_get_job_data_object(proc->name.jobid))) {
            /* major problem */
            opal_output(0, "%s COULD NOT GET JOB OBJECT FOR PROC %s(%d): state %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&proc->name), i,
                        orte_proc_state_to_str(proc->state));
            continue;
        }
        /* since the proc failed for reasons other than its own, this restart
         * does not count against its total - so mark it for restart
         */
        proc->state = ORTE_PROC_STATE_RESTART;
        proc->pid = 0;
        jdt->state = ORTE_JOB_STATE_RESTART;
        if (max_fails < proc->restarts) {
            max_fails = proc->restarts;
        }
        /* adjust the num terminated so that acctg works right */
        jdt->num_terminated++;
    }

    /* calculate a delay to avoid racy situation when a proc
     * is continuously failing due to, e.g., a bad command
     * syntax
     */
    if (1 < max_fails) {
        if (4 < max_fails) {
            /* cap the delay at 4 secs */
            offset.tv_sec = 4;
        } else {
            /* add a sec for each failure beyond the first */
            offset.tv_sec = max_fails - 1;
        }
    }

    /* now cycle thru the jobs and restart all those that were flagged */
    for (i=0; i < orte_job_data->size; i++) {
        if (NULL == (jdt = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, i))) {
            continue;
        }
        if (ORTE_JOB_STATE_RESTART == jdt->state) {
            OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                 "%s DELAYING RESTART OF JOB %s FOR %d SECS",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(jdt->jobid), (int)offset.tv_sec));
            cd = OBJ_NEW(orte_errmgr_caddy_t);
            cd->jdata = jdt;
            opal_event_evtimer_set(opal_event_base, &cd->ev, launch_restart, cd);
            opal_event_evtimer_add(&cd->ev, &offset);
        }
    }
}
示例#12
0
/* failure notifications come here */
static void remote_update(int status,
                          orte_process_name_t *sender,
                          orcm_pnp_tag_t tag,
                          struct iovec *msg,
                          int count,
                          opal_buffer_t *buffer,
                          void *cbdata)
{
    int rc, n, k, cnt;
    orte_process_name_t name;
    uint8_t flag;
    orte_job_t *jdata;
    orte_proc_t *proc, *pptr;
    orte_node_t *node;
    orte_app_context_t *app;
    opal_buffer_t *bfr;
    orte_proc_state_t state;
    orte_exit_code_t exit_code;
    pid_t pid;
    bool restart_reqd, job_released, job_done;
    uint16_t jfam;
    struct timeval offset={0, 0};
    int32_t max_fails=0;
    orte_errmgr_caddy_t *cd;

    OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output,
                         "%s errmgr:sched:receive proc state notification from %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(sender)));

    /* get the node object for the sender */
    if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, sender->vpid))) {
        opal_output(0, "%s CANNOT FIND NODE FOR DAEMON %s",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender));
        return;
    }

    /* unpack the names of the procs */
    restart_reqd = false;
    n=1;
    while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &name, &n, ORTE_NAME))) {

        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s GOT UPDATE FOR %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&name)));

        /* unpack the pid of the proc */
        n=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &pid, &n, OPAL_PID))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* unpack the state of the proc */
        n=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &state, &n, ORTE_PROC_STATE))) {
            ORTE_ERROR_LOG(rc);
            return;
        }
        /* unpack the exit_code of the proc */
        n=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &exit_code, &n, ORTE_EXIT_CODE))) {
            ORTE_ERROR_LOG(rc);
            return;
        }

        /* get the job object for this proc */
        if (NULL == (jdata = orte_get_job_data_object(name.jobid))) {
            /* BIG problem*/
            opal_output(0, "%s errmgr:sched JOB %s NOT FOUND",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_JOBID_PRINT(name.jobid));
            return;
        }

        /* get the proc object */
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, name.vpid))) {
            /* unknown proc - race condition when killing a proc on cmd */
            OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                 "%s MISSING PROC %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&name)));
            continue;
        }
        /* update data */
        proc->pid = pid;
        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s CHANGING STATE OF PROC %s FROM %s TO %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&name),
                             orte_proc_state_to_str(proc->state),
                             orte_proc_state_to_str(state)));
        proc->state = state;
        proc->exit_code = exit_code;
        /* if the proc has failed, mark the job for restart unless
         * it was killed by our own cmd
         */
        if (ORTE_PROC_STATE_UNTERMINATED < state) {
            /* reset the stats */
            OBJ_DESTRUCT(&proc->stats);
            OBJ_CONSTRUCT(&proc->stats, opal_pstats_t);
            if (ORTE_PROC_STATE_KILLED_BY_CMD == state) {
                /* this is a response to our killing a proc - remove it
                 * from the system
                 */
                opal_pointer_array_set_item(jdata->procs, name.vpid, NULL);
                jdata->num_procs--;
                /* clean it off of the node */
                for (k=0; k < node->procs->size; k++) {
                    if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) {
                        continue;
                    }
                    if (pptr->name.jobid == proc->name.jobid &&
                        pptr->name.vpid == proc->name.vpid) {
                        /* found it */
                        OPAL_OUTPUT_VERBOSE((7, orte_errmgr_base.output,
                                             "%s REMOVING ENTRY %d FOR PROC %s FROM NODE %s",
                                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), k,
                                             ORTE_NAME_PRINT(&proc->name),
                                             ORTE_VPID_PRINT(sender->vpid)));
                        opal_pointer_array_set_item(node->procs, k, NULL);
                        node->num_procs--;
                        /* maintain acctg */
                        OBJ_RELEASE(proc);
                        break;
                    }
                }
                /* release the object */
                OBJ_RELEASE(proc);
                /* if the job is now empty, or if the only procs remaining are stopped
                 * due to exceeding restart (and thus cannot run), remove it too
                 */
                if (0 == jdata->num_procs) {
                    opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), NULL);
                    OBJ_RELEASE(jdata);
                } else {
                    job_done = true;
                    for (k=0; k < jdata->procs->size; k++) {
                        if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, k))) {
                            continue;
                        }
                        OPAL_OUTPUT_VERBOSE((3, orte_errmgr_base.output,
                                             "%s CHECKING PROC %s STATE %s",
                                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                             ORTE_NAME_PRINT(&pptr->name),
                                             orte_proc_state_to_str(pptr->state)));
                        if (pptr->state < ORTE_PROC_STATE_UNTERMINATED ||
                            ORTE_PROC_STATE_CANNOT_RESTART != pptr->state) {
                            job_done = false;
                            break;
                        }
                    }
                    if (job_done) {
                        opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), NULL);
                        OBJ_RELEASE(jdata);
                    }
                }
            } else {
                OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                     "%s FLAGGING JOB %s AS CANDIDATE FOR RESTART",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_JOBID_PRINT(jdata->jobid)));
                jdata->state = ORTE_JOB_STATE_RESTART;
                /* flag that at least one job requires restart */
                restart_reqd = true;
            }
        }
        /* prep for next round */
        n=1;
    }
    if (ORCM_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
        ORTE_ERROR_LOG(rc);
    }

    /* if restart not reqd, nothing more to do */
    if (!restart_reqd) {
        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s NO RESTARTS REQUIRED",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return;
    }

    /* cycle thru the array of jobs looking for those requiring restart */
    for (n=1; n < orte_job_data->size; n++) {
        if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, n))) {
            continue;
        }
        if (ORTE_JOB_STATE_RESTART != jdata->state) {
            continue;
        }
        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s JOB %s CANDIDATE FOR RESTART",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jdata->jobid)));
        /* find the proc that needs restarting */
        restart_reqd = false;
        job_released = false;
        max_fails = 0;
        offset.tv_sec = 0;
        for (cnt=0; cnt < jdata->procs->size; cnt++) {
            if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, cnt))) {
                continue;
            }
            if (ORTE_PROC_STATE_UNTERMINATED < proc->state &&
                ORTE_PROC_STATE_KILLED_BY_CMD != proc->state) {
                /* get the app for this proc */
                app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx);
                if (NULL == app) {
                    opal_output(0, "%s UNKNOWN APP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                    continue;
                }

                /* check the number of restarts to see if the limit has been reached */
                if (app->max_restarts < 0 ||
                    proc->restarts < app->max_restarts) {
                    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                         "%s FLAGGING PROC %s FOR RESTART",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         ORTE_NAME_PRINT(&proc->name)));
                    /* flag the proc for restart */
                    proc->state = ORTE_PROC_STATE_RESTART;
                    restart_reqd = true;
                    /* adjust accounting */
                    jdata->num_terminated++;
                    /* increment the restart counter since the proc will be restarted */
                    proc->restarts++;
                    /* track max failures */
                    if (max_fails < proc->restarts) {
                        max_fails = proc->restarts;
                    }
                } else {
                    /* limit reached - don't restart it */
                    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                                         "%s PROC %s AT LIMIT - CANNOT RESTART",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         ORTE_NAME_PRINT(&proc->name)));
                    /* leave the proc in the system so users can see that it
                     * reached the restart limit
                     */
                    proc->state = ORTE_PROC_STATE_CANNOT_RESTART;
                    proc->pid = 0;
                    /* increment his restarts this once so it shows as too high */
                    proc->restarts++;
                    /* adjust accounting */
                    jdata->num_procs--;
                    jdata->num_terminated++;
                    /* clean it off of the node */
                    if (NULL == (node = proc->node)) {
                        continue;
                    }
                    for (k=0; k < node->procs->size; k++) {
                        if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) {
                            continue;
                        }
                        if (pptr == proc) {
                            /* found it */
                            opal_pointer_array_set_item(node->procs, k, NULL);
                            node->num_procs--;
                            /* maintain acctg */
                            OBJ_RELEASE(proc);
                            proc->node = NULL;
                            break;
                        }
                    }
                }
            }
        }
        /* if the job was released, then move on */
        if (job_released) {
            continue;
        }
        /* if no procs require restart, then move on to next job */
        if (!restart_reqd) {
            jdata->state = ORTE_JOB_STATE_RUNNING;  /* reset this */
            continue;
        }

        /* calculate a delay to avoid racy situation when a proc
         * is continuously failing due to, e.g., a bad command
         * syntax
         */
        if (1 < max_fails) {
            if (4 < max_fails) {
                /* cap the delay at 4 secs */
                offset.tv_sec = 4;
            } else {
                /* add a sec for each failure beyond the first */
                offset.tv_sec = max_fails - 1;
            }
        }
        OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                             "%s DELAYING RESTART OF JOB %s FOR %d SECS",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jdata->jobid), (int)offset.tv_sec));
        cd = OBJ_NEW(orte_errmgr_caddy_t);
        cd->jdata = jdata;
        opal_event_evtimer_set(opal_event_base, &cd->ev, launch_restart, cd);
        opal_event_evtimer_add(&cd->ev, &offset);
    }
}
示例#13
0
static void check_config(int fd, short args, void *cbdata)
{
    DIR *dirp = NULL;
    struct dirent * dir_entry;
    struct stat buf;
    int i, rc, n, j, k, m;
    char *fullpath;
    orcm_cfgi_app_t *app, *app2, *aptr;
    orcm_cfgi_run_t *run;
    orcm_cfgi_exec_t *exec, *exec2, *eptr;
    orcm_cfgi_version_t *vers, *vers2, *vptr;
    orcm_cfgi_bin_t *bin;
    orte_job_t *jdat, *jptr;
    orte_app_context_t *ax;
    opal_pointer_array_t found_apps;
    bool found, dir_found;
    orcm_cfgi_caddy_t *caddy;

    /* take control */
    ORTE_ACQUIRE_THREAD(&orcm_cfgi_base.ctl);

    OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                         "%s CHECKING CONFIG DIRECTORY %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         mca_orcm_cfgi_file_component.dir));

    /* Open the directory so we can get a listing */
    if (NULL == (dirp = opendir(mca_orcm_cfgi_file_component.dir))) {
        if (0 < opal_output_get_verbosity(orcm_cfgi_base.output)) {
            orte_show_help("help-cfgi-file.txt", "no-dir",
                           true, mca_orcm_cfgi_file_component.dir);
        }
        dir_found = false;
        goto restart;
    }
    dir_found = true;

    /* setup the array of apps */
    OBJ_CONSTRUCT(&found_apps, opal_pointer_array_t);
    opal_pointer_array_init(&found_apps, 16, INT_MAX, 16);

    /* cycle thru the directory */
    while (NULL != (dir_entry = readdir(dirp))) {
        /* Skip the obvious */
        if (0 == strncmp(dir_entry->d_name, ".", strlen(".")) ||
            0 == strncmp(dir_entry->d_name, "..", strlen(".."))) {
            continue;
        }
        /* Skip editor-related files */
        if (NULL != strstr(dir_entry->d_name, ".swp") ||
            NULL != strstr(dir_entry->d_name, ".swx") ||
            NULL != strchr(dir_entry->d_name, '~')) {
            continue;
        }
        if ('#' == dir_entry->d_name[0]) {
            continue;
        }

        /* parse the file, adding all found apps to the array */
        fullpath = opal_os_path(false, mca_orcm_cfgi_file_component.dir, dir_entry->d_name, NULL);
        if (ORCM_SUCCESS != (rc = parse_file(fullpath, &found_apps))) {
            OPAL_OUTPUT_VERBOSE((1, orcm_cfgi_base.output,
                                 "%s CANNOT PARSE FILE %s: %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 dir_entry->d_name, ORTE_ERROR_NAME(rc)));
        }
        free(fullpath);
    }
    closedir(dirp);

    /* cycle thru the installed apps */
    for (i=0; i < orcm_cfgi_base.installed_apps.size; i++) {
        if (NULL == (app = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&orcm_cfgi_base.installed_apps, i))) {
            continue;
        }
        app->modified = false;
        /* is this app present in the found apps? */
        app2 = NULL;
        for (j=0; j < found_apps.size; j++) {
            if (NULL == (aptr = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&found_apps, j))) {
                continue;
            }
            if (0 == strcmp(app->application, aptr->application)) {
                app2 = aptr;
                /* remove it from the found_apps array as we will now process it */
                opal_pointer_array_set_item(&found_apps, j, NULL);
                break;
            }
        }
        if (NULL == app2) {
            OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                 "%s APP %s IS NO LONGER INSTALLED",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 app->application));
            /* no longer present - remove this object from the installed array */
            opal_pointer_array_set_item(&orcm_cfgi_base.installed_apps, app->idx, NULL);
            /* find all instances */
            for (j=0; j < app->instances.size; j++) {
                if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, j))) {
                    continue;
                }
                OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                     "%s APP %s IS NO LONGER INSTALLED - KILLING INSTANCE %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     app->application, run->instance));
                /* remove it from the array */
                opal_pointer_array_set_item(&app->instances, j, NULL);
                run->app = NULL;
                run->app_idx = -1;
                /* delink all the binaries */
                for (k=0; k < run->binaries.size; k++) {
                    if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, k))) {
                        continue;
                    }
                    bin->vers = NULL;
                    bin->exec = NULL;
                }
                /* kill the associated executing job, if any */
                caddy = OBJ_NEW(orcm_cfgi_caddy_t);
                caddy->cmd = ORCM_CFGI_KILL_JOB;
                /* retain the run object as it has -not- been removed from
                 * the running config
                 */
                OBJ_RETAIN(run);
                caddy->run = run;
                /* send it off to be processed */
                opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy);
            }
            /* release it */
            OBJ_RELEASE(app);
            continue;
        }
        /* app was present - did we modify it */
        if (app->max_instances != app2->max_instances) {
            app->max_instances = app2->max_instances;
            app->modified = true;
        }
        /* did we remove any executables? */
        for (j=0; j < app->executables.size; j++) {
            if (NULL == (exec = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app->executables, j))) {
                continue;
            }
            /* is it present in the found apps */
            exec2 = NULL;
            for (k=0; k < app2->executables.size; k++) {
                if (NULL == (eptr = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app2->executables, k))) {
                    continue;
                }
                if (0 == strcmp(exec->appname, eptr->appname)) {
                    exec2 = eptr;
                    break;
                }
            }
            if (NULL == exec2) {
                OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                     "%s APP %s EXECUTABLE %s IS NO LONGER INSTALLED",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     app->application, exec->appname));
                /* this executable has been removed */
                opal_pointer_array_set_item(&app->executables, j, NULL);
                /* find all instances
                 * that use this executable and kill associated binaries
                 */
                for (k=0; k < app->instances.size; k++) {
                    if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, k))) {
                        continue;
                    }
                    /* search the binaries to see if they include this executable */
                    for (n=0; n < run->binaries.size; n++) {
                        if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, n))) {
                            continue;
                        }
                        if (0 == strcmp(bin->appname, exec->appname)) {
                            OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                                 "%s APP %s EXECUTABLE %s IS NO LONGER INSTALLED - KILLING BINARY %s",
                                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                                 app->application, exec->appname, bin->binary));
                            exec->total_procs -= bin->num_procs;
                            /* ensure we know it is no longer pointing to an installed exec/version */
                            bin->vers = NULL;
                            bin->exec = NULL;
                            /* kill the associated executing exec, if any */
                            caddy = OBJ_NEW(orcm_cfgi_caddy_t);
                            caddy->cmd = ORCM_CFGI_KILL_EXE;
                            /* retain the run object as it has -not- been removed from
                             * the running config
                             */
                            OBJ_RETAIN(run);
                            caddy->run = run;
                            /* send it off to be processed */
                            opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy);
                            break;
                        }
                    }
                }
                OBJ_RELEASE(exec);
                continue;
            }
            /* kept the exec, but was it modified */
            if (exec->process_limit != exec2->process_limit) {
                exec->process_limit = exec2->process_limit;
                app->modified = true;
            }
            /* did we remove any versions */
            for (k=0; k < exec->versions.size; k++) {
                if (NULL == (vers = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec->versions, k))) {
                    continue;
                }
                /* is it present in the found app/exec */
                vers2 = NULL;
                for (n=0; n < exec2->versions.size; n++) {
                    if (NULL == (vptr = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec2->versions, n))) {
                        continue;
                    }
                    if (0 == strcmp(vptr->version, vers->version)) {
                        vers2 = vptr;
                        /* since we have this version, we can remove it from
                         * the found app
                         */
                        opal_pointer_array_set_item(&exec2->versions, n, NULL);
                        break;
                    }
                }
                if (NULL != vers2) {
                    OBJ_RELEASE(vers2);
                    continue;
                }
                OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                     "%s APP %s EXEC %s VERSION %s IS NO LONGER INSTALLED",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     app->application, exec->appname, vers->version));
                /* nope - been removed, so take it out of the array */
                opal_pointer_array_set_item(&exec->versions, k, NULL);
                /* find all instances and kill this version */
                for (m=0; m < app->instances.size; m++) {
                    if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, m))) {
                        continue;
                    }
                    /* search the binaries to see if they include this version */
                    for (n=0; n < run->binaries.size; n++) {
                        if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, n))) {
                            continue;
                        }
                        if (0 == strcmp(bin->appname, exec->appname) &&
                            0 == strcmp(bin->version, vers->version)) {
                            OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                                 "%s APP %s EXECUTABLE %s VERSION %s IS NO LONGER INSTALLED - KILLING BINARY %s",
                                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                                 app->application, exec->appname, vers->version, bin->binary));
                            exec->total_procs -= bin->num_procs;
                            /* ensure we know it is no longer pointing to an installed version */
                            bin->vers = NULL;
                            /* kill the associated executing exec, if any */
                            caddy = OBJ_NEW(orcm_cfgi_caddy_t);
                            caddy->cmd = ORCM_CFGI_KILL_EXE;
                            /* retain the run object as it has -not- been removed from
                             * the running config
                             */
                            OBJ_RETAIN(run);
                            caddy->run = run;
                            /* send it off to be processed */
                            opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy);
                            break;
                        }
                    }
                }
                /* cleanup */
                OBJ_RELEASE(vers);
            }
        }
        /* did we add any executables or versions */
        for (k=0; k < app2->executables.size; k++) {
            if (NULL == (exec2 = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app2->executables, k))) {
                continue;
            }
            exec = NULL;
            for (j=0; j < app->executables.size; j++) {
                if (NULL == (eptr = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app->executables, j))) {
                    continue;
                }
                if (0 == strcmp(eptr->appname, exec2->appname)) {
                    exec = eptr;
                    break;
                }
            }
            if (NULL == exec) {
                OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                     "%s APP %s ADDING EXECUTABLE %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     app->application, exec2->appname));
                /* added this exec - just move it across */
                opal_pointer_array_set_item(&app2->executables, k, NULL);
                exec2->idx = opal_pointer_array_add(&app->executables, exec2);
                app->modified = true;
                continue;
            }
            /* exec already present, and we dealt with mods above - so
             * see if any versions were added.
             */
            for (j=0; j < exec2->versions.size; j++) {
                if (NULL == (vers = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec2->versions, j))) {
                    continue;
                }
                /* if already present, ignore */
                vers2 = NULL;
                for (n=0; n < exec->versions.size; n++) {
                    if (NULL == (vptr = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec->versions, n))) {
                        continue;
                    }
                    if (0 == strcmp(vptr->version, vers->version)) {
                        vers2 = vptr;
                        break;
                    }
                }
                if (NULL == vers2) {
                    OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                         "%s APP %s ADDING EXECUTABLE %s VERSION %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         app->application, exec2->appname, vers->version));
                    opal_pointer_array_set_item(&exec2->versions, j, NULL);
                    vers->exec = exec;
                    vers->idx = opal_pointer_array_add(&exec->versions, vers);
                    app->modified = true;
                } else {
                    OBJ_RELEASE(vers2);
                }
            }
        }
        /* done with this entry */
        OBJ_RELEASE(app2);
    }

    /* any added applications get handled now - anything still in found_apps
     * would have been added
     */
    for (j=0; j < found_apps.size; j++) {
        if (NULL == (aptr = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&found_apps, j))) {
            continue;
        }
        OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                             "%s ADDING APP %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             aptr->application));
        /* just shift the entry to the installed_apps array */
        aptr->idx = opal_pointer_array_add(&orcm_cfgi_base.installed_apps, aptr);
        /* mark it as modified so it will be handled below */
        aptr->modified = true;
    }
    OBJ_DESTRUCT(&found_apps);

    /* check installed vs configd for anything needing starting,
     * but only check modified apps
     */
    check_installed(false);

 restart:
#ifdef HAVE_SYS_INOTIFY_H
    if (dir_found) {
        if (timer_in_use) {
            /* redefine the event to use inotify now
             * that the dir has been found
             */
            if (0 > (watch = inotify_add_watch(notifier, mca_orcm_cfgi_file_component.dir,
                                               IN_DELETE | IN_MODIFY | IN_MOVE))) {
                close(notifier);
                opal_event_evtimer_add(probe_ev, &probe_time);
            } else {
                opal_event_del(probe_ev);
                opal_event_set(opal_event_base, probe_ev, notifier,
                               OPAL_EV_READ|OPAL_EV_PERSIST, inotify_handler, NULL);
                opal_event_add(probe_ev, 0);
                timer_in_use = false;
            }
        } else {
            /* reset the event */
            opal_event_add(probe_ev, 0);
        }
    } else {
        /* restart the timer so we keep looking for it */
        opal_event_evtimer_add(probe_ev, &probe_time);
    }
#else
    /* restart the timer */
    opal_event_evtimer_add(probe_ev, &probe_time);
#endif
    /* release control */
    ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl);
}
static void 
rml_oob_queued_progress(int fd, short event, void *arg)
{
    orte_rml_oob_queued_msg_t *qmsg;
    orte_rml_oob_msg_header_t *hdr;
    int real_tag;
    int ret;
    orte_process_name_t next, origin;

    while (true) {
        OPAL_THREAD_LOCK(&orte_rml_oob_module.queued_lock);
        qmsg = (orte_rml_oob_queued_msg_t*) opal_list_remove_first(&orte_rml_oob_module.queued_routing_messages);
        OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
        if (NULL == qmsg) break;

        hdr = (orte_rml_oob_msg_header_t*) qmsg->payload[0].iov_base;
        origin = hdr->origin;

        next = orte_routed.get_route(&hdr->destination);
        if (next.vpid == ORTE_VPID_INVALID) {
            opal_output(0,
                        "%s:queued progress tried routing message from %s to %s:%d, can't find route",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&hdr->origin),
                        ORTE_NAME_PRINT(&hdr->destination),
                        hdr->tag);
            opal_backtrace_print(stderr);
            orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
        }

        if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, ORTE_PROC_MY_NAME)) {
            opal_output(0, "%s:queued progress trying to get message from %s to %s:%d, routing loop",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&hdr->origin),
                        ORTE_NAME_PRINT(&hdr->destination),
                        hdr->tag);
            opal_backtrace_print(stderr);
            orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
        }

        if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, &hdr->destination)) {
            real_tag = hdr->tag;
        } else {
            real_tag = ORTE_RML_TAG_RML_ROUTE;
        }

        OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
                             "%s routing message from %s for %s to %s (tag: %d)",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&hdr->origin),
                             ORTE_NAME_PRINT(&hdr->destination),
                             ORTE_NAME_PRINT(&next),
                             hdr->tag));

        ORTE_RML_OOB_MSG_HEADER_HTON(*hdr);

        ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
                                                          &origin,
                                                          qmsg->payload,
                                                          1,
                                                          real_tag,
                                                          0,
                                                          rml_oob_recv_route_queued_send_callback,
                                                          qmsg);

        if (ORTE_SUCCESS != ret) {
            if (ORTE_ERR_ADDRESSEE_UNKNOWN == ret) {
                /* still no route -- try again */
                ORTE_RML_OOB_MSG_HEADER_NTOH(*hdr);
                OPAL_THREAD_LOCK(&orte_rml_oob_module.queued_lock);
                opal_list_append(&orte_rml_oob_module.queued_routing_messages,
                                 &qmsg->super);
                if (1 == opal_list_get_size(&orte_rml_oob_module.queued_routing_messages)) {
                    opal_event_evtimer_add(orte_rml_oob_module.timer_event,
                                           &orte_rml_oob_module.timeout);
                }
                OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
            } else {
                opal_output(0,
                            "%s failed to send message from %s to %s:%d %s (rc = %d)",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&next),
                            ORTE_NAME_PRINT(&origin),
                            real_tag,
                            ORTE_ERROR_NAME(ret),
                            ret);
                abort();
            }
        }
    }
}
示例#15
0
/* we cannot use the RML to communicate with SLURM as it doesn't
 * understand our internal protocol, so we have to do a bare-bones
 * exchange based on sockets
 */
static int dyn_allocate(orte_job_t *jdata)
{
    char *cmd_str, **cmd=NULL, *tmp, *jstring;
    char *node_list;
    orte_app_context_t *app;
    int i;
    struct timeval tv;
    local_jobtracker_t *jtrk;
    int64_t i64, *i64ptr;

    if (NULL == mca_ras_slurm_component.config_file) {
        opal_output(0, "Cannot perform dynamic allocation as no Slurm configuration file provided");
        return ORTE_ERR_NOT_FOUND;
    }

    /* track this request */
    jtrk = OBJ_NEW(local_jobtracker_t);
    jtrk->jobid = jdata->jobid;
    opal_list_append(&jobs, &jtrk->super);

    /* construct the command - note that the jdata structure contains
     * a field for the minimum number of nodes required for the job.
     * The node list can be constructed from the union of all the nodes
     * contained in the dash_host field of the app_contexts. So you'll
     * need to do a little work to build the command. We don't currently
     * have a field in the jdata structure for "mandatory" vs "optional"
     * allocations, so we'll have to add that someday. Likewise, you may
     * want to provide a param to adjust the timeout value
     */
    /* construct the cmd string */
    opal_argv_append_nosize(&cmd, "allocate");
    /* add the jobid */
    orte_util_convert_jobid_to_string(&jstring, jdata->jobid);
    asprintf(&tmp, "jobid=%s", jstring);
    opal_argv_append_nosize(&cmd, tmp);
    free(tmp);
    free(jstring);
    /* if we want the allocation for all apps in one shot,
     * then tell slurm
     *
     * RHC: we don't currently have the ability to handle
     * rolling allocations in the rest of the code base
     */
#if 0
    if (!mca_ras_slurm_component.rolling_alloc) {
        opal_argv_append_nosize(&cmd, "return=all");
    }
#else
    opal_argv_append_nosize(&cmd, "return=all");
#endif

    /* pass the timeout */
    asprintf(&tmp, "timeout=%d", mca_ras_slurm_component.timeout);
    opal_argv_append_nosize(&cmd, tmp);
    free(tmp);

    /* for each app, add its allocation request info */
    i64ptr = &i64;
    for (i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }
        /* add the app id, preceded by a colon separator */
        asprintf(&tmp, ": app=%d", (int)app->idx);
        opal_argv_append_nosize(&cmd, tmp);
        free(tmp);
        /* add the number of process "slots" we need */
        asprintf(&tmp, "np=%d", app->num_procs);
        opal_argv_append_nosize(&cmd, tmp);
        free(tmp);
        /* if we were given a minimum number of nodes, pass it along */
        if (orte_get_attribute(&app->attributes, ORTE_APP_MIN_NODES, (void**)&i64ptr, OPAL_INT64)) {
            asprintf(&tmp, "N=%ld", (long int)i64);
            opal_argv_append_nosize(&cmd, tmp);
            free(tmp);
        }
        /* add the list of nodes, if one was given, ensuring
         * that each node only appears once
         */
        node_list =  get_node_list(app);
        if (NULL != node_list) {
            asprintf(&tmp, "node_list=%s", node_list);
            opal_argv_append_nosize(&cmd, tmp);
            free(node_list);
            free(tmp);
        }
        /* add the mandatory/optional flag */
        if (orte_get_attribute(&app->attributes, ORTE_APP_MANDATORY, NULL, OPAL_BOOL)) {
            opal_argv_append_nosize(&cmd, "flag=mandatory");
        } else {
            opal_argv_append_nosize(&cmd, "flag=optional");
        }
    }

    /* assemble it into the final cmd to be sent */
    cmd_str = opal_argv_join(cmd, ' ');
    opal_argv_free(cmd);

    /* start a timer - if the response to our request doesn't appear
     * in the defined time, then we will error out as Slurm isn't
     * responding to us
     */
    opal_event_evtimer_set(orte_event_base, &jtrk->timeout_ev, timeout, jtrk);
    tv.tv_sec = mca_ras_slurm_component.timeout * 2;
    tv.tv_usec = 0;
    opal_event_evtimer_add(&jtrk->timeout_ev, &tv);

    opal_output_verbose(2, orte_ras_base_framework.framework_output,
                        "%s slurm:dynalloc cmd_str = %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        cmd_str);

    if (send(socket_fd, cmd_str, strlen(cmd_str)+1, 0) < 0) {
        ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
    }
    free(cmd_str);

    /* we cannot wait here for a response as we
     * are already in an event. So return a value
     * that indicates we are waiting for an
     * allocation so the base functions know
     * that they shouldn't progress the job
     */
    return ORTE_ERR_ALLOCATION_PENDING;
}
static void
rml_oob_recv_route_callback(int status,
                            struct orte_process_name_t* peer,
                            struct iovec* iov,
                            int count,
                            orte_rml_tag_t tag,
                            void *cbdata)
{
    orte_rml_oob_msg_header_t *hdr = 
        (orte_rml_oob_msg_header_t*) iov[0].iov_base;
    int real_tag;
    int ret;
    orte_process_name_t next, origin;
    struct iovec *new_iov;

    /* BWB -- propogate errors here... */
    assert(status >= 0);

    ORTE_RML_OOB_MSG_HEADER_NTOH(*hdr);

    origin = hdr->origin;

    next = orte_routed.get_route(&hdr->destination);
    if (next.vpid == ORTE_VPID_INVALID) {
        opal_output(0, "%s:route_callback tried routing message from %s to %s:%d, can't find route",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&origin),
                    ORTE_NAME_PRINT(&hdr->destination),
                    hdr->tag);
        opal_backtrace_print(stderr);
        orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
        return;
    }

    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, ORTE_PROC_MY_NAME)) {
        opal_output(0, "%s:route_callback trying to get message from %s to %s:%d, routing loop",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&origin),
                    ORTE_NAME_PRINT(&hdr->destination),
                    hdr->tag);
        opal_backtrace_print(stderr);
        orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
    }

    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, &hdr->destination)) {
        real_tag = hdr->tag;
    } else {
        real_tag = ORTE_RML_TAG_RML_ROUTE;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
                         "%s routing message from %s for %s to %s (tag: %d)",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(&hdr->origin),
                         ORTE_NAME_PRINT(&hdr->destination),
                         ORTE_NAME_PRINT(&next),
                         hdr->tag));

    ORTE_RML_OOB_MSG_HEADER_HTON(*hdr);

    /* NTH: fix potential race condition. oob may modify iov before
       the oob send completes */
    new_iov = (struct iovec*) calloc (count, sizeof (struct iovec));
    if (NULL == new_iov) {
	opal_output (0, "%s:route_callback malloc error!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        opal_backtrace_print(stderr);
        orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);    
    }

    memcpy (new_iov, iov, count * sizeof (struct iovec));

    ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
                                                      &origin,
                                                      new_iov,
                                                      count,
                                                      real_tag,
                                                      0,
                                                      rml_oob_recv_route_send_callback,
                                                      NULL);

    if (ORTE_SUCCESS != ret) {
        if (ORTE_ERR_ADDRESSEE_UNKNOWN == ret) {
            /* no route -- queue and hope we find a route */
            orte_rml_oob_queued_msg_t *qmsg = OBJ_NEW(orte_rml_oob_queued_msg_t);
            OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
                                 "%s: no OOB information for %s.  Queuing for later.",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&next)));
            ORTE_RML_OOB_MSG_HEADER_NTOH(*hdr);
            qmsg->payload[0].iov_base = (IOVBASE_TYPE*) malloc(iov[0].iov_len);
            if (NULL == qmsg->payload[0].iov_base) abort();
            qmsg->payload[0].iov_len = iov[0].iov_len;
            memcpy(qmsg->payload[0].iov_base, iov[0].iov_base, iov[0].iov_len);
            OPAL_THREAD_LOCK(&orte_rml_oob_module.queued_lock);
            opal_list_append(&orte_rml_oob_module.queued_routing_messages,
                             &qmsg->super);
            if (1 == opal_list_get_size(&orte_rml_oob_module.queued_routing_messages)) {
                opal_event_evtimer_add(orte_rml_oob_module.timer_event,
                                       &orte_rml_oob_module.timeout);
            }
            OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
        } else {
            opal_output(0,
                        "%s failed to send message to %s: %s (rc = %d)",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&next),
                        opal_strerror(ret),
                        ret);
            orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
        }
    }
}
示例#17
0
void orcm_sensor_base_start(orte_jobid_t job)
{
    orcm_sensor_active_module_t *i_module;
    int i;
    orcm_sensor_sampler_t *sampler;

    opal_output_verbose(5, orcm_sensor_base_framework.framework_output,
                        "%s sensor:base: sensor start called",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

    /* if no modules are active, then there is nothing to do */
    if (0 == orcm_sensor_base.modules.size) {
        return;
    }

    if (!mods_active) {
        opal_output_verbose(5, orcm_sensor_base_framework.framework_output,
                            "%s sensor:base: starting sensors",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

        if (!orcm_sensor_base.dbhandle_requested) {
            orcm_db.open("sensor", NULL, db_open_cb, NULL);
            orcm_sensor_base.dbhandle_requested = true;
        }

        /* call the start function of all modules in priority order */
        for (i=0; i < orcm_sensor_base.modules.size; i++) {
            if (NULL == (i_module = (orcm_sensor_active_module_t*)opal_pointer_array_get_item(&orcm_sensor_base.modules, i))) {
                continue;
            }
            mods_active = true;
            if (NULL != i_module->module->start) {
                i_module->module->start(job);
            }
        }

        /* create the event base and start the progress engine, if necessary */
        if (!orcm_sensor_base.ev_active) {
            orcm_sensor_base.ev_active = true;
            if (NULL == (orcm_sensor_base.ev_base = orcm_start_progress_thread("sensor", progress_thread_engine, NULL))) {
                orcm_sensor_base.ev_active = false;
                return;
            }
        }


        if (mods_active && 0 < orcm_sensor_base.sample_rate) {
            /* startup a timer to wake us up periodically
             * for a data sample, and pass in the sampler
             */
            opal_output_verbose(5, orcm_sensor_base_framework.framework_output,
                                "%s sensor:base: creating sampler with rate %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                orcm_sensor_base.sample_rate);
            sampler = OBJ_NEW(orcm_sensor_sampler_t);
            sampler->rate.tv_sec = orcm_sensor_base.sample_rate;
            sampler->log_data = orcm_sensor_base.log_samples;
            opal_event_evtimer_set(orcm_sensor_base.ev_base, &sampler->ev,
                                   take_sample, sampler);
            opal_event_evtimer_add(&sampler->ev, &sampler->rate);
        }
    } else if (!orcm_sensor_base.ev_active) {
        orcm_sensor_base.ev_active = true;
        orcm_restart_progress_thread("sensor");
    }

    return;    
}
示例#18
0
/*
 * Start monitoring of local processes
 */
static void start(orte_jobid_t jobid)
{
    mca_base_component_t *c = &mca_sensor_file_component.super.base_version;
    opal_list_item_t *item;
    orte_odls_job_t *jobdat;
    orte_app_context_t *app, *aptr;
    int rc, tmp;
    char *filename;
    file_tracker_t *ft;

    /* cannot monitor my own job */
    if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) {
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s starting file monitoring for job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobid)));
    
    /* get the local jobdat for this job */
    for (item = opal_list_get_first(&orte_local_jobdata);
         item != opal_list_get_end(&orte_local_jobdata);
         item = opal_list_get_end(&orte_local_jobdata)) {
        jobdat = (orte_odls_job_t*)item;
        if (jobid == jobdat->jobid || ORTE_JOBID_WILDCARD == jobid) {
            /* must be at least one app_context, so use the first one found */
            app = NULL;
            for (tmp=0; tmp < jobdat->apps.size; tmp++) {
                if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(&jobdat->apps, tmp))) {
                    app = aptr;
                    break;
                }
            }
            if (NULL == app) {
                /* got a problem */
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                continue;
            }
            
            /* search the environ to get the filename */
            if (ORTE_SUCCESS != (rc = mca_base_param_find_string(c, "filename", app->env, &filename))) {
                /* was a default file given */
                if (NULL == mca_sensor_file_component.file) {
                    /* can't do anything without a file */
                    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                         "%s sensor:file no file for job %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         ORTE_JOBID_PRINT(jobid)));
                    continue;
                }
                filename = mca_sensor_file_component.file;
            }
            
            /* create the tracking object */
            ft = OBJ_NEW(file_tracker_t);
            ft->jobid = jobid;
            ft->file = strdup(filename);
            
            /* search the environ to see what we are checking */
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_size", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_size) {
                    ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size);
                }
            } else {
                ft->check_size = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_access", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_access) {
                    ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access);
                }
            } else {
                ft->check_access = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_mod", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_mod) {
                    ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod);
                }
            } else {
                ft->check_mod = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "limit", app->env, &tmp))) {
                ft->limit = mca_sensor_file_component.limit;
            } else {
                ft->limit = tmp;
            }
            opal_list_append(&jobs, &ft->super);
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s file %s monitored for %s%s%s with limit %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ft->file, ft->check_size ? "SIZE:" : " ",
                                 ft->check_access ? "ACCESS TIME:" : " ",
                                 ft->check_mod ? "MOD TIME" : " ", ft->limit));
        }
    }
    
    /* start sampling */
    if (NULL == sample_ev && !opal_list_is_empty(&jobs)) {
        /* startup a timer to wake us up periodically
         * for a data sample
         */
        sample_ev =  (opal_event_t *) malloc(sizeof(opal_event_t));
        opal_event_evtimer_set(opal_event_base, sample_ev, sample, sample_ev);
        sample_time.tv_sec = mca_sensor_file_component.sample_rate;
        sample_time.tv_usec = 0;
        opal_event_evtimer_add(sample_ev, &sample_time);
    }
    return;
}
示例#19
0
static int native_abort(int flag, const char msg[])
{
    opal_buffer_t *bfr;
    pmix_cmd_t cmd = PMIX_ABORT_CMD;
    int rc;
    pmix_cb_t *cb;
    opal_event_t ev;
    struct timeval tv = {1, 0};

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native abort called",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    if (NULL == mca_pmix_native_component.uri) {
        /* no server available, so just return */
        return OPAL_SUCCESS;
    }

    if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) {
        /* create a buffer to hold the message */
        bfr = OBJ_NEW(opal_buffer_t);
        /* pack the cmd */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &cmd, 1, PMIX_CMD_T))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }
        /* pack the status flag */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &flag, 1, OPAL_INT))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }
        /* pack the string message - a NULL is okay */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &msg, 1, OPAL_STRING))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }

        /* create a callback object as we need to pass it to the
         * recv routine so we know which callback to use when
         * the return message is recvd */
        cb = OBJ_NEW(pmix_cb_t);
        cb->active = true;

        /* push a timeout event to wake us up just in case this
         * message cannot get thru - e.g., someone else may have
         * detected the failure of the server and ordered an abort */
        opal_event_evtimer_set(mca_pmix_native_component.evbase,
                               &ev, timeout, cb);
        opal_event_evtimer_add(&ev, &tv);

        /* push the message into our event base to send to the server */
        PMIX_ACTIVATE_SEND_RECV(bfr, wait_cbfunc, cb);

        /* wait for the release */
        PMIX_WAIT_FOR_COMPLETION(cb->active);
        OBJ_RELEASE(cb);
    }
    return OPAL_SUCCESS;
}
示例#20
0
文件: show_help.c 项目: ORNL/ompi
static int show_help(const char *filename, const char *topic,
                     const char *output, orte_process_name_t *sender)
{
    int rc;
    tuple_list_item_t *tli = NULL;
    orte_namelist_t *pnli;
    time_t now = time(NULL);

    /* If we're aggregating, check for duplicates.  Otherwise, don't
       track duplicates at all and always display the message. */
    if (orte_help_want_aggregate) {
        rc = get_tli(filename, topic, &tli);
    } else {
        rc = ORTE_ERR_NOT_FOUND;
    }

    /* If there's no output string (i.e., this is a control message
       asking us to suppress), then skip to the end. */
    if (NULL == output) {
        tli->tli_display = false;
        goto after_output;
    }

    /* Was it already displayed? */
    if (ORTE_SUCCESS == rc) {
        /* Yes.  But do we want to print anything?  That's complicated.

           We always show the first message of a given (filename,
           topic) tuple as soon as it arrives.  But we don't want to
           show duplicate notices often, because we could get overrun
           with them.  So we want to gather them up and say "We got N
           duplicates" every once in a while.

           And keep in mind that at termination, we'll unconditionally
           show all accumulated duplicate notices.

           A simple scheme is as follows:
           - when the first of a (filename, topic) tuple arrives
             - print the message
             - if a timer is not set, set T=now
           - when a duplicate (filename, topic) tuple arrives
             - if now>(T+5) and timer is not set (due to
               non-pre-emptiveness of our libevent, a timer *could* be
               set!)
               - print all accumulated duplicates
               - reset T=now
             - else if a timer was not set, set the timer for T+5
             - else if a timer was set, do nothing (just wait)
           - set T=now when the timer expires
        */           
        ++tli->tli_count_since_last_display;
        if (now > show_help_time_last_displayed + 5 && !show_help_timer_set) {
            show_accumulated_duplicates(0, 0, NULL);
        } else if (!show_help_timer_set) {
            opal_event_evtimer_set(orte_event_base, &show_help_timer_event,
                                   show_accumulated_duplicates, NULL);
            opal_event_evtimer_add(&show_help_timer_event, &show_help_interval);
            show_help_timer_set = true;
        }
    } 
    /* Not already displayed */
    else if (ORTE_ERR_NOT_FOUND == rc) {
        if (orte_xml_output) {
            char *tmp;
            tmp = xml_format((unsigned char*)output);
            fprintf(orte_xml_fp, "%s", tmp);
            fflush(orte_xml_fp);
            free(tmp);
        } else {
            opal_output(orte_clean_output, "%s", output);
        }
        if (!show_help_timer_set) {
            show_help_time_last_displayed = now;
        }
    }
    /* Some other error occurred */
    else {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

 after_output:
    /* If we're aggregating, add this process name to the list */
    if (orte_help_want_aggregate) {
        pnli = OBJ_NEW(orte_namelist_t);
        if (NULL == pnli) {
            rc = ORTE_ERR_OUT_OF_RESOURCE;
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        pnli->name = *sender;
        opal_list_append(&(tli->tli_processes), &(pnli->super));
    }
    return ORTE_SUCCESS;
}
示例#21
0
/*
 * Start monitoring of local processes
 */
static void start(orte_jobid_t jobid)
{
    orte_job_t *jobdat;
    orte_app_context_t *app, *aptr;
    int i;
    char *filename;
    file_tracker_t *ft;
    char *ptr;

    /* cannot monitor my own job */
    if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) {
        return;
    }

    OPAL_OUTPUT_VERBOSE((1, orcm_sensor_base_framework.framework_output,
                         "%s starting file monitoring for job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobid)));

    /* get the local jobdat for this job */
    if (NULL == (jobdat = orte_get_job_data_object(jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return;
    }

    /* must be at least one app_context, so use the first one found */
    app = NULL;
    for (i=0; i < jobdat->apps->size; i++) {
        if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, i))) {
            app = aptr;
            break;
        }
    }
    if (NULL == app) {
        /* got a problem */
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return;
    }

    /* search the environ to get the filename */
    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_filename", &filename)) {
        /* was a default file given */
        if (NULL == mca_sensor_file_component.file) {
            /* can't do anything without a file */
            OPAL_OUTPUT_VERBOSE((1, orcm_sensor_base_framework.framework_output,
                                 "%s sensor:file no file for job %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(jobid)));
            return;
        }
        filename = strdup(mca_sensor_file_component.file);
    }

    /* create the tracking object */
    ft = OBJ_NEW(file_tracker_t);
    ft->jobid = jobid;
    ft->file = strdup(filename);
    free(filename);

    /* search the environ to see what we are checking */
    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_check_size", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_size) {
            ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size);
        }
    } else {
        ft->check_size = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_check_access", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_access) {
            ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access);
        }
    } else {
        ft->check_access = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_check_mod", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_mod) {
            ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod);
        }
    } else {
        ft->check_mod = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, OPAL_MCA_PREFIX"sensor_file_limit", &ptr)) {
        ft->limit = mca_sensor_file_component.limit;
    } else {
        ft->limit = strtol(ptr, NULL, 10);
        free(ptr);
    }
    opal_list_append(&jobs, &ft->super);
    OPAL_OUTPUT_VERBOSE((1, orcm_sensor_base_framework.framework_output,
                         "%s file %s monitored for %s%s%s with limit %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ft->file, ft->check_size ? "SIZE:" : " ",
                         ft->check_access ? "ACCESS TIME:" : " ",
                         ft->check_mod ? "MOD TIME" : " ", ft->limit));

    /* start a separate file progress thread for sampling */
    if (mca_sensor_file_component.use_progress_thread) {
        if (!orcm_sensor_file.ev_active) {
            orcm_sensor_file.ev_active = true;
            if (NULL == (orcm_sensor_file.ev_base = opal_progress_thread_init("file"))) {
                orcm_sensor_file.ev_active = false;
                return;
            }
        }

        /* setup file sampler */
        file_sampler = OBJ_NEW(orcm_sensor_sampler_t);

        /* check if file sample rate is provided for this*/
        if (mca_sensor_file_component.sample_rate) {
            file_sampler->rate.tv_sec = mca_sensor_file_component.sample_rate;
        } else {
            file_sampler->rate.tv_sec = orcm_sensor_base.sample_rate;
        }
        file_sampler->log_data = orcm_sensor_base.log_samples;
        opal_event_evtimer_set(orcm_sensor_file.ev_base, &file_sampler->ev,
                               perthread_file_sample, file_sampler);
        opal_event_evtimer_add(&file_sampler->ev, &file_sampler->rate);
    }
    return;
}