/*
 * Function to return a pointer to a job map
 */
orte_job_map_t* orte_rmaps_base_get_job_map(orte_jobid_t job)
{
    orte_job_t *jdata;
    orte_job_map_t *map;
    orte_job_t *daemons;
    orte_proc_t *proc;
    orte_std_cntr_t i;
    orte_node_t *node;
    
    /* lookup the job's data object */
    if (NULL == (jdata = orte_get_job_data_object(job))) {
        /* bad jobid */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return NULL;
    }
    
    /* locate the map */
    map = jdata->map;
    
    /* lookup the daemon's job data struct */
    if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
        /* bad jobid */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return NULL;
    }
    
    /* check to see if daemons have been launched since we
     * last updated the map
     */
    for (i=0; i < map->nodes->size; i++) {
        if (NULL != map->nodes->addr[i]) {
            node = (orte_node_t*)map->nodes->addr[i];
            if (NULL != node->daemon) {
                if (daemons->procs->size < (orte_std_cntr_t)node->daemon->name.vpid) {
                    /* well that is bad */
                    ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
                    return NULL;                    
                }
                /* find the daemon's info */
                proc = (orte_proc_t*)daemons->procs->addr[node->daemon->name.vpid];
                if (NULL == proc) {
                    /* well that is bad too */
                    ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
                    return NULL;
                }
                if (NULL != proc->rml_uri) {
                    node->daemon_launched = true;
                } else {
                    node->daemon_launched = false;
                }
            }
        }
    }
    
    return map;
}
Esempio n. 2
0
static int init(void)
{
    int rc;

    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s initializing heartbeat recvs",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* setup to receive heartbeats */
    if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_CM) {
        if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                          ORTE_RML_TAG_HEARTBEAT,
                                                          ORTE_RML_PERSISTENT,
                                                          recv_beats, NULL))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
    }

    if (ORTE_PROC_IS_HNP) {
        daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
    }

    return rc;
}
/* this function automatically gets periodically called
 * by the event library so we can check on the state
 * of the various orteds
 */
static void check_heartbeat(int fd, short dummy, void *arg)
{
    int v;
    orte_proc_t *proc;
    orte_job_t *daemons;
    struct timeval timeout;
    bool died = false;
    opal_event_t *tmp = (opal_event_t*)arg;
    struct timeval now;
    
    OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
                         "%s plm:base:check_heartbeat",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* if we are aborting or shutting down, ignore this */
    if (orte_abnormal_term_ordered || 0 == orte_heartbeat_rate) {
        return;
    }
    
    /* get the job object for the daemons */
    if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return;
    }
    
    /* get current time */
    gettimeofday(&timeout, NULL);
    
    /* cycle through the daemons - make sure we check them all
     * in case multiple daemons died so all of those that did die
     * can be appropriately flagged
     */
    for (v=1; v < daemons->procs->size; v++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, v))) {
            continue;
        }
        if ((timeout.tv_sec - proc->beat) > HEARTBEAT_CK*orte_heartbeat_rate) {
            /* declare this orted dead */
            proc->state = ORTE_PROC_STATE_ABORTED;
            proc->exit_code = ORTE_ERROR_DEFAULT_EXIT_CODE;
            if (NULL == daemons->aborted_proc) {
                daemons->aborted_proc = proc;
            }
            ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
            died = true;
        }
    }
    
    /* if any daemon died, abort */
    if (died) {
        orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1,
                                    ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_ABORTED);
        return;
    }
    
    /* reset the timer */
    now.tv_sec = HEARTBEAT_CK*orte_heartbeat_rate;
    now.tv_usec = 0;
    opal_evtimer_add(tmp, &now);
}
Esempio n. 4
0
static void alps_wait_cb(orte_proc_t *proc, void* cbdata){
    orte_job_t *jdata;

    /* According to the ALPS folks, alps always returns the highest exit
       code of our remote processes. Thus, a non-zero exit status doesn't
       necessarily mean that alps failed - it could be that an orted returned
       a non-zero exit status. Of course, that means the orted failed(!), so
       the end result is the same - the job didn't start.
    
       As a result, we really can't do much with the exit status itself - it
       could be something in errno (if alps itself failed), or it could be
       something returned by an orted, or it could be something returned by
       the OS (e.g., couldn't find the orted binary). Somebody is welcome
       to sort out all the options and pretty-print a better error message. For
       now, though, the only thing that really matters is that
       alps failed. Report the error and make sure that orterun
       wakes up - otherwise, do nothing!
    */
    jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
    
    if (0 != proc->exit_code) {
        if (failed_launch) {
            /* report that the daemon has failed so we break out of the daemon
             * callback receive and exit
             */
            ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START);            
        } else {
            /* an orted must have died unexpectedly after launch - report
             * that the daemon has failed so we exit
             */
            ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ABORTED);
        }
    }
}
/**
* Terminate the orteds for a given job
 */
static int plm_slurm_terminate_orteds(void)
{
    int rc;
    orte_job_t *jdata;
    
    /* tell them to die without sending a reply - we will rely on the
     * waitpid to tell us when they have exited!
     */
    if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_NO_REPLY_CMD))) {
        ORTE_ERROR_LOG(rc);
    }
    
    /* check to see if the primary pid is set. If not, this indicates
     * that we never launched any additional daemons, so we cannot
     * not wait for a waitpid to fire and tell us it's okay to
     * exit. Instead, we simply trigger an exit for ourselves
     */
    if (!primary_pid_set) {
        OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
                             "%s plm:slurm: primary daemons complete!",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
        jdata->state = ORTE_JOB_STATE_TERMINATED;
        /* need to set the #terminated value to avoid an incorrect error msg */
        jdata->num_terminated = jdata->num_procs;
        orte_trigger_event(&orteds_exit);
    }
    
    return rc;
}
Esempio n. 6
0
void orte_routed_base_coll_relay_routing(orte_grpcomm_collective_t *coll)
{
    opal_list_item_t *item, *itm;
    orte_namelist_t *nm, *n2, *n3;
    bool dup;
    orte_job_t *jdata;
    orte_proc_t *proc;

    if (ORTE_PROC_IS_HNP) {
        /* nobody to send to */
        return;
    }
    /* if we are a daemon, then we look at the list of
     * participants. If there is a wildcard, then we
     * know that all procs are participating, so we
     * can send it to our parent. If not, then we have
     * to send the collective to the daemon hosting
     * the participating proc
     */
    for (item = opal_list_get_first(&coll->participants);
         item != opal_list_get_end(&coll->participants);
         item = opal_list_get_next(item)) {
        n2 = (orte_namelist_t*)item;
        nm = OBJ_NEW(orte_namelist_t);
        nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
        dup = false;
        if (ORTE_VPID_WILDCARD == n2->name.vpid) {
            nm->name.vpid = ORTE_PROC_MY_PARENT->vpid;
        } else {
            jdata = orte_get_job_data_object(n2->name.jobid);
            proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, n2->name.vpid);
            if (NULL == proc || NULL == proc->node || NULL == proc->node->daemon) {
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                continue;
            }
            nm->name.vpid = proc->node->daemon->name.vpid;
        }
        /* if it is me, then ignore */
        if (nm->name.vpid == ORTE_PROC_MY_NAME->vpid) {
            dup = true;
        } else {
            /* if it is already on the list, we ignore */
            for (itm = opal_list_get_first(&coll->targets);
                 itm != opal_list_get_end(&coll->targets);
                 itm = opal_list_get_next(itm)) {
                n3 = (orte_namelist_t*)itm;
                if (n3->name.vpid == nm->name.vpid) {
                    /* duplicate */
                    dup = true;
                    break;
                }
            }
        }
        if (dup) {
            OBJ_RELEASE(nm);
        } else {
            opal_list_append(&coll->targets, &nm->super);
        }
    }
}
static void srun_wait_cb(pid_t pid, int status, void* cbdata)
{
    orte_job_t *jdata;
    
    /* According to the SLURM folks, srun always returns the highest exit
     code of our remote processes. Thus, a non-zero exit status doesn't
     necessarily mean that srun failed - it could be that an orted returned
     a non-zero exit status. Of course, that means the orted failed(!), so
     the end result is the same - the job didn't start.
     
     As a result, we really can't do much with the exit status itself - it
     could be something in errno (if srun itself failed), or it could be
     something returned by an orted, or it could be something returned by
     the OS (e.g., couldn't find the orted binary). Somebody is welcome
     to sort out all the options and pretty-print a better error message. For
     now, though, the only thing that really matters is that
     srun failed. Report the error and make sure that orterun
     wakes up - otherwise, do nothing!
     
     Unfortunately, the pid returned here is the srun pid, not the pid of
     the proc that actually died! So, to avoid confusion, just use -1 as the
     pid so nobody thinks this is real
     */
    
    /* if we are in the launch phase, then any termination is bad */
    if (launching_daemons) {
        /* report that one or more daemons failed to launch so we can exit */
        OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
                             "%s plm:slurm: daemon failed during launch",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_FAILED_TO_START);
    } else {
        /* if this is after launch, then we need to abort only if the status
         * returned is non-zero - i.e., if the orteds exited with an error
         */
        if (0 != status) {
            /* an orted must have died unexpectedly after launch - report
             * that the daemon has failed so we exit
             */
            OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
                                 "%s plm:slurm: daemon failed while running",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_ABORTED);
        }
        /* otherwise, check to see if this is the primary pid */
        if (primary_srun_pid == pid) {
            /* in this case, we just want to fire the proper trigger so
             * mpirun can exit
             */
            OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
                                 "%s plm:slurm: primary daemons complete!",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
            jdata->state = ORTE_JOB_STATE_TERMINATED;
            /* need to set the #terminated value to avoid an incorrect error msg */
            jdata->num_terminated = jdata->num_procs;
            orte_trigger_event(&orteds_exit);
        }
    }
}
Esempio n. 8
0
int orte_rml_base_get_contact_info(orte_jobid_t job, opal_buffer_t *data)
{
    int i;
    orte_job_t *jdata;
    orte_proc_t *proc;
    int rc;

    /* lookup the job */
    if (NULL == (jdata = orte_get_job_data_object(job))) {
        /* bad jobid */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }

    /* cycle through all procs in the job, adding their contact info to the buffer */
    for (i=0; i < jdata->procs->size; i++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
            continue;
        }
        /* if this proc doesn't have any contact info, ignore it */
        if (NULL == proc->rml_uri) {
            continue;
        }
        if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &proc->rml_uri, 1, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
    }

    return ORTE_SUCCESS;
}
Esempio n. 9
0
/**
* Terminate the orteds for a given job
 */
static int plm_slurm_terminate_orteds(void)
{
    int rc=ORTE_SUCCESS;
    orte_job_t *jdata;
        
    /* check to see if the primary pid is set. If not, this indicates
     * that we never launched any additional daemons, so we cannot
     * not wait for a waitpid to fire and tell us it's okay to
     * exit. Instead, we simply trigger an exit for ourselves
     */
    if (primary_pid_set) {
        if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
            ORTE_ERROR_LOG(rc);
        }
    } else {
        OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
                             "%s plm:slurm: primary daemons complete!",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
        /* need to set the #terminated value to avoid an incorrect error msg */
        jdata->num_terminated = jdata->num_procs;
        ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
    }
    
    return rc;
}
Esempio n. 10
0
orte_vpid_t orte_get_lowest_vpid_alive(orte_jobid_t job)
{
    int i;
    orte_job_t *jdata;
    orte_proc_t *proc;

    if (NULL == (jdata = orte_get_job_data_object(job))) {
        return ORTE_VPID_INVALID;
    }

    if (ORTE_PROC_IS_DAEMON &&
        ORTE_PROC_MY_NAME->jobid == job &&
        NULL != orte_process_info.my_hnp_uri) {
        /* if we were started by an HNP, then the lowest vpid
         * is always 1
         */
        return 1;
    }

    for (i=0; i < jdata->procs->size; i++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
            continue;
        }
        if (proc->state == ORTE_PROC_STATE_RUNNING) {
            /* must be lowest one alive */
            return proc->name.vpid;
        }
    }
    /* only get here if no live proc found */
    return ORTE_VPID_INVALID;
}
Esempio n. 11
0
/**
* Terminate the orteds for a given job
 */
static int plm_alps_terminate_orteds(void)
{
    int rc;
    orte_job_t *jdata;

    OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
                            "%s plm:alps: terminating orteds",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* deregister the waitpid callback to ensure we don't make it look like
     * alps failed when it didn't. Since the alps may have already completed,
     * do NOT ERROR_LOG any return code to avoid confusing, duplicate error
     * messages
     */
    if (NULL != alpsrun) {
        orte_wait_cb_cancel(alpsrun);
    }

    /* now tell them to die */
    if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
        ORTE_ERROR_LOG(rc);
    }

    jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);

    OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
                            "%s plm:alps: terminated orteds",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    return rc;
}
static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sender,
                                                      opal_buffer_t* buffer)
{
    opal_buffer_t answer;
    orte_std_cntr_t count;
    orte_job_t *jdata = NULL;
    orte_proc_t **procs = NULL;
    orte_process_name_t name;
    int rc;

    OBJ_CONSTRUCT(&answer, opal_buffer_t);

    /*
     * Unpack the data
     */
    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &name, &count, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto CLEANUP;
    }

    /*
     * Process the data
     */
    /* get the job data object for this proc */
    if (NULL == (jdata = orte_get_job_data_object(name.jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        ORTE_UPDATE_EXIT_STATUS(1);
        orte_trigger_event(&orte_exit);
        goto CLEANUP;
    }
    /* get the proc object for it */
    procs = (orte_proc_t**)jdata->procs->addr;
    if (NULL == procs[name.vpid] || NULL == procs[name.vpid]->node) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        ORTE_UPDATE_EXIT_STATUS(1);
        orte_trigger_event(&orte_exit);
        goto CLEANUP;
    }

    /*
     * Send back the answer
     */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&answer, &(procs[name.vpid]->node->name), 1, OPAL_STRING))) {
        ORTE_ERROR_LOG(rc);
        ORTE_UPDATE_EXIT_STATUS(1);
        orte_trigger_event(&orte_exit);
        goto CLEANUP;
    }

    if (0 > (rc = orte_rml.send_buffer(sender, &answer, ORTE_RML_TAG_FILEM_BASE_RESP, 0))) {
        ORTE_ERROR_LOG(rc);
    }

 CLEANUP:
    OBJ_DESTRUCT(&answer);

}
static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sender,
                                                      opal_buffer_t* buffer)
{
    opal_buffer_t *answer;
    orte_std_cntr_t count;
    orte_job_t *jdata = NULL;
    orte_proc_t *proc = NULL;
    orte_process_name_t name;
    int rc;

    /*
     * Unpack the data
     */
    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &name, &count, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /*
     * Process the data
     */
    /* get the job data object for this proc */
    if (NULL == (jdata = orte_get_job_data_object(name.jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }
    /* get the proc object for it */
    proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, name.vpid);
    if (NULL == proc || NULL == proc->node) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /*
     * Send back the answer
     */
    answer = OBJ_NEW(opal_buffer_t);
    if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &(proc->node->name), 1, OPAL_STRING))) {
        ORTE_ERROR_LOG(rc);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        OBJ_RELEASE(answer);
        return;
    }

    if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
                                          ORTE_RML_TAG_FILEM_BASE_RESP,
                                          orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        OBJ_RELEASE(answer);
        return;
    }
}
Esempio n. 14
0
/* after we allocate, we need to map the processes
 * so we know what nodes will be used
 */
static void allocation_complete(int fd, short args, void *cbdata)
{
    orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
    orte_job_t *jdata;
    orte_job_t *daemons;
    orte_topology_t *t;
    orte_node_t *node;
    int i;

    ORTE_ACQUIRE_OBJECT(caddy);
    jdata = state->jdata;

    jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;

    /* get the daemon job object */
    if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        goto done;
    }
    /* mark that we are not using a VM */
    orte_set_attribute(&daemons->attributes, ORTE_JOB_NO_VM, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);

    /* ensure that all nodes point to our topology - we
     * cannot support hetero nodes with this state machine
     */
    t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
    for (i=1; i < orte_node_pool->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
            continue;
        }
        node->topology = t;
    }
    if (!orte_managed_allocation) {
        if (NULL != orte_set_slots &&
            0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) {
            for (i=0; i < orte_node_pool->size; i++) {
                if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
                    continue;
                }
                if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
                    OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
                                         "%s plm:base:setting slots for node %s by %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots));
                    orte_plm_base_set_slots(node);
                }
            }
        }
    }

    /* move to the map stage */
    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);

 done:
    /* cleanup */
    OBJ_RELEASE(state);
}
Esempio n. 15
0
void orte_routed_base_xcast_routing(orte_grpcomm_collective_t *coll,
                                    opal_list_t *my_children)
{
    opal_list_item_t *item;
    orte_routed_tree_t *child;
    orte_namelist_t *nm;
    int i;
    orte_proc_t *proc;
    orte_job_t *daemons;

    /* if we are the HNP and an abnormal termination is underway,
     * then send it directly to everyone
     */
    if (ORTE_PROC_IS_HNP) {
        if (orte_abnormal_term_ordered) {
            daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
            for (i=1; i < daemons->procs->size; i++) {
                if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, i))) {
                    continue;
                }
                /* exclude anyone known not alive */
                if (proc->alive) {
                    nm = OBJ_NEW(orte_namelist_t);
                    nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
                    nm->name.vpid = proc->name.vpid;
                    opal_list_append(&coll->targets, &nm->super);
                }
            }
            /* if nobody is known alive, then we need to die */
            if (0 == opal_list_get_size(&coll->targets)) {
                ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
            }
        } else {
            /* the xcast always goes to our children */
            for (item = opal_list_get_first(my_children);
                 item != opal_list_get_end(my_children);
                 item = opal_list_get_next(item)) {
                child = (orte_routed_tree_t*)item;
                nm = OBJ_NEW(orte_namelist_t);
                nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
                nm->name.vpid = child->vpid;
                opal_list_append(&coll->targets, &nm->super);
            }
        }
    } else {
        /* I am a daemon - route to my children */
        for (item = opal_list_get_first(my_children);
             item != opal_list_get_end(my_children);
             item = opal_list_get_next(item)) {
            child = (orte_routed_tree_t*)item;
            nm = OBJ_NEW(orte_namelist_t);
            nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
            nm->name.vpid = child->vpid;
            opal_list_append(&coll->targets, &nm->super);
        }
    }
}
Esempio n. 16
0
static void mrhnp_complete(const orte_job_t *jdata)
{
    orte_job_t *jptr;
    orte_job_map_t *map;
    orte_proc_t *daemon;
    orte_iof_proc_t *proct;
    unsigned char data[1];
    opal_list_item_t *item;
    int i;
    orte_node_t *node;
    orte_jobid_t stdout_target, *jbptr;

    stdout_target = ORTE_JOBID_INVALID;
    jbptr = &stdout_target;
    if (!orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID)) {
        /* nothing to do */
        return;
    }

    /* the job is complete - close out the stdin
     * of any procs it was feeding
     */
    jptr = orte_get_job_data_object(stdout_target);
    map = jptr->map;
    /* cycle thru the map to find any node that has at least
     * one proc from this job
     */
    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }
        daemon = node->daemon;
        if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
            for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
                 item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
                 item = opal_list_get_next(item)) {
                proct = (orte_iof_proc_t*)item;
                if (proct->name.jobid == jptr->jobid) {
                    if (NULL != proct->sink) {
                        /* need to write a 0-byte event to clear the stream and close it */
                        orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev);
                        proct->sink = NULL;
                    }
                }
            }
        } else {
            OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                                 "%s sending close stdin to daemon %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&daemon->name)));
                
            /* need to send a 0-byte message to clear the stream and close it */
            send_data(&daemon->name, ORTE_IOF_STDIN, jptr->jobid, data, 0);
        }
    }
}
Esempio n. 17
0
static orte_proc_t* find_proc(orte_process_name_t *proc)
{
    orte_job_t *jdata;
    
    if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
        return NULL;
    }

    return (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
}
Esempio n. 18
0
/**
* Terminate the orteds for a given job
 */
static int plm_yarn_terminate_orteds(void) {
	finish_app_master(0 == orte_exit_status);
    orte_job_t* jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
    if (ORTE_JOB_STATE_DAEMONS_TERMINATED != jdata->state) {
		/* need to set the #terminated value to avoid an incorrect error msg */
		jdata->num_terminated = jdata->num_procs;
        ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
    }
    return ORTE_SUCCESS;
}
Esempio n. 19
0
int orte_util_encode_nodemap(opal_byte_object_t *boptr, bool update)
{
    orte_node_t *node;
    int32_t i;
    int rc;
    opal_buffer_t buf;
    orte_job_t *daemons;
    orte_proc_t *dmn;

    /* if the daemon job has not been updated, then there is
     * nothing to send
     */
    daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
    if (update && !ORTE_FLAG_TEST(daemons, ORTE_JOB_FLAG_UPDATED)) {
        boptr->bytes = NULL;
        boptr->size = 0;
        return ORTE_SUCCESS;
    }

    /* setup a buffer for tmp use */
    OBJ_CONSTRUCT(&buf, opal_buffer_t);

    /* send the number of nodes */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &daemons->num_procs, 1, ORTE_VPID))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    for (i=0; i < daemons->procs->size; i++) {
        if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, i))) {
            continue;
        }
        /* if the daemon doesn't have a node, that's an error */
        if (NULL == (node = dmn->node)) {
            opal_output(0, "DAEMON %s HAS NO NODE", ORTE_NAME_PRINT(&dmn->name));
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &dmn->name.vpid, 1, ORTE_VPID))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        /* pack the node */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node, 1, ORTE_NODE))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
    }

    /* transfer the payload to the byte object */
    opal_dss.unload(&buf, (void**)&boptr->bytes, &boptr->size);
    OBJ_DESTRUCT(&buf);

    return ORTE_SUCCESS;
}
Esempio n. 20
0
int orte_routed_base_process_callback(orte_jobid_t job, opal_buffer_t *buffer)
{
    orte_proc_t *proc;
    orte_job_t *jdata;
    orte_std_cntr_t cnt;
    char *rml_uri;
    orte_vpid_t vpid;
    int rc;

    /* lookup the job object for this process */
    if (NULL == (jdata = orte_get_job_data_object(job))) {
        /* came from a different job family - this is an error */
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }
    
    /* unpack the data for each entry */
    cnt = 1;
    while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &vpid, &cnt, ORTE_VPID))) {

        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            continue;
        }
        
        OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
                             "%s routed_binomial:callback got uri %s for job %s rank %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             (NULL == rml_uri) ? "NULL" : rml_uri,
                             ORTE_JOBID_PRINT(job), ORTE_VPID_PRINT(vpid)));
        
        if (NULL == rml_uri) {
            /* should not happen */
            ORTE_ERROR_LOG(ORTE_ERR_FATAL);
            return ORTE_ERR_FATAL;
        }
        
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, vpid))) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            continue;
        }
        
        /* update the record */
        proc->rml_uri = strdup(rml_uri);
        free(rml_uri);
        
        cnt = 1;
    }
    if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }    

    return ORTE_SUCCESS;    
}
Esempio n. 21
0
orte_proc_t* orte_get_proc_object(orte_process_name_t *proc)
{
    orte_job_t *jdata;
    orte_proc_t *proct;

    if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
        return NULL;
    }
    proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
    return proct;
}
Esempio n. 22
0
static void srun_wait_cb(orte_proc_t *proc, void* cbdata){
    orte_job_t *jdata;

    /* According to the SLURM folks, srun always returns the highest exit
     code of our remote processes. Thus, a non-zero exit status doesn't
     necessarily mean that srun failed - it could be that an orted returned
     a non-zero exit status. Of course, that means the orted failed(!), so
     the end result is the same - the job didn't start.

     As a result, we really can't do much with the exit status itself - it
     could be something in errno (if srun itself failed), or it could be
     something returned by an orted, or it could be something returned by
     the OS (e.g., couldn't find the orted binary). Somebody is welcome
     to sort out all the options and pretty-print a better error message. For
     now, though, the only thing that really matters is that
     srun failed. Report the error and make sure that orterun
     wakes up - otherwise, do nothing!

     Unfortunately, the pid returned here is the srun pid, not the pid of
     the proc that actually died! So, to avoid confusion, just use -1 as the
     pid so nobody thinks this is real
     */

    jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);

    /* abort only if the status returned is non-zero - i.e., if
    * the orteds exited with an error
     */
    if (0 != proc->exit_code) {
        /* an orted must have died unexpectedly - report
         * that the daemon has failed so we exit
         */
        OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
                             "%s plm:slurm: daemon failed while running",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ABORTED);
    } else {
        /* otherwise, check to see if this is the primary pid */
        if (primary_srun_pid == proc->pid) {
            /* in this case, we just want to fire the proper trigger so
             * mpirun can exit
             */
            OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
                                 "%s plm:slurm: primary daemons complete!",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            /* need to set the #terminated value to avoid an incorrect error msg */
            jdata->num_terminated = jdata->num_procs;
            ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
        }
    }

    /* done with this dummy */
    OBJ_RELEASE(proc);
}
Esempio n. 23
0
static orte_proc_t* find_proc(orte_process_name_t *proc)
{
    orte_job_t *jdata;
    
    if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
        opal_output(0, "CANNOT FIND JOB OBJECT FOR PROC %s", ORTE_NAME_PRINT(proc));
        return NULL;
    }

    return (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
}
Esempio n. 24
0
static void timeout(int fd, short args, void *cbdata)
{
    local_jobtracker_t *jtrk = (local_jobtracker_t*)cbdata;
    orte_job_t *jdata;

    orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-timeout", true);
    opal_output_verbose(2, orte_ras_base_framework.framework_output,
                        "%s Timed out on dynamic allocation",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
    /* indicate that we failed to receive an allocation */
    jdata = orte_get_job_data_object(jtrk->jobid);
    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOC_FAILED);
}
Esempio n. 25
0
static void get_routing_list(opal_list_t *coll)
{
    orte_namelist_t *nm;
    int32_t i;
    orte_job_t *jdata;
    orte_proc_t *proc;
    
    /* if I am anything other than daemons and the HNP, this
     * is a meaningless command as I am not allowed to route
     */
    if (!ORTE_PROC_IS_DAEMON || !ORTE_PROC_IS_HNP) {
        return;
    }
    
    /* daemons don't route */
    if (ORTE_PROC_IS_DAEMON) {
        return;
    }
    /* HNP sends direct to each daemon */
    if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return;
    }
    for (i=1; i < jdata->procs->size; i++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
            continue;
        }
        if( proc->state <= ORTE_PROC_STATE_UNTERMINATED &&
            NULL != proc->rml_uri ) {
            OPAL_OUTPUT_VERBOSE((5, orte_routed_base_framework.framework_output,
                                 "%s get_routing_tree: Adding process %s state %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&(proc->name)),
                                 orte_proc_state_to_str(proc->state)));

            nm = OBJ_NEW(orte_namelist_t);
            nm->name.jobid = proc->name.jobid;
            nm->name.vpid = proc->name.vpid;
            opal_list_append(coll, &nm->super);
        } else {
            OPAL_OUTPUT_VERBOSE((5, orte_routed_base_framework.framework_output,
                                 "%s get_routing_tree: Skipped process %15s state %s (non functional daemon)",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&(proc->name)),
                                 orte_proc_state_to_str(proc->state)));
        }
    }
}
Esempio n. 26
0
static orte_proc_t* find_proc(orte_process_name_t *proc)
{
    orte_job_t *jdata;
    orte_proc_t **procs;
    
    if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
        return NULL;
    }
    procs = (orte_proc_t**)jdata->procs->addr;
    
    if (jdata->num_procs < proc->vpid) {
        return NULL;
    }
    
    return procs[proc->vpid];
}
Esempio n. 27
0
orte_vpid_t orte_get_proc_daemon_vpid(orte_process_name_t *proc)
{
    orte_job_t *jdata;
    orte_proc_t *proct;

    if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
        return ORTE_VPID_INVALID;
    }
    if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid))) {
        return ORTE_VPID_INVALID;
    }
    if (NULL == proct->node || NULL == proct->node->daemon) {
        return ORTE_VPID_INVALID;
    }
    return proct->node->daemon->name.vpid;
}
Esempio n. 28
0
int orte_errmgr_base_restart_job(orte_jobid_t jobid, char * global_handle, int seq_num)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_process_name_t loc_proc;
    orte_job_t *jdata;
    orte_sstore_base_handle_t prev_sstore_handle = ORTE_SSTORE_HANDLE_INVALID;

    /* JJH First determine if we can recover this way */

    /*
     * Find the corresponding sstore handle
     */
    prev_sstore_handle = orte_sstore_handle_last_stable;
    if( ORTE_SUCCESS != (ret = orte_sstore.request_restart_handle(&orte_sstore_handle_last_stable,
                                                                  NULL,
                                                                  global_handle,
                                                                  seq_num,
                                                                  NULL)) ) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    /* get the job object */
    if (NULL == (jdata = orte_get_job_data_object(jobid))) {
        exit_status = ORTE_ERR_NOT_FOUND;
        ORTE_ERROR_LOG(exit_status);
        goto cleanup;
    }

    /*
     * Start the recovery
     */
    orte_snapc_base_has_recovered = false;
    loc_proc.jobid = jobid;
    loc_proc.vpid  = 0;
    ORTE_ACTIVATE_PROC_STATE(&loc_proc, ORTE_PROC_STATE_KILLED_BY_CMD);
    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_CONTROL_RESTART);
    while( !orte_snapc_base_has_recovered ) {
        opal_progress();
    }
    orte_sstore_handle_last_stable = prev_sstore_handle;

 cleanup:
    return exit_status;
}
Esempio n. 29
0
static size_t num_routes(void)
{
    orte_job_t *jdata;

    if (!ORTE_PROC_IS_HNP) {
        return 0;
    }

    /* if I am the HNP, then the number of routes is
     * the number of daemons still alive (other than me)
     */
    if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return 0;
    }

    return (jdata->num_procs - jdata->num_terminated - 1);
}
Esempio n. 30
0
/* after we allocate, we need to map the processes
 * so we know what nodes will be used
 */
static void allocation_complete(int fd, short args, void *cbdata)
{
    orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
    orte_job_t *jdata = state->jdata;
    orte_job_t *daemons;

    jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;

    /* get the daemon job object */
    if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        goto done;
    }
    /* mark that we are not using a VM */
    daemons->controls |= ORTE_JOB_CONTROL_NO_VM;

#if OPAL_HAVE_HWLOC
    {
        hwloc_topology_t t;
        orte_node_t *node;
        int i;

        /* ensure that all nodes point to our topology - we
         * cannot support hetero nodes with this state machine
         */
        t = (hwloc_topology_t)opal_pointer_array_get_item(orte_node_topologies, 0);
        for (i=1; i < orte_node_pool->size; i++) {
            if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
                continue;
            }
            node->topology = t;
        }
    }
#endif
    
    /* move to the map stage */
    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);

 done:
    /* cleanup */
    OBJ_RELEASE(state);
}