Exemple #1
0
/***   NIDMAP UTILITIES   ***/
orte_jmap_t* orte_util_lookup_jmap(orte_jobid_t job)
{
    int i;
    orte_jmap_t *jmap;
    
    /* unfortunately, job objects cannot be stored
     * by index number as the jobid is a constructed
     * value. So we have no choice but to cycle through
     * the jobmap pointer array and look for the entry
     * we want. We also cannot trust that the array is
     * left-justified as cleanup is done - and array
     * entries set to NULL - upon job completion.
     */
    for (i=0; i < orte_jobmap.size; i++) {
        if (NULL == (jmap = (orte_jmap_t*)opal_pointer_array_get_item(&orte_jobmap, i))) {
            continue;
        }
        OPAL_OUTPUT_VERBOSE((10, orte_debug_output,
                             "%s lookup:pmap: checking job %s for job %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jmap->job), ORTE_JOBID_PRINT(job)));
        if (job == jmap->job) {
            return jmap;
        }
    }
    
    /* if we didn't find it, return NULL */
    return NULL;
}
Exemple #2
0
void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc)
{
    if (NULL != proc) {
        switch(state) {
        case ORTE_PROC_STATE_ABORTED:
        case ORTE_PROC_STATE_ABORTED_BY_SIG:
        case ORTE_PROC_STATE_TERM_WO_SYNC:
        case ORTE_PROC_STATE_TERMINATED:
        case ORTE_PROC_STATE_KILLED_BY_CMD:
        case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED:
            opal_output(0, "%d: Process %s is dead.",
                        orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
            break;

        case ORTE_PROC_STATE_HEARTBEAT_FAILED:
            opal_output(0, "%d: Process %s is unreachable.",
                        orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));

        case ORTE_PROC_STATE_COMM_FAILED:
            opal_output(0, "%d: Failed to communicate with process %s.",
                        orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
            break;

        case ORTE_PROC_STATE_CALLED_ABORT:
        case ORTE_PROC_STATE_FAILED_TO_START:
            opal_output(0, "%d: Process %s has called abort.",
                        orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
            break;
        case ORTE_PROC_STATE_MIGRATING:
        default:
            break;
        }
    }
}
Exemple #3
0
/*****************
 * Local Functions
 *****************/
static void default_hnp_abort(orte_job_t *jdata)
{
    int rc;

    /* if we are already in progress, then ignore this call */
    if (opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
        OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output,
                             "%s errmgr:default_hnp: abort in progress, ignoring abort on job %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jdata->jobid)));
        return;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output,
                         "%s errmgr:default_hnp: abort called on job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jdata->jobid)));

    /* the job aborted - turn off any sensors on this job */
    orte_sensor.stop(jdata->jobid);

    /* set control params to indicate we are terminating */
    orte_job_term_ordered = true;
    orte_enable_recovery = false;

    /* if it is the daemon job that aborted, then we need
     * to flag an abnormal term - otherwise, just abort
     * the job cleanly
     */
    if (ORTE_PROC_MY_NAME->jobid == jdata->jobid) {
        orte_abnormal_term_ordered = true;
    }

    if (0 < jdata->num_non_zero_exit) {
        /* warn user */
        opal_output(orte_clean_output,
                    "-------------------------------------------------------\n"
                    "%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.\n"
                    "-------------------------------------------------------",
                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child",
                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
                    jdata->num_non_zero_exit,
                    (1 == jdata->num_non_zero_exit) ? "process returned\na non-zero exit code." :
                    "processes returned\nnon-zero exit codes.");
    }

    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output,
                         "%s errmgr:default_hnp: ordering orted termination",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* tell the plm to terminate the orteds - they will automatically
     * kill their local procs
     */
    if (ORTE_SUCCESS != (rc = orte_plm.terminate_orteds())) {
        ORTE_ERROR_LOG(rc);
    }
}
Exemple #4
0
/*****************
 * Local Functions
 *****************/
static void default_hnp_abort(orte_job_t *jdata)
{
    int rc;
    int32_t i32, *i32ptr;

    /* if we are already in progress, then ignore this call */
    if (opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
        OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,
                             "%s errmgr:default_hnp: abort in progress, ignoring abort on job %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jdata->jobid)));
        return;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,
                         "%s errmgr:default_hnp: abort called on job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jdata->jobid)));

    /* set control params to indicate we are terminating */
    orte_job_term_ordered = true;
    orte_enable_recovery = false;

    /* if it is the daemon job that aborted, then we need
     * to flag an abnormal term - otherwise, just abort
     * the job cleanly
     */
    if (ORTE_PROC_MY_NAME->jobid == jdata->jobid) {
        orte_abnormal_term_ordered = true;
    }

    i32 = 0;
    i32ptr = &i32;
    if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) {
        /* warn user */
        opal_output(orte_clean_output,
                    "-------------------------------------------------------\n"
                    "%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.\n"
                    "-------------------------------------------------------",
                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child",
                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
                    i32, (1 == i32) ? "process returned\na non-zero exit code" :
                    "processes returned\nnon-zero exit codes");
    }

    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,
                         "%s errmgr:default_hnp: ordering orted termination",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* tell the plm to terminate the orteds - they will automatically
     * kill their local procs
     */
    if (ORTE_SUCCESS != (rc = orte_plm.terminate_orteds())) {
        ORTE_ERROR_LOG(rc);
    }
}
Exemple #5
0
static void job_errors(int fd, short args, void *cbdata)
{
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
    orte_job_state_t jobstate = caddy->job_state;
    char *msg;

    /*
     * if orte is trying to shutdown, just let it
     */
    if (orte_finalizing) {
        return;
    }

    /* if the jdata is NULL, then we abort as this
     * is reporting an unrecoverable error
     */
    if (NULL == caddy->jdata) {
        OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,
                         "%s errmgr:orcm: jobid %s reported error state %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         orte_job_state_to_str(jobstate)));
        asprintf(&msg, "%s errmgr:orcm: jobid %s reported error state %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         orte_job_state_to_str(jobstate));
        /* notify this */
        ORTE_NOTIFIER_INTERNAL_ERROR(caddy->jdata, jobstate, ORTE_NOTIFIER_CRIT, 1, msg);
    /* cleanup */
    /* ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FORCED_EXIT);*/
        OBJ_RELEASE(caddy);
        return;
    }

    /* update the state */
    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,
                         "%s errmgr:orcm: job %s reported error state %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(caddy->jdata->jobid),
                         orte_job_state_to_str(jobstate)));

    asprintf(&msg, "%s errmgr:orcm: jobid %s reported error state %s",
                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                     ORTE_JOBID_PRINT(caddy->jdata->jobid),
                     orte_job_state_to_str(jobstate));
    /* notify this */
    ORTE_NOTIFIER_INTERNAL_ERROR(caddy->jdata, jobstate, ORTE_NOTIFIER_WARN, 1, msg);

    /* cleanup */
    OBJ_RELEASE(caddy);
}
/*
 * STANDARD PRINT FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED
 */
int orte_dt_std_print(char **output, char *prefix, void *src, opal_data_type_t type)
{
    /* set default result */
    *output = NULL;
    
    switch(type) {
        case ORTE_STD_CNTR:
            orte_dt_quick_print(output, "ORTE_STD_CNTR", prefix, src, ORTE_STD_CNTR_T);
            break;
        case ORTE_VPID:
            orte_dt_quick_print(output, "ORTE_VPID", prefix, src, ORTE_VPID_T);
            break;
            
        case ORTE_JOBID:
            asprintf(output, "%sData Type: ORTE_JOBID\tData size: %lu\tValue: %s",
                     (NULL == prefix) ? "" : prefix, (unsigned long)sizeof(orte_jobid_t),
                     ORTE_JOBID_PRINT(*(orte_jobid_t*)src));
            break;
            
#if !ORTE_DISABLE_FULL_SUPPORT
        case ORTE_PROC_STATE:
            orte_dt_quick_print(output, "ORTE_PROC_STATE", prefix, src, ORTE_PROC_STATE_T);
            break;
            
        case ORTE_JOB_STATE:
            orte_dt_quick_print(output, "ORTE_JOB_STATE", prefix, src, ORTE_JOB_STATE_T);
            break;
            
        case ORTE_NODE_STATE:
            orte_dt_quick_print(output, "ORTE_NODE_STATE", prefix, src, ORTE_NODE_STATE_T);
            break;
            
        case ORTE_EXIT_CODE:
            orte_dt_quick_print(output, "ORTE_EXIT_CODE", prefix, src, ORTE_EXIT_CODE_T);
            break;
        
        case ORTE_RML_TAG:
            orte_dt_quick_print(output, "ORTE_RML_TAG", prefix, src, ORTE_RML_TAG_T);
            break;
        
        case ORTE_DAEMON_CMD:
            orte_dt_quick_print(output, "ORTE_DAEMON_CMD", prefix, src, ORTE_DAEMON_CMD_T);
            break;

        case ORTE_GRPCOMM_MODE:
            orte_dt_quick_print(output, "ORTE_GRPCOMM_MODE", prefix, src, ORTE_GRPCOMM_MODE_T);
            break;
            
        case ORTE_IOF_TAG:
            orte_dt_quick_print(output, "ORTE_IOF_TAG", prefix, src, ORTE_IOF_TAG_T);
            break;
#endif
            
        default:
            ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE);
            return ORTE_ERR_UNKNOWN_DATA_TYPE;
    }
    
    return ORTE_SUCCESS;
}
static void failed_start(orte_odls_job_t *jobdat, orte_exit_code_t exit_code)
{
    opal_list_item_t *item;
    orte_odls_child_t *child;

    /* set the state */
    jobdat->state = ORTE_JOB_STATE_FAILED_TO_START;

    for (item = opal_list_get_first(&orte_local_children);
         item != opal_list_get_end(&orte_local_children);
         item = opal_list_get_next(item)) {
        child = (orte_odls_child_t*)item;
        if (child->name->jobid == jobdat->jobid) {
            if (ORTE_PROC_STATE_LAUNCHED > child->state ||
                ORTE_PROC_STATE_FAILED_TO_START == child->state) {
                /* this proc never launched - flag that the iof
                 * is complete or else we will hang waiting for
                 * pipes to close that were never opened
                 */
                child->iof_complete = true;
                /* ditto for waitpid */
                child->waitpid_recvd = true;
            }
        }
    }
    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output,
                         "%s errmgr:hnp: job %s reported incomplete start",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobdat->jobid)));
    return;
}
static int pretty_print(orte_ps_mpirun_info_t *hnpinfo) {
    char *header;
    int len_hdr;
    
    /*
     * Print header and remember header length
     */
    len_hdr = asprintf(&header, "Information from mpirun %s", ORTE_JOBID_PRINT(hnpinfo->hnp->name.jobid));
    
    printf("\n\n%s\n", header);
    free(header);
    pretty_print_dashed_line(len_hdr);
    
    /*
     * Print Node Information
     */
    if( orte_ps_globals.nodes )
        pretty_print_nodes(hnpinfo->nodes, hnpinfo->num_nodes);

    /*
     * Print Job Information
     */
    pretty_print_jobs(hnpinfo->jobs, hnpinfo->num_jobs);

    return ORTE_SUCCESS;
}
static void failed_start(orte_job_t *jobdat)
{
    int i;
    orte_proc_t *child;

    /* set the state */
    jobdat->state = ORTE_JOB_STATE_FAILED_TO_START;

    for (i=0; i < orte_local_children->size; i++) {
        if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
            continue;
        }
        /* is this child part of the specified job? */
        if (child->name.jobid == jobdat->jobid) {
            if (ORTE_PROC_STATE_FAILED_TO_START == child->state) {
                /* this proc never launched - flag that the iof
                 * is complete or else we will hang waiting for
                 * pipes to close that were never opened
                 */
                ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
                /* ditto for waitpid */
                ORTE_FLAG_SET(child, ORTE_PROC_FLAG_WAITPID);
            }
        }
    }
    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,
                         "%s errmgr:hnp: job %s reported incomplete start",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobdat->jobid)));
    return;
}
static int plm_yarn_actual_launch_procs(orte_job_t* jdata)
{
    int rc;

    int launched_proc_num = 0;

    OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
                    "%s plm:yarn:plm_yarn_actual_launch_procs for job %s",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_JOBID_PRINT(jdata->jobid)));


    rc = common_launch_process(jdata, false, &launched_proc_num);

	if (rc != ORTE_SUCCESS) {
		return rc;
	}

	/* if all jdata procs are launched successfully, then modify the job's state */
	if (launched_proc_num == jdata->num_procs) {
		jdata->state = ORTE_JOB_STATE_RUNNING;
		OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
						"%s plm:yarn:plm_yarn_actual_launch_procs: launch jdata procs successfully with AM",
						ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
	}

	return ORTE_SUCCESS;
}
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_node_t *node,
                                    bool oversubscribed, orte_proc_t *proc)
{
    orte_std_cntr_t i;
    orte_node_t *node_from_map;
    int rc;
    
    /* see if this node has already been assigned to the map - if
     * not, then add the pointer to the pointer array
     */
    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (node_from_map = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }
        if (node_from_map->index == node->index) {
            /* we have this node in the array */
            goto PROCESS;
        }
    }
    /* if we get here, then this node isn't already in the map - add it */
    OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
                         "%s rmaps:base: adding node %s to map",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (NULL == node->name) ? "NULL" : node->name));
    
    if (ORTE_SUCCESS > (rc = opal_pointer_array_add(map->nodes, (void*)node))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    OBJ_RETAIN(node);  /* maintain accounting on object */
    ++map->num_nodes;
    
PROCESS:
    /* add the proc to this node's local processes - it is assumed
     * that the proc isn't already there as this would be an error
     * in the mapper
     */
    OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
                         "%s rmaps:base: mapping proc for job %s to node %s whose daemon is %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(proc->name.jobid),
                         (NULL == node->name) ? "NULL" : node->name,
                         (NULL == node->daemon) ? "NULL" : ORTE_NAME_PRINT(&(node->daemon->name))));
    
    if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    /* retain the proc struct so that we correctly track its release */
    OBJ_RETAIN(proc);
    ++node->num_procs;

    /* update the oversubscribed state of the node */
    node->oversubscribed = oversubscribed;
    
    return ORTE_SUCCESS;
}
Exemple #12
0
int orte_routed_base_process_callback(orte_jobid_t job, opal_buffer_t *buffer)
{
    orte_proc_t *proc;
    orte_job_t *jdata;
    orte_std_cntr_t cnt;
    char *rml_uri;
    orte_vpid_t vpid;
    int rc;

    /* lookup the job object for this process */
    if (NULL == (jdata = orte_get_job_data_object(job))) {
        /* came from a different job family - this is an error */
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }
    
    /* unpack the data for each entry */
    cnt = 1;
    while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &vpid, &cnt, ORTE_VPID))) {

        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            continue;
        }
        
        OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
                             "%s routed_binomial:callback got uri %s for job %s rank %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             (NULL == rml_uri) ? "NULL" : rml_uri,
                             ORTE_JOBID_PRINT(job), ORTE_VPID_PRINT(vpid)));
        
        if (NULL == rml_uri) {
            /* should not happen */
            ORTE_ERROR_LOG(ORTE_ERR_FATAL);
            return ORTE_ERR_FATAL;
        }
        
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, vpid))) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            continue;
        }
        
        /* update the record */
        proc->rml_uri = strdup(rml_uri);
        free(rml_uri);
        
        cnt = 1;
    }
    if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }    

    return ORTE_SUCCESS;    
}
Exemple #13
0
/* Report the checkpoint status over the notifier interface */
void orte_snapc_ckpt_state_notify(int state)
{
    switch(state) {
    case ORTE_SNAPC_CKPT_STATE_ESTABLISHED:
	    orte_notifier.log(ORTE_NOTIFIER_INFO, ORTE_SNAPC_CKPT_NOTIFY(state),
                          "%d: Checkpoint established for process %s.",
			  orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
        break;
    case ORTE_SNAPC_CKPT_STATE_NO_CKPT:
        orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state),
                          "%d: Process %s is not checkpointable.",
                          orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
        break;
    case ORTE_SNAPC_CKPT_STATE_ERROR:
        orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state),
                          "%d: Failed to checkpoint process %s.",
                          orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
        break;
    case ORTE_SNAPC_CKPT_STATE_RECOVERED:
        orte_notifier.log(ORTE_NOTIFIER_INFO, ORTE_SNAPC_CKPT_NOTIFY(state),
                          "%d: Successfully restarted process %s.",
                          orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
        break;
    case ORTE_SNAPC_CKPT_STATE_NO_RESTART:
        orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state),
                          "%d: Failed to restart process %s.",
                          orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
        break;
    /* ADK: We currently do not notify for these states, but good to
     * have them around anyways. */
    case ORTE_SNAPC_CKPT_STATE_NONE:
    case ORTE_SNAPC_CKPT_STATE_REQUEST:
    case ORTE_SNAPC_CKPT_STATE_PENDING:
    case ORTE_SNAPC_CKPT_STATE_RUNNING:
    case ORTE_SNAPC_CKPT_STATE_STOPPED:
    case ORTE_SNAPC_CKPT_STATE_MIGRATING:
    case ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL:
    default:
        break;
    }
}
Exemple #14
0
void orte_errmgr_base_migrate_state_notify(int state)
{
    switch(state) {
    case ORTE_ERRMGR_MIGRATE_STATE_ERROR:
    case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS:
        opal_output(0, "%d: Migration failed for process %s.",
                    orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
        break;
    case ORTE_ERRMGR_MIGRATE_STATE_FINISH:
        opal_output(0, "%d: Migration successful for process %s.",
                    orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
        break;

    case ORTE_ERRMGR_MIGRATE_STATE_NONE:
    case ORTE_ERRMGR_MIGRATE_STATE_REQUEST:
    case ORTE_ERRMGR_MIGRATE_STATE_RUNNING:
    case ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT:
    case ORTE_ERRMGR_MIGRATE_STATE_STARTUP:
    case ORTE_ERRMGR_MIGRATE_MAX:
    default:
        break;
    }
}
/*
 * This function gets called by the PLM when an orted notifies us that
 * a job failed to start.
 * Various components will follow their own strategy for dealing with
 * this situation. For this component, we simply kill the job.
 */
void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code)
{
    int rc;
    
    OPAL_TRACE(1);
    
    /* if we are already in progress, then ignore this call */
    if (!opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
        OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output,
                             "%s errmgr:default: abort in progress, ignoring incomplete start on job %s with status %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(job), exit_code));
        return;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output,
                         "%s errmgr:default: job %s reported incomplete start with status %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(job), exit_code));

    orte_job_term_ordered = true;
    
    /* tell the plm to terminate all jobs */
    if (ORTE_SUCCESS != (rc = orte_plm.terminate_job(ORTE_JOBID_WILDCARD))) {
        ORTE_ERROR_LOG(rc);
    }
    
    /* set the exit status, just in case whomever called us failed
     * to do so - it can only be done once, so we are protected
     * from overwriting it
     */
    ORTE_UPDATE_EXIT_STATUS(exit_code);
    
    /* wakeup orterun so we can exit */
    orte_trigger_event(&orte_exit);   
}
Exemple #16
0
static int xcast(orte_jobid_t job,
                 opal_buffer_t *buffer,
                 orte_rml_tag_t tag)
{
    int rc = ORTE_SUCCESS;
    opal_buffer_t buf;
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
                         "%s grpcomm:xcast sent to job %s tag %ld",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(job), (long)tag));
    
    /* if there is no message to send, then just return ok */
    if (NULL == buffer) {
        return ORTE_SUCCESS;
    }
    
    /* prep the output buffer */
    OBJ_CONSTRUCT(&buf, opal_buffer_t);
    
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_app_pack_xcast(ORTE_DAEMON_PROCESS_AND_RELAY_CMD,
                                                               job, &buf, buffer, tag))) {
        ORTE_ERROR_LOG(rc);
        goto CLEANUP;
    }
    
    /* if I am the HNP, just set things up so the cmd processor gets called.
     * We don't want to message ourselves as this can create circular logic
     * in the RML. Instead, this macro will set a zero-time event which will
     * cause the buffer to be processed by the cmd processor - probably will
     * fire right away, but that's okay
     * The macro makes a copy of the buffer, so it's okay to release it here
     */
    if (ORTE_PROC_IS_HNP) {
        ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
    } else {
        /* otherwise, send it to the HNP for relay */
        if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buf, ORTE_RML_TAG_DAEMON, 0))) {
            ORTE_ERROR_LOG(rc);
            goto CLEANUP;
        }
        rc = ORTE_SUCCESS;
    }
    
CLEANUP:
    OBJ_DESTRUCT(&buf);
    return rc;
}
Exemple #17
0
void orte_state_base_cleanup_job(int fd, short argc, void *cbdata)
{
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
    orte_job_t *jdata = caddy->jdata;

    OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                         "%s state:base:cleanup on job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid)));

    /* flag that we were notified */
    jdata->state = ORTE_JOB_STATE_NOTIFIED;
    /* send us back thru job complete */
    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED);
    OBJ_RELEASE(caddy);
}
Exemple #18
0
void orte_jmap_dump(orte_jmap_t *jmap)
{
    int i;
    orte_pmap_t *pmap;
    
    opal_output(orte_clean_output, "****   DUMP OF JOB %s (%s procs)   ***",
                ORTE_JOBID_PRINT(jmap->job), ORTE_VPID_PRINT(jmap->num_procs));
    
    for (i=0; i < jmap->pmap.size; i++) {
        if (NULL == (pmap = (orte_pmap_t*)opal_pointer_array_get_item(&jmap->pmap, i))) {
            continue;
        }
        opal_output(orte_clean_output, "\tnode %d local_rank %d node_rank %d",
                    pmap->node, (int)pmap->local_rank, (int)pmap->node_rank);
    }
    opal_output(orte_clean_output, "\n");
}
Exemple #19
0
static void launch_restart(int fd, short args, void *cbdata)
{
    orte_errmgr_caddy_t *cd = (orte_errmgr_caddy_t*)cbdata;
    int rc;
    opal_buffer_t *bfr;
    uint16_t jfam;

    OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output,
                         "%s RESTARTING JOB %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(cd->jdata->jobid)));

    /* reset the job */
    orte_plm_base_reset_job(cd->jdata);

    /* the resilient mapper will automatically avoid restarting the
     * proc on its former node
     */

    /* map the job again */
    if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(cd->jdata))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    bfr = OBJ_NEW(opal_buffer_t);
    /* indicate the target DVM */
    jfam = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
    opal_dss.pack(bfr, &jfam, 1, OPAL_UINT16);

    /* get the launch data */
    if (ORTE_SUCCESS != (rc = orte_odls.get_add_procs_data(bfr, cd->jdata->jobid))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(bfr);
        goto cleanup;
    }
    /* send it to the daemons */
    if (ORCM_SUCCESS != (rc = orcm_pnp.output_nb(ORCM_PNP_SYS_CHANNEL,
                                                 NULL, ORCM_PNP_TAG_COMMAND,
                                                 NULL, 0, bfr, cbfunc, NULL))) {
        ORTE_ERROR_LOG(rc);
    }

 cleanup:
    OBJ_RELEASE(cd);
}
static void mylog(orte_notifier_request_t *req)
{
    char tod[48];

    opal_output_verbose(5, orte_notifier_base_framework.framework_output,
                           "notifier:syslog:mylog function called with severity %d errcode %d and messg %s",
                           (int)req->severity, req->errcode, req->msg);
    /* If there was a message, output it */
    (void)ctime_r(&req->t, tod);
    /* trim the newline */
    tod[strlen(tod)] = '\0';

    syslog(req->severity, "[%s]%s %s: JOBID %s REPORTS ERROR %s: %s", tod,
           ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
           orte_notifier_base_sev2str(req->severity),
           ORTE_JOBID_PRINT((NULL == req->jdata) ?
                            ORTE_JOBID_INVALID : req->jdata->jobid),
           orte_job_state_to_str(req->state),
           (NULL == req->msg) ? "<N/A>" : req->msg);
}
Exemple #21
0
static void track_jobs(int fd, short argc, void *cbdata)
{
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
    opal_buffer_t *alert;
    orte_plm_cmd_flag_t cmd;
    int rc;

    if (ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE == caddy->job_state) {
        OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
                             "%s state:orted:track_jobs sending local launch complete for job %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(caddy->jdata->jobid)));
        /* update the HNP with all proc states for this job */
        alert = OBJ_NEW(opal_buffer_t);
        /* pack update state command */
        cmd = ORTE_PLM_UPDATE_PROC_STATE;
        if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(alert);
            goto cleanup;
        }
        /* pack the job info */
        if (ORTE_SUCCESS != (rc = pack_state_update(alert, caddy->jdata))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(alert);
            goto cleanup;
        }
        /* send it */
        if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
                                              ORTE_RML_TAG_PLM,
                                              orte_rml_send_callback, NULL))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(alert);
        }
    }

 cleanup:
    OBJ_RELEASE(caddy);
}
Exemple #22
0
static int update_state(orte_jobid_t job,
                        orte_job_state_t jobstate,
                        orte_process_name_t *proc,
                        orte_proc_state_t state,
                        pid_t pid,
                        orte_exit_code_t exit_code)
{
    orte_ns_cmp_bitmask_t mask;

    OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output,
                         "%s errmgr:default_app: job %s reported state %s"
                         " for proc %s state %s exit_code %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(job),
                         orte_job_state_to_str(jobstate),
                         (NULL == proc) ? "NULL" : ORTE_NAME_PRINT(proc),
                         orte_proc_state_to_str(state), exit_code));
    
    /*
     * if orte is trying to shutdown, just let it
     */
    if (orte_finalizing) {
        return ORTE_SUCCESS;
    }

    if (ORTE_PROC_STATE_COMM_FAILED == state) {
        mask = ORTE_NS_CMP_ALL;
        /* if it is our own connection, ignore it */
        if (OPAL_EQUAL == orte_util_compare_name_fields(mask, ORTE_PROC_MY_NAME, proc)) {
            return ORTE_SUCCESS;
        }
        /* see is this was a lifeline */
        if (ORTE_SUCCESS != orte_routed.route_lost(proc)) {
            return ORTE_ERR_UNRECOVERABLE;
        }
    }
    return ORTE_SUCCESS;
}
Exemple #23
0
static int xcast(orte_jobid_t job,
                 opal_buffer_t *buffer,
                 orte_rml_tag_t tag)
{
    int rc = ORTE_SUCCESS;
    opal_buffer_t *buf;
    
    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
                         "%s grpcomm:bad:xcast sent to job %s tag %ld",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(job), (long)tag));
    
    /* if there is no message to send, then just return ok */
    if (NULL == buffer) {
        return ORTE_SUCCESS;
    }
    
    /* prep the output buffer */
    buf = OBJ_NEW(opal_buffer_t);
    
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_xcast(job, buf, buffer, tag))) {
        ORTE_ERROR_LOG(rc);
        goto CLEANUP;
    }
    
    /* send it to the HNP (could be myself) for relay */
    if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST,
                                          0, orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buf);
        goto CLEANUP;
    }
    rc = ORTE_SUCCESS;
    
CLEANUP:
    return rc;
}
Exemple #24
0
/* Useful for debugging. Not used otherwise. */
void print_orte_job_data() {
    orte_job_t *jdata;
    orte_proc_t *pdata;
    int i, j;

    if (NULL == orte_job_data) {
        opal_output(0, "ORTE_JOB_DATA == NULL");
        return;
    }

    for (i = 0; i < orte_job_data->size; i++) {
        if (NULL == (jdata = (orte_job_t *) opal_pointer_array_get_item(orte_job_data, i))) {
            continue;
        }
        opal_output(0, "JOB: %s", ORTE_JOBID_PRINT(jdata->jobid));

        for (j = 0; j < jdata->num_procs; j++) {
            if (NULL == (pdata = (orte_proc_t *) opal_pointer_array_get_item(jdata->procs, j))) {
                continue;
            }
            opal_output(0, "    PROC: %s", ORTE_NAME_PRINT(&(pdata->name)));
        }
    }
}
int orte_plm_base_orted_terminate_job(orte_jobid_t jobid)
{
    opal_pointer_array_t procs;
    orte_proc_t proc;
    int rc;
    
    OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
                         "%s plm:base:orted_terminate job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobid)));
    
    OBJ_CONSTRUCT(&procs, opal_pointer_array_t);
    opal_pointer_array_init(&procs, 1, 1, 1);
    OBJ_CONSTRUCT(&proc, orte_proc_t);
    proc.name.jobid = jobid;
    proc.name.vpid = ORTE_VPID_WILDCARD;
    opal_pointer_array_add(&procs, &proc);
    if (ORTE_SUCCESS != (rc = orte_plm_base_orted_kill_local_procs(&procs))) {
        ORTE_ERROR_LOG(rc);
    }
    OBJ_DESTRUCT(&procs);
    OBJ_DESTRUCT(&proc);
    return rc;
}
int orte_snapc_base_global_coord_ckpt_init_cmd(orte_process_name_t* peer,
        opal_buffer_t* buffer,
        opal_crs_base_ckpt_options_t *options,
        orte_jobid_t *jobid)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_std_cntr_t count = 1;
    orte_ns_cmp_bitmask_t mask;

    mask = ORTE_NS_CMP_ALL;

    /*
     * Do not send to self, as that is silly.
     */
    if (OPAL_EQUAL ==
            orte_util_compare_name_fields(mask, peer, ORTE_PROC_MY_HNP)) {
        OPAL_OUTPUT_VERBOSE((10, orte_snapc_base_framework.framework_output,
                             "%s) base:ckpt_init_cmd: Error: Do not send to self!\n",
                             ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type)));
        return ORTE_SUCCESS;
    }

    OPAL_OUTPUT_VERBOSE((10, orte_snapc_base_framework.framework_output,
                         "%s) base:ckpt_init_cmd: Receiving commands\n",
                         ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type)));

    /********************
     * Receive command line checkpoint request:
     * - Command (already received)
     * - options
     * - jobid
     ********************/
    if( ORTE_SUCCESS != (ret = orte_snapc_base_unpack_options(buffer, options)) ) {
        opal_output(orte_snapc_base_framework.framework_output,
                    "%s) base:ckpt_init_cmd: Error: Unpack (options) Failure (ret = %d)\n",
                    ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret );
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    count = 1;
    if ( ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, jobid, &count, ORTE_JOBID)) ) {
        opal_output(orte_snapc_base_framework.framework_output,
                    "%s) base:ckpt_init_cmd: Error: DSS Unpack (jobid) Failure (ret = %d) (LINE = %d)\n",
                    ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type),
                    ret, __LINE__);
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((10, orte_snapc_base_framework.framework_output,
                         "%s) base:ckpt_init_cmd: Received [%d, %d, %s]\n",
                         ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type),
                         (int)(options->term),
                         (int)(options->stop),
                         ORTE_JOBID_PRINT(*jobid)));

cleanup:
    return exit_status;
}
Exemple #27
0
static void orte_job_destruct(orte_job_t* job)
{
    orte_proc_t *proc;
    orte_app_context_t *app;
    orte_job_t *jdata;
    int n;
    orte_timer_t *evtimer;

    if (NULL == job) {
        /* probably just a race condition - just return */
        return;
    }
    
    if (orte_debug_flag) {
        opal_output(0, "%s Releasing job data for %s",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job->jobid));
    }
    
    for (n=0; n < job->apps->size; n++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(job->apps, n))) {
            continue;
        }
        OBJ_RELEASE(app);
    }
    OBJ_RELEASE(job->apps);
    
    /* release any pointers in the attributes */
    evtimer = NULL;
    if (orte_get_attribute(&job->attributes, ORTE_JOB_FAILURE_TIMER_EVENT,
                           (void**)&evtimer, OPAL_PTR)) {
        orte_remove_attribute(&job->attributes, ORTE_JOB_FAILURE_TIMER_EVENT);
        /* the timer is a pointer to orte_timer_t */
        OBJ_RELEASE(evtimer);
    }
    proc = NULL;
    if (orte_get_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC,
                           (void**)&proc, OPAL_PTR)) {
        orte_remove_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC);
        /* points to an orte_proc_t */
        OBJ_RELEASE(proc);
    }

    if (NULL != job->map) {
        OBJ_RELEASE(job->map);
        job->map = NULL;
    }
    
    for (n=0; n < job->procs->size; n++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(job->procs, n))) {
            continue;
        }
        OBJ_RELEASE(proc);
    }
    OBJ_RELEASE(job->procs);
    
    /* release the attributes */
    OPAL_LIST_DESTRUCT(&job->attributes);

    /* find the job in the global array */
    if (NULL != orte_job_data && ORTE_JOBID_INVALID != job->jobid) {
        for (n=0; n < orte_job_data->size; n++) {
            if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, n))) {
                continue;
            }
            if (jdata->jobid == job->jobid) {
                /* set the entry to NULL */
                opal_pointer_array_set_item(orte_job_data, n, NULL);
                break;
            }
        }
    }
}
/* this is the read handler for my own child procs and stdin
 */
void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata)
{
    orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    int32_t numbytes;
    opal_list_item_t *item;
    orte_iof_proc_t *proct;
    int i, j;
    orte_ns_cmp_bitmask_t mask;
    orte_job_t *jdata;
    orte_iof_job_t *iofjob;
    orte_node_t *node;
    orte_proc_t *daemon;
    orte_job_map_t *map;
    bool write_out=false;

    /* read up to the fragment size */
#if !defined(__WINDOWS__)
    numbytes = read(fd, data, sizeof(data));
#else
    {
        DWORD readed;
        HANDLE handle = (HANDLE)_get_osfhandle(fd);
        ReadFile(handle, data, sizeof(data), &readed, NULL);
        numbytes = (int)readed;
    }
#endif  /* !defined(__WINDOWS__) */
    
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                         "%s iof:mrhnp:read handler read %d bytes from %s:%d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         ORTE_NAME_PRINT(&rev->name), fd));

    if (numbytes < 0) {
        /* either we have a connection error or it was a non-blocking read */
        
        /* non-blocking, retry */
        if (EAGAIN == errno || EINTR == errno) {
            opal_event_add(rev->ev, 0);
            return;
        } 

        OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                             "%s iof:mrhnp:read handler %s Error on connection:%d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&rev->name), fd));
        /* Un-recoverable error. Allow the code to flow as usual in order to
         * to send the zero bytes message up the stream, and then close the
         * file descriptor and delete the event.
         */
        numbytes = 0;
    }
    
    /* if job termination has been ordered, just ignore the
     * data and delete the stdin read event, if that is what fired
     */
    if (orte_job_term_ordered) {
        if (ORTE_IOF_STDIN & rev->tag) {
            OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev);
        }
        return;
    }

    if (ORTE_IOF_STDIN & rev->tag) {
        /* The event has fired, so it's no longer active until we
         * re-add it
         */
        mca_iof_mr_hnp_component.stdinev->active = false;    
        /* if this was read from my stdin, I need to send this input to all
         * daemons who host mapper procs
         */
        for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) {
            if (NULL == (iofjob = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) {
                continue;
            }
            jdata = iofjob->jdata;
            OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                 "%s read %d bytes from stdin - writing to job %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                 ORTE_JOBID_PRINT(jdata->jobid)));
            map = jdata->map;
            for (i=0; i < map->nodes->size; i++) {
                if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                    continue;
                }
                daemon = node->daemon;

                if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
                    /* if it is me, then send the bytes down the stdin pipe
                     * for every local proc (they are all on my proct list) - we even send 0 byte events
                     * down the pipe so it forces out any preceding data before
                     * closing the output stream. We add a 0 byte message if
                     * numbytes < sizeof(data) as this means the chunk we read
                     * was the end of the file.
                     */
                    for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
                         item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
                         item = opal_list_get_next(item)) {
                        proct = (orte_iof_proc_t*)item;
                        if (proct->name.jobid == jdata->jobid) {
                            if (NULL == proct->sink) {
                                opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
                                continue;
                            }
                            if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev)) {
                                /* getting too backed up - stop the read event for now if it is still active */
                                if (mca_iof_mr_hnp_component.stdinev->active) {
                                    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                                         "buffer backed up - holding"));
                                    mca_iof_mr_hnp_component.stdinev->active = false;
                                }
                                return;
                            }
                            if (0 < numbytes && numbytes < (int)sizeof(data)) {
                                /* need to write a 0-byte event to clear the stream and close it */
                                orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev);
                                proct->sink = NULL;
                            }
                        }
                    }
                } else {
                    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                         "%s sending %d bytes from stdin to daemon %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                         ORTE_NAME_PRINT(&daemon->name)));
                
                    /* send the data to the daemon so it can
                     * write it to all local procs from this job.
                     * If the connection closed,
                     * numbytes will be zero so zero bytes will be
                     * sent - this will tell the daemon to close
                     * the fd for stdin to that proc
                     */
                    send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
                    if (0 < numbytes && numbytes < (int)sizeof(data)) {
                        /* need to send a 0-byte message to clear the stream and close it */
                        send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, 0);
                    }
                }
            }
        }
        /* if num_bytes was zero, then we need to terminate the event */
        if (0 == numbytes || numbytes < (int)sizeof(data)) {
            /* this will also close our stdin file descriptor */
            if (NULL != mca_iof_mr_hnp_component.stdinev) {
                OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev);
            }
        } else {
            /* if we are looking at a tty, then we just go ahead and restart the
             * read event assuming we are not backgrounded
             */
            if (orte_iof_mrhnp_stdin_check(fd)) {
                restart_stdin(fd, 0, NULL);
            } else {
                /* delay for awhile and then restart */
                ORTE_TIMER_EVENT(0, 10000, restart_stdin, ORTE_INFO_PRI);
            }
        }
        return;
    }

    if (ORTE_IOF_STDOUT & rev->tag && 0 < numbytes) {
        /* see if we need to forward this output */
        jdata = orte_get_job_data_object(rev->name.jobid);
        if (ORTE_JOBID_INVALID == jdata->stdout_target) {
            /* end of the chain - just output the info */
            write_out = true;
            goto PROCESS;
        }
        /* it goes to the next job in the chain */
        jdata = orte_get_job_data_object(jdata->stdout_target);
        map = jdata->map;
        for (i=0; i < map->nodes->size; i++) {
            if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                continue;
            }
            daemon = node->daemon;

            if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
                /* if it is me, then send the bytes down the stdin pipe
                 * for every local proc (they are all on my proct list)
                 */
                for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
                     item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
                     item = opal_list_get_next(item)) {
                    proct = (orte_iof_proc_t*)item;
                    if (proct->name.jobid == jdata->jobid) {
                        if (NULL == proct->sink) {
                            opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
                            continue;
                        }
                        orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev);
                    }
                }
            } else {
                OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                                     "%s sending %d bytes from stdout of %s to daemon %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                                     ORTE_NAME_PRINT(&rev->name),
                                     ORTE_NAME_PRINT(&daemon->name)));
                
                /* send the data to the daemon so it can
                 * write it to all local procs from this job
                 */
                send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
            }
        }
    }
    
 PROCESS:
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                         "%s read %d bytes from %s of %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
                         (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"),
                         ORTE_NAME_PRINT(&rev->name)));
    
    if (0 == numbytes) {
        /* if we read 0 bytes from the stdout/err/diag, find this proc
         * on our list and
         * release the appropriate event. This will delete the
         * read event and close the file descriptor
         */
        for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
             item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
             item = opal_list_get_next(item)) {
            proct = (orte_iof_proc_t*)item;
            mask = ORTE_NS_CMP_ALL;
            if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) {
                /* found it - release corresponding event. This deletes
                 * the read event and closes the file descriptor
                 */
                if (rev->tag & ORTE_IOF_STDOUT) {
                    OBJ_RELEASE(proct->revstdout);
                } else if (rev->tag & ORTE_IOF_STDERR) {
                    OBJ_RELEASE(proct->revstderr);
                } else if (rev->tag & ORTE_IOF_STDDIAG) {
                    OBJ_RELEASE(proct->revstddiag);
                }
                /* check to see if they are all done */
                if (NULL == proct->revstdout &&
                    NULL == proct->revstderr &&
                    NULL == proct->revstddiag) {
                    /* this proc's iof is complete */
                    opal_list_remove_item(&mca_iof_mr_hnp_component.procs, item);
                    ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
                    OBJ_RELEASE(proct);
                }
                break;
            }
        }
        return;
    } else {
        /* output this to our local output */
        if (ORTE_IOF_STDOUT & rev->tag) {
            if (write_out) {
                orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev);
            }
        } else {
            orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev);
        }
    }
    
    /* re-add the event */
    opal_event_add(rev->ev, 0);

    return;
}
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
{
    /* the radix module routes all proc communications through
     * the local daemon. Daemons must identify which of their
     * daemon-peers is "hosting" the specified recipient and
     * route the message to that daemon. Daemon contact info
     * is handled elsewhere, so all we need to do here is
     * ensure that the procs are told to route through their
     * local daemon, and that daemons are told how to route
     * for each proc
     */
    int rc;

    /* if I am a tool, then I stand alone - there is nothing to do */
    if (ORTE_PROC_IS_TOOL) {
        return ORTE_SUCCESS;
    }
    
    /* if I am a daemon or HNP, then I have to extract the routing info for this job
     * from the data sent to me for launch and update the routing tables to
     * point at the daemon for each proc
     */
    if (ORTE_PROC_IS_DAEMON) {
        
        OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
                             "%s routed_radix: init routes for daemon job %s\n\thnp_uri %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(job),
                             (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
        
        if (NULL == ndat) {
            /* indicates this is being called during orte_init.
             * Get the HNP's name for possible later use
             */
            if (NULL == orte_process_info.my_hnp_uri) {
                /* fatal error */
                ORTE_ERROR_LOG(ORTE_ERR_FATAL);
                return ORTE_ERR_FATAL;
            }
            /* set the contact info into the hash table */
            orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
            
            /* extract the hnp name and store it */
            if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
                                                               ORTE_PROC_MY_HNP, NULL))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }

            /* if we are using static ports, set my lifeline to point at my parent */
            if (orte_static_ports) {
                lifeline = ORTE_PROC_MY_PARENT;
            } else {
                /* set our lifeline to the HNP - we will abort if that connection is lost */
                lifeline = ORTE_PROC_MY_HNP;
            }
            
            /* daemons will send their contact info back to the HNP as
             * part of the message confirming they are read to go. HNP's
             * load their contact info during orte_init
             */
        } else {
                /* ndat != NULL means we are getting an update of RML info
                 * for the daemons - so update our contact info and routes
                 */
                if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) {
                    ORTE_ERROR_LOG(rc);
                }
                return rc;
            }

        OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
                             "%s routed_radix: completed init routes",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        
        return ORTE_SUCCESS;
    }
    

    if (ORTE_PROC_IS_HNP) {
        
        OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
                             "%s routed_radix: init routes for HNP job %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(job)));
        
        if (NULL == ndat) {
            /* the HNP has no lifeline */
            lifeline = NULL;
        } else {
            /* if this is for my own jobid, then I am getting an update of RML info
             * for the daemons - so update our contact info and routes
             */
            if (ORTE_PROC_MY_NAME->jobid == job) {
                if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
            } else {
                /* if not, then I need to process the callback */
                if (ORTE_SUCCESS != (rc = orte_routed_base_process_callback(job, ndat))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
            }
        }

        return ORTE_SUCCESS;
    }

    {  /* MUST BE A PROC */
        /* if ndat != NULL, then this is being invoked by the proc to
         * init a route to a specified process that is outside of our
         * job family. We want that route to go through our HNP, routed via
         * out local daemon - however, we cannot know for
         * certain that the HNP already knows how to talk to the specified
         * procs. For example, in OMPI's publish/subscribe procedures, the
         * DPM framework looks for an mca param containing the global ompi-server's
         * uri. This info will come here so the proc can setup a route to
         * the server - we need to pass the routing info to our HNP
         */
        if (NULL != ndat) {
            int rc;
            opal_buffer_t *xfer;
            orte_rml_cmd_flag_t cmd=ORTE_RML_UPDATE_CMD;
            bool ack_waiting;

            OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
                                 "%s routed_radix: init routes w/non-NULL data",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            
            if (ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) != ORTE_JOB_FAMILY(job)) {
                /* if this is for a different job family, then we route via our HNP
                 * to minimize connection counts to entities such as ompi-server, so
                 * start by sending the contact info to the HNP for update
                 */
                OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
                                     "%s routed_radix_init_routes: diff job family - sending update to %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_HNP)));
                
                /* prep the buffer for transmission to the HNP */
                xfer = OBJ_NEW(opal_buffer_t);
                opal_dss.pack(xfer, &cmd, 1, ORTE_RML_CMD);
                opal_dss.copy_payload(xfer, ndat);

                /* save any new connections for use in subsequent connect_accept calls */
                orte_routed_base_update_hnps(ndat);

                if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, xfer,
                                                      ORTE_RML_TAG_RML_INFO_UPDATE,
                                                      orte_rml_send_callback, NULL))) {
                    ORTE_ERROR_LOG(rc);
                    OBJ_RELEASE(xfer);
                    return rc;
                }

                /* wait right here until the HNP acks the update to ensure that
                 * any subsequent messaging can succeed
                 */
                ack_waiting = true;
                orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                        ORTE_RML_TAG_UPDATE_ROUTE_ACK,
                                        ORTE_RML_NON_PERSISTENT,
                                        recv_ack, &ack_waiting);
                ORTE_WAIT_FOR_COMPLETION(ack_waiting);                

                OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
                                     "%s routed_radix_init_routes: ack recvd",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                
                /* our get_route function automatically routes all messages for
                 * other job families via the HNP, so nothing more to do here
                 */
            }
            return ORTE_SUCCESS;
        }
        
        /* if ndat=NULL, then we are being called during orte_init. In this
         * case, we need to setup a few critical pieces of info
         */
        
        OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
                             "%s routed_radix: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
                             (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
                             (NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
                
        if (NULL == orte_process_info.my_daemon_uri) {
            /* in this module, we absolutely MUST have this information - if
             * we didn't get it, then error out
             */
            opal_output(0, "%s ERROR: Failed to identify the local daemon's URI",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            opal_output(0, "%s ERROR: This is a fatal condition when the radix router",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            opal_output(0, "%s ERROR: has been selected - either select the unity router",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            opal_output(0, "%s ERROR: or ensure that the local daemon info is provided",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERR_FATAL;
        }
            
        /* we have to set the HNP's name, even though we won't route messages directly
         * to it. This is required to ensure that we -do- send messages to the correct
         * HNP name
         */
        if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
                                                           ORTE_PROC_MY_HNP, NULL))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        
        /* Set the contact info in the RML - this won't actually establish
         * the connection, but just tells the RML how to reach the daemon
         * if/when we attempt to send to it
         */
        orte_rml.set_contact_info(orte_process_info.my_daemon_uri);
        /* extract the daemon's name so we can update the routing table */
        if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
                                                           ORTE_PROC_MY_DAEMON, NULL))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        
        /* set our lifeline to the local daemon - we will abort if this connection is lost */
        lifeline = ORTE_PROC_MY_DAEMON;
        
        /* register ourselves -this sends a message to the daemon (warming up that connection)
         * and sends our contact info to the HNP when all local procs have reported
         *
         * NOTE: it may seem odd that we send our contact info to the HNP - after all,
         * the HNP doesn't really need to know how to talk to us directly if we are
         * using this routing method. However, this is good for two reasons:
         *
         * (1) some debuggers and/or tools may need RML contact
         *     info to set themselves up
         *
         * (2) doing so allows the HNP to "block" in a dynamic launch
         *     until all procs are reported running, thus ensuring that no communication
         *     is attempted until the overall ORTE system knows how to talk to everyone -
         *     otherwise, the system can just hang.
         */
        if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(true))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        /* no answer is expected or coming */
        
        return ORTE_SUCCESS;
    }
}
static int update_route(orte_process_name_t *target,
                        orte_process_name_t *route)
{ 
    int i;
    orte_routed_jobfam_t *jfam;
    uint16_t jfamily;
    
    if (target->jobid == ORTE_JOBID_INVALID ||
        target->vpid == ORTE_VPID_INVALID) {
        return ORTE_ERR_BAD_PARAM;
    }

    /* if I am an application process, we don't update the route since
     * we automatically route everything through the local daemon
     */
    if (ORTE_PROC_IS_APP) {
        return ORTE_SUCCESS;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
                         "%s routed_radix_update: %s --> %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(target), 
                         ORTE_NAME_PRINT(route)));


    /* if I am a daemon and the target is my HNP, then check
     * the route - if it isn't direct, then we just flag that
     * we have a route to the HNP
     */
    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target) &&
        OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, route)) {
        hnp_direct = false;
        return ORTE_SUCCESS;
    }

    /* if this is from a different job family, then I need to
     * track how to send messages to it
     */
    if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
        
        /* if I am a daemon, then I will automatically route
         * anything to this job family via my HNP - so nothing to do
         * here, just return
         */
        if (ORTE_PROC_IS_DAEMON) {
            return ORTE_SUCCESS;
        }
        
        OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
                             "%s routed_radix_update: diff job family routing job %s --> %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(target->jobid), 
                             ORTE_NAME_PRINT(route)));
        
        /* see if this target is already present */
        jfamily = ORTE_JOB_FAMILY(target->jobid);
        for (i=0; i < orte_routed_jobfams.size; i++) {
            if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
                continue;
            }
            if (jfam->job_family == jfamily) {
                OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
                                     "%s routed_radix: updating route to %s via %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_JOB_FAMILY_PRINT(target->jobid),
                                     ORTE_NAME_PRINT(route)));
                jfam->route.jobid = route->jobid;
                jfam->route.vpid = route->vpid;
                return ORTE_SUCCESS;
            }
        }

        /* not there, so add the route FOR THE JOB FAMILY*/
        OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
                             "%s routed_radix: adding route to %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOB_FAMILY_PRINT(target->jobid)));
        jfam = OBJ_NEW(orte_routed_jobfam_t);
        jfam->job_family = jfamily;
        jfam->route.jobid = route->jobid;
        jfam->route.vpid = route->vpid;
        opal_pointer_array_add(&orte_routed_jobfams, jfam);
        return ORTE_SUCCESS;
    }
    
    return ORTE_SUCCESS;
}