/*** NIDMAP UTILITIES ***/ orte_jmap_t* orte_util_lookup_jmap(orte_jobid_t job) { int i; orte_jmap_t *jmap; /* unfortunately, job objects cannot be stored * by index number as the jobid is a constructed * value. So we have no choice but to cycle through * the jobmap pointer array and look for the entry * we want. We also cannot trust that the array is * left-justified as cleanup is done - and array * entries set to NULL - upon job completion. */ for (i=0; i < orte_jobmap.size; i++) { if (NULL == (jmap = (orte_jmap_t*)opal_pointer_array_get_item(&orte_jobmap, i))) { continue; } OPAL_OUTPUT_VERBOSE((10, orte_debug_output, "%s lookup:pmap: checking job %s for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jmap->job), ORTE_JOBID_PRINT(job))); if (job == jmap->job) { return jmap; } } /* if we didn't find it, return NULL */ return NULL; }
void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc) { if (NULL != proc) { switch(state) { case ORTE_PROC_STATE_ABORTED: case ORTE_PROC_STATE_ABORTED_BY_SIG: case ORTE_PROC_STATE_TERM_WO_SYNC: case ORTE_PROC_STATE_TERMINATED: case ORTE_PROC_STATE_KILLED_BY_CMD: case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED: opal_output(0, "%d: Process %s is dead.", orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); break; case ORTE_PROC_STATE_HEARTBEAT_FAILED: opal_output(0, "%d: Process %s is unreachable.", orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); case ORTE_PROC_STATE_COMM_FAILED: opal_output(0, "%d: Failed to communicate with process %s.", orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); break; case ORTE_PROC_STATE_CALLED_ABORT: case ORTE_PROC_STATE_FAILED_TO_START: opal_output(0, "%d: Process %s has called abort.", orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); break; case ORTE_PROC_STATE_MIGRATING: default: break; } } }
/***************** * Local Functions *****************/ static void default_hnp_abort(orte_job_t *jdata) { int rc; /* if we are already in progress, then ignore this call */ if (opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */ OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output, "%s errmgr:default_hnp: abort in progress, ignoring abort on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); return; } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output, "%s errmgr:default_hnp: abort called on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* the job aborted - turn off any sensors on this job */ orte_sensor.stop(jdata->jobid); /* set control params to indicate we are terminating */ orte_job_term_ordered = true; orte_enable_recovery = false; /* if it is the daemon job that aborted, then we need * to flag an abnormal term - otherwise, just abort * the job cleanly */ if (ORTE_PROC_MY_NAME->jobid == jdata->jobid) { orte_abnormal_term_ordered = true; } if (0 < jdata->num_non_zero_exit) { /* warn user */ opal_output(orte_clean_output, "-------------------------------------------------------\n" "%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.\n" "-------------------------------------------------------", (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child", (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), jdata->num_non_zero_exit, (1 == jdata->num_non_zero_exit) ? "process returned\na non-zero exit code." : "processes returned\nnon-zero exit codes."); } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output, "%s errmgr:default_hnp: ordering orted termination", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* tell the plm to terminate the orteds - they will automatically * kill their local procs */ if (ORTE_SUCCESS != (rc = orte_plm.terminate_orteds())) { ORTE_ERROR_LOG(rc); } }
/***************** * Local Functions *****************/ static void default_hnp_abort(orte_job_t *jdata) { int rc; int32_t i32, *i32ptr; /* if we are already in progress, then ignore this call */ if (opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */ OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_hnp: abort in progress, ignoring abort on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); return; } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_hnp: abort called on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* set control params to indicate we are terminating */ orte_job_term_ordered = true; orte_enable_recovery = false; /* if it is the daemon job that aborted, then we need * to flag an abnormal term - otherwise, just abort * the job cleanly */ if (ORTE_PROC_MY_NAME->jobid == jdata->jobid) { orte_abnormal_term_ordered = true; } i32 = 0; i32ptr = &i32; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) { /* warn user */ opal_output(orte_clean_output, "-------------------------------------------------------\n" "%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.\n" "-------------------------------------------------------", (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child", (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), i32, (1 == i32) ? "process returned\na non-zero exit code" : "processes returned\nnon-zero exit codes"); } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_hnp: ordering orted termination", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* tell the plm to terminate the orteds - they will automatically * kill their local procs */ if (ORTE_SUCCESS != (rc = orte_plm.terminate_orteds())) { ORTE_ERROR_LOG(rc); } }
static void job_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_job_state_t jobstate = caddy->job_state; char *msg; /* * if orte is trying to shutdown, just let it */ if (orte_finalizing) { return; } /* if the jdata is NULL, then we abort as this * is reporting an unrecoverable error */ if (NULL == caddy->jdata) { OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:orcm: jobid %s reported error state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_job_state_to_str(jobstate))); asprintf(&msg, "%s errmgr:orcm: jobid %s reported error state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_job_state_to_str(jobstate)); /* notify this */ ORTE_NOTIFIER_INTERNAL_ERROR(caddy->jdata, jobstate, ORTE_NOTIFIER_CRIT, 1, msg); /* cleanup */ /* ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FORCED_EXIT);*/ OBJ_RELEASE(caddy); return; } /* update the state */ OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:orcm: job %s reported error state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(caddy->jdata->jobid), orte_job_state_to_str(jobstate))); asprintf(&msg, "%s errmgr:orcm: jobid %s reported error state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(caddy->jdata->jobid), orte_job_state_to_str(jobstate)); /* notify this */ ORTE_NOTIFIER_INTERNAL_ERROR(caddy->jdata, jobstate, ORTE_NOTIFIER_WARN, 1, msg); /* cleanup */ OBJ_RELEASE(caddy); }
/* * STANDARD PRINT FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED */ int orte_dt_std_print(char **output, char *prefix, void *src, opal_data_type_t type) { /* set default result */ *output = NULL; switch(type) { case ORTE_STD_CNTR: orte_dt_quick_print(output, "ORTE_STD_CNTR", prefix, src, ORTE_STD_CNTR_T); break; case ORTE_VPID: orte_dt_quick_print(output, "ORTE_VPID", prefix, src, ORTE_VPID_T); break; case ORTE_JOBID: asprintf(output, "%sData Type: ORTE_JOBID\tData size: %lu\tValue: %s", (NULL == prefix) ? "" : prefix, (unsigned long)sizeof(orte_jobid_t), ORTE_JOBID_PRINT(*(orte_jobid_t*)src)); break; #if !ORTE_DISABLE_FULL_SUPPORT case ORTE_PROC_STATE: orte_dt_quick_print(output, "ORTE_PROC_STATE", prefix, src, ORTE_PROC_STATE_T); break; case ORTE_JOB_STATE: orte_dt_quick_print(output, "ORTE_JOB_STATE", prefix, src, ORTE_JOB_STATE_T); break; case ORTE_NODE_STATE: orte_dt_quick_print(output, "ORTE_NODE_STATE", prefix, src, ORTE_NODE_STATE_T); break; case ORTE_EXIT_CODE: orte_dt_quick_print(output, "ORTE_EXIT_CODE", prefix, src, ORTE_EXIT_CODE_T); break; case ORTE_RML_TAG: orte_dt_quick_print(output, "ORTE_RML_TAG", prefix, src, ORTE_RML_TAG_T); break; case ORTE_DAEMON_CMD: orte_dt_quick_print(output, "ORTE_DAEMON_CMD", prefix, src, ORTE_DAEMON_CMD_T); break; case ORTE_GRPCOMM_MODE: orte_dt_quick_print(output, "ORTE_GRPCOMM_MODE", prefix, src, ORTE_GRPCOMM_MODE_T); break; case ORTE_IOF_TAG: orte_dt_quick_print(output, "ORTE_IOF_TAG", prefix, src, ORTE_IOF_TAG_T); break; #endif default: ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); return ORTE_ERR_UNKNOWN_DATA_TYPE; } return ORTE_SUCCESS; }
static void failed_start(orte_odls_job_t *jobdat, orte_exit_code_t exit_code) { opal_list_item_t *item; orte_odls_child_t *child; /* set the state */ jobdat->state = ORTE_JOB_STATE_FAILED_TO_START; for (item = opal_list_get_first(&orte_local_children); item != opal_list_get_end(&orte_local_children); item = opal_list_get_next(item)) { child = (orte_odls_child_t*)item; if (child->name->jobid == jobdat->jobid) { if (ORTE_PROC_STATE_LAUNCHED > child->state || ORTE_PROC_STATE_FAILED_TO_START == child->state) { /* this proc never launched - flag that the iof * is complete or else we will hang waiting for * pipes to close that were never opened */ child->iof_complete = true; /* ditto for waitpid */ child->waitpid_recvd = true; } } } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output, "%s errmgr:hnp: job %s reported incomplete start", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jobdat->jobid))); return; }
static int pretty_print(orte_ps_mpirun_info_t *hnpinfo) { char *header; int len_hdr; /* * Print header and remember header length */ len_hdr = asprintf(&header, "Information from mpirun %s", ORTE_JOBID_PRINT(hnpinfo->hnp->name.jobid)); printf("\n\n%s\n", header); free(header); pretty_print_dashed_line(len_hdr); /* * Print Node Information */ if( orte_ps_globals.nodes ) pretty_print_nodes(hnpinfo->nodes, hnpinfo->num_nodes); /* * Print Job Information */ pretty_print_jobs(hnpinfo->jobs, hnpinfo->num_jobs); return ORTE_SUCCESS; }
static void failed_start(orte_job_t *jobdat) { int i; orte_proc_t *child; /* set the state */ jobdat->state = ORTE_JOB_STATE_FAILED_TO_START; for (i=0; i < orte_local_children->size; i++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } /* is this child part of the specified job? */ if (child->name.jobid == jobdat->jobid) { if (ORTE_PROC_STATE_FAILED_TO_START == child->state) { /* this proc never launched - flag that the iof * is complete or else we will hang waiting for * pipes to close that were never opened */ ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE); /* ditto for waitpid */ ORTE_FLAG_SET(child, ORTE_PROC_FLAG_WAITPID); } } } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:hnp: job %s reported incomplete start", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jobdat->jobid))); return; }
static int plm_yarn_actual_launch_procs(orte_job_t* jdata) { int rc; int launched_proc_num = 0; OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:yarn:plm_yarn_actual_launch_procs for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); rc = common_launch_process(jdata, false, &launched_proc_num); if (rc != ORTE_SUCCESS) { return rc; } /* if all jdata procs are launched successfully, then modify the job's state */ if (launched_proc_num == jdata->num_procs) { jdata->state = ORTE_JOB_STATE_RUNNING; OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:yarn:plm_yarn_actual_launch_procs: launch jdata procs successfully with AM", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } return ORTE_SUCCESS; }
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_node_t *node, bool oversubscribed, orte_proc_t *proc) { orte_std_cntr_t i; orte_node_t *node_from_map; int rc; /* see if this node has already been assigned to the map - if * not, then add the pointer to the pointer array */ for (i=0; i < map->nodes->size; i++) { if (NULL == (node_from_map = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } if (node_from_map->index == node->index) { /* we have this node in the array */ goto PROCESS; } } /* if we get here, then this node isn't already in the map - add it */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output, "%s rmaps:base: adding node %s to map", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == node->name) ? "NULL" : node->name)); if (ORTE_SUCCESS > (rc = opal_pointer_array_add(map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); return rc; } OBJ_RETAIN(node); /* maintain accounting on object */ ++map->num_nodes; PROCESS: /* add the proc to this node's local processes - it is assumed * that the proc isn't already there as this would be an error * in the mapper */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output, "%s rmaps:base: mapping proc for job %s to node %s whose daemon is %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(proc->name.jobid), (NULL == node->name) ? "NULL" : node->name, (NULL == node->daemon) ? "NULL" : ORTE_NAME_PRINT(&(node->daemon->name)))); if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) { ORTE_ERROR_LOG(rc); return rc; } /* retain the proc struct so that we correctly track its release */ OBJ_RETAIN(proc); ++node->num_procs; /* update the oversubscribed state of the node */ node->oversubscribed = oversubscribed; return ORTE_SUCCESS; }
int orte_routed_base_process_callback(orte_jobid_t job, opal_buffer_t *buffer) { orte_proc_t *proc; orte_job_t *jdata; orte_std_cntr_t cnt; char *rml_uri; orte_vpid_t vpid; int rc; /* lookup the job object for this process */ if (NULL == (jdata = orte_get_job_data_object(job))) { /* came from a different job family - this is an error */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } /* unpack the data for each entry */ cnt = 1; while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &vpid, &cnt, ORTE_VPID))) { if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) { ORTE_ERROR_LOG(rc); continue; } OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_binomial:callback got uri %s for job %s rank %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == rml_uri) ? "NULL" : rml_uri, ORTE_JOBID_PRINT(job), ORTE_VPID_PRINT(vpid))); if (NULL == rml_uri) { /* should not happen */ ORTE_ERROR_LOG(ORTE_ERR_FATAL); return ORTE_ERR_FATAL; } if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); continue; } /* update the record */ proc->rml_uri = strdup(rml_uri); free(rml_uri); cnt = 1; } if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
/* Report the checkpoint status over the notifier interface */ void orte_snapc_ckpt_state_notify(int state) { switch(state) { case ORTE_SNAPC_CKPT_STATE_ESTABLISHED: orte_notifier.log(ORTE_NOTIFIER_INFO, ORTE_SNAPC_CKPT_NOTIFY(state), "%d: Checkpoint established for process %s.", orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); break; case ORTE_SNAPC_CKPT_STATE_NO_CKPT: orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state), "%d: Process %s is not checkpointable.", orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); break; case ORTE_SNAPC_CKPT_STATE_ERROR: orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state), "%d: Failed to checkpoint process %s.", orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); break; case ORTE_SNAPC_CKPT_STATE_RECOVERED: orte_notifier.log(ORTE_NOTIFIER_INFO, ORTE_SNAPC_CKPT_NOTIFY(state), "%d: Successfully restarted process %s.", orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); break; case ORTE_SNAPC_CKPT_STATE_NO_RESTART: orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state), "%d: Failed to restart process %s.", orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); break; /* ADK: We currently do not notify for these states, but good to * have them around anyways. */ case ORTE_SNAPC_CKPT_STATE_NONE: case ORTE_SNAPC_CKPT_STATE_REQUEST: case ORTE_SNAPC_CKPT_STATE_PENDING: case ORTE_SNAPC_CKPT_STATE_RUNNING: case ORTE_SNAPC_CKPT_STATE_STOPPED: case ORTE_SNAPC_CKPT_STATE_MIGRATING: case ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL: default: break; } }
void orte_errmgr_base_migrate_state_notify(int state) { switch(state) { case ORTE_ERRMGR_MIGRATE_STATE_ERROR: case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS: opal_output(0, "%d: Migration failed for process %s.", orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); break; case ORTE_ERRMGR_MIGRATE_STATE_FINISH: opal_output(0, "%d: Migration successful for process %s.", orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); break; case ORTE_ERRMGR_MIGRATE_STATE_NONE: case ORTE_ERRMGR_MIGRATE_STATE_REQUEST: case ORTE_ERRMGR_MIGRATE_STATE_RUNNING: case ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT: case ORTE_ERRMGR_MIGRATE_STATE_STARTUP: case ORTE_ERRMGR_MIGRATE_MAX: default: break; } }
/* * This function gets called by the PLM when an orted notifies us that * a job failed to start. * Various components will follow their own strategy for dealing with * this situation. For this component, we simply kill the job. */ void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code) { int rc; OPAL_TRACE(1); /* if we are already in progress, then ignore this call */ if (!opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */ OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, "%s errmgr:default: abort in progress, ignoring incomplete start on job %s with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), exit_code)); return; } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, "%s errmgr:default: job %s reported incomplete start with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), exit_code)); orte_job_term_ordered = true; /* tell the plm to terminate all jobs */ if (ORTE_SUCCESS != (rc = orte_plm.terminate_job(ORTE_JOBID_WILDCARD))) { ORTE_ERROR_LOG(rc); } /* set the exit status, just in case whomever called us failed * to do so - it can only be done once, so we are protected * from overwriting it */ ORTE_UPDATE_EXIT_STATUS(exit_code); /* wakeup orterun so we can exit */ orte_trigger_event(&orte_exit); }
static int xcast(orte_jobid_t job, opal_buffer_t *buffer, orte_rml_tag_t tag) { int rc = ORTE_SUCCESS; opal_buffer_t buf; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, "%s grpcomm:xcast sent to job %s tag %ld", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), (long)tag)); /* if there is no message to send, then just return ok */ if (NULL == buffer) { return ORTE_SUCCESS; } /* prep the output buffer */ OBJ_CONSTRUCT(&buf, opal_buffer_t); if (ORTE_SUCCESS != (rc = orte_grpcomm_base_app_pack_xcast(ORTE_DAEMON_PROCESS_AND_RELAY_CMD, job, &buf, buffer, tag))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } /* if I am the HNP, just set things up so the cmd processor gets called. * We don't want to message ourselves as this can create circular logic * in the RML. Instead, this macro will set a zero-time event which will * cause the buffer to be processed by the cmd processor - probably will * fire right away, but that's okay * The macro makes a copy of the buffer, so it's okay to release it here */ if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor); } else { /* otherwise, send it to the HNP for relay */ if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buf, ORTE_RML_TAG_DAEMON, 0))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } rc = ORTE_SUCCESS; } CLEANUP: OBJ_DESTRUCT(&buf); return rc; }
void orte_state_base_cleanup_job(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_job_t *jdata = caddy->jdata; OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, "%s state:base:cleanup on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid))); /* flag that we were notified */ jdata->state = ORTE_JOB_STATE_NOTIFIED; /* send us back thru job complete */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); OBJ_RELEASE(caddy); }
void orte_jmap_dump(orte_jmap_t *jmap) { int i; orte_pmap_t *pmap; opal_output(orte_clean_output, "**** DUMP OF JOB %s (%s procs) ***", ORTE_JOBID_PRINT(jmap->job), ORTE_VPID_PRINT(jmap->num_procs)); for (i=0; i < jmap->pmap.size; i++) { if (NULL == (pmap = (orte_pmap_t*)opal_pointer_array_get_item(&jmap->pmap, i))) { continue; } opal_output(orte_clean_output, "\tnode %d local_rank %d node_rank %d", pmap->node, (int)pmap->local_rank, (int)pmap->node_rank); } opal_output(orte_clean_output, "\n"); }
static void launch_restart(int fd, short args, void *cbdata) { orte_errmgr_caddy_t *cd = (orte_errmgr_caddy_t*)cbdata; int rc; opal_buffer_t *bfr; uint16_t jfam; OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s RESTARTING JOB %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(cd->jdata->jobid))); /* reset the job */ orte_plm_base_reset_job(cd->jdata); /* the resilient mapper will automatically avoid restarting the * proc on its former node */ /* map the job again */ if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(cd->jdata))) { ORTE_ERROR_LOG(rc); goto cleanup; } bfr = OBJ_NEW(opal_buffer_t); /* indicate the target DVM */ jfam = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid); opal_dss.pack(bfr, &jfam, 1, OPAL_UINT16); /* get the launch data */ if (ORTE_SUCCESS != (rc = orte_odls.get_add_procs_data(bfr, cd->jdata->jobid))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(bfr); goto cleanup; } /* send it to the daemons */ if (ORCM_SUCCESS != (rc = orcm_pnp.output_nb(ORCM_PNP_SYS_CHANNEL, NULL, ORCM_PNP_TAG_COMMAND, NULL, 0, bfr, cbfunc, NULL))) { ORTE_ERROR_LOG(rc); } cleanup: OBJ_RELEASE(cd); }
static void mylog(orte_notifier_request_t *req) { char tod[48]; opal_output_verbose(5, orte_notifier_base_framework.framework_output, "notifier:syslog:mylog function called with severity %d errcode %d and messg %s", (int)req->severity, req->errcode, req->msg); /* If there was a message, output it */ (void)ctime_r(&req->t, tod); /* trim the newline */ tod[strlen(tod)] = '\0'; syslog(req->severity, "[%s]%s %s: JOBID %s REPORTS ERROR %s: %s", tod, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_notifier_base_sev2str(req->severity), ORTE_JOBID_PRINT((NULL == req->jdata) ? ORTE_JOBID_INVALID : req->jdata->jobid), orte_job_state_to_str(req->state), (NULL == req->msg) ? "<N/A>" : req->msg); }
static void track_jobs(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; opal_buffer_t *alert; orte_plm_cmd_flag_t cmd; int rc; if (ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE == caddy->job_state) { OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output, "%s state:orted:track_jobs sending local launch complete for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(caddy->jdata->jobid))); /* update the HNP with all proc states for this job */ alert = OBJ_NEW(opal_buffer_t); /* pack update state command */ cmd = ORTE_PLM_UPDATE_PROC_STATE; if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(alert); goto cleanup; } /* pack the job info */ if (ORTE_SUCCESS != (rc = pack_state_update(alert, caddy->jdata))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(alert); goto cleanup; } /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(alert); } } cleanup: OBJ_RELEASE(caddy); }
static int update_state(orte_jobid_t job, orte_job_state_t jobstate, orte_process_name_t *proc, orte_proc_state_t state, pid_t pid, orte_exit_code_t exit_code) { orte_ns_cmp_bitmask_t mask; OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output, "%s errmgr:default_app: job %s reported state %s" " for proc %s state %s exit_code %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), orte_job_state_to_str(jobstate), (NULL == proc) ? "NULL" : ORTE_NAME_PRINT(proc), orte_proc_state_to_str(state), exit_code)); /* * if orte is trying to shutdown, just let it */ if (orte_finalizing) { return ORTE_SUCCESS; } if (ORTE_PROC_STATE_COMM_FAILED == state) { mask = ORTE_NS_CMP_ALL; /* if it is our own connection, ignore it */ if (OPAL_EQUAL == orte_util_compare_name_fields(mask, ORTE_PROC_MY_NAME, proc)) { return ORTE_SUCCESS; } /* see is this was a lifeline */ if (ORTE_SUCCESS != orte_routed.route_lost(proc)) { return ORTE_ERR_UNRECOVERABLE; } } return ORTE_SUCCESS; }
static int xcast(orte_jobid_t job, opal_buffer_t *buffer, orte_rml_tag_t tag) { int rc = ORTE_SUCCESS; opal_buffer_t *buf; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, "%s grpcomm:bad:xcast sent to job %s tag %ld", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), (long)tag)); /* if there is no message to send, then just return ok */ if (NULL == buffer) { return ORTE_SUCCESS; } /* prep the output buffer */ buf = OBJ_NEW(opal_buffer_t); if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_xcast(job, buf, buffer, tag))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } /* send it to the HNP (could be myself) for relay */ if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST, 0, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); goto CLEANUP; } rc = ORTE_SUCCESS; CLEANUP: return rc; }
/* Useful for debugging. Not used otherwise. */ void print_orte_job_data() { orte_job_t *jdata; orte_proc_t *pdata; int i, j; if (NULL == orte_job_data) { opal_output(0, "ORTE_JOB_DATA == NULL"); return; } for (i = 0; i < orte_job_data->size; i++) { if (NULL == (jdata = (orte_job_t *) opal_pointer_array_get_item(orte_job_data, i))) { continue; } opal_output(0, "JOB: %s", ORTE_JOBID_PRINT(jdata->jobid)); for (j = 0; j < jdata->num_procs; j++) { if (NULL == (pdata = (orte_proc_t *) opal_pointer_array_get_item(jdata->procs, j))) { continue; } opal_output(0, " PROC: %s", ORTE_NAME_PRINT(&(pdata->name))); } } }
int orte_plm_base_orted_terminate_job(orte_jobid_t jobid) { opal_pointer_array_t procs; orte_proc_t proc; int rc; OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s plm:base:orted_terminate job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jobid))); OBJ_CONSTRUCT(&procs, opal_pointer_array_t); opal_pointer_array_init(&procs, 1, 1, 1); OBJ_CONSTRUCT(&proc, orte_proc_t); proc.name.jobid = jobid; proc.name.vpid = ORTE_VPID_WILDCARD; opal_pointer_array_add(&procs, &proc); if (ORTE_SUCCESS != (rc = orte_plm_base_orted_kill_local_procs(&procs))) { ORTE_ERROR_LOG(rc); } OBJ_DESTRUCT(&procs); OBJ_DESTRUCT(&proc); return rc; }
int orte_snapc_base_global_coord_ckpt_init_cmd(orte_process_name_t* peer, opal_buffer_t* buffer, opal_crs_base_ckpt_options_t *options, orte_jobid_t *jobid) { int ret, exit_status = ORTE_SUCCESS; orte_std_cntr_t count = 1; orte_ns_cmp_bitmask_t mask; mask = ORTE_NS_CMP_ALL; /* * Do not send to self, as that is silly. */ if (OPAL_EQUAL == orte_util_compare_name_fields(mask, peer, ORTE_PROC_MY_HNP)) { OPAL_OUTPUT_VERBOSE((10, orte_snapc_base_framework.framework_output, "%s) base:ckpt_init_cmd: Error: Do not send to self!\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type))); return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((10, orte_snapc_base_framework.framework_output, "%s) base:ckpt_init_cmd: Receiving commands\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type))); /******************** * Receive command line checkpoint request: * - Command (already received) * - options * - jobid ********************/ if( ORTE_SUCCESS != (ret = orte_snapc_base_unpack_options(buffer, options)) ) { opal_output(orte_snapc_base_framework.framework_output, "%s) base:ckpt_init_cmd: Error: Unpack (options) Failure (ret = %d)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret ); ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } count = 1; if ( ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, jobid, &count, ORTE_JOBID)) ) { opal_output(orte_snapc_base_framework.framework_output, "%s) base:ckpt_init_cmd: Error: DSS Unpack (jobid) Failure (ret = %d) (LINE = %d)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret, __LINE__); ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } OPAL_OUTPUT_VERBOSE((10, orte_snapc_base_framework.framework_output, "%s) base:ckpt_init_cmd: Received [%d, %d, %s]\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), (int)(options->term), (int)(options->stop), ORTE_JOBID_PRINT(*jobid))); cleanup: return exit_status; }
static void orte_job_destruct(orte_job_t* job) { orte_proc_t *proc; orte_app_context_t *app; orte_job_t *jdata; int n; orte_timer_t *evtimer; if (NULL == job) { /* probably just a race condition - just return */ return; } if (orte_debug_flag) { opal_output(0, "%s Releasing job data for %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job->jobid)); } for (n=0; n < job->apps->size; n++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(job->apps, n))) { continue; } OBJ_RELEASE(app); } OBJ_RELEASE(job->apps); /* release any pointers in the attributes */ evtimer = NULL; if (orte_get_attribute(&job->attributes, ORTE_JOB_FAILURE_TIMER_EVENT, (void**)&evtimer, OPAL_PTR)) { orte_remove_attribute(&job->attributes, ORTE_JOB_FAILURE_TIMER_EVENT); /* the timer is a pointer to orte_timer_t */ OBJ_RELEASE(evtimer); } proc = NULL; if (orte_get_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC, (void**)&proc, OPAL_PTR)) { orte_remove_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC); /* points to an orte_proc_t */ OBJ_RELEASE(proc); } if (NULL != job->map) { OBJ_RELEASE(job->map); job->map = NULL; } for (n=0; n < job->procs->size; n++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(job->procs, n))) { continue; } OBJ_RELEASE(proc); } OBJ_RELEASE(job->procs); /* release the attributes */ OPAL_LIST_DESTRUCT(&job->attributes); /* find the job in the global array */ if (NULL != orte_job_data && ORTE_JOBID_INVALID != job->jobid) { for (n=0; n < orte_job_data->size; n++) { if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, n))) { continue; } if (jdata->jobid == job->jobid) { /* set the entry to NULL */ opal_pointer_array_set_item(orte_job_data, n, NULL); break; } } } }
/* this is the read handler for my own child procs and stdin */ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) { orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; int32_t numbytes; opal_list_item_t *item; orte_iof_proc_t *proct; int i, j; orte_ns_cmp_bitmask_t mask; orte_job_t *jdata; orte_iof_job_t *iofjob; orte_node_t *node; orte_proc_t *daemon; orte_job_map_t *map; bool write_out=false; /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); #else { DWORD readed; HANDLE handle = (HANDLE)_get_osfhandle(fd); ReadFile(handle, data, sizeof(data), &readed, NULL); numbytes = (int)readed; } #endif /* !defined(__WINDOWS__) */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s iof:mrhnp:read handler read %d bytes from %s:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), fd)); if (numbytes < 0) { /* either we have a connection error or it was a non-blocking read */ /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { opal_event_add(rev->ev, 0); return; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s iof:mrhnp:read handler %s Error on connection:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&rev->name), fd)); /* Un-recoverable error. Allow the code to flow as usual in order to * to send the zero bytes message up the stream, and then close the * file descriptor and delete the event. */ numbytes = 0; } /* if job termination has been ordered, just ignore the * data and delete the stdin read event, if that is what fired */ if (orte_job_term_ordered) { if (ORTE_IOF_STDIN & rev->tag) { OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev); } return; } if (ORTE_IOF_STDIN & rev->tag) { /* The event has fired, so it's no longer active until we * re-add it */ mca_iof_mr_hnp_component.stdinev->active = false; /* if this was read from my stdin, I need to send this input to all * daemons who host mapper procs */ for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) { if (NULL == (iofjob = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) { continue; } jdata = iofjob->jdata; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s read %d bytes from stdin - writing to job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_JOBID_PRINT(jdata->jobid))); map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } daemon = node->daemon; if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) { /* if it is me, then send the bytes down the stdin pipe * for every local proc (they are all on my proct list) - we even send 0 byte events * down the pipe so it forces out any preceding data before * closing the output stream. We add a 0 byte message if * numbytes < sizeof(data) as this means the chunk we read * was the end of the file. */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == jdata->jobid) { if (NULL == proct->sink) { opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name)); continue; } if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev)) { /* getting too backed up - stop the read event for now if it is still active */ if (mca_iof_mr_hnp_component.stdinev->active) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "buffer backed up - holding")); mca_iof_mr_hnp_component.stdinev->active = false; } return; } if (0 < numbytes && numbytes < (int)sizeof(data)) { /* need to write a 0-byte event to clear the stream and close it */ orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev); proct->sink = NULL; } } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s sending %d bytes from stdin to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&daemon->name))); /* send the data to the daemon so it can * write it to all local procs from this job. * If the connection closed, * numbytes will be zero so zero bytes will be * sent - this will tell the daemon to close * the fd for stdin to that proc */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes); if (0 < numbytes && numbytes < (int)sizeof(data)) { /* need to send a 0-byte message to clear the stream and close it */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, 0); } } } } /* if num_bytes was zero, then we need to terminate the event */ if (0 == numbytes || numbytes < (int)sizeof(data)) { /* this will also close our stdin file descriptor */ if (NULL != mca_iof_mr_hnp_component.stdinev) { OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev); } } else { /* if we are looking at a tty, then we just go ahead and restart the * read event assuming we are not backgrounded */ if (orte_iof_mrhnp_stdin_check(fd)) { restart_stdin(fd, 0, NULL); } else { /* delay for awhile and then restart */ ORTE_TIMER_EVENT(0, 10000, restart_stdin, ORTE_INFO_PRI); } } return; } if (ORTE_IOF_STDOUT & rev->tag && 0 < numbytes) { /* see if we need to forward this output */ jdata = orte_get_job_data_object(rev->name.jobid); if (ORTE_JOBID_INVALID == jdata->stdout_target) { /* end of the chain - just output the info */ write_out = true; goto PROCESS; } /* it goes to the next job in the chain */ jdata = orte_get_job_data_object(jdata->stdout_target); map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } daemon = node->daemon; if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) { /* if it is me, then send the bytes down the stdin pipe * for every local proc (they are all on my proct list) */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == jdata->jobid) { if (NULL == proct->sink) { opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name)); continue; } orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev); } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s sending %d bytes from stdout of %s to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), ORTE_NAME_PRINT(&daemon->name))); /* send the data to the daemon so it can * write it to all local procs from this job */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes); } } } PROCESS: OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s read %d bytes from %s of %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"), ORTE_NAME_PRINT(&rev->name))); if (0 == numbytes) { /* if we read 0 bytes from the stdout/err/diag, find this proc * on our list and * release the appropriate event. This will delete the * read event and close the file descriptor */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; mask = ORTE_NS_CMP_ALL; if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) { /* found it - release corresponding event. This deletes * the read event and closes the file descriptor */ if (rev->tag & ORTE_IOF_STDOUT) { OBJ_RELEASE(proct->revstdout); } else if (rev->tag & ORTE_IOF_STDERR) { OBJ_RELEASE(proct->revstderr); } else if (rev->tag & ORTE_IOF_STDDIAG) { OBJ_RELEASE(proct->revstddiag); } /* check to see if they are all done */ if (NULL == proct->revstdout && NULL == proct->revstderr && NULL == proct->revstddiag) { /* this proc's iof is complete */ opal_list_remove_item(&mca_iof_mr_hnp_component.procs, item); ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE); OBJ_RELEASE(proct); } break; } } return; } else { /* output this to our local output */ if (ORTE_IOF_STDOUT & rev->tag) { if (write_out) { orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev); } } else { orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev); } } /* re-add the event */ opal_event_add(rev->ev, 0); return; }
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) { /* the radix module routes all proc communications through * the local daemon. Daemons must identify which of their * daemon-peers is "hosting" the specified recipient and * route the message to that daemon. Daemon contact info * is handled elsewhere, so all we need to do here is * ensure that the procs are told to route through their * local daemon, and that daemons are told how to route * for each proc */ int rc; /* if I am a tool, then I stand alone - there is nothing to do */ if (ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } /* if I am a daemon or HNP, then I have to extract the routing info for this job * from the data sent to me for launch and update the routing tables to * point at the daemon for each proc */ if (ORTE_PROC_IS_DAEMON) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix: init routes for daemon job %s\n\thnp_uri %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri)); if (NULL == ndat) { /* indicates this is being called during orte_init. * Get the HNP's name for possible later use */ if (NULL == orte_process_info.my_hnp_uri) { /* fatal error */ ORTE_ERROR_LOG(ORTE_ERR_FATAL); return ORTE_ERR_FATAL; } /* set the contact info into the hash table */ orte_rml.set_contact_info(orte_process_info.my_hnp_uri); /* extract the hnp name and store it */ if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL))) { ORTE_ERROR_LOG(rc); return rc; } /* if we are using static ports, set my lifeline to point at my parent */ if (orte_static_ports) { lifeline = ORTE_PROC_MY_PARENT; } else { /* set our lifeline to the HNP - we will abort if that connection is lost */ lifeline = ORTE_PROC_MY_HNP; } /* daemons will send their contact info back to the HNP as * part of the message confirming they are read to go. HNP's * load their contact info during orte_init */ } else { /* ndat != NULL means we are getting an update of RML info * for the daemons - so update our contact info and routes */ if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) { ORTE_ERROR_LOG(rc); } return rc; } OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_radix: completed init routes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return ORTE_SUCCESS; } if (ORTE_PROC_IS_HNP) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix: init routes for HNP job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job))); if (NULL == ndat) { /* the HNP has no lifeline */ lifeline = NULL; } else { /* if this is for my own jobid, then I am getting an update of RML info * for the daemons - so update our contact info and routes */ if (ORTE_PROC_MY_NAME->jobid == job) { if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) { ORTE_ERROR_LOG(rc); return rc; } } else { /* if not, then I need to process the callback */ if (ORTE_SUCCESS != (rc = orte_routed_base_process_callback(job, ndat))) { ORTE_ERROR_LOG(rc); return rc; } } } return ORTE_SUCCESS; } { /* MUST BE A PROC */ /* if ndat != NULL, then this is being invoked by the proc to * init a route to a specified process that is outside of our * job family. We want that route to go through our HNP, routed via * out local daemon - however, we cannot know for * certain that the HNP already knows how to talk to the specified * procs. For example, in OMPI's publish/subscribe procedures, the * DPM framework looks for an mca param containing the global ompi-server's * uri. This info will come here so the proc can setup a route to * the server - we need to pass the routing info to our HNP */ if (NULL != ndat) { int rc; opal_buffer_t *xfer; orte_rml_cmd_flag_t cmd=ORTE_RML_UPDATE_CMD; bool ack_waiting; OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix: init routes w/non-NULL data", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); if (ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) != ORTE_JOB_FAMILY(job)) { /* if this is for a different job family, then we route via our HNP * to minimize connection counts to entities such as ompi-server, so * start by sending the contact info to the HNP for update */ OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix_init_routes: diff job family - sending update to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_HNP))); /* prep the buffer for transmission to the HNP */ xfer = OBJ_NEW(opal_buffer_t); opal_dss.pack(xfer, &cmd, 1, ORTE_RML_CMD); opal_dss.copy_payload(xfer, ndat); /* save any new connections for use in subsequent connect_accept calls */ orte_routed_base_update_hnps(ndat); if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, xfer, ORTE_RML_TAG_RML_INFO_UPDATE, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(xfer); return rc; } /* wait right here until the HNP acks the update to ensure that * any subsequent messaging can succeed */ ack_waiting = true; orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UPDATE_ROUTE_ACK, ORTE_RML_NON_PERSISTENT, recv_ack, &ack_waiting); ORTE_WAIT_FOR_COMPLETION(ack_waiting); OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix_init_routes: ack recvd", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* our get_route function automatically routes all messages for * other job families via the HNP, so nothing more to do here */ } return ORTE_SUCCESS; } /* if ndat=NULL, then we are being called during orte_init. In this * case, we need to setup a few critical pieces of info */ OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri, (NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri)); if (NULL == orte_process_info.my_daemon_uri) { /* in this module, we absolutely MUST have this information - if * we didn't get it, then error out */ opal_output(0, "%s ERROR: Failed to identify the local daemon's URI", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_output(0, "%s ERROR: This is a fatal condition when the radix router", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_output(0, "%s ERROR: has been selected - either select the unity router", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_output(0, "%s ERROR: or ensure that the local daemon info is provided", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); return ORTE_ERR_FATAL; } /* we have to set the HNP's name, even though we won't route messages directly * to it. This is required to ensure that we -do- send messages to the correct * HNP name */ if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL))) { ORTE_ERROR_LOG(rc); return rc; } /* Set the contact info in the RML - this won't actually establish * the connection, but just tells the RML how to reach the daemon * if/when we attempt to send to it */ orte_rml.set_contact_info(orte_process_info.my_daemon_uri); /* extract the daemon's name so we can update the routing table */ if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri, ORTE_PROC_MY_DAEMON, NULL))) { ORTE_ERROR_LOG(rc); return rc; } /* set our lifeline to the local daemon - we will abort if this connection is lost */ lifeline = ORTE_PROC_MY_DAEMON; /* register ourselves -this sends a message to the daemon (warming up that connection) * and sends our contact info to the HNP when all local procs have reported * * NOTE: it may seem odd that we send our contact info to the HNP - after all, * the HNP doesn't really need to know how to talk to us directly if we are * using this routing method. However, this is good for two reasons: * * (1) some debuggers and/or tools may need RML contact * info to set themselves up * * (2) doing so allows the HNP to "block" in a dynamic launch * until all procs are reported running, thus ensuring that no communication * is attempted until the overall ORTE system knows how to talk to everyone - * otherwise, the system can just hang. */ if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(true))) { ORTE_ERROR_LOG(rc); return rc; } /* no answer is expected or coming */ return ORTE_SUCCESS; } }
static int update_route(orte_process_name_t *target, orte_process_name_t *route) { int i; orte_routed_jobfam_t *jfam; uint16_t jfamily; if (target->jobid == ORTE_JOBID_INVALID || target->vpid == ORTE_VPID_INVALID) { return ORTE_ERR_BAD_PARAM; } /* if I am an application process, we don't update the route since * we automatically route everything through the local daemon */ if (ORTE_PROC_IS_APP) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix_update: %s --> %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(route))); /* if I am a daemon and the target is my HNP, then check * the route - if it isn't direct, then we just flag that * we have a route to the HNP */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target) && OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, route)) { hnp_direct = false; return ORTE_SUCCESS; } /* if this is from a different job family, then I need to * track how to send messages to it */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { /* if I am a daemon, then I will automatically route * anything to this job family via my HNP - so nothing to do * here, just return */ if (ORTE_PROC_IS_DAEMON) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix_update: diff job family routing job %s --> %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(target->jobid), ORTE_NAME_PRINT(route))); /* see if this target is already present */ jfamily = ORTE_JOB_FAMILY(target->jobid); for (i=0; i < orte_routed_jobfams.size; i++) { if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) { continue; } if (jfam->job_family == jfamily) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_radix: updating route to %s via %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOB_FAMILY_PRINT(target->jobid), ORTE_NAME_PRINT(route))); jfam->route.jobid = route->jobid; jfam->route.vpid = route->vpid; return ORTE_SUCCESS; } } /* not there, so add the route FOR THE JOB FAMILY*/ OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_radix: adding route to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOB_FAMILY_PRINT(target->jobid))); jfam = OBJ_NEW(orte_routed_jobfam_t); jfam->job_family = jfamily; jfam->route.jobid = route->jobid; jfam->route.vpid = route->vpid; opal_pointer_array_add(&orte_routed_jobfams, jfam); return ORTE_SUCCESS; } return ORTE_SUCCESS; }