/** * Terminate the orteds for a given job */ static int plm_alps_terminate_orteds(void) { int rc; /* deregister the waitpid callback to ensure we don't make it look like * alps failed when it didn't. Since the alps may have already completed, * do NOT ERROR_LOG any return code to avoid confusing, duplicate error * messages */ orte_wait_cb_cancel(alps_pid); /* now tell them to die */ if (orte_abnormal_term_ordered) { /* cannot know if a daemon is able to * tell us it died, so just ensure they * all terminate */ if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_HALT_VM_CMD))) { ORTE_ERROR_LOG(rc); } } else { /* we need them to "phone home", though, * so we can know that they have exited */ if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) { ORTE_ERROR_LOG(rc); } } return rc; }
/** * Terminate the orteds for a given job */ static int plm_slurm_terminate_orteds(void) { int rc=ORTE_SUCCESS; orte_job_t *jdata; /* check to see if the primary pid is set. If not, this indicates * that we never launched any additional daemons, so we cannot * not wait for a waitpid to fire and tell us it's okay to * exit. Instead, we simply trigger an exit for ourselves */ if (primary_pid_set) { if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) { ORTE_ERROR_LOG(rc); } } else { OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:slurm: primary daemons complete!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); /* need to set the #terminated value to avoid an incorrect error msg */ jdata->num_terminated = jdata->num_procs; ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED); } return rc; }
/** * Terminate the orteds for a given job */ static int plm_slurm_terminate_orteds(void) { int rc; orte_job_t *jdata; /* tell them to die without sending a reply - we will rely on the * waitpid to tell us when they have exited! */ if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_NO_REPLY_CMD))) { ORTE_ERROR_LOG(rc); } /* check to see if the primary pid is set. If not, this indicates * that we never launched any additional daemons, so we cannot * not wait for a waitpid to fire and tell us it's okay to * exit. Instead, we simply trigger an exit for ourselves */ if (!primary_pid_set) { OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:slurm: primary daemons complete!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); jdata->state = ORTE_JOB_STATE_TERMINATED; /* need to set the #terminated value to avoid an incorrect error msg */ jdata->num_terminated = jdata->num_procs; orte_trigger_event(&orteds_exit); } return rc; }
/** * Terminate the orteds for a given job */ static int plm_alps_terminate_orteds(void) { int rc; orte_job_t *jdata; OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output, "%s plm:alps: terminating orteds", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* deregister the waitpid callback to ensure we don't make it look like * alps failed when it didn't. Since the alps may have already completed, * do NOT ERROR_LOG any return code to avoid confusing, duplicate error * messages */ if (NULL != alpsrun) { orte_wait_cb_cancel(alpsrun); } /* now tell them to die */ if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) { ORTE_ERROR_LOG(rc); } jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED); OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output, "%s plm:alps: terminated orteds", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return rc; }
/** * Terminate the orteds for a given job */ static int plm_lsf_terminate_orteds(void) { int rc; if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) { ORTE_ERROR_LOG(rc); } return rc; }
static int isolated_terminate_orteds(void) { int rc; /* send ourselves the halt command */ if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) { ORTE_ERROR_LOG(rc); } return rc; }
/** * Terminate the orteds for a given job */ int plm_tm_terminate_orteds(void) { int rc; /* now tell them to die */ if (orte_abnormal_term_ordered) { /* cannot know if a daemon is able to * tell us it died, so just ensure they * all terminate */ if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_HALT_VM_CMD))) { ORTE_ERROR_LOG(rc); } } else { /* we need them to "phone home", though, * so we can know that they have exited */ if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) { ORTE_ERROR_LOG(rc); } } return rc; }
/** * Terminate the orteds for a given job */ static int plm_alps_terminate_orteds(void) { int rc; /* deregister the waitpid callback to ensure we don't make it look like * alps failed when it didn't. Since the alps may have already completed, * do NOT ERROR_LOG any return code to avoid confusing, duplicate error * messages */ if (NULL != alpsrun) { orte_wait_cb_cancel(alpsrun); } /* now tell them to die */ if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) { ORTE_ERROR_LOG(rc); } return rc; }