static void plm_yarn_launch_apps(int fd, short args, void *cbdata) { int rc; orte_job_t *jdata; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; /* convenience */ jdata = caddy->jdata; if (ORTE_JOB_STATE_LAUNCH_APPS != caddy->job_state) { ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); return; } /* update job state */ jdata->state = caddy->job_state; /* register recv callback for daemons sync request */ if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_YARN_SYNC_REQUEST, ORTE_RML_PERSISTENT, yarn_hnp_sync_recv, jdata))) { ORTE_ERROR_LOG(rc); } orte_plm_base_launch_apps(fd, args, cbdata); //============heartbeat with AM====== opal_event_t *ev = NULL; ev = (opal_event_t*) malloc(sizeof(opal_event_t)); struct timeval delay; delay.tv_sec = 1; delay.tv_usec = 0; opal_event_evtimer_set(orte_event_base, ev, heartbeat_with_AM_cb, jdata); opal_event_evtimer_add(ev, &delay); //=================================== }
/* When working in this function, ALWAYS jump to "cleanup" if * you encounter an error so that orterun will be woken up and * the job can cleanly terminate */ static int plm_slurm_launch_job(orte_job_t *jdata) { orte_app_context_t **apps; orte_node_t **nodes; orte_std_cntr_t n; orte_job_map_t *map; char *jobid_string = NULL; char *param; char **argv = NULL; int argc; int rc; char *tmp; char** env = NULL; char* var; char *nodelist_flat; char **nodelist_argv; int nodelist_argc; char *name_string; char **custom_strings; int num_args, i; char *cur_prefix; struct timeval launchstart, launchstop; int proc_vpid_index; orte_jobid_t failed_job; bool failed_launch=true; /* flag the daemons as failing by default */ failed_job = ORTE_PROC_MY_NAME->jobid; if (orte_timing) { if (0 != gettimeofday(&launchstart, NULL)) { opal_output(0, "plm_slurm: could not obtain job start time"); launchstart.tv_sec = 0; launchstart.tv_usec = 0; } } /* indicate the state of the launch */ launching_daemons = true; /* create a jobid for this job */ if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) { ORTE_ERROR_LOG(rc); goto cleanup; } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:slurm: launching job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* set the active jobid */ active_job = jdata->jobid; /* Get the map for this job */ if (NULL == (map = orte_rmaps.get_job_map(active_job))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto cleanup; } apps = (orte_app_context_t**)jdata->apps->addr; nodes = (orte_node_t**)map->nodes->addr; if (0 == map->num_new_daemons) { /* no new daemons required - just launch apps */ OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:slurm: no new daemons to launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto launch_apps; } /* need integer value for command line parameter */ asprintf(&jobid_string, "%lu", (unsigned long) jdata->jobid); /* * start building argv array */ argv = NULL; argc = 0; /* * SLURM srun OPTIONS */ /* add the srun command */ opal_argv_append(&argc, &argv, "srun"); /* Append user defined arguments to srun */ if ( NULL != mca_plm_slurm_component.custom_args ) { custom_strings = opal_argv_split(mca_plm_slurm_component.custom_args, ' '); num_args = opal_argv_count(custom_strings); for (i = 0; i < num_args; ++i) { opal_argv_append(&argc, &argv, custom_strings[i]); } opal_argv_free(custom_strings); } asprintf(&tmp, "--nodes=%lu", (unsigned long) map->num_new_daemons); opal_argv_append(&argc, &argv, tmp); free(tmp); asprintf(&tmp, "--ntasks=%lu", (unsigned long) map->num_new_daemons); opal_argv_append(&argc, &argv, tmp); free(tmp); /* alert us if any orteds die during startup */ opal_argv_append(&argc, &argv, "--kill-on-bad-exit"); /* create nodelist */ nodelist_argv = NULL; nodelist_argc = 0; for (n=0; n < map->num_nodes; n++ ) { /* if the daemon already exists on this node, then * don't include it */ if (nodes[n]->daemon_launched) { continue; } /* otherwise, add it to the list of nodes upon which * we need to launch a daemon */ opal_argv_append(&nodelist_argc, &nodelist_argv, nodes[n]->name); } if (0 == opal_argv_count(nodelist_argv)) { orte_show_help("help-plm-slurm.txt", "no-hosts-in-list", true); rc = ORTE_ERR_FAILED_TO_START; goto cleanup; } nodelist_flat = opal_argv_join(nodelist_argv, ','); opal_argv_free(nodelist_argv); asprintf(&tmp, "--nodelist=%s", nodelist_flat); opal_argv_append(&argc, &argv, tmp); free(tmp); OPAL_OUTPUT_VERBOSE((2, orte_plm_globals.output, "%s plm:slurm: launching on nodes %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodelist_flat)); /* * ORTED OPTIONS */ /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); /* Add basic orted command line options, including debug flags */ orte_plm_base_orted_append_basic_args(&argc, &argv, "slurm", &proc_vpid_index, false); /* tell the new daemons the base of the name list so they can compute * their own name on the other end */ rc = orte_util_convert_vpid_to_string(&name_string, map->daemon_vpid_start); if (ORTE_SUCCESS != rc) { opal_output(0, "plm_slurm: unable to get daemon vpid as string"); goto cleanup; } free(argv[proc_vpid_index]); argv[proc_vpid_index] = strdup(name_string); free(name_string); if (0 < opal_output_get_verbosity(orte_plm_globals.output)) { param = opal_argv_join(argv, ' '); OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:slurm: final top-level argv:\n\t%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == param) ? "NULL" : param)); if (NULL != param) free(param); } /* Copy the prefix-directory specified in the corresponding app_context. If there are multiple, different prefix's in the app context, complain (i.e., only allow one --prefix option for the entire slurm run -- we don't support different --prefix'es for different nodes in the SLURM plm) */ cur_prefix = NULL; for (n=0; n < jdata->num_apps; n++) { char * app_prefix_dir = apps[n]->prefix_dir; /* Check for already set cur_prefix_dir -- if different, complain */ if (NULL != app_prefix_dir) { if (NULL != cur_prefix && 0 != strcmp (cur_prefix, app_prefix_dir)) { orte_show_help("help-plm-slurm.txt", "multiple-prefixes", true, cur_prefix, app_prefix_dir); return ORTE_ERR_FATAL; } /* If not yet set, copy it; iff set, then it's the same anyway */ if (NULL == cur_prefix) { cur_prefix = strdup(app_prefix_dir); OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:slurm: Set prefix:%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_prefix)); } } } /* setup environment */ env = opal_argv_copy(orte_launch_environ); /* add the nodelist */ var = mca_base_param_environ_variable("orte", "slurm", "nodelist"); opal_setenv(var, nodelist_flat, true, &env); free(nodelist_flat); free(var); /* exec the daemon(s) */ if (ORTE_SUCCESS != (rc = plm_slurm_start_proc(argc, argv, env, cur_prefix))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* do NOT wait for srun to complete. Srun only completes when the processes * it starts - in this case, the orteds - complete. Instead, we'll catch * any srun failures and deal with them elsewhere */ /* wait for daemons to callback */ if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) { OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:slurm: daemon launch failed for job %s on error %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc))); goto cleanup; } launch_apps: /* get here if daemons launch okay - any failures now by apps */ launching_daemons = false; failed_job = active_job; if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) { OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:slurm: launch of apps failed for job %s on error %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc))); goto cleanup; } /* declare the launch a success */ failed_launch = false; if (orte_timing) { if (0 != gettimeofday(&launchstop, NULL)) { opal_output(0, "plm_slurm: could not obtain stop time"); } else { opal_output(0, "plm_slurm: total job launch time is %ld usec", (launchstop.tv_sec - launchstart.tv_sec)*1000000 + (launchstop.tv_usec - launchstart.tv_usec)); } } if (ORTE_SUCCESS != rc) { opal_output(0, "plm:slurm: start_procs returned error %d", rc); goto cleanup; } cleanup: if (NULL != argv) { opal_argv_free(argv); } if (NULL != env) { opal_argv_free(env); } if(NULL != jobid_string) { free(jobid_string); } /* check for failed launch - if so, force terminate */ if (failed_launch) { orte_plm_base_launch_failed(failed_job, -1, ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_FAILED_TO_START); } return rc; }