static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sender, opal_buffer_t* buffer) { opal_buffer_t answer; orte_std_cntr_t count; orte_job_t *jdata = NULL; orte_proc_t **procs = NULL; orte_process_name_t name; int rc; OBJ_CONSTRUCT(&answer, opal_buffer_t); /* * Unpack the data */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &name, &count, ORTE_NAME))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto CLEANUP; } /* * Process the data */ /* get the job data object for this proc */ if (NULL == (jdata = orte_get_job_data_object(name.jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto CLEANUP; } /* get the proc object for it */ procs = (orte_proc_t**)jdata->procs->addr; if (NULL == procs[name.vpid] || NULL == procs[name.vpid]->node) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto CLEANUP; } /* * Send back the answer */ if (ORTE_SUCCESS != (rc = opal_dss.pack(&answer, &(procs[name.vpid]->node->name), 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto CLEANUP; } if (0 > (rc = orte_rml.send_buffer(sender, &answer, ORTE_RML_TAG_FILEM_BASE_RESP, 0))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } CLEANUP: OBJ_DESTRUCT(&answer); }
static void poll_spawns(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; int i, rc; bool failed_launch = true; int local_err; tm_event_t event; /* TM poll for all the spawns */ for (i = 0; i < launched; ++i) { rc = tm_poll(TM_NULL_EVENT, &event, 1, &local_err); if (TM_SUCCESS != rc) { opal_output(0, "plm:tm: failed to poll for a spawned daemon, return status = %d", rc); goto cleanup; } if (TM_SUCCESS != local_err) { opal_output(0, "plm:tm: failed to spawn daemon, error code = %d", local_err ); goto cleanup; } } failed_launch = false; #if 0 /* set a timer to tell us if one or more daemon's fails to start - use the * millisec/daemon timeout provided by the user to compute time */ if (0 < orte_startup_timeout) { OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:tm: setting startup timer for %d milliseconds", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_startup_timeout)); ORTE_DETECT_TIMEOUT(map->num_new_daemons, orte_startup_timeout*1000, -1, failed_start, state->jdata); } #endif cleanup: /* cleanup */ OBJ_RELEASE(state); /* check for failed launch - if so, force terminate */ if (failed_launch) { ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } }
/* after we allocate, we need to map the processes * so we know what nodes will be used */ static void allocation_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; orte_job_t *jdata = state->jdata; orte_job_t *daemons; jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; /* get the daemon job object */ if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto done; } /* mark that we are not using a VM */ daemons->controls |= ORTE_JOB_CONTROL_NO_VM; #if OPAL_HAVE_HWLOC { hwloc_topology_t t; orte_node_t *node; int i; /* ensure that all nodes point to our topology - we * cannot support hetero nodes with this state machine */ t = (hwloc_topology_t)opal_pointer_array_get_item(orte_node_topologies, 0); for (i=1; i < orte_node_pool->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } node->topology = t; } } #endif /* move to the map stage */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); done: /* cleanup */ OBJ_RELEASE(state); }
static void plm_yarn_launch_apps(int fd, short args, void *cbdata) { int rc; orte_job_t *jdata; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; /* convenience */ jdata = caddy->jdata; if (ORTE_JOB_STATE_LAUNCH_APPS != caddy->job_state) { ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); return; } /* update job state */ jdata->state = caddy->job_state; /* register recv callback for daemons sync request */ if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_YARN_SYNC_REQUEST, ORTE_RML_PERSISTENT, yarn_hnp_sync_recv, jdata))) { ORTE_ERROR_LOG(rc); } orte_plm_base_launch_apps(fd, args, cbdata); //============heartbeat with AM====== opal_event_t *ev = NULL; ev = (opal_event_t*) malloc(sizeof(opal_event_t)); struct timeval delay; delay.tv_sec = 1; delay.tv_usec = 0; opal_event_evtimer_set(orte_event_base, ev, heartbeat_with_AM_cb, jdata); opal_event_evtimer_add(ev, &delay); //=================================== }
static void launch_daemons(int fd, short args, void *cbdata) { orte_job_map_t *map; char *jobid_string = NULL; char *param; char **argv = NULL; int argc; int rc; char *tmp; char** env = NULL; char *nodelist_flat; char **nodelist_argv; int nodelist_argc; char *vpid_string; char **custom_strings; int num_args, i; char *cur_prefix; int proc_vpid_index; orte_app_context_t *app; orte_node_t *node; orte_std_cntr_t nnode; orte_job_t *daemons; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) { state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); return; } /* start by setting up the virtual machine */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); if (ORTE_SUCCESS != (rc = orte_plm_base_setup_virtual_machine(state->jdata))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* if we don't want to launch, then don't attempt to * launch the daemons - the user really wants to just * look at the proposed process map */ if (orte_do_not_launch) { /* set the state to indicate the daemons reported - this * will trigger the daemons_reported event and cause the * job to move to the following step */ state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); return; } /* Get the map for this job */ if (NULL == (map = daemons->map)) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto cleanup; } if (0 == map->num_new_daemons) { /* set the state to indicate the daemons reported - this * will trigger the daemons_reported event and cause the * job to move to the following step */ OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:alps: no new daemons to launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) { ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); } OBJ_RELEASE(state); return; } /* need integer value for command line parameter */ orte_util_convert_jobid_to_string(&jobid_string, daemons->jobid); /* * start building argv array */ argv = NULL; argc = 0; /* * ALPS aprun OPTIONS */ /* add the aprun command */ opal_argv_append(&argc, &argv, mca_plm_alps_component.aprun_cmd); /* Append user defined arguments to aprun */ if ( NULL != mca_plm_alps_component.custom_args ) { custom_strings = opal_argv_split(mca_plm_alps_component.custom_args, ' '); num_args = opal_argv_count(custom_strings); for (i = 0; i < num_args; ++i) { opal_argv_append(&argc, &argv, custom_strings[i]); } opal_argv_free(custom_strings); } /* number of processors needed */ opal_argv_append(&argc, &argv, "-n"); asprintf(&tmp, "%lu", (unsigned long) map->num_new_daemons); opal_argv_append(&argc, &argv, tmp); free(tmp); opal_argv_append(&argc, &argv, "-N"); opal_argv_append(&argc, &argv, "1"); opal_argv_append(&argc, &argv, "-cc"); opal_argv_append(&argc, &argv, "none"); /* create nodelist */ nodelist_argv = NULL; nodelist_argc = 0; for (nnode=0; nnode < map->nodes->size; nnode++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) { continue; } /* if the daemon already exists on this node, then * don't include it */ if (node->daemon_launched) { continue; } /* otherwise, add it to the list of nodes upon which * we need to launch a daemon */ opal_argv_append(&nodelist_argc, &nodelist_argv, node->name); } if (0 == opal_argv_count(nodelist_argv)) { orte_show_help("help-plm-alps.txt", "no-hosts-in-list", true); rc = ORTE_ERR_FAILED_TO_START; goto cleanup; } nodelist_flat = opal_argv_join(nodelist_argv, ','); opal_argv_free(nodelist_argv); /* if we are using all allocated nodes, then alps * doesn't need a nodelist */ if (map->num_new_daemons < orte_num_allocated_nodes) { opal_argv_append(&argc, &argv, "-L"); opal_argv_append(&argc, &argv, nodelist_flat); } /* * ORTED OPTIONS */ /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); /* Add basic orted command line options, including debug flags */ orte_plm_base_orted_append_basic_args(&argc, &argv, NULL, &proc_vpid_index, nodelist_flat); free(nodelist_flat); /* tell the new daemons the base of the name list so they can compute * their own name on the other end */ rc = orte_util_convert_vpid_to_string(&vpid_string, map->daemon_vpid_start); if (ORTE_SUCCESS != rc) { opal_output(0, "plm_alps: unable to create process name"); goto cleanup; } free(argv[proc_vpid_index]); argv[proc_vpid_index] = strdup(vpid_string); free(vpid_string); if (mca_plm_alps_component.debug) { param = opal_argv_join(argv, ' '); if (NULL != param) { opal_output(0, "plm:alps: final top-level argv:"); opal_output(0, "plm:alps: %s", param); free(param); } } /* Copy the prefix-directory specified in the corresponding app_context. If there are multiple, different prefix's in the app context, complain (i.e., only allow one --prefix option for the entire alps run -- we don't support different --prefix'es for different nodes in the ALPS plm) */ cur_prefix = NULL; for (i=0; i < state->jdata->apps->size; i++) { char *app_prefix_dir; if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(state->jdata->apps, i))) { continue; } app_prefix_dir = app->prefix_dir; /* Check for already set cur_prefix_dir -- if different, complain */ if (NULL != app_prefix_dir) { if (NULL != cur_prefix && 0 != strcmp (cur_prefix, app_prefix_dir)) { orte_show_help("help-plm-alps.txt", "multiple-prefixes", true, cur_prefix, app_prefix_dir); goto cleanup; } /* If not yet set, copy it; iff set, then it's the same anyway */ if (NULL == cur_prefix) { cur_prefix = strdup(app_prefix_dir); if (mca_plm_alps_component.debug) { opal_output (0, "plm:alps: Set prefix:%s", cur_prefix); } } } } /* setup environment */ env = opal_argv_copy(orte_launch_environ); if (0 < opal_output_get_verbosity(orte_plm_globals.output)) { param = opal_argv_join(argv, ' '); OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:alps: final top-level argv:\n\t%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == param) ? "NULL" : param)); if (NULL != param) free(param); } /* exec the daemon(s) */ if (ORTE_SUCCESS != (rc = plm_alps_start_proc(argc, argv, env, cur_prefix))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* indicate that the daemons for this job were launched */ state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; /* flag that launch was successful, so far as we currently know */ failed_launch = false; cleanup: if (NULL != argv) { opal_argv_free(argv); } if (NULL != env) { opal_argv_free(env); } if(NULL != jobid_string) { free(jobid_string); } /* cleanup the caddy */ OBJ_RELEASE(state); /* check for failed launch - if so, force terminate */ if (failed_launch) { ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } }
/* When working in this function, ALWAYS jump to "cleanup" if * you encounter an error so that orterun will be woken up and * the job can cleanly terminate */ static void launch_daemons(int fd, short args, void *cbdata) { orte_job_map_t *map = NULL; orte_app_context_t *app; orte_node_t *node; int proc_vpid_index; char *param; char **env = NULL; char *var; char **argv = NULL; char **nodeargv; int argc = 0; int rc; orte_std_cntr_t i; char *bin_base = NULL, *lib_base = NULL; tm_event_t *tm_events = NULL; tm_task_id *tm_task_ids = NULL; bool failed_launch = true; mode_t current_umask; char *nodelist; char* vpid_string; orte_job_t *daemons, *jdata; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; jdata = state->jdata; /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & jdata->controls) { jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); return; } /* setup the virtual machine */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); if (ORTE_SUCCESS != (rc = orte_plm_base_setup_virtual_machine(jdata))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* if we don't want to launch, then don't attempt to * launch the daemons - the user really wants to just * look at the proposed process map */ if (orte_do_not_launch) { /* set the state to indicate the daemons reported - this * will trigger the daemons_reported event and cause the * job to move to the following step */ jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); return; } /* Get the map for this job */ if (NULL == (map = daemons->map)) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto cleanup; } if (0 == map->num_new_daemons) { /* set the state to indicate the daemons reported - this * will trigger the daemons_reported event and cause the * job to move to the following step */ jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) { ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); } OBJ_RELEASE(state); return; } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:tm: launching vm", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* Allocate a bunch of TM events to use for tm_spawn()ing */ tm_events = malloc(sizeof(tm_event_t) * map->num_new_daemons); if (NULL == tm_events) { rc = ORTE_ERR_OUT_OF_RESOURCE; ORTE_ERROR_LOG(rc); goto cleanup; } tm_task_ids = malloc(sizeof(tm_task_id) * map->num_new_daemons); if (NULL == tm_task_ids) { rc = ORTE_ERR_OUT_OF_RESOURCE; ORTE_ERROR_LOG(rc); goto cleanup; } /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); /* create a list of nodes in this launch */ nodeargv = NULL; for (i = 0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } /* if this daemon already exists, don't launch it! */ if (node->daemon_launched) { continue; } /* add to list */ opal_argv_append_nosize(&nodeargv, node->name); } nodelist = opal_argv_join(nodeargv, ','); opal_argv_free(nodeargv); /* Add basic orted command line options */ orte_plm_base_orted_append_basic_args(&argc, &argv, "tm", &proc_vpid_index, nodelist); free(nodelist); if (0 < opal_output_get_verbosity(orte_plm_globals.output)) { param = opal_argv_join(argv, ' '); OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:tm: final top-level argv:\n\t%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == param) ? "NULL" : param)); if (NULL != param) free(param); } rc = plm_tm_connect(); if (ORTE_SUCCESS != rc) { goto cleanup; } connected = true; /* Figure out the basenames for the libdir and bindir. There is a lengthy comment about this in plm_rsh_module.c explaining all the rationale for how / why we're doing this. */ lib_base = opal_basename(opal_install_dirs.libdir); bin_base = opal_basename(opal_install_dirs.bindir); /* setup environment */ env = opal_argv_copy(orte_launch_environ); /* enable local launch by the orteds */ var = mca_base_param_env_var ("plm"); opal_setenv(var, "rsh", true, &env); free(var); /* add our umask -- see big note in orted.c */ current_umask = umask(0); umask(current_umask); asprintf(&var, "0%o", current_umask); opal_setenv("ORTE_DAEMON_UMASK_VALUE", var, true, &env); free(var); /* If we have a prefix, then modify the PATH and LD_LIBRARY_PATH environment variables. We only allow a single prefix to be specified. Since there will always be at least one app_context, we take it from there */ app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0); if (NULL != app->prefix_dir) { char *newenv; for (i = 0; NULL != env && NULL != env[i]; ++i) { /* Reset PATH */ if (0 == strncmp("PATH=", env[i], 5)) { asprintf(&newenv, "%s/%s:%s", app->prefix_dir, bin_base, env[i] + 5); OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:tm: resetting PATH: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), newenv)); opal_setenv("PATH", newenv, true, &env); free(newenv); } /* Reset LD_LIBRARY_PATH */ else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) { asprintf(&newenv, "%s/%s:%s", app->prefix_dir, lib_base, env[i] + 16); OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:tm: resetting LD_LIBRARY_PATH: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), newenv)); opal_setenv("LD_LIBRARY_PATH", newenv, true, &env); free(newenv); } } } /* Iterate through each of the nodes and spin * up a daemon. */ for (i = 0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } /* if this daemon already exists, don't launch it! */ if (node->daemon_launched) { continue; } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:tm: launching on node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); /* setup process name */ rc = orte_util_convert_vpid_to_string(&vpid_string, node->daemon->name.vpid); if (ORTE_SUCCESS != rc) { opal_output(0, "plm:tm: unable to get daemon vpid as string"); exit(-1); } free(argv[proc_vpid_index]); argv[proc_vpid_index] = strdup(vpid_string); free(vpid_string); /* exec the daemon */ if (0 < opal_output_get_verbosity(orte_plm_globals.output)) { param = opal_argv_join(argv, ' '); OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:tm: executing:\n\t%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == param) ? "NULL" : param)); if (NULL != param) free(param); } rc = tm_spawn(argc, argv, env, node->launch_id, tm_task_ids + launched, tm_events + launched); if (TM_SUCCESS != rc) { orte_show_help("help-plm-tm.txt", "tm-spawn-failed", true, argv[0], node->name, node->launch_id); rc = ORTE_ERROR; goto cleanup; } launched++; } /* indicate that the daemons for this job were launched */ state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; /* flag that launch was successful, so far as we currently know */ failed_launch = false; OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:tm:launch: finished spawning orteds", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); cleanup: /* cleanup */ OBJ_RELEASE(state); /* check for failed launch - if so, force terminate */ if (failed_launch) { ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } }
static void launch_daemons(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; orte_job_map_t *map; int rc; bool failed_launch = true; orte_job_t *daemons; int launched_proc_num = 0; OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:yarn:launch_daemons: LAUNCH DAEMONS CALLED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) { state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); return; } /* start by setting up the virtual machine */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); if (ORTE_SUCCESS != (rc = orte_plm_base_setup_virtual_machine(state->jdata))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* if we don't want to launch, then don't attempt to * launch the daemons - the user really wants to just * look at the proposed process map */ if (orte_do_not_launch) { /* set the state to indicate the daemons reported - this * will trigger the daemons_reported event and cause the * job to move to the following step */ state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); return; } /* Get the map for this job */ if (NULL == (map = daemons->map)) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto cleanup; } if (0 == map->num_new_daemons) { /* set the state to indicate the daemons reported - this * will trigger the daemons_reported event and cause the * job to move to the following step */ OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:yarn:launch_daemons: no new daemons to launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) { ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); } OBJ_RELEASE(state); return; } rc = common_launch_process(daemons, true, &launched_proc_num); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); goto cleanup; } /* if all daemon procs are launched successfully, then modify the job's state */ if (launched_proc_num == (daemons->num_procs - 1)) { /* indicate that the daemons for this job were launched */ state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:yarn:launch_daemons: launch daemon proc successfully with AM", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } /* flag that launch was successful, so far as we currently know */ failed_launch = false; cleanup: /* cleanup the caddy */ OBJ_RELEASE(state); /* check for failed launch - if so, force terminate */ if (failed_launch) { ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } }
static void proc_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_job_t *jdata; orte_proc_t *pptr; orte_process_name_t *proc = &caddy->name; orte_proc_state_t state = caddy->proc_state; orte_proc_t *child, *ptr; opal_buffer_t *alert; orte_plm_cmd_flag_t cmd; int rc=ORTE_SUCCESS; orte_vpid_t null=ORTE_VPID_INVALID; orte_ns_cmp_bitmask_t mask=ORTE_NS_CMP_ALL; int i; /* * if orte is trying to shutdown, just let it */ if (orte_finalizing) { goto cleanup; } OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s errmgr:default_orted:proc_errors process %s error state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc), orte_proc_state_to_str(state))); /* if this is a heartbeat failure, let the HNP handle it */ if (ORTE_PROC_STATE_HEARTBEAT_FAILED == state) { goto cleanup; } /* if this was a failed comm, then see if it was to our * lifeline */ if (ORTE_PROC_STATE_COMM_FAILED == state) { /* if it is our own connection, ignore it */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, proc)) { goto cleanup; } /* was it a daemon? */ if (proc->jobid != ORTE_PROC_MY_NAME->jobid) { /* nope - ignore */ goto cleanup; } OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s errmgr:default:orted daemon %s exited", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); /* see if this was a lifeline */ if (ORTE_SUCCESS != orte_routed.route_lost(proc)) { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s errmgr:orted daemon %s was a lifeline - exiting", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); /* kill our children */ killprocs(ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD); /* terminate - our routed children will see * us leave and automatically die */ ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto cleanup; } /* are any of my children still alive */ for (i=0; i < orte_local_children->size; i++) { if (NULL != (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { if (child->alive && child->state < ORTE_PROC_STATE_UNTERMINATED) { goto cleanup; } } } /* if all my routes and children are gone, then terminate ourselves nicely (i.e., this is a normal termination) */ if (0 == orte_routed.num_routes()) { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s errmgr:default:orted all routes gone - exiting", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_TERMINATE(0); } else { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s errmgr:default:orted not exiting, num_routes() == %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)orte_routed.num_routes())); } /* if not, then we can continue */ goto cleanup; } /* get the job object */ if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) { /* must already be complete */ goto cleanup; } pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid); /* if there are no local procs for this job, we can * ignore this call */ if (0 == jdata->num_local_procs) { goto cleanup; } /* find this proc in the local children */ child = NULL; for (i=0; i < orte_local_children->size; i++) { if (NULL == (ptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &ptr->name, proc)) { child = ptr; break; } } if (NULL == child) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); goto cleanup; } OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s errmgr:default_orted got state %s for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_proc_state_to_str(state), ORTE_NAME_PRINT(proc))); if (ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED == state) { child->state = state; /* Decrement the number of local procs */ jdata->num_local_procs--; /* kill this proc */ killprocs(proc->jobid, proc->vpid); goto cleanup; } if (ORTE_PROC_STATE_TERM_NON_ZERO == state) { if (!orte_abort_non_zero_exit) { /* leave the child in orte_local_children so we can * later send the state info after full job termination */ child->state = state; child->waitpid_recvd = true; if (child->iof_complete) { /* the proc has terminated */ child->alive = false; /* Clean up the session directory as if we were the process * itself. This covers the case where the process died abnormally * and didn't cleanup its own session directory. */ orte_session_dir_finalize(&child->name); /* track job status */ jdata->num_terminated++; } /* treat this as normal termination */ goto REPORT_STATE; } /* report this as abnormal termination to the HNP */ alert = OBJ_NEW(opal_buffer_t); /* pack update state command */ cmd = ORTE_PLM_UPDATE_PROC_STATE; if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { ORTE_ERROR_LOG(rc); return; } /* pack only the data for this proc - have to start with the jobid * so the receiver can unpack it correctly */ if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &proc->jobid, 1, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); return; } child->state = state; /* now pack the child's info */ if (ORTE_SUCCESS != (rc = pack_state_for_proc(alert, child))) { ORTE_ERROR_LOG(rc); return; } /* remove the child from our local array as it is no longer alive */ opal_pointer_array_set_item(orte_local_children, i, NULL); /* Decrement the number of local procs */ jdata->num_local_procs--; OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output, "%s errmgr:default_orted reporting proc %s abnormally terminated with non-zero status (local procs = %d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name), jdata->num_local_procs)); /* release the child object */ OBJ_RELEASE(child); /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, 0, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(alert); } return; } if (ORTE_PROC_STATE_FAILED_TO_START == state || ORTE_PROC_STATE_FAILED_TO_LAUNCH == state) { /* update the proc state */ child->state = state; /* count the proc as having "terminated" */ jdata->num_terminated++; /* leave the error report in this case to the * state machine, which will receive notice * when all local procs have attempted to start * so that we send a consolidated error report * back to the HNP */ goto cleanup; } if (ORTE_PROC_STATE_TERMINATED < state) { /* if the job hasn't completed and the state is abnormally * terminated, then we need to alert the HNP right away */ alert = OBJ_NEW(opal_buffer_t); /* pack update state command */ cmd = ORTE_PLM_UPDATE_PROC_STATE; if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { ORTE_ERROR_LOG(rc); return; } /* pack only the data for this proc - have to start with the jobid * so the receiver can unpack it correctly */ if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &proc->jobid, 1, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); return; } child->state = state; /* now pack the child's info */ if (ORTE_SUCCESS != (rc = pack_state_for_proc(alert, child))) { ORTE_ERROR_LOG(rc); return; } /* remove the child from our local array as it is no longer alive */ opal_pointer_array_set_item(orte_local_children, i, NULL); /* Decrement the number of local procs */ jdata->num_local_procs--; OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output, "%s errmgr:default_orted reporting proc %s aborted to HNP (local procs = %d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name), jdata->num_local_procs)); /* release the child object */ OBJ_RELEASE(child); /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, 0, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); } return; } REPORT_STATE: if (ORTE_PROC_STATE_REGISTERED == state) { /* see if everyone in this job has registered */ if (all_children_registered(proc->jobid)) { /* once everyone registers, send their contact info to * the HNP so it is available to debuggers and anyone * else that needs it */ OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output, "%s errmgr:default_orted: sending contact info to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); alert = OBJ_NEW(opal_buffer_t); /* pack init routes command */ cmd = ORTE_PLM_INIT_ROUTES_CMD; if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { ORTE_ERROR_LOG(rc); return; } /* pack the jobid */ if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &proc->jobid, 1, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); return; } /* pack all the local child vpids */ for (i=0; i < orte_local_children->size; i++) { if (NULL == (ptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } if (ptr->name.jobid == proc->jobid) { if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &ptr->name.vpid, 1, ORTE_VPID))) { ORTE_ERROR_LOG(rc); return; } } } /* pack an invalid marker */ if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &null, 1, ORTE_VPID))) { ORTE_ERROR_LOG(rc); return; } /* add in contact info for all procs in the job */ if (ORTE_SUCCESS != (rc = pack_child_contact_info(proc->jobid, alert))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&alert); return; } /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, 0, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); } } return; } /* only other state is terminated - see if anyone is left alive */ if (!any_live_children(proc->jobid)) { alert = OBJ_NEW(opal_buffer_t); /* pack update state command */ cmd = ORTE_PLM_UPDATE_PROC_STATE; if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { ORTE_ERROR_LOG(rc); return; } /* pack the data for the job */ if (ORTE_SUCCESS != (rc = pack_state_update(alert, jdata))) { ORTE_ERROR_LOG(rc); return; } OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output, "%s errmgr:default_orted reporting all procs in %s terminated", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* remove all of this job's children from the global list - do not lock * the thread as we are already locked */ for (i=0; i < orte_local_children->size; i++) { if (NULL == (ptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } if (jdata->jobid == ptr->name.jobid) { opal_pointer_array_set_item(orte_local_children, i, NULL); OBJ_RELEASE(ptr); } } /* ensure the job's local session directory tree is removed */ orte_session_dir_cleanup(jdata->jobid); /* remove this job from our local job data since it is complete */ opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), NULL); OBJ_RELEASE(jdata); /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, 0, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); } return; } cleanup: OBJ_RELEASE(caddy); }
static void job_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_job_t *jdata; orte_job_state_t jobstate; int rc; orte_plm_cmd_flag_t cmd; opal_buffer_t *alert; /* * if orte is trying to shutdown, just let it */ if (orte_finalizing) { return; } /* if the jdata is NULL, then we abort as this * is reporting an unrecoverable error */ if (NULL == caddy->jdata) { ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FORCED_EXIT); OBJ_RELEASE(caddy); return; } /* update the state */ jdata = caddy->jdata; jobstate = caddy->job_state; jdata->state = jobstate; OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base.output, "%s errmgr:default_orted: job %s reported error state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid), orte_job_state_to_str(jobstate))); switch (jobstate) { case ORTE_JOB_STATE_FAILED_TO_START: failed_start(jdata); break; case ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED: /* update all procs in job */ update_local_children(jdata, jobstate, ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED); /* order all local procs for this job to be killed */ killprocs(jdata->jobid, ORTE_VPID_WILDCARD); break; case ORTE_JOB_STATE_COMM_FAILED: /* kill all local procs */ killprocs(ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD); /* order termination */ ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto cleanup; break; case ORTE_JOB_STATE_HEARTBEAT_FAILED: /* let the HNP handle this */ goto cleanup; break; default: break; } alert = OBJ_NEW(opal_buffer_t); /* pack update state command */ cmd = ORTE_PLM_UPDATE_PROC_STATE; if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(alert); goto cleanup; } /* pack the job info */ if (ORTE_SUCCESS != (rc = pack_state_update(alert, jdata))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(alert); goto cleanup; } /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, 0, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(alert); } cleanup: OBJ_RELEASE(caddy); }
void orte_iof_base_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; orte_iof_write_event_t *wev = sink->wev; opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (0 == output->numbytes) { /* indicates we are to close this stream */ OBJ_RELEASE(sink); return; } num_written = write(wev->fd, output->data, output->numbytes); if (num_written < 0) { if (EAGAIN == errno || EINTR == errno) { /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* if the list is getting too large, abort */ if (orte_iof_base.output_limit < opal_list_get_size(&wev->outputs)) { opal_output(0, "IO Forwarding is running too far behind - something is blocking us from writing"); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto ABORT; } /* leave the write event running so it will call us again * when the fd is ready. */ return; } /* otherwise, something bad happened so all we can do is abort * this attempt */ OBJ_RELEASE(output); goto ABORT; } else if (num_written < output->numbytes) { /* incomplete write - adjust data to avoid duplicate output */ memmove(output->data, &output->data[num_written], output->numbytes - num_written); /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* if the list is getting too large, abort */ if (orte_iof_base.output_limit < opal_list_get_size(&wev->outputs)) { opal_output(0, "IO Forwarding is running too far behind - something is blocking us from writing"); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto ABORT; } /* leave the write event running so it will call us again * when the fd is ready */ return; } OBJ_RELEASE(output); } ABORT: opal_event_del(wev->ev); wev->pending = false; }
/* * Function for selecting one component from all those that are * available. */ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) { orte_job_t *jdata; orte_job_map_t *map; int rc; bool did_map; opal_list_item_t *item; orte_rmaps_base_selected_module_t *mod; orte_job_t *parent; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; /* convenience */ jdata = caddy->jdata; /* NOTE: NO PROXY COMPONENT REQUIRED - REMOTE PROCS ARE NOT * ALLOWED TO CALL RMAPS INDEPENDENTLY. ONLY THE PLM CAN * DO SO, AND ALL PLM COMMANDS ARE RELAYED TO HNP */ opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* NOTE: CHECK FOR JDATA->MAP == NULL. IF IT IS, THEN USE * THE VALUES THAT WERE READ BY THE LOCAL MCA PARAMS. THE * PLM PROXY WILL SEND A JOB-OBJECT THAT WILL INCLUDE ANY * MAPPING DIRECTIVES - OTHERWISE, THAT OBJECT WILL HAVE A * NULL MAP FIELD * LONE EXCEPTION - WE COPY DISPLAY MAP ACROSS IF THEY * DIDN'T SET IT */ if (NULL == jdata->map) { opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps: creating new map for job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* create a map object where we will store the results */ map = OBJ_NEW(orte_job_map_t); if (NULL == map) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } /* load it with the system defaults */ map->mapping = orte_rmaps_base.mapping; map->ranking = orte_rmaps_base.ranking; #if OPAL_HAVE_HWLOC map->binding = opal_hwloc_binding_policy; #endif if (NULL != orte_rmaps_base.ppr) { map->ppr = strdup(orte_rmaps_base.ppr); } map->cpus_per_rank = orte_rmaps_base.cpus_per_rank; map->display_map = orte_rmaps_base.display_map; /* assign the map object to this job */ jdata->map = map; } else { opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps: setting mapping policies for job %s", ORTE_JOBID_PRINT(jdata->jobid)); if (!jdata->map->display_map) { jdata->map->display_map = orte_rmaps_base.display_map; } /* set the default mapping policy IFF it wasn't provided */ if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { ORTE_SET_MAPPING_POLICY(jdata->map->mapping, orte_rmaps_base.mapping); } if (!ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)); } /* ditto for rank and bind policies */ if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { ORTE_SET_RANKING_POLICY(jdata->map->ranking, orte_rmaps_base.ranking); } #if OPAL_HAVE_HWLOC if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { jdata->map->binding = opal_hwloc_binding_policy; } #endif } #if OPAL_HAVE_HWLOC /* if we are not going to launch, then we need to set any * undefined topologies to match our own so the mapper * can operate */ if (orte_do_not_launch) { orte_node_t *node; hwloc_topology_t t0; int i; node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); t0 = node->topology; for (i=1; i < orte_node_pool->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } if (NULL == node->topology) { node->topology = t0; } } } #endif /* cycle thru the available mappers until one agrees to map * the job */ did_map = false; for (item = opal_list_get_first(&orte_rmaps_base.selected_modules); item != opal_list_get_end(&orte_rmaps_base.selected_modules); item = opal_list_get_next(item)) { mod = (orte_rmaps_base_selected_module_t*)item; if (ORTE_SUCCESS == (rc = mod->module->map_job(jdata))) { did_map = true; break; } /* mappers return "next option" if they didn't attempt to * map the job. anything else is a true error. */ if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } } /* if we get here without doing the map, or with zero procs in * the map, then that's an error */ if (!did_map || 0 == jdata->num_procs) { orte_show_help("help-orte-rmaps-base.txt", "failed-map", true); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } /* compute and save local ranks */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } #if OPAL_HAVE_HWLOC /* compute and save bindings */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } #endif /* if it is a dynamic spawn, save the bookmark on the parent's job too */ if (ORTE_JOBID_INVALID != jdata->originator.jobid) { if (NULL != (parent = orte_get_job_data_object(jdata->originator.jobid))) { parent->bookmark = jdata->bookmark; } } /* if we wanted to display the map, now is the time to do it - ignore * daemon job */ if (jdata->map->display_map) { char *output; int i, j; orte_node_t *node; orte_proc_t *proc; if (orte_display_diffable_output) { /* intended solely to test mapping methods, this output * can become quite long when testing at scale. Rather * than enduring all the malloc/free's required to * create an arbitrary-length string, custom-generate * the output a line at a time here */ /* display just the procs in a diffable format */ opal_output(orte_clean_output, "<map>"); fflush(stderr); /* loop through nodes */ for (i=0; i < jdata->map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { continue; } opal_output(orte_clean_output, "\t<host name=%s>", (NULL == node->name) ? "UNKNOWN" : node->name); fflush(stderr); for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } #if OPAL_HAVE_HWLOC { char locale[64]; if (NULL != proc->locale) { hwloc_bitmap_list_snprintf(locale, 64, proc->locale->cpuset); } opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu locale=%s binding=%s[%s:%u]>", ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx, (unsigned long)proc->local_rank, (unsigned long)proc->node_rank, locale, (NULL == proc->cpu_bitmap) ? "NULL" : proc->cpu_bitmap, opal_hwloc_base_print_level(jdata->map->bind_level), proc->bind_idx); } #else opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu>", ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx, (unsigned long)proc->local_rank, (unsigned long)proc->node_rank); #endif fflush(stderr); } opal_output(orte_clean_output, "\t</host>"); fflush(stderr); } #if OPAL_HAVE_HWLOC { opal_hwloc_locality_t locality; orte_proc_t *p0; /* test locality - for the first node, print the locality of each proc relative to the first one */ node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, 0); p0 = (orte_proc_t*)opal_pointer_array_get_item(node->procs, 0); opal_output(orte_clean_output, "\t<locality>"); for (j=1; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } locality = opal_hwloc_base_get_relative_locality(node->topology, jdata->map->bind_level, p0->bind_idx, jdata->map->bind_level, proc->bind_idx); opal_output(orte_clean_output, "\t\t<bind_level=%s rank=%s bind_idx=%u rank=%s bind_idx=%u locality=%s>", opal_hwloc_base_print_level(jdata->map->bind_level), ORTE_VPID_PRINT(p0->name.vpid), p0->bind_idx, ORTE_VPID_PRINT(proc->name.vpid), proc->bind_idx, opal_hwloc_base_print_locality(locality)); } opal_output(orte_clean_output, "\t</locality>\n</map>"); fflush(stderr); } #else opal_output(orte_clean_output, "\n</map>"); fflush(stderr); #endif } else { opal_dss.print(&output, NULL, jdata->map, ORTE_JOB_MAP); if (orte_xml_output) { fprintf(orte_xml_fp, "%s\n", output); fflush(orte_xml_fp); } else { opal_output(orte_clean_output, "%s", output); } free(output); } } /* set the job state to the next position */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE); /* cleanup */ OBJ_RELEASE(caddy); }
/* process incoming messages in order of receipt */ void orte_plm_base_recv(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { orte_plm_cmd_flag_t command; orte_std_cntr_t count; orte_jobid_t job; orte_job_t *jdata, *parent; opal_buffer_t *answer; orte_vpid_t vpid; orte_proc_t *proc; orte_proc_state_t state; orte_exit_code_t exit_code; int32_t rc=ORTE_SUCCESS, ret; orte_app_context_t *app, *child_app; orte_process_name_t name; pid_t pid; bool running; OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive processing msg", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &command, &count, ORTE_PLM_CMD))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } switch (command) { case ORTE_PLM_LAUNCH_JOB_CMD: OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive job launch command from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); /* unpack the job object */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jdata, &count, ORTE_JOB))) { ORTE_ERROR_LOG(rc); goto ANSWER_LAUNCH; } /* record the sender so we know who to respond to */ jdata->originator.jobid = sender->jobid; jdata->originator.vpid = sender->vpid; /* get the parent's job object */ if (NULL == (parent = orte_get_job_data_object(sender->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); goto ANSWER_LAUNCH; } /* if the prefix was set in the parent's job, we need to transfer * that prefix to the child's app_context so any further launch of * orteds can find the correct binary. There always has to be at * least one app_context in both parent and child, so we don't * need to check that here. However, be sure not to overwrite * the prefix if the user already provided it! */ app = (orte_app_context_t*)opal_pointer_array_get_item(parent->apps, 0); child_app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0); if (NULL != app->prefix_dir && NULL == child_app->prefix_dir) { child_app->prefix_dir = strdup(app->prefix_dir); } OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive adding hosts", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* process any add-hostfile and add-host options that were provided */ if (ORTE_SUCCESS != (rc = orte_ras_base_add_hosts(jdata))) { ORTE_ERROR_LOG(rc); goto ANSWER_LAUNCH; } if( NULL == parent->bookmark ) { /* find the sender's node in the job map */ if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, sender->vpid))) { /* set the bookmark so the child starts from that place - this means * that the first child process could be co-located with the proc * that called comm_spawn, assuming slots remain on that node. Otherwise, * the procs will start on the next available node */ jdata->bookmark = proc->node; } } else { jdata->bookmark = parent->bookmark; } /* launch it */ OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive calling spawn", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) { ORTE_ERROR_LOG(rc); goto ANSWER_LAUNCH; } break; ANSWER_LAUNCH: OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive - error on launch: %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc)); /* setup the response */ answer = OBJ_NEW(opal_buffer_t); /* pack the error code to be returned */ if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT32))) { ORTE_ERROR_LOG(ret); } /* send the response back to the sender */ if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_PLM_PROXY, 0, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); } break; case ORTE_PLM_UPDATE_PROC_STATE: OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive update proc state command from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); count = 1; while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &job, &count, ORTE_JOBID))) { OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive got update_proc_state for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job))); name.jobid = job; running = false; /* get the job object */ if (NULL == (jdata = orte_get_job_data_object(job))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); goto CLEANUP; } count = 1; while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &vpid, &count, ORTE_VPID))) { if (ORTE_VPID_INVALID == vpid) { /* flag indicates that this job is complete - move on */ break; } name.vpid = vpid; /* unpack the pid */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &pid, &count, OPAL_PID))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } /* unpack the state */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &state, &count, ORTE_PROC_STATE))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } if (ORTE_PROC_STATE_RUNNING == state) { running = true; } /* unpack the exit code */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &exit_code, &count, ORTE_EXIT_CODE))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive got update_proc_state for vpid %lu state %s exit_code %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)vpid, orte_proc_state_to_str(state), (int)exit_code)); /* get the proc data object */ if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } proc->state = state; proc->pid = pid; proc->exit_code = exit_code; ORTE_ACTIVATE_PROC_STATE(&name, state); } if (running) { jdata->num_daemons_reported++; if (orte_report_launch_progress) { if (0 == jdata->num_daemons_reported % 100 || jdata->num_daemons_reported == orte_process_info.num_procs) { ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_REPORT_PROGRESS); } } } /* prepare for next job */ count = 1; } if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { ORTE_ERROR_LOG(rc); } else { rc = ORTE_SUCCESS; } break; case ORTE_PLM_INIT_ROUTES_CMD: count=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &job, &count, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } name.jobid = job; /* get the job object */ if (NULL == (jdata = orte_get_job_data_object(job))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); goto CLEANUP; } count=1; while (ORTE_SUCCESS == opal_dss.unpack(buffer, &vpid, &count, ORTE_VPID)) { if (ORTE_VPID_INVALID == vpid) { break; } name.vpid = vpid; ORTE_ACTIVATE_PROC_STATE(&name, ORTE_PROC_STATE_REGISTERED); count=1; } /* pass the remainder of the buffer to the active module's * init_routes API */ if (ORTE_SUCCESS != (rc = orte_routed.init_routes(job, buffer))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } break; default: ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); rc = ORTE_ERR_VALUE_OUT_OF_BOUNDS; break; } CLEANUP: if (ORTE_SUCCESS != rc) { goto DEPART; } DEPART: /* see if an error occurred - if so, wakeup the HNP so we can exit */ if (ORTE_PROC_IS_HNP && ORTE_SUCCESS != rc) { jdata = NULL; ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:receive done processing commands", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); }
static void recv_data(int fd, short args, void *cbdata) { bool found; int i, rc; orte_node_t *nd, *nd2; opal_list_t nds, ndtmp; opal_list_item_t *item, *itm; char recv_msg[8192]; int nbytes, idx, sjob; char **alloc, *nodelist, *tpn; local_jobtracker_t *ptr, *jtrk; local_apptracker_t *aptrk; orte_app_context_t *app; orte_jobid_t jobid; orte_job_t *jdata; opal_output_verbose(2, orte_ras_base_framework.framework_output, "%s ras:slurm: dynamic allocation - data recvd", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* read the data from the socket and put it in the * nodes field of op */ memset(recv_msg, 0, sizeof(recv_msg)); nbytes = read(fd, recv_msg, sizeof(recv_msg) - 1); opal_output_verbose(2, orte_ras_base_framework.framework_output, "%s ras:slurm: dynamic allocation msg: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg); /* check if we got something */ if (0 == nbytes || 0 == strlen(recv_msg) || strstr(recv_msg, "failure") != NULL) { /* show an error here - basically, a "nothing was available" * message */ orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, (0 == strlen(recv_msg)) ? "NO MSG" : recv_msg); ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALLOC_FAILED); return; } /* break the message into its component parts, separated by colons */ alloc = opal_argv_split(recv_msg, ':'); /* the first section contains the ORTE jobid for this allocation */ tpn = strchr(alloc[0], '='); orte_util_convert_string_to_jobid(&jobid, tpn+1); /* get the corresponding job object */ jdata = orte_get_job_data_object(jobid); jtrk = NULL; /* find the associated tracking object */ for (item = opal_list_get_first(&jobs); item != opal_list_get_end(&jobs); item = opal_list_get_next(item)) { ptr = (local_jobtracker_t*)item; if (ptr->jobid == jobid) { jtrk = ptr; break; } } if (NULL == jtrk) { orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, "NO JOB TRACKER"); ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALLOC_FAILED); opal_argv_free(alloc); return; } /* stop the timeout event */ opal_event_del(&jtrk->timeout_ev); /* cycle across all the remaining parts - each is the allocation for * an app in this job */ OBJ_CONSTRUCT(&nds, opal_list_t); OBJ_CONSTRUCT(&ndtmp, opal_list_t); idx = -1; sjob = -1; nodelist = NULL; for (i=1; NULL != alloc[i]; i++) { if (ORTE_SUCCESS != parse_alloc_msg(alloc[i], &idx, &sjob, &nodelist, &tpn)) { orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, jtrk->cmd); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOC_FAILED); return; } if (idx < 0 || NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, idx))) { orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, jtrk->cmd); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOC_FAILED); opal_argv_free(alloc); return; } /* track the Slurm jobid */ if (NULL == (aptrk = (local_apptracker_t*)opal_pointer_array_get_item(&jtrk->apps, idx))) { aptrk = OBJ_NEW(local_apptracker_t); opal_pointer_array_set_item(&jtrk->apps, idx, aptrk); } aptrk->sjob = sjob; /* release the current dash_host as that contained the *desired* allocation */ opal_argv_free(app->dash_host); app->dash_host = NULL; /* since the nodelist/tpn may contain regular expressions, parse them */ if (ORTE_SUCCESS != (rc = orte_ras_slurm_discover(nodelist, tpn, &ndtmp))) { ORTE_ERROR_LOG(rc); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOC_FAILED); opal_argv_free(alloc); return; } /* transfer the discovered nodes to our node list, and construct * the new dash_host entry to match what was allocated */ while (NULL != (item = opal_list_remove_first(&ndtmp))) { nd = (orte_node_t*)item; opal_argv_append_nosize(&app->dash_host, nd->name); /* check for duplicates */ found = false; for (itm = opal_list_get_first(&nds); itm != opal_list_get_end(&nds); itm = opal_list_get_next(itm)) { nd2 = (orte_node_t*)itm; if (0 == strcmp(nd->name, nd2->name)) { found = true; nd2->slots += nd->slots; OBJ_RELEASE(item); break; } } if (!found) { /* append the new node to our list */ opal_list_append(&nds, item); } } /* cleanup */ free(nodelist); free(tpn); } /* cleanup */ opal_argv_free(alloc); OBJ_DESTRUCT(&ndtmp); if (opal_list_is_empty(&nds)) { /* if we get here, then we were able to contact slurm, * which means we are in an actively managed cluster. * However, slurm indicated that nothing is currently * available that meets our requirements. This is a fatal * situation - we do NOT have the option of running on * user-specified hosts as the cluster is managed. */ OBJ_DESTRUCT(&nds); orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } /* store the found nodes */ if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nds, jdata))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&nds); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); return; } OBJ_DESTRUCT(&nds); /* default to no-oversubscribe-allowed for managed systems */ if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); } /* flag that the allocation is managed */ orte_managed_allocation = true; /* move the job along */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATION_COMPLETE); /* all done */ return; }
/* * This function is responsible for: * - Constructing the remote absolute path for the specified file/dir * - Verify the existence of the file/dir * - Determine if the specified file/dir is in fact a file or dir or unknown if not found. */ static void filem_base_process_get_remote_path_cmd(orte_process_name_t* sender, opal_buffer_t* buffer) { opal_buffer_t answer; orte_std_cntr_t count; char *filename = NULL; char *tmp_name = NULL; char cwd[OPAL_PATH_MAX]; int file_type = ORTE_FILEM_TYPE_UNKNOWN; struct stat file_status; int rc; /* * Unpack the data */ OBJ_CONSTRUCT(&answer, opal_buffer_t); count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &filename, &count, OPAL_STRING))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto CLEANUP; } /* * Determine the absolute path of the file */ if(filename[0] != '/') { /* if it is not an absolute path already */ getcwd(cwd, sizeof(cwd)); asprintf(&tmp_name, "%s/%s", cwd, filename); } else { tmp_name = strdup(filename); } opal_output_verbose(10, orte_filem_base_output, "filem:base: process_get_remote_path_cmd: %s -> %s: Filename Requested (%s) translated to (%s)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender), filename, tmp_name); /* * Determine if the file/dir exists at that absolute path * Determine if the file/dir is a file or a directory */ if(0 != (rc = stat(tmp_name, &file_status) ) ){ file_type = ORTE_FILEM_TYPE_UNKNOWN; } else { /* Is it a directory? */ if(S_ISDIR(file_status.st_mode)) { file_type = ORTE_FILEM_TYPE_DIR; } else if(S_ISREG(file_status.st_mode)) { file_type = ORTE_FILEM_TYPE_FILE; } } /* * Pack up the response * Send back the reference type * - ORTE_FILEM_TYPE_FILE = File * - ORTE_FILEM_TYPE_DIR = Directory * - ORTE_FILEM_TYPE_UNKNOWN = Could not be determined, or does not exist */ if (ORTE_SUCCESS != (rc = opal_dss.pack(&answer, &tmp_name, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto CLEANUP; } if (ORTE_SUCCESS != (rc = opal_dss.pack(&answer, &file_type, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); goto CLEANUP; } if (0 > (rc = orte_rml.send_buffer(sender, &answer, ORTE_RML_TAG_FILEM_BASE_RESP, 0))) { ORTE_ERROR_LOG(rc); ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); } CLEANUP: if( NULL != filename) { free(filename); filename = NULL; } if( NULL != tmp_name) { free(tmp_name); tmp_name = NULL; } OBJ_DESTRUCT(&answer); }