opal_list_t* orcm_diag_base_prepare_db_input(struct timeval start_time, struct timeval end_time, char *nodename, char *diag_type, char *diag_subtype, char *diag_result) { opal_list_t *db_input = NULL; opal_value_t *kv = NULL; db_input = OBJ_NEW(opal_list_t); if (NULL != db_input) { /* load the start time */ kv = orcm_util_load_opal_value("start_time", &start_time, OPAL_TIMEVAL); if (NULL == kv) { goto memtestcleanup; } opal_list_append(db_input, &(kv->super)); /* load the end time */ kv = orcm_util_load_opal_value("end_time", &end_time, OPAL_TIMEVAL); if (NULL == kv) { goto memtestcleanup; } opal_list_append(db_input, &(kv->super)); /* load the hostname */ kv = orcm_util_load_opal_value("hostname", nodename, OPAL_STRING); if (NULL == kv) { goto memtestcleanup; } opal_list_append(db_input, &(kv->super)); /* load the diag type */ kv = orcm_util_load_opal_value("diag_type", diag_type, OPAL_STRING); if (NULL == kv) { goto memtestcleanup; } opal_list_append(db_input, &(kv->super)); /* load the diag subtype */ kv = orcm_util_load_opal_value("diag_subtype", diag_subtype, OPAL_STRING); if (NULL == kv) { goto memtestcleanup; } opal_list_append(db_input, &(kv->super)); /* load the diag result */ kv = orcm_util_load_opal_value("test_result", diag_result, OPAL_STRING); if (NULL == kv) { goto memtestcleanup; } opal_list_append(db_input, &(kv->super)); } return db_input; memtestcleanup: OPAL_LIST_RELEASE(db_input); return NULL; }
static void mycleanup(int dbhandle, int status, opal_list_t *kvs, void *cbdata) { OPAL_LIST_RELEASE(kvs); if (ORTE_SUCCESS != status) { log_enabled = false; } }
int orcm_finalize(void) { --orcm_initialized; if (0 != orcm_initialized) { /* check for mismatched calls */ if (0 > orcm_initialized) { opal_output(0, "%s MISMATCHED CALLS TO ORCM FINALIZE", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } return ORCM_ERROR; } /* mark that orte is finalizing so that system will work correctly */ orte_finalizing = true; /* everyone must finalize and close the cfgi framework */ (void) mca_base_framework_close(&orcm_cfgi_base_framework); /* cleanup any globals */ if (NULL != orcm_clusters) { OPAL_LIST_RELEASE(orcm_clusters); } if (NULL != orcm_schedulers) { OPAL_LIST_RELEASE(orcm_schedulers); } (void)orte_ess.finalize(); /* close the ess itself */ (void) mca_base_framework_close(&orte_ess_base_framework); /* close the sst itself */ (void) mca_base_framework_close(&orcm_sst_base_framework); /* cleanup the process info */ orte_proc_info_finalize(); orte_initialized = false; /* Close the general debug stream */ opal_output_close(orte_debug_output); /* finalize the opal utilities */ opal_finalize(); return ORCM_SUCCESS; }
void orcm_diag_base_db_cleanup(int db_handle, int status, opal_list_t *list, opal_list_t *ret, void *cbdata) { OPAL_LIST_RELEASE(list); if (ORTE_SUCCESS != status) { ORTE_ERROR_LOG(status); } }
static void mycbfunc(int status, void *cbdata) { opal_list_t *info = (opal_list_t*)cbdata; if (ORTE_SUCCESS != status) { ORTE_ERROR_LOG(status); } OPAL_LIST_RELEASE(info); }
static void opal_value_destruct(opal_value_t* ptr) { if (NULL != ptr->key) { free(ptr->key); } if (OPAL_STRING == ptr->type && NULL != ptr->data.string) { free(ptr->data.string); } else if (OPAL_BYTE_OBJECT == ptr->type && NULL != ptr->data.bo.bytes) { free(ptr->data.bo.bytes); } else if (OPAL_LIST == ptr->type && NULL != ptr->data.ptr) { OPAL_LIST_RELEASE(ptr->data.ptr); } }
static void orte_iof_base_proc_destruct(orte_iof_proc_t* ptr) { if (NULL != ptr->stdinev) { OBJ_RELEASE(ptr->stdinev); } if (NULL != ptr->revstdout) { OBJ_RELEASE(ptr->revstdout); } if (NULL != ptr->revstderr) { OBJ_RELEASE(ptr->revstderr); } if (NULL != ptr->revstddiag) { OBJ_RELEASE(ptr->revstddiag); } if (NULL != ptr->subscribers) { OPAL_LIST_RELEASE(ptr->subscribers); } }
/* * Find a list of ibv_ports matching a set of criteria. */ opal_list_t *opal_common_verbs_find_ports(const char *if_include, const char *if_exclude, int flags, int stream) { int32_t num_devs; struct ibv_device **devices; struct ibv_device *device; struct ibv_context *device_context; struct ibv_device_attr device_attr; struct ibv_port_attr port_attr; char **if_include_list = NULL, **if_exclude_list = NULL, **if_sanity_list = NULL; opal_common_verbs_device_item_t *di; opal_common_verbs_port_item_t *pi; int rc; uint32_t j; opal_list_t *port_list = NULL; bool want; /* Sanity check the include/exclude params */ if (NULL != if_include && NULL != if_exclude) { return NULL; } /* Query all the IBV devices on the machine. Use an ompi compatibility function, because how to get this list changed over the history of the IBV API. */ devices = opal_ibv_get_device_list(&num_devs); if (0 == num_devs) { opal_output_verbose(5, stream, "no verbs interfaces found"); return NULL; } opal_output_verbose(5, stream, "found %d verbs interface%s", num_devs, (num_devs != 1) ? "s" : ""); /* Allocate a list to fill */ port_list = OBJ_NEW(opal_list_t); if (NULL == port_list) { return NULL; } if (NULL != if_include) { opal_output_verbose(5, stream, "finding verbs interfaces, including %s", if_include); if_include_list = opal_argv_split(if_include, ','); if_sanity_list = opal_argv_copy(if_include_list); } else if (NULL != if_exclude) { opal_output_verbose(5, stream, "finding verbs interfaces, excluding %s", if_exclude); if_exclude_list = opal_argv_split(if_exclude, ','); if_sanity_list = opal_argv_copy(if_exclude_list); } /* Now loop through all the devices. Get the attributes for each port on each device to see if they match our selection criteria. */ for (int32_t i = 0; (int32_t) i < num_devs; ++i) { /* See if this device is on the include/exclude sanity check list. If it is, remove it from the sanity check list (i.e., we should end up with an empty list at the end if all entries in the sanity check list exist) */ device = devices[i]; check_sanity(&if_sanity_list, ibv_get_device_name(device), -1); opal_output_verbose(5, stream, "examining verbs interface: %s", ibv_get_device_name(device)); device_context = ibv_open_device(device); if (NULL == device_context) { opal_show_help("help-opal-common-verbs.txt", "ibv_open_device fail", true, opal_proc_local_get()->proc_hostname, ibv_get_device_name(device), errno, strerror(errno)); goto err_free_port_list; } if (ibv_query_device(device_context, &device_attr)){ opal_show_help("help-opal-common-verbs.txt", "ibv_query_device fail", true, opal_proc_local_get()->proc_hostname, ibv_get_device_name(device), errno, strerror(errno)); goto err_free_port_list; } /* Now that we have the attributes of this device, remove all ports of this device from the sanity check list. Note that IBV ports are indexed from 1, not 0. */ for (j = 1; j <= device_attr.phys_port_cnt; j++) { check_sanity(&if_sanity_list, ibv_get_device_name(device), j); } /* Check the device-specific flags to see if we want this device */ want = false; if (flags & OPAL_COMMON_VERBS_FLAGS_TRANSPORT_IB && IBV_TRANSPORT_IB == device->transport_type) { opal_output_verbose(5, stream, "verbs interface %s has right type (IB)", ibv_get_device_name(device)); want = true; } if (flags & OPAL_COMMON_VERBS_FLAGS_TRANSPORT_IWARP && IBV_TRANSPORT_IWARP == device->transport_type) { opal_output_verbose(5, stream, "verbs interface %s has right type (IWARP)", ibv_get_device_name(device)); want = true; } /* Check for RC or UD QP support */ if (flags & OPAL_COMMON_VERBS_FLAGS_RC) { rc = opal_common_verbs_qp_test(device_context, flags); if (OPAL_SUCCESS == rc) { want = true; opal_output_verbose(5, stream, "verbs interface %s supports RC QPs", ibv_get_device_name(device)); } else { opal_output_verbose(5, stream, "verbs interface %s failed to make RC QP", ibv_get_device_name(device)); } } if (flags & OPAL_COMMON_VERBS_FLAGS_UD) { rc = opal_common_verbs_qp_test(device_context, flags); if (OPAL_SUCCESS == rc) { want = true; opal_output_verbose(5, stream, "verbs interface %s supports UD QPs", ibv_get_device_name(device)); } else if (OPAL_ERR_TYPE_MISMATCH == rc) { opal_output_verbose(5, stream, "verbs interface %s made an RC QP! we don't want RC-capable devices", ibv_get_device_name(device)); } else { opal_output_verbose(5, stream, "verbs interface %s failed to make UD QP", ibv_get_device_name(device)); } } /* If we didn't want it, go to the next device */ if (!want) { continue; } /* Make a device_item_t to hold the device information */ di = OBJ_NEW(opal_common_verbs_device_item_t); if (NULL == di) { goto err_free_port_list; } di->device = device; di->context = device_context; di->device_attr = device_attr; di->device_name = strdup(ibv_get_device_name(device)); /* Note IBV ports are 1 based (not 0 based) */ for (j = 1; j <= device_attr.phys_port_cnt; j++) { /* If we don't want this port (based on if_include / if_exclude lists), skip it */ if (!want_this_port(if_include_list, if_exclude_list, di, j)) { opal_output_verbose(5, stream, "verbs interface %s:%d: rejected by include/exclude", ibv_get_device_name(device), j); continue; } /* Query the port */ if (ibv_query_port(device_context, (uint8_t) j, &port_attr)) { opal_show_help("help-opal-common-verbs.txt", "ibv_query_port fail", true, opal_proc_local_get()->proc_hostname, ibv_get_device_name(device), errno, strerror(errno)); goto err_free_port_list; } /* We definitely only want ACTIVE ports */ if (IBV_PORT_ACTIVE != port_attr.state) { opal_output_verbose(5, stream, "verbs interface %s:%d: not ACTIVE", ibv_get_device_name(device), j); continue; } /* Check the port-specific flags to see if we want this port */ want = false; if (0 == flags) { want = true; } if ((flags & (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB | OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) == (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB | OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) { /* If they specified both link layers, then we want this port */ want = true; } else if ((flags & (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB | OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) == 0) { /* If they specified neither link layer, then we want this port */ want = true; } #if HAVE_DECL_IBV_LINK_LAYER_ETHERNET else if (flags & OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB) { if (IBV_LINK_LAYER_INFINIBAND == port_attr.link_layer) { want = true; } else { opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want IB)", ibv_get_device_name(device), j, link_layer_to_str(port_attr.link_layer)); } } else if (flags & OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET) { if (IBV_LINK_LAYER_ETHERNET == port_attr.link_layer) { want = true; } else { opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want Ethernet)", ibv_get_device_name(device), j, link_layer_to_str(port_attr.link_layer)); } } #endif if (!want) { continue; } /* If we got this far, we want the port. Make an item for it. */ pi = OBJ_NEW(opal_common_verbs_port_item_t); if (NULL == pi) { goto err_free_port_list; } pi->device = di; pi->port_num = j; pi->port_attr = port_attr; OBJ_RETAIN(di); /* Add the port item to the list */ opal_list_append(port_list, &pi->super); opal_output_verbose(5, stream, "found acceptable verbs interface %s:%d", ibv_get_device_name(device), j); } /* We're done with the device; if some ports are using it, its ref count will be > 0, and therefore the device won't be deleted here. */ OBJ_RELEASE(di); } /* Sanity check that the devices specified in the if_include / if_exclude lists actually existed. If this is true, then the sanity list will now be empty. If there are still items left on the list, then they didn't exist. Bad. Print a warning (if the warning is not disabled). */ if (0 != opal_argv_count(if_sanity_list)) { if (opal_common_verbs_warn_nonexistent_if) { char *str = opal_argv_join(if_sanity_list, ','); opal_show_help("help-opal-common-verbs.txt", "nonexistent port", true, opal_proc_local_get()->proc_hostname, ((NULL != if_include) ? "in" : "ex"), str); free(str); /* Only warn once per process */ opal_common_verbs_warn_nonexistent_if = false; } } if (NULL != if_sanity_list) { opal_argv_free(if_sanity_list); } opal_argv_free(if_include_list); opal_argv_free(if_exclude_list); /* All done! */ opal_ibv_free_device_list(devices); return port_list; err_free_port_list: OPAL_LIST_RELEASE(port_list); opal_ibv_free_device_list(devices); if (NULL != if_sanity_list) { opal_argv_free(if_sanity_list); } opal_argv_free(if_include_list); opal_argv_free(if_exclude_list); return NULL; }
int orte_daemon(int argc, char *argv[]) { int ret = 0; opal_cmd_line_t *cmd_line = NULL; int i; opal_buffer_t *buffer; char hostname[OPAL_MAXHOSTNAMELEN]; #if OPAL_ENABLE_FT_CR == 1 char *tmp_env_var = NULL; #endif /* initialize the globals */ memset(&orted_globals, 0, sizeof(orted_globals)); /* initialize the singleton died pipe to an illegal value so we can detect it was set */ orted_globals.singleton_died_pipe = -1; bucket = OBJ_NEW(opal_buffer_t); /* setup to check common command line options that just report and die */ cmd_line = OBJ_NEW(opal_cmd_line_t); if (OPAL_SUCCESS != opal_cmd_line_create(cmd_line, orte_cmd_line_opts)) { OBJ_RELEASE(cmd_line); exit(1); } mca_base_cmd_line_setup(cmd_line); if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false, false, argc, argv))) { char *args = NULL; args = opal_cmd_line_get_usage_msg(cmd_line); fprintf(stderr, "Usage: %s [OPTION]...\n%s\n", argv[0], args); free(args); OBJ_RELEASE(cmd_line); return ret; } /* * Since this process can now handle MCA/GMCA parameters, make sure to * process them. */ mca_base_cmd_line_process_args(cmd_line, &environ, &environ); /* Ensure that enough of OPAL is setup for us to be able to run */ /* * NOTE: (JJH) * We need to allow 'mca_base_cmd_line_process_args()' to process command * line arguments *before* calling opal_init_util() since the command * line could contain MCA parameters that affect the way opal_init_util() * functions. AMCA parameters are one such option normally received on the * command line that affect the way opal_init_util() behaves. * It is "safe" to call mca_base_cmd_line_process_args() before * opal_init_util() since mca_base_cmd_line_process_args() does *not* * depend upon opal_init_util() functionality. */ if (OPAL_SUCCESS != opal_init_util(&argc, &argv)) { fprintf(stderr, "OPAL failed to initialize -- orted aborting\n"); exit(1); } /* save the environment for launch purposes. This MUST be * done so that we can pass it to any local procs we * spawn - otherwise, those local procs won't see any * non-MCA envars that were set in the enviro when the * orted was executed - e.g., by .csh */ orte_launch_environ = opal_argv_copy(environ); /* purge any ess/pmix flags set in the environ when we were launched */ opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ); opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ); /* if orte_daemon_debug is set, let someone know we are alive right * away just in case we have a problem along the way */ if (orted_globals.debug) { gethostname(hostname, sizeof(hostname)); fprintf(stderr, "Daemon was launched on %s - beginning to initialize\n", hostname); } /* check for help request */ if (orted_globals.help) { char *args = NULL; args = opal_cmd_line_get_usage_msg(cmd_line); orte_show_help("help-orted.txt", "orted:usage", false, argv[0], args); free(args); return 1; } #if defined(HAVE_SETSID) /* see if we were directed to separate from current session */ if (orted_globals.set_sid) { setsid(); } #endif /* see if they want us to spin until they can connect a debugger to us */ i=0; while (orted_spin_flag) { i++; if (1000 < i) i=0; } #if OPAL_ENABLE_FT_CR == 1 /* Mark as a tool program */ (void) mca_base_var_env_name ("opal_cr_is_tool", &tmp_env_var); opal_setenv(tmp_env_var, "1", true, &environ); free(tmp_env_var); #endif /* detach from controlling terminal * otherwise, remain attached so output can get to us */ if(!orte_debug_flag && !orte_debug_daemons_flag && orted_globals.daemonize) { opal_daemon_init(NULL); } /* Set the flag telling OpenRTE that I am NOT a * singleton, but am "infrastructure" - prevents setting * up incorrect infrastructure that only a singleton would * require. */ if (orted_globals.hnp) { if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_HNP))) { ORTE_ERROR_LOG(ret); return ret; } } else { if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_DAEMON))) { ORTE_ERROR_LOG(ret); return ret; } } /* finalize the OPAL utils. As they are opened again from orte_init->opal_init * we continue to have a reference count on them. So we have to finalize them twice... */ opal_finalize_util(); /* bind ourselves if so directed */ if (NULL != orte_daemon_cores) { char **cores=NULL, tmp[128]; hwloc_obj_t pu; hwloc_cpuset_t ours, res; int core; /* could be a collection of comma-delimited ranges, so * use our handy utility to parse it */ orte_util_parse_range_options(orte_daemon_cores, &cores); if (NULL != cores) { ours = hwloc_bitmap_alloc(); hwloc_bitmap_zero(ours); res = hwloc_bitmap_alloc(); for (i=0; NULL != cores[i]; i++) { core = strtoul(cores[i], NULL, 10); if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core, OPAL_HWLOC_LOGICAL))) { /* turn off the show help forwarding as we won't * be able to cycle the event library to send */ orte_show_help_finalize(); /* the message will now come out locally */ orte_show_help("help-orted.txt", "orted:cannot-bind", true, orte_process_info.nodename, orte_daemon_cores); ret = ORTE_ERR_NOT_SUPPORTED; hwloc_bitmap_free(ours); hwloc_bitmap_free(res); goto DONE; } hwloc_bitmap_or(res, ours, pu->cpuset); hwloc_bitmap_copy(ours, res); } /* if the result is all zeros, then don't bind */ if (!hwloc_bitmap_iszero(ours)) { (void)hwloc_set_cpubind(opal_hwloc_topology, ours, 0); if (opal_hwloc_report_bindings) { opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), opal_hwloc_topology, ours); opal_output(0, "Daemon %s is bound to cores %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); } } /* cleanup */ hwloc_bitmap_free(ours); hwloc_bitmap_free(res); opal_argv_free(cores); } } if ((int)ORTE_VPID_INVALID != orted_debug_failure) { orted_globals.abort=false; /* some vpid was ordered to fail. The value can be positive * or negative, depending upon the desired method for failure, * so need to check both here */ if (0 > orted_debug_failure) { orted_debug_failure = -1*orted_debug_failure; orted_globals.abort = true; } /* are we the specified vpid? */ if ((int)ORTE_PROC_MY_NAME->vpid == orted_debug_failure) { /* if the user specified we delay, then setup a timer * and have it kill us */ if (0 < orted_debug_failure_delay) { ORTE_TIMER_EVENT(orted_debug_failure_delay, 0, shutdown_callback, ORTE_SYS_PRI); } else { opal_output(0, "%s is executing clean %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orted_globals.abort ? "abort" : "abnormal termination"); /* do -not- call finalize as this will send a message to the HNP * indicating clean termination! Instead, just forcibly cleanup * the local session_dir tree and exit */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); /* if we were ordered to abort, do so */ if (orted_globals.abort) { abort(); } /* otherwise, return with non-zero status */ ret = ORTE_ERROR_DEFAULT_EXIT_CODE; goto DONE; } } } /* insert our contact info into our process_info struct so we * have it for later use and set the local daemon field to our name */ orte_oob_base_get_addr(&orte_process_info.my_daemon_uri); if (NULL == orte_process_info.my_daemon_uri) { /* no way to communicate */ ret = ORTE_ERROR; goto DONE; } ORTE_PROC_MY_DAEMON->jobid = ORTE_PROC_MY_NAME->jobid; ORTE_PROC_MY_DAEMON->vpid = ORTE_PROC_MY_NAME->vpid; /* if I am also the hnp, then update that contact info field too */ if (ORTE_PROC_IS_HNP) { orte_process_info.my_hnp_uri = strdup(orte_process_info.my_daemon_uri); ORTE_PROC_MY_HNP->jobid = ORTE_PROC_MY_NAME->jobid; ORTE_PROC_MY_HNP->vpid = ORTE_PROC_MY_NAME->vpid; } /* setup the primary daemon command receive function */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, ORTE_RML_PERSISTENT, orte_daemon_recv, NULL); /* output a message indicating we are alive, our name, and our pid * for debugging purposes */ if (orte_debug_daemons_flag) { fprintf(stderr, "Daemon %s checking in as pid %ld on host %s\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)orte_process_info.pid, orte_process_info.nodename); } /* We actually do *not* want the orted to voluntarily yield() the processor more than necessary. The orted already blocks when it is doing nothing, so it doesn't use any more CPU cycles than it should; but when it *is* doing something, we do not want it to be unnecessarily delayed because it voluntarily yielded the processor in the middle of its work. For example: when a message arrives at the orted, we want the OS to wake up the orted in a timely fashion (which most OS's seem good about doing) and then we want the orted to process the message as fast as possible. If the orted yields and lets aggressive MPI applications get the processor back, it may be a long time before the OS schedules the orted to run again (particularly if there is no IO event to wake it up). Hence, routed OOB messages (for example) may be significantly delayed before being delivered to MPI processes, which can be problematic in some scenarios (e.g., COMM_SPAWN, BTL's that require OOB messages for wireup, etc.). */ opal_progress_set_yield_when_idle(false); /* Change the default behavior of libevent such that we want to continually block rather than blocking for the default timeout and then looping around the progress engine again. There should be nothing in the orted that cannot block in libevent until "something" happens (i.e., there's no need to keep cycling through progress because the only things that should happen will happen in libevent). This is a minor optimization, but what the heck... :-) */ opal_progress_set_event_flag(OPAL_EVLOOP_ONCE); /* if requested, report my uri to the indicated pipe */ if (orted_globals.uri_pipe > 0) { orte_job_t *jdata; orte_proc_t *proc; orte_node_t *node; orte_app_context_t *app; char *tmp, *nptr, *sysinfo; char **singenv=NULL, *string_key, *env_str; /* setup the singleton's job */ jdata = OBJ_NEW(orte_job_t); /* default to ompi for now */ opal_argv_append_nosize(&jdata->personality, "ompi"); orte_plm_base_create_jobid(jdata); opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata); /* must create a map for it (even though it has no * info in it) so that the job info will be picked * up in subsequent pidmaps or other daemons won't * know how to route */ jdata->map = OBJ_NEW(orte_job_map_t); /* setup an app_context for the singleton */ app = OBJ_NEW(orte_app_context_t); app->app = strdup("singleton"); app->num_procs = 1; opal_pointer_array_add(jdata->apps, app); jdata->num_apps = 1; /* setup a proc object for the singleton - since we * -must- be the HNP, and therefore we stored our * node on the global node pool, and since the singleton * -must- be on the same node as us, indicate that */ proc = OBJ_NEW(orte_proc_t); proc->name.jobid = jdata->jobid; proc->name.vpid = 0; proc->parent = 0; ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); proc->state = ORTE_PROC_STATE_RUNNING; proc->app_idx = 0; /* obviously, it is on my node */ node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); proc->node = node; OBJ_RETAIN(node); /* keep accounting straight */ opal_pointer_array_add(jdata->procs, proc); jdata->num_procs = 1; /* add the node to the job map */ OBJ_RETAIN(node); opal_pointer_array_add(jdata->map->nodes, node); jdata->map->num_nodes++; /* and it obviously is on the node */ OBJ_RETAIN(proc); opal_pointer_array_add(node->procs, proc); node->num_procs++; /* and obviously it is one of my local procs */ OBJ_RETAIN(proc); opal_pointer_array_add(orte_local_children, proc); jdata->num_local_procs = 1; /* set the trivial */ proc->local_rank = 0; proc->node_rank = 0; proc->app_rank = 0; proc->state = ORTE_PROC_STATE_RUNNING; proc->app_idx = 0; ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL); /* set the ORTE_JOB_TRANSPORT_KEY from the environment */ orte_pre_condition_transports(jdata, NULL); /* register the singleton's nspace with our PMIx server */ if (ORTE_SUCCESS != (ret = orte_pmix_server_register_nspace(jdata, false))) { ORTE_ERROR_LOG(ret); goto DONE; } /* use setup fork to create the envars needed by the singleton */ if (OPAL_SUCCESS != (ret = opal_pmix.server_setup_fork(&proc->name, &singenv))) { ORTE_ERROR_LOG(ret); goto DONE; } /* append the transport key to the envars needed by the singleton */ if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_TRANSPORT_KEY, (void**)&string_key, OPAL_STRING) || NULL == string_key) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); goto DONE; } asprintf(&env_str, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); opal_argv_append_nosize(&singenv, env_str); free(env_str); nptr = opal_argv_join(singenv, '*'); opal_argv_free(singenv); /* create a string that contains our uri + sysinfo + PMIx server URI envars */ orte_util_convert_sysinfo_to_string(&sysinfo, orte_local_cpu_type, orte_local_cpu_model); asprintf(&tmp, "%s[%s]%s", orte_process_info.my_daemon_uri, sysinfo, nptr); free(sysinfo); free(nptr); /* pass that info to the singleton */ if (OPAL_SUCCESS != (ret = opal_fd_write(orted_globals.uri_pipe, strlen(tmp)+1, tmp))) { ; /* need to add 1 to get the NULL */ ORTE_ERROR_LOG(ret); goto DONE; } /* cleanup */ free(tmp); close(orted_globals.uri_pipe); /* since a singleton spawned us, we need to harvest * any MCA params from the local environment so * we can pass them along to any subsequent daemons * we may start as the result of a comm_spawn */ for (i=0; NULL != environ[i]; i++) { if (0 == strncmp(environ[i], OPAL_MCA_PREFIX, 9)) { /* make a copy to manipulate */ tmp = strdup(environ[i]); /* find the equal sign */ nptr = strchr(tmp, '='); *nptr = '\0'; nptr++; /* add the mca param to the orted cmd line */ opal_argv_append_nosize(&orted_cmd_line, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append_nosize(&orted_cmd_line, &tmp[9]); opal_argv_append_nosize(&orted_cmd_line, nptr); free(tmp); } } } /* if we were given a pipe to monitor for singleton termination, set that up */ if (orted_globals.singleton_died_pipe > 0) { /* register shutdown handler */ pipe_handler = (opal_event_t*)malloc(sizeof(opal_event_t)); opal_event_set(orte_event_base, pipe_handler, orted_globals.singleton_died_pipe, OPAL_EV_READ, pipe_closed, pipe_handler); opal_event_add(pipe_handler, NULL); } /* If I have a parent, then save his contact info so * any messages we send can flow thru him. */ orte_parent_uri = NULL; (void) mca_base_var_register ("orte", "orte", NULL, "parent_uri", "URI for the parent if tree launch is enabled.", MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_CONSTANT, &orte_parent_uri); if (NULL != orte_parent_uri) { orte_process_name_t parent; opal_value_t val; /* set the contact info into our local database */ ret = orte_rml_base_parse_uris(orte_parent_uri, &parent, NULL); if (ORTE_SUCCESS != ret) { ORTE_ERROR_LOG(ret); free (orte_parent_uri); orte_parent_uri = NULL; goto DONE; } OBJ_CONSTRUCT(&val, opal_value_t); val.key = OPAL_PMIX_PROC_URI; val.type = OPAL_STRING; val.data.string = orte_parent_uri; if (OPAL_SUCCESS != (ret = opal_pmix.store_local(&parent, &val))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&val); goto DONE; } val.key = NULL; val.data.string = NULL; OBJ_DESTRUCT(&val); /* don't need this value anymore */ free(orte_parent_uri); orte_parent_uri = NULL; /* tell the routed module that we have a path * back to the HNP */ if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, &parent))) { ORTE_ERROR_LOG(ret); goto DONE; } /* set the lifeline to point to our parent so that we * can handle the situation if that lifeline goes away */ if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(NULL, &parent))) { ORTE_ERROR_LOG(ret); goto DONE; } } /* if we are not the HNP...the only time we will be an HNP * is if we are launched by a singleton to provide support * for it */ if (!ORTE_PROC_IS_HNP) { orte_process_name_t target; target.jobid = ORTE_PROC_MY_NAME->jobid; if (orte_fwd_mpirun_port || orte_static_ports) { /* setup the rollup callback */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK, ORTE_RML_PERSISTENT, rollup, NULL); target.vpid = ORTE_PROC_MY_NAME->vpid; /* since we will be waiting for any children to send us * their rollup info before sending to our parent, save * a little time in the launch phase by "warming up" the * connection to our parent while we wait for our children */ buffer = OBJ_NEW(opal_buffer_t); // zero-byte message if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_PARENT, buffer, ORTE_RML_TAG_WARMUP_CONNECTION, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buffer); goto DONE; } } else { target.vpid = 0; } /* send the information to the orted report-back point - this function * will process the data, but also counts the number of * orteds that reported back so the launch procedure can continue. * We need to do this at the last possible second as the HNP * can turn right around and begin issuing orders to us */ buffer = OBJ_NEW(opal_buffer_t); /* insert our name for rollup purposes */ if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buffer); goto DONE; } /* get any connection info we may have pushed */ { opal_value_t *val = NULL, *kv; opal_list_t *modex; int32_t flag; if (OPAL_SUCCESS != (ret = opal_pmix.get(ORTE_PROC_MY_NAME, NULL, NULL, &val)) || NULL == val) { /* just pack a marker indicating we don't have any to share */ flag = 0; if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT32))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buffer); goto DONE; } } else { /* the data is returned as a list of key-value pairs in the opal_value_t */ if (OPAL_PTR == val->type) { modex = (opal_list_t*)val->data.ptr; flag = (int32_t)opal_list_get_size(modex); if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT32))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buffer); goto DONE; } OPAL_LIST_FOREACH(kv, modex, opal_value_t) { if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &kv, 1, OPAL_VALUE))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buffer); goto DONE; } } OPAL_LIST_RELEASE(modex); } else { opal_output(0, "VAL KEY: %s", (NULL == val->key) ? "NULL" : val->key); /* single value */ flag = 1; if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT32))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buffer); goto DONE; } if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &val, 1, OPAL_VALUE))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(buffer); goto DONE; } } OBJ_RELEASE(val); }
/* stuff proc attributes for sending back to a proc */ int orte_pmix_server_register_nspace(orte_job_t *jdata) { int rc; orte_proc_t *pptr; int i, k, n; opal_list_t *info, *pmap; opal_value_t *kv; orte_node_t *node, *mynode; opal_vpid_t vpid; char **list, **procs, **micro, *tmp, *regex, *cpulist, *peerlist; orte_job_t *dmns; orte_job_map_t *map; orte_app_context_t *app; uid_t uid; gid_t gid; opal_list_t *cache; opal_output_verbose(2, orte_pmix_server_globals.output, "%s register nspace for %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid)); /* setup the info list */ info = OBJ_NEW(opal_list_t); uid = geteuid(); gid = getegid(); /* jobid */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOBID); kv->data.string = strdup(ORTE_JOBID_PRINT(jdata->jobid)); kv->type = OPAL_STRING; opal_list_append(info, &kv->super); /* offset */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NPROC_OFFSET); kv->data.uint32 = jdata->offset; kv->type = OPAL_UINT32; opal_list_append(info, &kv->super); /* check for cached values to add to the job info */ cache = NULL; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) && NULL != cache) { while (NULL != (kv = (opal_value_t*)opal_list_remove_first(cache))) { opal_list_append(info, &kv->super); } orte_remove_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE); OBJ_RELEASE(cache); } /* assemble the node and proc map info */ list = NULL; procs = NULL; map = jdata->map; for (i=0; i < map->nodes->size; i++) { micro = NULL; if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { opal_argv_append_nosize(&list, node->name); /* assemble all the ranks for this job that are on this node */ for (k=0; k < node->procs->size; k++) { if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) { if (jdata->jobid == pptr->name.jobid) { opal_argv_append_nosize(µ, ORTE_VPID_PRINT(pptr->name.vpid)); } } } /* assemble the rank/node map */ if (NULL != micro) { tmp = opal_argv_join(micro, ','); opal_argv_free(micro); opal_argv_append_nosize(&procs, tmp); free(tmp); } } } /* let the PMIx server generate the nodemap regex */ if (NULL != list) { tmp = opal_argv_join(list, ','); opal_argv_free(list); list = NULL; if (OPAL_SUCCESS != (rc = opal_pmix.generate_regex(tmp, ®ex))) { ORTE_ERROR_LOG(rc); free(tmp); OPAL_LIST_RELEASE(info); return rc; } free(tmp); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_MAP); kv->type = OPAL_STRING; kv->data.string = regex; opal_list_append(info, &kv->super); } /* let the PMIx server generate the procmap regex */ if (NULL != procs) { tmp = opal_argv_join(procs, ';'); opal_argv_free(procs); procs = NULL; if (OPAL_SUCCESS != (rc = opal_pmix.generate_ppn(tmp, ®ex))) { ORTE_ERROR_LOG(rc); free(tmp); OPAL_LIST_RELEASE(info); return rc; } free(tmp); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_PROC_MAP); kv->type = OPAL_STRING; kv->data.string = regex; opal_list_append(info, &kv->super); } /* get our local node */ if (NULL == (dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, ORTE_PROC_MY_NAME->vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } mynode = pptr->node; if (NULL == mynode) { /* cannot happen */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } /* pass our node ID */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODEID); kv->type = OPAL_UINT32; kv->data.uint32 = mynode->index; opal_list_append(info, &kv->super); /* pass our node size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = mynode->num_procs; opal_list_append(info, &kv->super); /* pass the number of nodes in the job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NUM_NODES); kv->type = OPAL_UINT32; kv->data.uint32 = map->num_nodes; opal_list_append(info, &kv->super); /* univ size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_UNIV_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->total_slots_alloc; opal_list_append(info, &kv->super); /* job size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOB_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_procs; opal_list_append(info, &kv->super); /* number of apps in this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOB_NUM_APPS); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_apps; opal_list_append(info, &kv->super); /* local size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_local_procs; opal_list_append(info, &kv->super); /* max procs */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_MAX_PROCS); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->total_slots_alloc; opal_list_append(info, &kv->super); /* register any local clients */ vpid = ORTE_VPID_MAX; for (i=0; i < mynode->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(mynode->procs, i))) { continue; } if (pptr->name.jobid == jdata->jobid) { if (pptr->name.vpid < vpid) { vpid = pptr->name.vpid; } /* go ahead and register this client */ if (OPAL_SUCCESS != (rc = opal_pmix.server_register_client(&pptr->name, uid, gid, (void*)pptr, NULL, NULL))) { ORTE_ERROR_LOG(rc); } } } /* pass the local ldr */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALLDR); kv->type = OPAL_VPID; kv->data.name.vpid = vpid; opal_list_append(info, &kv->super); /* for each proc in this job, create an object that * includes the info describing the proc so the recipient has a complete * picture. This allows procs to connect to each other without * any further info exchange, assuming the underlying transports * support it. We also pass all the proc-specific data here so * that each proc can lookup info about every other proc in the job */ for (n=0; n < map->nodes->size; n++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) { continue; } /* construct the list of local peers, while adding * each proc's locality info */ list = NULL; procs = NULL; cpulist = NULL; peerlist = NULL; vpid = ORTE_VPID_MAX; for (i=0; i < node->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { continue; } if (pptr->name.jobid == jdata->jobid) { opal_argv_append_nosize(&list, ORTE_VPID_PRINT(pptr->name.vpid)); if (pptr->name.vpid < vpid) { vpid = pptr->name.vpid; } /* note that we have to pass the cpuset for each local * peer so locality can be computed */ tmp = NULL; if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) { if (NULL != tmp) { opal_argv_append_nosize(&procs, tmp); free(tmp); } else { opal_argv_append_nosize(&procs, "UNBOUND"); } } else { opal_argv_append_nosize(&procs, "UNBOUND"); } } } /* construct the list of peers for transmission */ if (NULL != list) { peerlist = opal_argv_join(list, ','); opal_argv_free(list); list = NULL; } /* construct the list of cpusets for transmission */ if (NULL != procs) { cpulist = opal_argv_join(procs, ':'); opal_argv_free(procs); procs = NULL; } /* if this is me, then pass the peers and cpusets to myself * in order to maintain backward compatibility for the non-pmix * components in OPAL/pmix */ if (node == mynode) { /* pass the list of peers */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_PEERS); kv->type = OPAL_STRING; kv->data.string = strdup(peerlist); opal_list_append(info, &kv->super); /* pass the list of cpusets */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS); kv->type = OPAL_STRING; kv->data.string = strdup(cpulist); opal_list_append(info, &kv->super); } /* now cycle across each proc on this node, passing all data that * varies by proc */ for (i=0; i < node->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { continue; } /* only consider procs from this job */ if (pptr->name.jobid != jdata->jobid) { continue; } /* setup the proc map object */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_PROC_DATA); kv->type = OPAL_PTR; kv->data.ptr = OBJ_NEW(opal_list_t); opal_list_append(info, &kv->super); pmap = kv->data.ptr; /* must start with rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->name.vpid; opal_list_append(pmap, &kv->super); /* pass the list of peers */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_PEERS); kv->type = OPAL_STRING; kv->data.string = strdup(peerlist); opal_list_append(pmap, &kv->super); /* pass the list of cpusets */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS); kv->type = OPAL_STRING; kv->data.string = strdup(cpulist); opal_list_append(pmap, &kv->super); /* appnum */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APPNUM); kv->type = OPAL_UINT32; kv->data.uint32 = pptr->app_idx; opal_list_append(pmap, &kv->super); /* app ldr */ app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APPLDR); kv->type = OPAL_VPID; kv->data.name.vpid = app->first_rank; opal_list_append(pmap, &kv->super); /* global/univ rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_GLOBAL_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->name.vpid + jdata->offset; opal_list_append(pmap, &kv->super); /* app rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APP_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->app_rank; opal_list_append(pmap, &kv->super); /* app size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APP_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = app->num_procs; opal_list_append(info, &kv->super); /* local rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_RANK); kv->type = OPAL_UINT16; kv->data.uint16 = pptr->local_rank; opal_list_append(pmap, &kv->super); /* node rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_RANK); kv->type = OPAL_UINT16; kv->data.uint32 = pptr->node_rank; opal_list_append(pmap, &kv->super); /* hostname */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_HOSTNAME); kv->type = OPAL_STRING; kv->data.string = strdup(pptr->node->name); opal_list_append(pmap, &kv->super); /* node ID */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODEID); kv->type = OPAL_UINT32; kv->data.uint32 = pptr->node->index; opal_list_append(pmap, &kv->super); } /* cleanup */ if (NULL != cpulist) { free(cpulist); } if (NULL != peerlist) { free(peerlist); } } /* mark the job as registered */ orte_set_attribute(&jdata->attributes, ORTE_JOB_NSPACE_REGISTERED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); /* pass it down */ /* we are in an event, so no need to callback */ rc = opal_pmix.server_register_nspace(jdata->jobid, jdata->num_local_procs, info, NULL, NULL); OPAL_LIST_RELEASE(info); return rc; }
static void res_log(opal_buffer_t *sample) { opal_pstats_t *st=NULL; opal_node_stats_t *nst=NULL; int rc, n; opal_list_t *vals; opal_value_t *kv; char *node; if (!log_enabled) { return; } /* unpack the node name */ n=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &node, &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return; } /* unpack the node stats */ n=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &nst, &n, OPAL_NODE_STAT))) { ORTE_ERROR_LOG(rc); return; } if (mca_sensor_resusage_component.log_node_stats) { vals = OBJ_NEW(opal_list_t); kv = OBJ_NEW(opal_value_t); kv->key = strdup("ctime"); kv->type = OPAL_TIMEVAL; kv->data.tv.tv_sec = nst->sample_time.tv_sec; kv->data.tv.tv_usec = nst->sample_time.tv_usec; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("hostname"); kv->type = OPAL_STRING; kv->data.string = strdup(node); opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("total_mem"); kv->type = OPAL_FLOAT; kv->data.fval = nst->total_mem; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("free_mem"); kv->type = OPAL_FLOAT; kv->data.fval = nst->free_mem; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("buffers"); kv->type = OPAL_FLOAT; kv->data.fval = nst->buffers; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("cached"); kv->type = OPAL_FLOAT; kv->data.fval = nst->cached; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("swap_total"); kv->type = OPAL_FLOAT; kv->data.fval = nst->swap_total; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("swap_free"); kv->type = OPAL_FLOAT; kv->data.fval = nst->swap_free; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("mapped"); kv->type = OPAL_FLOAT; kv->data.fval = nst->mapped; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("swap_cached"); kv->type = OPAL_FLOAT; kv->data.fval = nst->swap_cached; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("la"); kv->type = OPAL_FLOAT; kv->data.fval = nst->la; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("la5"); kv->type = OPAL_FLOAT; kv->data.fval = nst->la5; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("la15"); kv->type = OPAL_FLOAT; kv->data.fval = nst->la15; opal_list_append(vals, &kv->super); /* store it */ if (0 <= orcm_sensor_base.dbhandle) { orcm_db.store(orcm_sensor_base.dbhandle, "nodestats", vals, mycleanup, NULL); } else { OPAL_LIST_RELEASE(vals); } } OBJ_RELEASE(nst); if (mca_sensor_resusage_component.log_process_stats) { /* unpack all process stats */ n=1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(sample, &st, &n, OPAL_PSTAT))) { vals = OBJ_NEW(opal_list_t); kv = OBJ_NEW(opal_value_t); kv->key = strdup("node"); kv->type = OPAL_STRING; kv->data.string = strdup(st->node); opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("rank"); kv->type = OPAL_INT32; kv->data.int32 = st->rank; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("pid"); kv->type = OPAL_PID; kv->data.pid = st->pid; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("cmd"); kv->type = OPAL_STRING; kv->data.string = strdup(st->cmd); opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("state"); kv->type = OPAL_STRING; kv->data.string = (char*)malloc(3 * sizeof(char)); if (NULL == kv->data.string) { return; } kv->data.string[0] = st->state[0]; kv->data.string[1] = st->state[1]; kv->data.string[2] = '\0'; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("time"); kv->type = OPAL_TIMEVAL; kv->data.tv.tv_sec = st->time.tv_sec; kv->data.tv.tv_usec = st->time.tv_usec; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("percent_cpu"); kv->type = OPAL_FLOAT; kv->data.fval = st->percent_cpu; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("priority"); kv->type = OPAL_INT32; kv->data.int32 = st->priority; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("num_threads"); kv->type = OPAL_INT16; kv->data.int16 = st->num_threads; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("vsize"); kv->type = OPAL_FLOAT; kv->data.fval = st->vsize; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("rss"); kv->type = OPAL_FLOAT; kv->data.fval = st->rss; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("peak_vsize"); kv->type = OPAL_FLOAT; kv->data.fval = st->peak_vsize; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("processor"); kv->type = OPAL_INT16; kv->data.int16 = st->processor; opal_list_append(vals, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup("sample_time"); kv->type = OPAL_TIMEVAL; kv->data.tv.tv_sec = st->sample_time.tv_sec; kv->data.tv.tv_usec = st->sample_time.tv_usec; opal_list_append(vals, &kv->super); /* store it */ if (0 <= orcm_sensor_base.dbhandle) { orcm_db.store(orcm_sensor_base.dbhandle, "procstats", vals, mycleanup, NULL); } else { OPAL_LIST_RELEASE(vals); } OBJ_RELEASE(st); n=1; } if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { ORTE_ERROR_LOG(rc); } } }
static void opal_adjacency_list_destruct(opal_adjacency_list_t *aj_list) { aj_list->vertex = NULL; OPAL_LIST_RELEASE(aj_list->edges); }
static void opal_graph_destruct(opal_graph_t *graph) { OPAL_LIST_RELEASE(graph->adjacency_list); graph->number_of_vertices = 0; graph->number_of_edges = 0; }
/* stuff proc attributes for sending back to a proc */ int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force) { int rc; orte_proc_t *pptr; int i, k, n; opal_list_t *info, *pmap; opal_value_t *kv; orte_node_t *node, *mynode; opal_vpid_t vpid; char **list, **procs, **micro, *tmp, *regex; orte_job_t *dmns; orte_job_map_t *map; orte_app_context_t *app; uid_t uid; gid_t gid; opal_list_t *cache; hwloc_obj_t machine; opal_output_verbose(2, orte_pmix_server_globals.output, "%s register nspace for %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid)); /* setup the info list */ info = OBJ_NEW(opal_list_t); uid = geteuid(); gid = getegid(); /* pass our nspace/rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_SERVER_NSPACE); kv->data.string = strdup(ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); kv->type = OPAL_STRING; opal_list_append(info, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_SERVER_RANK); kv->data.uint32 = ORTE_PROC_MY_NAME->vpid; kv->type = OPAL_UINT32; opal_list_append(info, &kv->super); /* jobid */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOBID); kv->data.string = strdup(ORTE_JOBID_PRINT(jdata->jobid)); kv->type = OPAL_STRING; opal_list_append(info, &kv->super); /* offset */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NPROC_OFFSET); kv->data.uint32 = jdata->offset; kv->type = OPAL_UINT32; opal_list_append(info, &kv->super); /* check for cached values to add to the job info */ cache = NULL; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) && NULL != cache) { while (NULL != (kv = (opal_value_t*)opal_list_remove_first(cache))) { opal_list_append(info, &kv->super); } orte_remove_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE); OBJ_RELEASE(cache); } /* assemble the node and proc map info */ list = NULL; procs = NULL; map = jdata->map; for (i=0; i < map->nodes->size; i++) { micro = NULL; if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { opal_argv_append_nosize(&list, node->name); /* assemble all the ranks for this job that are on this node */ for (k=0; k < node->procs->size; k++) { if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) { if (jdata->jobid == pptr->name.jobid) { opal_argv_append_nosize(µ, ORTE_VPID_PRINT(pptr->name.vpid)); } } } /* assemble the rank/node map */ if (NULL != micro) { tmp = opal_argv_join(micro, ','); opal_argv_free(micro); opal_argv_append_nosize(&procs, tmp); free(tmp); } } } /* let the PMIx server generate the nodemap regex */ if (NULL != list) { tmp = opal_argv_join(list, ','); opal_argv_free(list); list = NULL; if (OPAL_SUCCESS != (rc = opal_pmix.generate_regex(tmp, ®ex))) { ORTE_ERROR_LOG(rc); free(tmp); OPAL_LIST_RELEASE(info); return rc; } free(tmp); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_MAP); kv->type = OPAL_STRING; kv->data.string = regex; opal_list_append(info, &kv->super); } /* let the PMIx server generate the procmap regex */ if (NULL != procs) { tmp = opal_argv_join(procs, ';'); opal_argv_free(procs); procs = NULL; if (OPAL_SUCCESS != (rc = opal_pmix.generate_ppn(tmp, ®ex))) { ORTE_ERROR_LOG(rc); free(tmp); OPAL_LIST_RELEASE(info); return rc; } free(tmp); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_PROC_MAP); kv->type = OPAL_STRING; kv->data.string = regex; opal_list_append(info, &kv->super); } /* get our local node */ if (NULL == (dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, ORTE_PROC_MY_NAME->vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } mynode = pptr->node; if (NULL == mynode) { /* cannot happen */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } /* pass our node ID */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODEID); kv->type = OPAL_UINT32; kv->data.uint32 = mynode->index; opal_list_append(info, &kv->super); /* pass our node size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = mynode->num_procs; opal_list_append(info, &kv->super); /* pass the number of nodes in the job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NUM_NODES); kv->type = OPAL_UINT32; kv->data.uint32 = map->num_nodes; opal_list_append(info, &kv->super); /* univ size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_UNIV_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->total_slots_alloc; opal_list_append(info, &kv->super); /* job size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOB_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_procs; opal_list_append(info, &kv->super); /* number of apps in this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOB_NUM_APPS); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_apps; opal_list_append(info, &kv->super); /* local size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_local_procs; opal_list_append(info, &kv->super); /* max procs */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_MAX_PROCS); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->total_slots_alloc; opal_list_append(info, &kv->super); /* topology signature */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_TOPOLOGY_SIGNATURE); kv->type = OPAL_STRING; kv->data.string = strdup(orte_topo_signature); opal_list_append(info, &kv->super); /* total available physical memory */ machine = hwloc_get_next_obj_by_type (opal_hwloc_topology, HWLOC_OBJ_MACHINE, NULL); if (NULL != machine) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_AVAIL_PHYS_MEMORY); kv->type = OPAL_UINT64; #if HWLOC_API_VERSION < 0x20000 kv->data.uint64 = machine->memory.total_memory; #else kv->data.uint64 = machine->total_memory; #endif opal_list_append(info, &kv->super); } /* pass the mapping policy used for this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_MAPBY); kv->type = OPAL_STRING; kv->data.string = strdup(orte_rmaps_base_print_mapping(jdata->map->mapping)); opal_list_append(info, &kv->super); /* pass the ranking policy used for this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_RANKBY); kv->type = OPAL_STRING; kv->data.string = strdup(orte_rmaps_base_print_ranking(jdata->map->ranking)); opal_list_append(info, &kv->super); /* pass the binding policy used for this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_BINDTO); kv->type = OPAL_STRING; kv->data.string = strdup(opal_hwloc_base_print_binding(jdata->map->binding)); opal_list_append(info, &kv->super); /* register any local clients */ vpid = ORTE_VPID_MAX; micro = NULL; for (i=0; i < mynode->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(mynode->procs, i))) { continue; } if (pptr->name.jobid == jdata->jobid) { opal_argv_append_nosize(µ, ORTE_VPID_PRINT(pptr->name.vpid)); if (pptr->name.vpid < vpid) { vpid = pptr->name.vpid; } /* go ahead and register this client */ if (OPAL_SUCCESS != (rc = opal_pmix.server_register_client(&pptr->name, uid, gid, (void*)pptr, NULL, NULL))) { ORTE_ERROR_LOG(rc); } } } if (NULL != micro) { /* pass the local peers */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_PEERS); kv->type = OPAL_STRING; kv->data.string = opal_argv_join(micro, ','); opal_argv_free(micro); opal_list_append(info, &kv->super); } /* pass the local ldr */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALLDR); kv->type = OPAL_VPID; kv->data.name.vpid = vpid; opal_list_append(info, &kv->super); /* for each proc in this job, create an object that * includes the info describing the proc so the recipient has a complete * picture. This allows procs to connect to each other without * any further info exchange, assuming the underlying transports * support it. We also pass all the proc-specific data here so * that each proc can lookup info about every other proc in the job */ for (n=0; n < map->nodes->size; n++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) { continue; } /* cycle across each proc on this node, passing all data that * varies by proc */ for (i=0; i < node->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { continue; } /* only consider procs from this job */ if (pptr->name.jobid != jdata->jobid) { continue; } /* setup the proc map object */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_PROC_DATA); kv->type = OPAL_PTR; kv->data.ptr = OBJ_NEW(opal_list_t); opal_list_append(info, &kv->super); pmap = kv->data.ptr; /* must start with rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->name.vpid; opal_list_append(pmap, &kv->super); /* location, for local procs */ if (node == mynode) { tmp = NULL; if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING) && NULL != tmp) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALITY_STRING); kv->type = OPAL_STRING; kv->data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, tmp); opal_list_append(pmap, &kv->super); free(tmp); } else { /* the proc is not bound */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALITY_STRING); kv->type = OPAL_STRING; kv->data.string = NULL; opal_list_append(pmap, &kv->super); } } /* global/univ rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_GLOBAL_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->name.vpid + jdata->offset; opal_list_append(pmap, &kv->super); if (1 < jdata->num_apps) { /* appnum */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APPNUM); kv->type = OPAL_UINT32; kv->data.uint32 = pptr->app_idx; opal_list_append(pmap, &kv->super); /* app ldr */ app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APPLDR); kv->type = OPAL_VPID; kv->data.name.vpid = app->first_rank; opal_list_append(pmap, &kv->super); /* app rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APP_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->app_rank; opal_list_append(pmap, &kv->super); /* app size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APP_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = app->num_procs; opal_list_append(info, &kv->super); } /* local rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_RANK); kv->type = OPAL_UINT16; kv->data.uint16 = pptr->local_rank; opal_list_append(pmap, &kv->super); /* node rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_RANK); kv->type = OPAL_UINT16; kv->data.uint32 = pptr->node_rank; opal_list_append(pmap, &kv->super); /* node ID */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODEID); kv->type = OPAL_UINT32; kv->data.uint32 = pptr->node->index; opal_list_append(pmap, &kv->super); if (map->num_nodes < orte_hostname_cutoff) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_HOSTNAME); kv->type = OPAL_STRING; kv->data.string = strdup(pptr->node->name); opal_list_append(pmap, &kv->super); } } } /* mark the job as registered */ orte_set_attribute(&jdata->attributes, ORTE_JOB_NSPACE_REGISTERED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); /* pass it down */ /* we are in an event, so no need to callback */ rc = opal_pmix.server_register_nspace(jdata->jobid, jdata->num_local_procs, info, NULL, NULL); OPAL_LIST_RELEASE(info); /* if the user has connected us to an external server, then we must * assume there is going to be some cross-mpirun exchange, and so * we protect against that situation by publishing the job info * for this job - this allows any subsequent "connect" to retrieve * the job info */ if (NULL != orte_data_server_uri) { opal_buffer_t buf; OBJ_CONSTRUCT(&buf, opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &jdata, 1, ORTE_JOB))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return rc; } info = OBJ_NEW(opal_list_t); /* create a key-value with the key being the string jobid * and the value being the byte object */ kv = OBJ_NEW(opal_value_t); orte_util_convert_jobid_to_string(&kv->key, jdata->jobid); kv->type = OPAL_BYTE_OBJECT; opal_dss.unload(&buf, (void**)&kv->data.bo.bytes, &kv->data.bo.size); OBJ_DESTRUCT(&buf); opal_list_append(info, &kv->super); /* set the range to be session */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_RANGE); kv->type = OPAL_UINT; kv->data.uint = OPAL_PMIX_RANGE_SESSION; opal_list_append(info, &kv->super); /* set the persistence to be app */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_PERSISTENCE); kv->type = OPAL_INT; kv->data.integer = OPAL_PMIX_PERSIST_APP; opal_list_append(info, &kv->super); /* add our effective userid to the directives */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_USERID); kv->type = OPAL_UINT32; kv->data.uint32 = geteuid(); opal_list_append(info, &kv->super); /* now publish it */ if (ORTE_SUCCESS != (rc = pmix_server_publish_fn(ORTE_PROC_MY_NAME, info, mycbfunc, info))) { ORTE_ERROR_LOG(rc); } } return rc; }