static int init(void) { char cwd[OPAL_PATH_MAX]; int rc; OPAL_OUTPUT_VERBOSE((5, orcm_pvsn_base_framework.framework_output, "%s pvsn:wwulf:init", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) { return rc; } /* check to see if we can execute wwsh */ cmd = opal_path_findv("wwsh", X_OK, environ, cwd); if (NULL == cmd) { return ORTE_ERR_EXE_NOT_FOUND; } OPAL_OUTPUT_VERBOSE((5, orcm_pvsn_base_framework.framework_output, "%s pvsn:wwulf:init path to wwsh %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cmd)); return ORCM_SUCCESS; }
static void check_debugger(int fd, short event, void *arg) { struct timeval now; opal_event_t *tmp = (opal_event_t*)arg; orte_job_t *jdata; orte_app_context_t *app; char cwd[OPAL_PATH_MAX]; int rc; int32_t ljob; if (MPIR_being_debugged) { if (orte_debug_flag) { opal_output(0, "%s Launching debugger %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), MPIR_executable_path); } /* a debugger has attached! All the MPIR_Proctable * data is already available, so we only need to * check to see if we should spawn any daemons */ if ('\0' != MPIR_executable_path[0]) { /* this will be launched just like a regular job, * so we do not use the global orte_debugger_daemon * as this is reserved for co-location upon startup */ jdata = OBJ_NEW(orte_job_t); /* create a jobid for these daemons - this is done solely * to avoid confusing the rest of the system's bookkeeping */ orte_plm_base_create_jobid(jdata); /* flag the job as being debugger daemons */ jdata->controls |= ORTE_JOB_CONTROL_DEBUGGER_DAEMON; /* unless directed, we do not forward output */ if (!MPIR_forward_output) { jdata->controls &= ~ORTE_JOB_CONTROL_FORWARD_OUTPUT; } /* set the mapping policy to "pernode" so we only get * one debugger daemon on each node */ jdata->map = OBJ_NEW(orte_job_map_t); jdata->map->npernode = 1; /* add it to the global job pool */ ljob = ORTE_LOCAL_JOBID(jdata->jobid); opal_pointer_array_set_item(orte_job_data, ljob, jdata); /* create an app_context for the debugger daemon */ app = OBJ_NEW(orte_app_context_t); app->app = strdup((char*)MPIR_executable_path); if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { orte_show_help("help-orterun.txt", "orterun:init-failure", true, "get the cwd", rc); OBJ_RELEASE(jdata); goto RELEASE; } app->cwd = strdup(cwd); app->user_specified_cwd = false; opal_argv_append_nosize(&app->argv, app->app); build_debugger_args(app); opal_pointer_array_add(jdata->apps, &app->super); jdata->num_apps = 1; /* now go ahead and spawn this job */ if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) { ORTE_ERROR_LOG(rc); } } RELEASE: /* notify the debugger that all is ready */ MPIR_Breakpoint(); } else { /* reissue the timer to wake us up again */ now.tv_sec = orte_debugger_check_rate; now.tv_usec = 0; opal_evtimer_add(tmp, &now); } }
int main(int argc, char *argv[]) { int32_t ret, i; opal_cmd_line_t cmd_line; char **inpt; opal_buffer_t *buf; int count; char cwd[OPAL_PATH_MAX]; orcm_tool_cmd_t flag = ORCM_TOOL_STOP_CMD; int32_t master=0; uint16_t jfam=0; /*************** * Initialize ***************/ /* * Make sure to init util before parse_args * to ensure installdirs is setup properly * before calling mca_base_open(); */ if( ORTE_SUCCESS != (ret = orcm_init_util()) ) { return ret; } /* initialize the globals */ my_globals.help = false; my_globals.replicas = NULL; my_globals.sched = NULL; my_globals.hnp_uri = NULL; /* Parse the command line options */ opal_cmd_line_create(&cmd_line, cmd_line_opts); mca_base_open(); mca_base_cmd_line_setup(&cmd_line); ret = opal_cmd_line_parse(&cmd_line, true, argc, argv); /* extract the MCA/GMCA params */ mca_base_cmd_line_process_args(&cmd_line, &environ, &environ); /** * Now start parsing our specific arguments */ if (OPAL_SUCCESS != ret || my_globals.help) { char *args = NULL; args = opal_cmd_line_get_usage_msg(&cmd_line); orte_show_help("help-orcm-stop.txt", "usage", true, args); free(args); return ORTE_ERROR; } if (NULL != my_globals.sched) { if (0 == strncmp(my_globals.sched, "file", strlen("file")) || 0 == strncmp(my_globals.sched, "FILE", strlen("FILE"))) { char input[1024], *filename; FILE *fp; /* it is a file - get the filename */ filename = strchr(my_globals.sched, ':'); if (NULL == filename) { /* filename is not correctly formatted */ orte_show_help("help-openrcm-runtime.txt", "hnp-filename-bad", true, "scheduler", my_globals.sched); return ORTE_ERROR; } ++filename; /* space past the : */ if (0 >= strlen(filename)) { /* they forgot to give us the name! */ orte_show_help("help-openrcm-runtime.txt", "hnp-filename-bad", true, "scheduler", my_globals.sched); return ORTE_ERROR; } /* open the file and extract the pid */ fp = fopen(filename, "r"); if (NULL == fp) { /* can't find or read file! */ orte_show_help("help-openrcm-runtime.txt", "hnp-filename-access", true, "scheduler", filename); return ORTE_ERROR; } if (NULL == fgets(input, 1024, fp)) { /* something malformed about file */ fclose(fp); orte_show_help("help-openrcm-runtime.txt", "hnp-file-bad", "scheduler", true, filename); return ORTE_ERROR; } fclose(fp); input[strlen(input)-1] = '\0'; /* remove newline */ /* convert the pid */ master = strtoul(input, NULL, 10); } else { /* should just be the master itself */ master = strtoul(my_globals.sched, NULL, 10); } } /* if we were given HNP contact info, parse it and * setup the process_info struct with that info */ if (NULL != my_globals.hnp_uri) { if (0 == strncmp(my_globals.hnp_uri, "file", strlen("file")) || 0 == strncmp(my_globals.hnp_uri, "FILE", strlen("FILE"))) { char input[1024], *filename; FILE *fp; /* it is a file - get the filename */ filename = strchr(my_globals.hnp_uri, ':'); if (NULL == filename) { /* filename is not correctly formatted */ orte_show_help("help-openrcm-runtime.txt", "hnp-filename-bad", true, "uri", my_globals.hnp_uri); goto cleanup; } ++filename; /* space past the : */ if (0 >= strlen(filename)) { /* they forgot to give us the name! */ orte_show_help("help-openrcm-runtime.txt", "hnp-filename-bad", true, "uri", my_globals.hnp_uri); goto cleanup; } /* open the file and extract the uri */ fp = fopen(filename, "r"); if (NULL == fp) { /* can't find or read file! */ orte_show_help("help-openrcm-runtime.txt", "hnp-filename-access", true, filename); goto cleanup; } if (NULL == fgets(input, 1024, fp)) { /* something malformed about file */ fclose(fp); orte_show_help("help-openrcm-runtime.txt", "hnp-file-bad", true, filename); goto cleanup; } fclose(fp); input[strlen(input)-1] = '\0'; /* remove newline */ /* put into the process info struct */ orte_process_info.my_hnp_uri = strdup(input); } else { /* should just be the uri itself */ orte_process_info.my_hnp_uri = strdup(my_globals.hnp_uri); } } if (OPAL_SUCCESS != opal_getcwd(cwd, sizeof(cwd))) { opal_output(orte_clean_output, "failed to get cwd\n"); return ORTE_ERR_NOT_FOUND; } /*************************** * We need all of OPAL and ORTE - this will * automatically connect us to the CM ***************************/ if (ORTE_SUCCESS != orcm_init(ORCM_TOOL)) { orcm_finalize(); return 1; } /* if we were given the hnp uri, extract the job family for the * master id */ if (NULL != my_globals.hnp_uri) { master = ORTE_JOB_FAMILY(ORTE_PROC_MY_HNP->jobid); } /* register to receive responses */ if (ORCM_SUCCESS != (ret = orcm_pnp.register_receive("orcm-stop", "0.1", "alpha", ORCM_PNP_GROUP_INPUT_CHANNEL, ORCM_PNP_TAG_TOOL, ack_recv, NULL))) { ORTE_ERROR_LOG(ret); goto cleanup; } /* announce my existence */ if (ORCM_SUCCESS != (ret = orcm_pnp.announce("orcm-stop", "0.1", "alpha", NULL))) { ORTE_ERROR_LOG(ret); goto cleanup; } /* setup the buffer to send our cmd */ buf = OBJ_NEW(opal_buffer_t); /* indicate the scheduler to be used */ jfam = master & 0x0000ffff; opal_dss.pack(buf, &jfam, 1, OPAL_UINT16); /* get the apps to stop */ inpt = NULL; opal_cmd_line_get_tail(&cmd_line, &count, &inpt); if (0 == count) { /* if no apps were given, then we stop the entire * DVM itself by telling the daemon's to terminate */ if (ORCM_SUCCESS != (ret = orcm_pnp.output_nb(ORCM_PNP_SYS_CHANNEL, NULL, ORCM_PNP_TAG_TERMINATE, NULL, 0, buf, cbfunc, NULL))) { ORTE_ERROR_LOG(ret); } goto cleanup; } else { /* load the stop cmd */ opal_dss.pack(buf, &flag, 1, ORCM_TOOL_CMD_T); /* for each app */ for (i=0; NULL != inpt[i]; i++) { opal_dss.pack(buf, &inpt[i], 1, OPAL_STRING); /* pack the replicas to be stopped */ opal_dss.pack(buf, &my_globals.replicas, 1, OPAL_STRING); } opal_argv_free(inpt); if (ORCM_SUCCESS != (ret = orcm_pnp.output_nb(ORCM_PNP_SYS_CHANNEL, NULL, ORCM_PNP_TAG_TOOL, NULL, 0, buf, cbfunc, NULL))) { ORTE_ERROR_LOG(ret); } } /* now wait for ack */ opal_event_dispatch(opal_event_base); /*************** * Cleanup ***************/ cleanup: orcm_finalize(); return ret; }