static pmix_status_t spawn_debugger(char *appspace, myrel_t *myrel) { pmix_status_t rc; pmix_info_t *dinfo; pmix_app_t *debugger; size_t dninfo; char cwd[1024]; char dspace[PMIX_MAX_NSLEN+1]; mylock_t mylock; pmix_status_t code = PMIX_ERR_JOB_TERMINATED; /* setup the debugger */ PMIX_APP_CREATE(debugger, 1); debugger[0].cmd = strdup("./debuggerd"); PMIX_ARGV_APPEND(rc, debugger[0].argv, "./debuggerd"); getcwd(cwd, 1024); // point us to our current directory debugger[0].cwd = strdup(cwd); /* provide directives so the daemons go where we want, and * let the RM know these are debugger daemons */ dninfo = 6; PMIX_INFO_CREATE(dinfo, dninfo); PMIX_INFO_LOAD(&dinfo[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING); // instruct the RM to launch one copy of the executable on each node PMIX_INFO_LOAD(&dinfo[1], PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); // these are debugger daemons PMIX_INFO_LOAD(&dinfo[1], PMIX_DEBUG_JOB, appspace, PMIX_STRING); // the nspace being debugged PMIX_INFO_LOAD(&dinfo[2], PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL); // notify us when the debugger job completes PMIX_INFO_LOAD(&dinfo[3], PMIX_DEBUG_WAITING_FOR_NOTIFY, NULL, PMIX_BOOL); // tell the daemon that the proc is waiting to be released PMIX_INFO_LOAD(&dinfo[4], PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me PMIX_INFO_LOAD(&dinfo[5], PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me /* spawn the daemons */ fprintf(stderr, "Debugger: spawning %s\n", debugger[0].cmd); if (PMIX_SUCCESS != (rc = PMIx_Spawn(dinfo, dninfo, debugger, 1, dspace))) { fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", PMIx_Error_string(rc)); PMIX_INFO_FREE(dinfo, dninfo); PMIX_APP_FREE(debugger, 1); return rc; } /* cleanup */ PMIX_INFO_FREE(dinfo, dninfo); PMIX_APP_FREE(debugger, 1); /* register callback for when this job terminates */ myrel->nspace = strdup(dspace); PMIX_INFO_CREATE(dinfo, 2); PMIX_INFO_LOAD(&dinfo[0], PMIX_EVENT_RETURN_OBJECT, myrel, PMIX_POINTER); /* only call me back when this specific job terminates */ PMIX_INFO_LOAD(&dinfo[1], PMIX_NSPACE, dspace, PMIX_STRING); DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(&code, 1, dinfo, 2, release_fn, evhandler_reg_callbk, (void*)&mylock); DEBUG_WAIT_THREAD(&mylock); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); PMIX_INFO_FREE(dinfo, 2); return rc; }
/* KVS_Fence */ int PMI2_KVS_Fence(void) { pmix_status_t rc = PMIX_SUCCESS; PMI2_CHECK(); pmix_output_verbose(3, pmix_globals.debug_output, "PMI2_KVS_Fence"); if (PMIX_SUCCESS != (rc = PMIx_Commit())) { return convert_err(rc); } /* we want all data to be collected upon completion */ { pmix_info_t info[1]; bool val_data = 1; /* set controlling parameters * PMIX_COLLECT_DATA - meet legacy PMI2 requirement */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &val_data, PMIX_BOOL); rc = PMIx_Fence(NULL, 0, &info[0], 1); PMIX_INFO_DESTRUCT(&info[0]); } return convert_err(rc); }
int PMI2_Info_GetSize(int *size) { pmix_status_t rc = PMIX_ERROR; pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; PMI2_CHECK(); if (NULL == size) { return PMI2_ERR_INVALID_ARGS; } /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); } PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
PMIX_EXPORT int PMI_Init(int *spawned) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { /* if we didn't see a PMIx server (e.g., missing envar), * then allow us to run as a singleton */ if (PMIX_ERR_INVALID_NAMESPACE == rc) { if (NULL != spawned) { *spawned = 0; } pmi_singleton = true; (void)strncpy(myproc.nspace, "1234", PMIX_MAX_NSLEN); myproc.rank = 0; pmi_init = 1; return PMI_SUCCESS; } return PMI_ERR_INIT; } /* getting internal key requires special rank value */ memcpy(&proc, &myproc, sizeof(myproc)); proc.rank = PMIX_RANK_UNDEF; /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); if (NULL != spawned) { /* get the spawned flag */ if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) { rc = convert_int(spawned, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } } else { /* if not found, default to not spawned */ *spawned = 0; } } pmi_init = 1; rc = PMIX_SUCCESS; error: PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
void pmix_errhandler_invoke(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) { /* We need to parse thru each registered handler and determine * which one to call for the specific error */ int i, idflt; size_t j; bool fired = false; pmix_error_reg_info_t *errreg, *errdflt=NULL; pmix_info_t *iptr; PMIX_INFO_CREATE(iptr, ninfo+1); (void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN); iptr[0].value.type = PMIX_INT; if (NULL != info) { for (j=0; j < ninfo; j++) { PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type); } } for (i = 0; i < pmix_globals.errregs.size; i++) { if (NULL == (errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item(&pmix_globals.errregs, i))) { continue; } if (NULL == errreg->info || 0 == errreg->ninfo) { // this is a general err handler - we will call it if there is no better match errdflt = errreg; idflt = i; continue; } iptr[0].value.data.integer = i; /* match error name key first */ for (j = 0; j < errreg->ninfo; j++) { if ((0 == strcmp(errreg->info[j].key, PMIX_ERROR_NAME)) && (status == errreg->info[j].value.data.int32)) { iptr[0].value.data.integer = i; errreg->errhandler(status, procs, nprocs, iptr, ninfo+1); fired = true; break; } } } /* if nothing fired and we found a general err handler, then fire it */ if (!fired && NULL != errdflt) { iptr[0].value.data.integer = idflt; errdflt->errhandler(status, procs, nprocs, iptr, ninfo+1); } /* cleanup */ PMIX_INFO_FREE(iptr, ninfo+1); }
PMIX_EXPORT int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *found, int waitfor) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; pmix_proc_t proc = myproc; proc.rank = PMIX_RANK_UNDEF; PMI2_CHECK(); if ((NULL == name) || (NULL == value) || (NULL == found)) { return PMI2_ERR_INVALID_ARG; } if (pmi2_singleton) { return PMI2_FAIL; } /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); *found = 0; /* TODO: does PMI2's "name" makes sense to PMIx? */ rc = PMIx_Get(&proc, name, info, 1, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { rc = PMIX_ERROR; } else if (NULL != val->data.string) { (void)strncpy(value, val->data.string, valuelen); *found = 1; } PMIX_VALUE_RELEASE(val); } else if (PMIX_ERR_NOT_FOUND == rc) { rc = PMIX_SUCCESS; } PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *found) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; PMI2_CHECK(); if ((NULL == name) || (NULL == value) || (NULL == found)) { return PMI2_ERR_INVALID_ARG; } /* getting internal key requires special rank value */ memcpy(&proc, &myproc, sizeof(myproc)); proc.rank = PMIX_RANK_UNDEF; /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); *found = 0; rc = PMIx_Get(&proc, name, info, 1, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { rc = PMIX_ERROR; } else if (NULL != val->data.string) { (void)strncpy(value, val->data.string, valuelen); *found = 1; } PMIX_VALUE_RELEASE(val); } else if (PMIX_ERR_NOT_FOUND == rc) { rc = PMIX_SUCCESS; } PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
PMIX_EXPORT int PMI_Get_appnum(int *appnum) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; pmix_proc_t proc = myproc; proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); if (NULL == appnum) { return PMI_ERR_INVALID_ARG; } if (pmi_singleton) { *appnum = 0; return PMI_SUCCESS; } /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); rc = PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val); if (PMIX_SUCCESS == rc) { rc = convert_int(appnum, val); PMIX_VALUE_RELEASE(val); } else if( PMIX_ERR_NOT_FOUND == rc ){ /* this is optional value, set to 0 */ *appnum = 0; rc = PMIX_SUCCESS; } PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
/* Barrier only applies to our own nspace, and we want all * data to be collected upon completion */ PMIX_EXPORT int PMI_Barrier(void) { pmix_status_t rc = PMIX_SUCCESS; pmix_info_t buf; int ninfo = 0; pmix_info_t *info = NULL; bool val = 1; PMI_CHECK(); if (pmi_singleton) { return PMI_SUCCESS; } info = &buf; PMIX_INFO_CONSTRUCT(info); PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &val, PMIX_BOOL); ninfo = 1; rc = PMIx_Fence(NULL, 0, info, ninfo); PMIX_INFO_DESTRUCT(info); return convert_err(rc); }
PMIX_EXPORT int PMI_Get_clique_size(int *size) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; pmix_proc_t proc = myproc; proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); if (NULL == size) { return PMI_ERR_INVALID_ARG; } if (pmi_singleton) { *size = 1; return PMI_SUCCESS; } /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); rc = PMIx_Get(&proc, PMIX_LOCAL_SIZE, info, 1, &val); if (PMIX_SUCCESS == rc) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); } PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t nprocs, n; int cnt, j; bool doabort = false; volatile bool active; pmix_info_t info, *iptr; size_t ninfo; pmix_status_t code; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { doabort = true; } } /* init us and declare we are a test programming model */ PMIX_INFO_CREATE(iptr, 2); PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } PMIX_INFO_FREE(iptr, 2); pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* test something */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } PMIX_VALUE_RELEASE(val); /* test something */ if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_SERVER_URI, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } pmix_output(0, "CLIENT SERVER URI: %s", val->data.string); PMIX_VALUE_RELEASE(val); /* register a handler specifically for when models declare */ active = true; ninfo = 1; PMIX_INFO_CREATE(iptr, ninfo); PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); code = PMIX_MODEL_DECLARED; PMIx_Register_event_handler(&code, 1, iptr, ninfo, model_callback, model_registration_callback, (void*)&active); while (active) { usleep(10); } PMIX_INFO_FREE(iptr, ninfo); /* register our errhandler */ active = true; PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, errhandler_reg_callbk, (void*)&active); while (active) { usleep(10); } /* get our universe size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Store_internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } (void)asprintf(&tmp, "%s-%d-remote-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } if (PMIX_SUCCESS != (rc = PMIx_Commit())) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Commit failed: %s", myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); goto done; } /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Fence failed: %s", myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); goto done; } /* check the returned data */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); continue; } if (NULL == val) { pmix_output(0, "Client ns %s rank %d: NULL value returned", myproc.nspace, myproc.rank); break; } if (PMIX_UINT64 != val->type) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); continue; } if (1234 != val->data.uint64) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); PMIX_VALUE_RELEASE(val); free(tmp); continue; } pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); if (n != myproc.rank) { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { /* this data should _not_ be found as we are on the same node * and the data was "put" with a PMIX_REMOTE scope */ pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); continue; } pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } } } } /* now get the data blob for myself */ pmix_output(0, "Client ns %s rank %d testing internal modex blob", myproc.nspace, myproc.rank); if (PMIX_SUCCESS == (rc = PMIx_Get(&myproc, NULL, NULL, 0, &val))) { if (PMIX_DATA_ARRAY != val->type) { pmix_output(0, "Client ns %s rank %d did not return an array for its internal modex blob", myproc.nspace, myproc.rank); PMIX_VALUE_RELEASE(val); } else if (PMIX_INFO != val->data.darray->type) { pmix_output(0, "Client ns %s rank %d returned an internal modex array of type %s instead of PMIX_INFO", myproc.nspace, myproc.rank, PMIx_Data_type_string(val->data.darray->type)); PMIX_VALUE_RELEASE(val); } else if (0 == val->data.darray->size) { pmix_output(0, "Client ns %s rank %d returned an internal modex array of zero length", myproc.nspace, myproc.rank); PMIX_VALUE_RELEASE(val); } else { pmix_info_t *iptr = (pmix_info_t*)val->data.darray->array; for (n=0; n < val->data.darray->size; n++) { pmix_output(0, "\tKey: %s", iptr[n].key); } PMIX_VALUE_RELEASE(val); } } else { pmix_output(0, "Client ns %s rank %d internal modex blob FAILED with error %s(%d)", myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc); } /* log something */ PMIX_INFO_CONSTRUCT(&info); PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg", PMIX_STRING); active = true; rc = PMIx_Log_nb(&info, 1, NULL, 0, opcbfunc, (void*)&active); if (PMIX_SUCCESS != rc) { pmix_output(0, "Client ns %s rank %d - log_nb returned %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } else { while (active) { usleep(10); } } PMIX_INFO_DESTRUCT(&info); /* if requested and our rank is 0, call abort */ if (doabort) { if (0 == myproc.rank) { PMIx_Abort(PMIX_ERR_PROC_REQUESTED_ABORT, "CALLING ABORT", NULL, 0); } else { while(!completed) { usleep(10); } } } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(rc); }
int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; if (PMIX_SUCCESS != PMIx_Init(&myproc)) { return PMI2_ERR_INIT; } /* get the rank */ *rank = myproc.rank; /* getting internal key requires special rank value */ memcpy(&proc, &myproc, sizeof(myproc)); proc.rank = PMIX_RANK_UNDEF; /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); if (NULL != size) { /* get the universe size - this will likely pull * down all attributes assigned to the job, thus * making all subsequent "get" operations purely * local */ if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } } else { /* cannot continue without this info */ rc = PMIX_ERR_INIT; goto error; } } if (NULL != spawned) { /* get the spawned flag */ if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) { rc = convert_int(spawned, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } } else { /* if not found, default to not spawned */ *spawned = 0; } } if (NULL != appnum) { /* get our appnum */ if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val)) { rc = convert_int(appnum, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } } else { /* if not found, default to 0 */ *appnum = 0; } } pmi2_init = 1; rc = PMIX_SUCCESS; error: PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
int main(int argc, char **argv) { pmix_status_t rc; pmix_proc_t myproc; pmix_info_t *info; pmix_app_t *app; size_t ninfo, napps; /* check for user directives - this would include: * - a flag indicating we want to attach to a specified application * - application info if we are launching a new app */ /* init us - if a PMIx server pid was provided, then pass it along */ if (0 < server_pid) { ninfo = 1; PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_SERVER_PIDINFO, server_pid, PMIX_UINT32); } else { info = NULL; ninfo = 0; } if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { fprintf(stderr, "PMIx_tool_init failed: %d\n", rc); exit(rc); } if (0 < ninfo) { PMIX_INFO_FREE(info, ninfo); } fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* if we are attaching to a running job, then attach to it */ if (attach) { ret = attach_to_running_job(argv[1]); } else { /* this is an initial launch - we need to launch the application * plus the debugger daemons, letting the RM know we are debugging * so that it will "pause" the app procs until we are ready */ napps = 2; PMIX_APP_CREATE(app, napps); /* setup the executable */ app[0].cmd = strdup("app"); app[0].argc = 1; app[0].argv = (char**)malloc(2*sizeof(char*)); app[0].argv[0] = strdup("app"); app[0].argv[1] = NULL; /* provide directives so the apps do what the user requested */ ninfo = 2; PMIX_INFO_CREATE(app[0].info, ninfo); PMIX_INFO_LOAD(&app[0].info[0], PMIX_NP, 128, PMIX_UINT64); PMIX_INFO_LOAD(&app[0].info[0], PMIX_MAPBY, "slot", PMIX_STRING); /* setup the name of the daemon executable to launch */ app[1].cmd = strdup("debuggerdaemon"); app[1].argc = 1; app[1].argv = (char**)malloc(2*sizeof(char*)); app[1].argv[0] = strdup("debuggerdaemon"); app[1].argv[1] = NULL; /* provide directives so the daemons go where we want, and * let the RM know these are debugger daemons */ ninfo = 2; PMIX_INFO_CREATE(app[1].info, ninfo); PMIX_INFO_LOAD(&app[1].info[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING); // instruct the RM to launch one copy of the executable on each node PMIX_INFO_LOAD(&app[1].info[1], PMIX_DEBUGGER_DAEMONS, true, PMIX_BOOL); // these are debugger daemons /* spawn the daemons */ PMIx_Spawn(NULL, 0, app, napps, dspace); /* cleanup */ PMIX_APP_FREE(app, napps); /* this is where a debugger tool would wait until the debug operation is complete */ } done: PMIx_tool_finalize(NULL, 0); return(ret); }
int main(int argc, char **argv, const char **environ) { pmix_status_t rc; pmix_info_t *info = NULL; bool flag; pmix_status_t retval; pmix_app_t *spawned_app = NULL; pmix_info_t *job_info = NULL; pmix_info_t *proc_info = NULL; int job_info_count = 0; int job_info_index = 0; int proc_info_count = 0; int proc_info_index = 0; char spawned_nsp[PMIX_MAX_NSLEN+1]; char *path_to_app = NULL; char *host_to_use = NULL; int number_of_clients = 0; int temp_counter = 0; done_flag = false; gethostname(hostn, 500); int spawned_app_argc = 0; char **scr_environ = NULL; int proc_count = 1; int node_count = 0; bool blocking_mode = true; char *node_list = NULL; bool forward_all_scr_envs = false; bool pmix_mode = false; const char *optstring = "+n:N:L:x:bB:pPvhe"; int temp_slen=0; /* todo: add arg parsing with ompi schizo */ verbose_print = false; int sleep_max = 30; const int fixed_sleep = 5; int c; while((c = getopt(argc, argv, optstring)) != -1){ switch(c){ case 'h': print_usage(argv[0]); exit(0); break; case 'n': proc_count = atoi(optarg); if(proc_count <= 0 || proc_count > 100){ printf("outside the range of allowable instances to spawn [1-100]\n"); exit(1); } if(verbose_print) { printf("proc_count = %d\n", proc_count); } break; case 'N': /* node_count = atoi(optarg); */ node_count = 1; if(verbose_print) { printf("node_count = %d\n", node_count); } break; case 'B': blocking_mode = true; sleep_max = atoi(optarg); if(sleep_max < 0){ printf("can't sleep for less than 0 seconds\n"); exit(1); } if(verbose_print){ printf("blocking mode = %x\n", blocking_mode); } break; case 'b': blocking_mode = false; if(verbose_print){ printf("blocking mode = %x\n", blocking_mode); } break; case 'L': node_list = optarg; host_to_use = node_list; if(verbose_print){ printf("node_list = '%s'\n", node_list); } break; case 'x': temp_slen = strlen(optarg); /* check if the string is the same length as 'SCR', if so compare them */ if(temp_slen == strlen(SCR_STRING)){ if(strncmp(optarg, SCR_STRING, strlen(SCR_STRING)) == 0){ /* if the string is SCR, then forward all SCR related env vars */ if(verbose_print) printf("all scr envs will be forwarded\n"); forward_all_scr_envs = true; } else{ /* handled like a normal env var */ handle_standard_env_var(optarg, &scr_environ); } } else{ /*handled like a normal env var */ handle_standard_env_var(optarg, &scr_environ); } break; case 'v': verbose_print = true; break; case 'p': pmix_mode = true; if(verbose_print){ printf("pmix_mode = %x\n", pmix_mode); } break; case 'P': pmix_mode = false; if(verbose_print){ printf("pmix_mode = %x\n", pmix_mode); } break; case 'e': experimental = true; break; case '?': printf("missing a required argument or invalid option: %x\n", optopt); print_usage(argv[0]); exit(1); break; default: printf("Unrecognized argument: %c\n", c); print_usage(argv[0]); exit(1); break; } } /* number of instances to spawn */ number_of_clients = proc_count; /* check to make sure an application was specified to launch */ if( optind < argc ){ /* if optind is < argc, it means there is at least one more arg * beyond the args for this program */ path_to_app = argv[optind]; spawned_app_argc = argc - optind; if(verbose_print) { printf("app to launch: %s @ %s:%d\n", path_to_app, __FILE__, __LINE__); } } else{ printf("program_to_spawn option was not provded\n"); print_usage(argv[0]); exit(1); } if(verbose_print){ printf("master process will spawn %d instances; app to run: %s\n\n", number_of_clients, path_to_app); printf("pmix version: %s (host: %s)\n", PMIx_Get_version(), hostn); } /* init pmix */ retval = PMIx_Init(&main_proc, NULL, 0); if(retval != PMIX_SUCCESS){ error_helper(retval, hostn, "error initializing pmix"); exit(0); } if(verbose_print){ printf("rank %d, host '%s', nspace: '%s' init'd pmix succesfully\n\n", main_proc.rank, hostn, main_proc.nspace); } /* we need to attach to a "system" PMIx server so we * can ask it to spawn applications for us. There can * only be one such connection on a node, so we will * instruct the tool library to only look for it */ int ninfo = 1; PMIX_INFO_CREATE(info, ninfo); flag = true; PMIX_INFO_LOAD(&info[0], PMIX_CONNECT_TO_SYSTEM, &flag, PMIX_BOOL); /* initialize the library and make the connection */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&tool_proc, NULL, 0 ))) { fprintf(stderr, "PMIx_tool_init failed: %d\n", rc ); exit(rc); } if (0 < ninfo) { PMIX_INFO_FREE(info, ninfo); } /* first call fence to sync all processes */ retval = fence_helper(); if(retval != PMIX_SUCCESS) { error_helper(retval, hostn, "error fencing"); exit(retval); } /* Process SCR env vars if needed */ if(forward_all_scr_envs){ parse_all_scr_envs(&scr_environ, environ); } /* finalize the env array so a NULL is in place */ finalize_array(scr_environ); /* Setup info structs to pass to this: */ /* pmix_info_t *error_info = NULL; */ /* PMIX_INFO_CREATE(error_info, 1); */ /* strncpy(error_info[0].key, PMIX_ERROR_GROUP_ABORT, PMIX_MAX_KEYLEN); error_info[0].value.type = PMIX_BOOL; error_info[0].value.data.flag = true; */ /* strncpy(error_info[0].key, PMIX_ERROR_GROUP_SPAWN, PMIX_MAX_KEYLEN); int t_val = 1; pmix_value_load(&error_info[1].value, &t_val, PMIX_BOOL); */ /*error_info[1].value.type = PMIX_BOOL; error_info[1].value.data.flag = true; */ /* strncpy(error_info[2].key, PMIX_ERROR_GROUP_GENERAL, PMIX_MAX_KEYLEN); error_info[2].value.type = PMIX_BOOL; error_info[2].value.data.flag = true; */ /* TODO: setup error handling when implemented in pmix with the * following error codes: */ /* pmix_status_t registered_codes[5]; registered_codes[0] = PMIX_ERR_JOB_TERMINATED; registered_codes[1] = PMIX_ERR_PROC_ABORTED; registered_codes[2] = PMIX_ERR_PROC_ABORTING; */ PMIx_Register_event_handler(NULL, 0, NULL, 0, errhandler_cb, errhandler_reg_callbk, (void *) NULL); /* PMIX_INFO_DESTRUCT(error_info); */ /* allocate memory to hold the spawend app struct */ PMIX_APP_CREATE(spawned_app, 1); /* maxprocs isn't documented very well, but it appears to control * how many instances of the spanwed app are created */ spawned_app->maxprocs = number_of_clients; /* set the app to run */ (void)asprintf(&spawned_app->cmd, "%s", path_to_app); /* set argv for spawned app starting with remaining argv */ spawned_app->argv = &argv[optind]; /* set the environment pointer */ spawned_app->env = scr_environ; /*--START: add all proc level infos */ /* add things to the proc level info */ if(!pmix_mode){ job_info_count++; } if(host_to_use != NULL){ proc_info_count++; } if(verbose_print){ printf("enabling debug feature for forwarding stdout/stderr\n"); proc_info_count+=2; /* add PMIX_FWD_STDOUT and PMIX_FWD_STDERR later*/ } if(experimental){ job_info_count++; } if(node_count == 1){ job_info_count++; } /*--END: add all proc level infos */ /*--START: append actual proc level info */ PMIX_INFO_CREATE(job_info, job_info_count); PMIX_INFO_CREATE(proc_info, proc_info_count); /* PMIX_VAL_set_assign(_v, _field, _val ) */ /* PMIX_VAL_set_strdup(_v, _field, _val ) */ if(host_to_use != NULL){ /* add info struct to the spawned app itself for the host */ /* old way */ strncpy(proc_info[proc_info_index].key, PMIX_HOST, PMIX_MAX_KEYLEN); //proc_info[proc_info_index].value.type = PMIX_STRING; /* set the data for host list to use */ //proc_info[proc_info_index].value.data.string = host_to_use; /* end old way */ if(verbose_print) printf("about to set host val\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), string, host_to_use ); proc_info_index++; } if(!pmix_mode){ strncpy(job_info[job_info_index].key, PMIX_NON_PMI, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set non pmix flag\n"); PMIX_VAL_SET(&(job_info[job_info_index].value), flag, true); job_info_index++; } if(verbose_print){ strncpy(proc_info[proc_info_index].key, PMIX_FWD_STDOUT, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set stdout flag\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), flag, true ); proc_info_index++; strncpy(proc_info[proc_info_index].key, PMIX_FWD_STDERR, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set stderr flag\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), flag, true ); proc_info_index++; } if(experimental){ printf("attempting to perform experiment\n"); bool local_flag = true; PMIX_INFO_LOAD(&job_info[job_info_index], PMIX_NOTIFY_COMPLETION, &local_flag, PMIX_BOOL); job_info_index++; } if(node_count == 1){ strncpy(job_info[job_info_index].key, PMIX_PPR, PMIX_MAX_KEYLEN); PMIX_VAL_SET(&(job_info[job_info_index].value), string, "1:n"); job_info_index++; } /*--END: append actual proc level info */ /* sanity check to make sure we covered all the info structs */ if(proc_info_index != proc_info_count ){ printf("bug: mismatch with appending proc info\n"); exit(1); } if(job_info_index != job_info_count){ printf("bug: mismatch with appending job info\n"); exit(1); } /* TODO: TEST PMIX_NOTIFY_COMPLETION WHEN IT'S IMPLEMENTED IN PMIX */ /* fill in job_info */ /* strncpy(job_info[0].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_INT; job_info[0].value.data.integer = 10; */ /* strncpy(job_info[0].key, PMIX_NOTIFY_COMPLETION, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_BOOL; job_info[0].value.data.flag = true; */ /*strncpy(spawned_app->info[0].key, PMIX_DISPLAY_MAP, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_BOOL; job_info[0].value.data.flag = true;*/ /* TODO: TEST PMIX_NOTIFY_COMPLETION WHEN IT'S IMPLEMENTED IN PMIX */ spawned_app->info = proc_info; spawned_app->ninfo = proc_info_count; if(verbose_print){ printf("proc level info count: %d\n", proc_info_count); } /* call spawn */ retval = PMIx_Spawn(job_info, job_info_count, spawned_app, 1, spawned_nsp); if(verbose_print) { printf("rank %d (host %s) just called spawn; spawned nspace: %s, retval:%d\n", main_proc.rank, hostn, spawned_nsp, retval); } if(retval != PMIX_SUCCESS){ error_helper(retval, hostn, "error with spawn"); goto done; } /* TODO: TEMPORARY WORKAROUND TO WAIT FOR A SPAWNED PROCESS */ if(blocking_mode){ sleep(fixed_sleep); /* wait until app completes: */ while(!done_flag){ sleep(fixed_sleep); temp_counter++; if(temp_counter*fixed_sleep >= sleep_max) { if(verbose_print) printf("broke out early\n"); break; } } if(verbose_print){ if(done_flag == true) { printf("done_flag was set to true!\n"); } } } done: /* fence first */ retval = fence_helper(); if(retval != PMIX_SUCCESS){ if(verbose_print) printf("error fencing, finalize may fail ! \n"); } /* finalize */ PMIx_Deregister_event_handler(_g_errhandler_ref, NULL, NULL); if(verbose_print){ fprintf(stdout, "spawn master process (rank %d) (host %s) finalizing\n", main_proc.rank, hostn); } /* clean up pmix */ retval = PMIx_tool_finalize(); if(retval == PMIX_SUCCESS) { if(verbose_print){ printf("spawn master process %d finalize success\n\n", main_proc.rank); } } else { printf("spawn master process %d pmix_finalize FAILURE: %d\n\n", main_proc.rank, retval); } retval = PMIx_Finalize(NULL, 0); fflush(stdout); /* cleanup before returning */ PMIX_INFO_FREE(job_info, job_info_count); spawned_app->argv = NULL; PMIX_APP_FREE(spawned_app, 1); if(verbose_print) printf("%s exiting cleanly :)\n", argv[0]); return 0; }
int main(int argc, char **argv) { pmix_status_t rc; pmix_value_t *val; pmix_proc_t proc; pmix_info_t *info; size_t ninfo; pmix_query_t *query; size_t nq, n; myquery_data_t myquery_data; pid_t pid; pmix_status_t code = PMIX_ERR_JOB_TERMINATED; mylock_t mylock; myrel_t myrel; uint16_t localrank; char *target = NULL; pid = getpid(); /* init us - since we were launched by the RM, our connection info * will have been provided at startup. */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { fprintf(stderr, "Debugger daemon: PMIx_tool_init failed: %d\n", rc); exit(0); } fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, (unsigned long)pid); /* register our default event handler */ DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&mylock); DEBUG_WAIT_THREAD(&mylock); if (PMIX_SUCCESS != mylock.status) { rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); goto done; } DEBUG_DESTRUCT_LOCK(&mylock); /* get the nspace of the job we are to debug - it will be in our JOB info */ #ifdef PMIX_LOAD_PROCID PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); #else PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_KEYLEN); proc.rank = PMIX_RANK_WILDCARD; #endif if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_DEBUG_JOB, NULL, 0, &val))) { fprintf(stderr, "[%s:%d:%lu] Failed to get job being debugged - error %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Error_string(rc)); goto done; } if (NULL == val || PMIX_STRING != val->type || NULL == val->data.string) { fprintf(stderr, "[%s:%d:%lu] Failed to get job being debugged - NULL data returned\n", myproc.nspace, myproc.rank, (unsigned long)pid); goto done; } /* save it for later */ target = strdup(val->data.string); PMIX_VALUE_RELEASE(val); fprintf(stderr, "[%s:%d:%lu] Debugging %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, target); /* get my local rank so I can determine which local proc is "mine" * to debug */ val = NULL; if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_RANK, NULL, 0, &val))) { fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - error %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Error_string(rc)); goto done; } if (NULL == val) { fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - NULL data returned\n", myproc.nspace, myproc.rank, (unsigned long)pid); goto done; } if (PMIX_UINT16 != val->type) { fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - returned wrong type %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Data_type_string(val->type)); goto done; } /* save the data */ localrank = val->data.uint16; PMIX_VALUE_RELEASE(val); fprintf(stderr, "[%s:%d:%lu] my local rank %d\n", myproc.nspace, myproc.rank, (unsigned long)pid, (int)localrank); /* register another handler specifically for when the target * job completes */ DEBUG_CONSTRUCT_LOCK(&myrel.lock); myrel.nspace = strdup(proc.nspace); PMIX_INFO_CREATE(info, 2); PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); /* only call me back when this specific job terminates */ PMIX_LOAD_PROCID(&proc, target, PMIX_RANK_WILDCARD); PMIX_INFO_LOAD(&info[1], PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); fprintf(stderr, "[%s:%d:%lu] registering for termination of %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, proc.nspace); DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(&code, 1, info, 2, release_fn, evhandler_reg_callbk, (void*)&mylock); DEBUG_WAIT_THREAD(&mylock); if (PMIX_SUCCESS != mylock.status) { rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); PMIX_INFO_FREE(info, 2); goto done; } DEBUG_DESTRUCT_LOCK(&mylock); PMIX_INFO_FREE(info, 2); /* get our local proctable - for scalability reasons, we don't want to * have our "root" debugger process get the proctable for everybody and * send it out to us. So ask the local PMIx server for the pid's of * our local target processes */ nq = 1; PMIX_QUERY_CREATE(query, nq); PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE); query[0].nqual = 1; PMIX_INFO_CREATE(query[0].qualifiers, 1); PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, target, PMIX_STRING); // the nspace we are enquiring about /* setup the caddy to retrieve the data */ DEBUG_CONSTRUCT_LOCK(&myquery_data.lock); myquery_data.info = NULL; myquery_data.ninfo = 0; /* execute the query */ if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) { fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); goto done; } DEBUG_WAIT_THREAD(&myquery_data.lock); DEBUG_DESTRUCT_LOCK(&myquery_data.lock); PMIX_QUERY_FREE(query, nq); if (PMIX_SUCCESS != myquery_data.status) { rc = myquery_data.status; goto done; } fprintf(stderr, "[%s:%d:%lu] Local proctable received\n", myproc.nspace, myproc.rank, (unsigned long)pid); /* now that we have the proctable for our local processes, we can do our * magic debugger stuff and attach to them. We then send a "release" event * to them - i.e., it's the equivalent to setting the MPIR breakpoint. We * do this with the event notification system. For this example, we just * send it to all local procs of the job being debugged */ (void)strncpy(proc.nspace, target, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; ninfo = 2; PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC); // deliver to the target nspace PMIX_INFO_LOAD(&info[1], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); // deliver to the target nspace fprintf(stderr, "[%s:%u:%lu] Sending release\n", myproc.nspace, myproc.rank, (unsigned long)pid); rc = PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE, NULL, PMIX_RANGE_CUSTOM, info, ninfo, NULL, NULL); if (PMIX_SUCCESS != rc) { fprintf(stderr, "%s[%s:%u:%lu] Sending release failed with error %s(%d)\n", myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Error_string(rc), rc); goto done; } /* do some debugger magic while waiting for the job to terminate */ DEBUG_WAIT_THREAD(&myrel.lock); done: if (NULL != target) { free(target); } /* finalize us */ fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu: Finalizing\n", myproc.nspace, myproc.rank, (unsigned long)pid); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank, (unsigned long)pid); } fflush(stderr); return(0); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t nprocs, n; pmix_info_t *info; bool flag; volatile int active; pmix_status_t dbg = PMIX_ERR_DEBUGGER_RELEASE; /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. This includes any * debugger flag instructing us to stop-in-init. If such a directive * is included, then the process will be stopped in this call until * the "debugger release" notification arrives */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ active = -1; PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&active); while (-1 == active) { sleep(1); } if (0 != active) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); exit(active); } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* check to see if we have been instructed to wait for a debugger * to attach to us. We won't get both a stop-in-init AND a * wait-for-notify directive, so we should never stop twice. This * directive is provided so that something like an MPI implementation * can do some initial setup in MPI_Init prior to pausing for the * debugger */ if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, PMIX_DEBUG_WAIT_FOR_NOTIFY, NULL, 0, &val))) { /* register for debugger release */ active = -1; PMIx_Register_event_handler(&dbg, 1, NULL, 0, release_fn, evhandler_reg_callbk, (void*)&active); /* wait for registration to complete */ while (-1 == active) { sleep(1); } if (0 != active) { fprintf(stderr, "[%s:%d] Debug handler registration failed\n", myproc.nspace, myproc.rank); exit(active); } /* wait for debugger release */ while (waiting_for_debugger) { sleep(1); } } /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, val->data.uint32); /* get the number of procs in our job - univ size is the total number of allocated * slots, not the number of procs in the job */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d num procs %d\n", myproc.nspace, myproc.rank, nprocs); /* put a few values */ if (0 > asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Store_internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); /* push the data to our PMIx server */ if (PMIX_SUCCESS != (rc = PMIx_Commit())) { fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* call fence to synchronize with our peers - instruct * the fence operation to collect and return all "put" * data from our peers */ PMIX_INFO_CREATE(info, 1); flag = true; PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } PMIX_INFO_FREE(info, 1); /* check the returned data */ for (n=0; n < nprocs; n++) { if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { exit(1); } if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); goto done; } if (PMIX_UINT64 != val->type) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } if (1234 != val->data.uint64) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { exit(1); } if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); goto done; } if (PMIX_STRING != val->type) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } if (0 != strcmp(val->data.string, "1234")) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
static pmix_status_t validate_cred(struct pmix_peer_t *peer, const pmix_info_t directives[], size_t ndirs, pmix_info_t **info, size_t *ninfo, const pmix_byte_object_t *cred) { pmix_peer_t *pr = (pmix_peer_t*)peer; #if defined(SO_PEERCRED) #ifdef HAVE_STRUCT_SOCKPEERCRED_UID #define HAVE_STRUCT_UCRED_UID struct sockpeercred ucred; #else struct ucred ucred; #endif socklen_t crlen = sizeof (ucred); #endif uid_t euid = -1; gid_t egid = -1; char *ptr; size_t ln; bool takeus; char **types; size_t n, m; uint32_t u32; pmix_output_verbose(2, pmix_globals.debug_output, "psec: native validate_cred %s", (NULL == cred) ? "NULL" : "NON-NULL"); if (PMIX_PROTOCOL_V1 == pr->protocol) { /* usock protocol - get the remote side's uid/gid */ #if defined(SO_PEERCRED) && (defined(HAVE_STRUCT_UCRED_UID) || defined(HAVE_STRUCT_UCRED_CR_UID)) /* Ignore received 'cred' and validate ucred for socket instead. */ pmix_output_verbose(2, pmix_globals.debug_output, "psec:native checking getsockopt on socket %d for peer credentials", pr->sd); if (getsockopt(pr->sd, SOL_SOCKET, SO_PEERCRED, &ucred, &crlen) < 0) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: getsockopt SO_PEERCRED failed: %s", strerror (pmix_socket_errno)); return PMIX_ERR_INVALID_CRED; } #if defined(HAVE_STRUCT_UCRED_UID) euid = ucred.uid; egid = ucred.gid; #else euid = ucred.cr_uid; egid = ucred.cr_gid; #endif #elif defined(HAVE_GETPEEREID) pmix_output_verbose(2, pmix_globals.debug_output, "psec:native checking getpeereid on socket %d for peer credentials", pr->sd); if (0 != getpeereid(pr->sd, &euid, &egid)) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: getsockopt getpeereid failed: %s", strerror (pmix_socket_errno)); return PMIX_ERR_INVALID_CRED; } #else return PMIX_ERR_NOT_SUPPORTED; #endif } else if (PMIX_PROTOCOL_V2 == pr->protocol) { /* this is a tcp protocol, so the cred is actually the uid/gid * passed upwards from the client */ if (NULL == cred) { /* not allowed */ return PMIX_ERR_INVALID_CRED; } ln = cred->size; euid = 0; egid = 0; if (sizeof(uid_t) <= ln) { memcpy(&euid, cred->bytes, sizeof(uid_t)); ln -= sizeof(uid_t); ptr = cred->bytes + sizeof(uid_t); } else { return PMIX_ERR_INVALID_CRED; } if (sizeof(gid_t) <= ln) { memcpy(&egid, ptr, sizeof(gid_t)); } else { return PMIX_ERR_INVALID_CRED; } } else if (PMIX_PROTOCOL_UNDEF != pr->protocol) { /* don't recognize the protocol */ return PMIX_ERR_NOT_SUPPORTED; } /* if we are responding to a local request to validate a credential, * then see if they specified a mechanism */ if (NULL != directives && 0 < ndirs) { for (n=0; n < ndirs; n++) { if (0 == strncmp(directives[n].key, PMIX_CRED_TYPE, PMIX_MAX_KEYLEN)) { /* split the specified string */ types = pmix_argv_split(directives[n].value.data.string, ','); takeus = false; for (m=0; NULL != types[m]; m++) { if (0 == strcmp(types[m], "native")) { /* it's us! */ takeus = true; break; } } pmix_argv_free(types); if (!takeus) { return PMIX_ERR_NOT_SUPPORTED; } } } } /* check uid */ if (euid != pr->info->uid) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: socket cred contains invalid uid %u", euid); return PMIX_ERR_INVALID_CRED; } /* check gid */ if (egid != pr->info->gid) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: socket cred contains invalid gid %u", egid); return PMIX_ERR_INVALID_CRED; } /* validated - mark that we did it */ if (NULL != info) { PMIX_INFO_CREATE(*info, 3); if (NULL == *info) { return PMIX_ERR_NOMEM; } *ninfo = 3; /* mark that this came from us */ PMIX_INFO_LOAD(info[0], PMIX_CRED_TYPE, "munge", PMIX_STRING); /* provide the uid it contained */ u32 = euid; PMIX_INFO_LOAD(info[1], PMIX_USERID, &u32, PMIX_UINT32); /* provide the gid it contained */ u32 = egid; PMIX_INFO_LOAD(info[2], PMIX_GRPID, &u32, PMIX_UINT32); } return PMIX_SUCCESS; }
int main(int argc, char **argv) { pmix_status_t rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs, n; pmix_info_t *info, *iptr; bool flag; mylock_t mylock; pmix_data_array_t *dptr; /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&mylock); /* wait for registration to complete */ DEBUG_WAIT_THREAD(&mylock); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); goto done; } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); /* inform the RM that we are preemptible, and that our checkpoint methods are * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */ PMIX_INFO_CREATE(info, 2); flag = true; PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL); /* can't use "load" to load a pmix_data_array_t */ (void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN); PMIX_DATA_ARRAY_CREATE(info[1].value.data.darray, 2, PMIX_INFO); dptr = info[1].value.data.darray; rc = SIGUSR2; iptr = (pmix_info_t*)dptr->array; PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT); rc = PMIX_JCTRL_CHECKPOINT; PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS); /* since this is informational and not a requested operation, the target parameter * doesn't mean anything and can be ignored */ DEBUG_CONSTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); DEBUG_DESTRUCT_LOCK(&mylock); goto done; } DEBUG_WAIT_THREAD(&mylock); PMIX_INFO_FREE(info, 2); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* now request that this process be monitored using heartbeats */ PMIX_INFO_CREATE(iptr, 1); PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER); PMIX_INFO_CREATE(info, 3); PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING); n = 5; // require a heartbeat every 5 seconds PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32); n = 2; // two heartbeats can be missed before declaring us "stalled" PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32); /* make the request */ DEBUG_CONSTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT, info, 3, infocbfunc, (void*)&mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); DEBUG_DESTRUCT_LOCK(&mylock); goto done; } DEBUG_WAIT_THREAD(&mylock); PMIX_INFO_FREE(iptr, 1); PMIX_INFO_FREE(info, 3); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* send a heartbeat */ PMIx_Heartbeat(); /* call fence to synchronize with our peers - no need to * collect any info as we didn't "put" anything */ PMIX_INFO_CREATE(info, 1); flag = false; PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } PMIX_INFO_FREE(info, 1); done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
static int attach_to_running_job(char *nspace) { pmix_status_t rc; pmix_proc_t myproc; pmix_query_t *query; size_t nq; myquery_data_t *q; /* query the active nspaces so we can verify that the * specified one exists */ nq = 1; PMIX_QUERY_CREATE(query, nq); PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_NAMESPACES); q = (myquery_data_t*)malloc(sizeof(myquery_data_t)); DEBUG_CONSTRUCT_LOCK(&q->lock); if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)q))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); return -1; } DEBUG_WAIT_THREAD(&q->lock); DEBUG_DESTRUCT_LOCK(&q->lock); if (NULL == q->info) { fprintf(stderr, "Query returned no info\n"); return -1; } /* the query should have returned a comma-delimited list of nspaces */ if (PMIX_STRING != q->info[0].value.type) { fprintf(stderr, "Query returned incorrect data type: %d\n", q->info[0].value.type); return -1; } if (NULL == q->info[0].value.data.string) { fprintf(stderr, "Query returned no active nspaces\n"); return -1; } fprintf(stderr, "Query returned %s\n", q->info[0].value.data.string); return 0; #if 0 /* split the returned string and look for the given nspace */ /* if not found, then we have an error */ PMIX_INFO_FREE(info, ninfo); /* get the proctable for this nspace */ ninfo = 1; PMIX_INFO_CREATE(info, ninfo); (void)strncpy(info[0].key, PMIX_QUERY_PROC_TABLE, PMIX_MAX_KEYLEN); (void)strncpy(info[0].qualifier, nspace, PMIX_MAX_KEYLEN); if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(info, ninfo, infocbfunc, (void*)&active))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info_nb failed: %d\n", myproc.nspace, myproc.rank, rc); return -1; } /* wait to get a response */ /* the query should have returned a data_array */ if (PMIX_DATA_ARRAY != info[0].type) { fprintf(stderr, "Query returned incorrect data type: %d\n", info[0].type); return -1; } if (NULL == info[0].data.darray.array) { fprintf(stderr, "Query returned no proctable info\n"); return -1; } /* the data array consists of a struct: * size_t size; * void* array; * * In this case, the array is composed of pmix_proc_info_t structs: * pmix_proc_t proc; // contains the nspace,rank of this proc * char* hostname; * char* executable_name; * pid_t pid; * int exit_code; * pmix_proc_state_t state; */ /* this is where a debugger tool would process the proctable to * create whatever blob it needs to provide to its daemons */ PMIX_INFO_FREE(info, ninfo); /* setup the debugger daemon spawn request */ napps = 1; PMIX_APP_CREATE(app, napps); /* setup the name of the daemon executable to launch */ app[0].cmd = strdup("debuggerdaemon"); app[0].argc = 1; app[0].argv = (char**)malloc(2*sizeof(char*)); app[0].argv[0] = strdup("debuggerdaemon"); app[0].argv[1] = NULL; /* provide directives so the daemons go where we want, and * let the RM know these are debugger daemons */ ninfo = 3; PMIX_INFO_CREATE(app[0].info, ninfo); PMIX_INFO_LOAD(&app[0].info[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING); // instruct the RM to launch one copy of the executable on each node PMIX_INFO_LOAD(&app[0].info[1], PMIX_DEBUGGER_DAEMONS, true, PMIX_BOOL); // these are debugger daemons PMIX_INFO_LOAD(&app[0].info[2], PMIX_DEBUG_TARGET, nspace, PMIX_STRING); // the "jobid" of the application to be debugged /* spawn the daemons */ PMIx_Spawn(NULL, 0, app, napps, dspace); /* cleanup */ PMIX_APP_FREE(app, napps); /* this is where a debugger tool would wait until the debug operation is complete */ return 0; #endif }
int main(int argc, char **argv) { pmix_status_t rc; pmix_info_t *info, *iptr; pmix_app_t *app; size_t ninfo, napps; char *nspace = NULL; int i; pmix_query_t *query; size_t nq, n; myquery_data_t myquery_data; bool cospawn = false, stop_on_exec = false, cospawn_reqd = false; char cwd[1024]; pmix_status_t code = PMIX_ERR_JOB_TERMINATED; mylock_t mylock; myrel_t myrel, launcher_ready, dbrel; pid_t pid; pmix_envar_t envar; char *launchers[] = { "prun", "mpirun", "mpiexec", "orterun", NULL }; pmix_proc_t proc; bool found; pmix_data_array_t darray; char *tmp; char clientspace[PMIX_MAX_NSLEN+1]; pid = getpid(); /* Process any arguments we were given */ for (i=1; i < argc; i++) { if (0 == strcmp(argv[i], "-h") || 0 == strcmp(argv[i], "--help")) { /* print the usage message and exit */ } if (0 == strcmp(argv[i], "-a") || 0 == strcmp(argv[i], "--attach")) { if (NULL != nspace) { /* can only support one */ fprintf(stderr, "Cannot attach to more than one nspace\n"); exit(1); } /* the next argument must be the nspace */ ++i; if (argc == i) { /* they goofed */ fprintf(stderr, "The %s option requires an <nspace> argument\n", argv[i]); exit(1); } nspace = strdup(argv[i]); } else if (0 == strcmp(argv[i], "-c") || 0 == strcmp(argv[i], "--cospawn")){ cospawn_reqd = true; break; } } info = NULL; ninfo = 0; /* use the system connection first, if available */ PMIX_INFO_CREATE(info, 1); PMIX_INFO_LOAD(&info[0], PMIX_CONNECT_SYSTEM_FIRST, NULL, PMIX_BOOL); /* init as a tool */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { fprintf(stderr, "PMIx_tool_init failed: %s(%d)\n", PMIx_Error_string(rc), rc); exit(rc); } PMIX_INFO_FREE(info, ninfo); fprintf(stderr, "Debugger ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, (unsigned long)pid); /* construct the debugger termination release */ DEBUG_CONSTRUCT_LOCK(&dbrel.lock); /* register a default event handler */ DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&mylock); DEBUG_WAIT_THREAD(&mylock); DEBUG_DESTRUCT_LOCK(&mylock); /* if we are attaching to a running job, then attach to it */ if (NULL != nspace) { if (PMIX_SUCCESS != (rc = attach_to_running_job(nspace))) { fprintf(stderr, "Failed to attach to nspace %s: error code %d\n", nspace, rc); goto done; } } done: DEBUG_DESTRUCT_LOCK(&myrel.lock); DEBUG_DESTRUCT_LOCK(&dbrel.lock); PMIx_tool_finalize(); return(rc); }
void pmix_errhandler_invoke(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) { /* We need to parse thru each registered handler and determine * which one to call for the specific error */ int i, idflt; size_t j, k; bool fired = false; bool exact_match; pmix_error_reg_info_t *errreg, *errdflt=NULL; pmix_info_t *iptr; /* we will need to provide the errhandler reference id when * we provide the callback. Since the callback function doesn't * provide a param for that purpose, we have to add it to any * info array that came from the RM, so extend the array by 1 */ PMIX_INFO_CREATE(iptr, ninfo+1); /* put the reference id in the first location */ (void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN); iptr[0].value.type = PMIX_INT; /* we don't know the reference id yet, but we'll fill that in * later - for now, just copy the incoming info array across */ if (NULL != info) { for (j=0; j < ninfo; j++) { PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type); } } /* search our array of errhandlers for a match. We take any specific * error status first, then take the group of the incoming status next. * If neither of those have been registered, then use any default * errhandler - otherwise, ignore it */ for (i = 0; i < pmix_globals.errregs.size; i++) { if (NULL == (errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item(&pmix_globals.errregs, i))) { continue; } if (NULL == errreg->info || 0 == errreg->ninfo) { // this is a general err handler - we will call it if there is no better match errdflt = errreg; idflt = i; continue; } iptr[0].value.data.integer = i; /* match error name key first */ exact_match = false; for (j = 0; j < errreg->ninfo; j++) { if ((0 == strcmp(errreg->info[j].key, PMIX_ERROR_NAME)) && (status == errreg->info[j].value.data.int32)) { iptr[0].value.data.integer = i; errreg->errhandler(status, procs, nprocs, iptr, ninfo+1); fired = true; exact_match = true; break; } } if (!exact_match && NULL != info) { /* if no exact match was found, then we will fire the errhandler * for any matching info key. This may be too lax and need to be adjusted * later */ for (k = 0; k < errreg->ninfo; k++) { if ((0 == strcmp(errreg->info[j].key, info[k].key)) && (pmix_value_cmp(&errreg->info[j].value, &info[k].value))) { errreg->errhandler(status, procs, nprocs, iptr, ninfo+1); fired = true; } } } } /* if nothing fired and we found a general err handler, then fire it */ if (!fired && NULL != errdflt) { iptr[0].value.data.integer = idflt; errdflt->errhandler(status, procs, nprocs, iptr, ninfo+1); } /* cleanup */ PMIX_INFO_FREE(iptr, ninfo+1); }
PMIX_EXPORT int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; pmix_proc_t proc = myproc; proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { /* if we didn't see a PMIx server (e.g., missing envar), * then allow us to run as a singleton */ if (PMIX_ERR_INVALID_NAMESPACE == rc) { if (NULL != spawned) { *spawned = 0; } if (NULL != size) { *size = 1; } if (NULL != rank) { *rank = 0; } if (NULL != appnum) { *appnum = 0; } pmi2_singleton = true; (void)strncpy(myproc.nspace, "1234", PMIX_MAX_NSLEN); myproc.rank = 0; pmi2_init = 1; return PMI2_SUCCESS; } return PMI2_ERR_INIT; } /* get the rank */ *rank = myproc.rank; /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); if (NULL != size) { /* get the universe size - this will likely pull * down all attributes assigned to the job, thus * making all subsequent "get" operations purely * local */ if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } } else { /* cannot continue without this info */ rc = PMIX_ERR_INIT; goto error; } } if (NULL != spawned) { /* get the spawned flag */ if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) { rc = convert_int(spawned, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } } else { /* if not found, default to not spawned */ *spawned = 0; } } if (NULL != appnum) { /* get our appnum */ if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val)) { rc = convert_int(appnum, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } } else { /* if not found, default to 0 */ *appnum = 0; } } pmi2_init = 1; rc = PMIX_SUCCESS; error: PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
int MPIDU_bc_table_create(int rank, int size, int *nodemap, void *bc, int bc_len, int same_len, int roots_only, void **bc_table, size_t ** bc_indices) { int rc, mpi_errno = MPI_SUCCESS; int start, end, i; char *val = NULL, *val_p; int out_len, val_len, rem, flag; pmix_value_t value, *pvalue; pmix_info_t *info; pmix_proc_t proc; int local_rank, local_leader; size_t my_bc_len = bc_len; MPIR_NODEMAP_get_local_info(rank, size, nodemap, &local_size, &local_rank, &local_leader); /* if business cards can be different length, use the max value length */ if (!same_len) bc_len = VALLEN; mpi_errno = MPIDU_shm_seg_alloc(bc_len * size, (void **) &segment, MPL_MEM_ADDRESS); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIDU_shm_seg_commit(&memory, &barrier, local_size, local_rank, local_leader, rank, MPL_MEM_ADDRESS); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (size == 1) { memcpy(segment, bc, my_bc_len); goto single; } val = MPL_malloc(VALLEN, MPL_MEM_ADDRESS); memset(val, 0, VALLEN); val_p = val; rem = VALLEN; rc = MPL_str_add_binary_arg(&val_p, &rem, "mpi", (char *) bc, my_bc_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**buscard"); MPIR_Assert(rem >= 0); if (!roots_only || rank == local_leader) { value.type = PMIX_STRING; value.data.string = val; rc = PMIx_Put(PMIX_LOCAL, "bc", &value); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_put"); rc = PMIx_Put(PMIX_REMOTE, "bc", &value); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_put"); rc = PMIx_Commit(); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_commit"); } PMIX_INFO_CREATE(info, 1); PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); rc = PMIx_Fence(&MPIR_Process.pmix_wcproc, 1, info, 1); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_fence"); PMIX_INFO_FREE(info, 1); if (!roots_only) { start = local_rank * (size / local_size); end = start + (size / local_size); if (local_rank == local_size - 1) end += size % local_size; for (i = start; i < end; i++) { PMIX_PROC_CONSTRUCT(&proc); MPL_strncpy(proc.nspace, MPIR_Process.pmix_proc.nspace, PMIX_MAX_NSLEN); proc.rank = i; rc = PMIx_Get(&proc, "bc", NULL, 0, &pvalue); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_get"); rc = MPL_str_get_binary_arg(val, "mpi", &segment[i * bc_len], bc_len, &out_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost"); PMIX_VALUE_RELEASE(pvalue); } } else { int num_nodes, *node_roots; MPIR_NODEMAP_get_node_roots(nodemap, size, &node_roots, &num_nodes); start = local_rank * (num_nodes / local_size); end = start + (num_nodes / local_size); if (local_rank == local_size - 1) end += num_nodes % local_size; for (i = start; i < end; i++) { PMIX_PROC_CONSTRUCT(&proc); MPL_strncpy(proc.nspace, MPIR_Process.pmix_proc.nspace, PMIX_MAX_NSLEN); proc.rank = i; rc = PMIx_Get(&proc, "bc", NULL, 0, &pvalue); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_get"); rc = MPL_str_get_binary_arg(val, "mpi", &segment[i * bc_len], bc_len, &out_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost"); PMIX_VALUE_RELEASE(pvalue); } } mpi_errno = MPIDU_shm_barrier(barrier, local_size); if (mpi_errno) MPIR_ERR_POP(mpi_errno); single: if (!same_len) { indices = MPL_malloc(size * sizeof(size_t), MPL_MEM_ADDRESS); for (i = 0; i < size; i++) indices[i] = bc_len * i; *bc_indices = indices; } fn_exit: MPL_free(val); *bc_table = segment; return mpi_errno; fn_fail: goto fn_exit; }
static pmix_status_t create_cred(struct pmix_peer_t *peer, const pmix_info_t directives[], size_t ndirs, pmix_info_t **info, size_t *ninfo, pmix_byte_object_t *cred) { pmix_peer_t *pr = (pmix_peer_t*)peer; char **types; size_t n, m; bool takeus; uid_t euid; gid_t egid; char *tmp, *ptr; /* ensure initialization */ PMIX_BYTE_OBJECT_CONSTRUCT(cred); /* we may be responding to a local request for a credential, so * see if they specified a mechanism */ if (NULL != directives && 0 < ndirs) { /* cycle across the provided info and see if they specified * any desired credential types */ takeus = true; for (n=0; n < ndirs; n++) { if (0 == strncmp(directives[n].key, PMIX_CRED_TYPE, PMIX_MAX_KEYLEN)) { /* see if we are included */ types = pmix_argv_split(directives[n].value.data.string, ','); /* start by assuming they don't want us */ takeus = false; for (m=0; NULL != types[m]; m++) { if (0 == strcmp(types[m], "native")) { /* it's us! */ takeus = true; break; } } pmix_argv_free(types); break; } } if (!takeus) { PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; } } if (PMIX_PROTOCOL_V1 == pr->protocol) { /* usock protocol - nothing to do */ goto complete; } else if (PMIX_PROTOCOL_V2 == pr->protocol) { /* tcp protocol - need to provide our effective * uid and gid for validation on remote end */ tmp = (char*)malloc(sizeof(uid_t) + sizeof(gid_t)); if (NULL == tmp) { return PMIX_ERR_NOMEM; } euid = geteuid(); memcpy(tmp, &euid, sizeof(uid_t)); ptr = tmp + sizeof(uid_t); egid = getegid(); memcpy(ptr, &egid, sizeof(gid_t)); cred->bytes = tmp; cred->size = sizeof(uid_t) + sizeof(gid_t); goto complete; } else { /* unrecognized protocol */ PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; } complete: if (NULL != info) { /* mark that this came from us */ PMIX_INFO_CREATE(*info, 1); if (NULL == *info) { return PMIX_ERR_NOMEM; } *ninfo = 1; PMIX_INFO_LOAD(info[0], PMIX_CRED_TYPE, "native", PMIX_STRING); } return PMIX_SUCCESS; }
PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *found) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; pmix_proc_t proc = myproc; proc.rank = PMIX_RANK_UNDEF; PMI2_CHECK(); if ((NULL == name) || (NULL == value) || (NULL == found)) { return PMI2_ERR_INVALID_ARG; } if (pmi2_singleton) { return PMI2_FAIL; } /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); /* PMI-2 expects resource manager to set * process mapping in ANL notation. */ if (!strcmp(name, ANL_MAPPING)) { /* we are looking in the job-data. If there is nothing there * we don't want to look in rank's data, thus set rank to widcard */ proc = myproc; proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) && (NULL != val) && (PMIX_STRING == val->type)) { strncpy(value, val->data.string, valuelen); PMIX_VALUE_FREE(val, 1); *found = 1; return PMI2_SUCCESS; } else { /* artpol: * Some RM's (i.e. SLURM) already have ANL precomputed. The export it * through PMIX_ANL_MAP variable. * If we haven't found it we want to have our own packing functionality * since it's common. * Somebody else has to write it since I've already done that for * GPL'ed SLURM :) */ *found = 1; return PMI2_FAIL; } } *found = 0; rc = PMIx_Get(&proc, name, info, 1, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { rc = PMIX_ERROR; } else if (NULL != val->data.string) { (void)strncpy(value, val->data.string, valuelen); *found = 1; } PMIX_VALUE_RELEASE(val); } else if (PMIX_ERR_NOT_FOUND == rc) { rc = PMIX_SUCCESS; } PMIX_INFO_DESTRUCT(&info[0]); return convert_err(rc); }
int main(int argc, char **argv) { pmix_status_t rc; pmix_value_t *val; pmix_proc_t proc; pmix_info_t *info; size_t ninfo; volatile int active; pmix_query_t *query; size_t nq, n; myquery_data_t myquery_data; fprintf(stderr, "I AM HERE\n"); fflush(stderr); sleep(10); exit(0); /* init us - since we were launched by the RM, our connection info * will have been provided at startup. */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { fprintf(stderr, "Debugger daemon ns %s rank %d: PMIx_tool_init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Debugger daemon ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* register our default event handler */ active = -1; PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&active); while (-1 == active) { usleep(10); } if (0 != active) { exit(active); } /* get the nspace of the job we are to debug */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_DEBUG_JOB, NULL, 0, &val))) { fprintf(stderr, "[%s:%d] Failed to get job being debugged - error %d\n", myproc.nspace, myproc.rank, rc); goto done; } if (NULL == val) { fprintf(stderr, "Got NULL return\n"); goto done; } fprintf(stderr, "[%s:%d] Debugging %s\n", myproc.nspace, myproc.rank, val->data.string); /* get our local proctable - for scalability reasons, we don't want to * have our "root" debugger process get the proctable for everybody and * send it out to us. So ask the local PMIx server for the pid's of * our local target processes */ nq = 1; PMIX_QUERY_CREATE(query, nq); PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE); query[0].nqual = 1; PMIX_INFO_CREATE(query[0].qualifiers, 1); PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, val->data.string, PMIX_STRING); // the nspace we are enquiring about /* setup the caddy to retrieve the data */ myquery_data.info = NULL; myquery_data.ninfo = 0; myquery_data.active = true; /* execute the query */ if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) { fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); goto done; } while (myquery_data.active) { usleep(10); } fprintf(stderr, "[%s:%d] Local proctable received\n", myproc.nspace, myproc.rank); /* now that we have the proctable for our local processes, we can do our * magic debugger stuff and attach to them. We then send a "release" event * to them - i.e., it's the equivalent to setting the MPIR breakpoint. We * do this with the event notification system */ (void)strncpy(proc.nspace, val->data.string, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* we send the notification to just the local procs of the job being debugged */ ninfo = 1; PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC); // deliver to the target nspace fprintf(stderr, "[%s:%u] Sending release\n", myproc.nspace, myproc.rank); PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE, NULL, PMIX_RANGE_LOCAL, info, ninfo, NULL, NULL); /* do some debugger magic */ n = 0; fprintf(stderr, "[%s:%u] Hanging around awhile, doing debugger magic\n", myproc.nspace, myproc.rank); while (n < 5) { usleep(1000); ++n; } done: /* finalize us */ fprintf(stderr, "Debugger daemon ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_shift_caddy_t *cd; pmix_cmd_t cmd = PMIX_LOG_CMD; pmix_buffer_t *msg; pmix_status_t rc; size_t n; time_t timestamp = 0; pmix_proc_t *source = NULL; PMIX_ACQUIRE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:log non-blocking"); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } if (0 == ndata || NULL == data) { PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_BAD_PARAM; } /* check the directives - if they requested a timestamp, then * get the time, also look for a source */ if (NULL != directives) { for (n=0; n < ndirs; n++) { if (0 == strncmp(directives[n].key, PMIX_LOG_GENERATE_TIMESTAMP, PMIX_MAX_KEYLEN)) { if (PMIX_INFO_TRUE(&directives[n])) { /* pickup the timestamp */ timestamp = time(NULL); } } else if (0 == strncmp(directives[n].key, PMIX_LOG_SOURCE, PMIX_MAX_KEYLEN)) { source = directives[n].value.data.proc; } } } /* if we are a client or tool, we never do this ourselves - we * always pass this request to our server for execution */ if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } PMIX_RELEASE_THREAD(&pmix_global_lock); /* if we are not a server, then relay this request to the server */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->cbfunc.opcbfn = cbfunc; cd->cbdata = cbdata; msg = PMIX_NEW(pmix_buffer_t); PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &cmd, 1, PMIX_COMMAND); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } /* provide the timestamp - zero will indicate * that it wasn't taken */ PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, ×tamp, 1, PMIX_TIME); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } /* pack the number of data entries */ PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &ndata, 1, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } if (0 < ndata) { PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, data, ndata, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } } PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &ndirs, 1, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } if (0 < ndirs) { PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, directives, ndirs, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } } pmix_output_verbose(2, pmix_plog_base_framework.framework_output, "pmix:log sending to server"); PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cd); } return rc; } PMIX_RELEASE_THREAD(&pmix_global_lock); /* if no recorded source was found, then we must be it */ if (NULL == source) { source = &pmix_globals.myid; cd = PMIX_NEW(pmix_shift_caddy_t); cd->cbfunc.opcbfn = cbfunc; cd->cbdata = cbdata; cd->ndirs = ndirs + 1; PMIX_INFO_CREATE(cd->directives, cd->ndirs); for (n=0; n < ndirs; n++) { PMIX_INFO_XFER(&cd->directives[n], (pmix_info_t*)&directives[n]); } PMIX_INFO_LOAD(&cd->directives[ndirs], PMIX_LOG_SOURCE, &source, PMIX_PROC); /* call down to process the request - the various components * will thread shift as required */ rc = pmix_plog.log(source, data, ndata, cd->directives, cd->ndirs, localcbfunc, cd); if (PMIX_SUCCESS != rc) { PMIX_INFO_FREE(cd->directives, cd->ndirs); PMIX_RELEASE(cd); } } else if (0 == strncmp(source->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN) && source->rank == pmix_globals.myid.rank) { /* if I am the recorded source, then this is a re-submission of * something that got "upcalled" by a prior call. In this case, * we return a "not supported" error as clearly we couldn't * handle it, and neither could our host */ rc = PMIX_ERR_NOT_SUPPORTED; } else { /* call down to process the request - the various components * will thread shift as required */ rc = pmix_plog.log(source, data, ndata, directives, ndirs, cbfunc, cbdata); } return rc; }