static pmix_status_t spawn_debugger(char *appspace, myrel_t *myrel) { pmix_status_t rc; pmix_info_t *dinfo; pmix_app_t *debugger; size_t dninfo; char cwd[1024]; char dspace[PMIX_MAX_NSLEN+1]; mylock_t mylock; pmix_status_t code = PMIX_ERR_JOB_TERMINATED; /* setup the debugger */ PMIX_APP_CREATE(debugger, 1); debugger[0].cmd = strdup("./debuggerd"); PMIX_ARGV_APPEND(rc, debugger[0].argv, "./debuggerd"); getcwd(cwd, 1024); // point us to our current directory debugger[0].cwd = strdup(cwd); /* provide directives so the daemons go where we want, and * let the RM know these are debugger daemons */ dninfo = 6; PMIX_INFO_CREATE(dinfo, dninfo); PMIX_INFO_LOAD(&dinfo[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING); // instruct the RM to launch one copy of the executable on each node PMIX_INFO_LOAD(&dinfo[1], PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); // these are debugger daemons PMIX_INFO_LOAD(&dinfo[1], PMIX_DEBUG_JOB, appspace, PMIX_STRING); // the nspace being debugged PMIX_INFO_LOAD(&dinfo[2], PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL); // notify us when the debugger job completes PMIX_INFO_LOAD(&dinfo[3], PMIX_DEBUG_WAITING_FOR_NOTIFY, NULL, PMIX_BOOL); // tell the daemon that the proc is waiting to be released PMIX_INFO_LOAD(&dinfo[4], PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me PMIX_INFO_LOAD(&dinfo[5], PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me /* spawn the daemons */ fprintf(stderr, "Debugger: spawning %s\n", debugger[0].cmd); if (PMIX_SUCCESS != (rc = PMIx_Spawn(dinfo, dninfo, debugger, 1, dspace))) { fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", PMIx_Error_string(rc)); PMIX_INFO_FREE(dinfo, dninfo); PMIX_APP_FREE(debugger, 1); return rc; } /* cleanup */ PMIX_INFO_FREE(dinfo, dninfo); PMIX_APP_FREE(debugger, 1); /* register callback for when this job terminates */ myrel->nspace = strdup(dspace); PMIX_INFO_CREATE(dinfo, 2); PMIX_INFO_LOAD(&dinfo[0], PMIX_EVENT_RETURN_OBJECT, myrel, PMIX_POINTER); /* only call me back when this specific job terminates */ PMIX_INFO_LOAD(&dinfo[1], PMIX_NSPACE, dspace, PMIX_STRING); DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(&code, 1, dinfo, 2, release_fn, evhandler_reg_callbk, (void*)&mylock); DEBUG_WAIT_THREAD(&mylock); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); PMIX_INFO_FREE(dinfo, 2); return rc; }
int main(int argc, char **argv) { pmix_status_t rc; pmix_proc_t myproc; pmix_info_t *info; size_t ninfo; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { fprintf(stderr, "PMIx_tool_init failed: %d\n", rc); exit(rc); } pmix_output(0, "Tool ns %s rank %d: Running", myproc.nspace, myproc.rank); /* query something */ ninfo = 2; PMIX_INFO_CREATE(info, ninfo); (void)strncpy(info[0].key, "foobar", PMIX_MAX_KEYLEN); (void)strncpy(info[1].key, "spastic", PMIX_MAX_KEYLEN); if (PMIX_SUCCESS != (rc = PMIx_Query_info(info, ninfo))) { pmix_output(0, "Client ns %s rank %d: PMIx_Query_info failed: %d", myproc.nspace, myproc.rank, rc); goto done; } if (0 != strncmp(info[0].key, "foobar", PMIX_MAX_KEYLEN)) { pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs foobar", myproc.nspace, myproc.rank, info[0].key); } if (0 != strncmp(info[1].key, "spastic", PMIX_MAX_KEYLEN)) { pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs spastic", myproc.nspace, myproc.rank, info[1].key); } if (PMIX_STRING != info[0].value.type) { pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong type: %d vs %d", myproc.nspace, myproc.rank, info[0].value.type, PMIX_STRING); } if (PMIX_STRING != info[1].value.type) { pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[1] wrong type: %d vs %d", myproc.nspace, myproc.rank, info[1].value.type, PMIX_STRING); } if (0 != strcmp(info[0].value.data.string, "0")) { pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong value: %s vs 0", myproc.nspace, myproc.rank, info[1].value.data.string); } if (0 != strcmp(info[1].value.data.string, "1")) { pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[1] wrong value: %s vs 1", myproc.nspace, myproc.rank, info[1].value.data.string); } PMIX_INFO_FREE(info, ninfo); done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(rc); }
pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err, pmix_info_t *info, int ninfo, int *index) { int i; pmix_status_t rc = PMIX_SUCCESS; pmix_error_reg_info_t *errreg; errreg = PMIX_NEW(pmix_error_reg_info_t); errreg->errhandler = err; errreg->ninfo = ninfo; if (NULL != info && 0 < ninfo) { PMIX_INFO_CREATE(errreg->info, ninfo); for (i=0; i < ninfo; i++) { (void)strncpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN); pmix_value_xfer(&errreg->info[i].value, &info[i].value); } } *index = pmix_pointer_array_add(&pmix_globals.errregs, errreg); pmix_output_verbose(2, pmix_globals.debug_output, "pmix_add_errhandler index =%d", *index); if (*index < 0) { PMIX_RELEASE(errreg); rc = PMIX_ERROR; } return rc; }
/* this is a callback function for the PMIx_Query * API. The query will callback with a status indicating * if the request could be fully satisfied, partially * satisfied, or completely failed. The info parameter * contains an array of the returned data, with the * info->key field being the key that was provided in * the query call. Thus, you can correlate the returned * data in the info->value field to the requested key. * * Once we have dealt with the returned data, we must * call the release_fn so that the PMIx library can * cleanup */ static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, pmix_release_cbfunc_t release_fn, void *release_cbdata) { myquery_data_t *mq = (myquery_data_t*)cbdata; size_t n; /* save the returned info - it will be * released in the release_fn */ if (0 < ninfo) { PMIX_INFO_CREATE(mq->info, ninfo); mq->ninfo = ninfo; for (n=0; n < ninfo; n++) { fprintf(stderr, "Transferring %s\n", info[n].key); PMIX_INFO_XFER(&mq->info[n], &info[n]); } } /* let the library release the data */ if (NULL != release_fn) { release_fn(release_cbdata); } /* release the block */ mq->active = false; }
int pmix2x_server_init(opal_pmix_server_module_t *module, opal_list_t *info) { pmix_status_t rc; int dbg; opal_value_t *kv; pmix_info_t *pinfo; size_t sz, n; volatile bool active; if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); putenv(dbgvalue); } /* convert the list to an array of pmix_info_t */ if (NULL != info) { sz = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, sz); n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&pinfo[n].value, kv); ++n; }
void pmi_fence(int collect) { pmix_info_t *info = NULL; pmix_proc_t proc; bool value = 1; int ninfo = 0; int rc; if( collect ){ PMIX_INFO_CREATE(info, 1); (void)strncpy(info->key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); pmix_value_load(&info->value, &value, PMIX_BOOL); ninfo = 1; } /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, this_proc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, ninfo))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d", this_proc.nspace, this_proc.rank, rc); abort(); } if( collect ){ PMIX_INFO_FREE(info, ninfo); } }
/* this is a callback function for the PMIx_Query * API. The query will callback with a status indicating * if the request could be fully satisfied, partially * satisfied, or completely failed. The info parameter * contains an array of the returned data, with the * info->key field being the key that was provided in * the query call. Thus, you can correlate the returned * data in the info->value field to the requested key. * * Once we have dealt with the returned data, we must * call the release_fn so that the PMIx library can * cleanup */ static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, pmix_release_cbfunc_t release_fn, void *release_cbdata) { myquery_data_t *mq = (myquery_data_t*)cbdata; size_t n; mq->status = status; /* save the returned info - the PMIx library "owns" it * and will release it and perform other cleanup actions * when release_fn is called */ if (0 < ninfo) { PMIX_INFO_CREATE(mq->info, ninfo); mq->ninfo = ninfo; for (n=0; n < ninfo; n++) { fprintf(stderr, "Key %s Type %s(%d)\n", info[n].key, PMIx_Data_type_string(info[n].value.type), info[n].value.type); PMIX_INFO_XFER(&mq->info[n], &info[n]); } } /* let the library release the data and cleanup from * the operation */ if (NULL != release_fn) { release_fn(release_cbdata); } /* release the block */ DEBUG_WAKEUP_THREAD(&mq->lock); }
static void query_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata; pmix_status_t rc; pmix_shift_caddy_t *results; int cnt; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query cback from server"); results = PMIX_NEW(pmix_shift_caddy_t); /* unpack the status */ cnt = 1; PMIX_BFROPS_UNPACK(rc, peer, buf, &results->status, &cnt, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); results->status = rc; goto complete; } if (PMIX_SUCCESS != results->status) { goto complete; } /* unpack any returned data */ cnt = 1; PMIX_BFROPS_UNPACK(rc, peer, buf, &results->ninfo, &cnt, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); results->status = rc; goto complete; } if (0 < results->ninfo) { PMIX_INFO_CREATE(results->info, results->ninfo); cnt = results->ninfo; PMIX_BFROPS_UNPACK(rc, peer, buf, results->info, &cnt, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); results->status = rc; goto complete; } } complete: pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query cback from server releasing"); /* release the caller */ if (NULL != cd->cbfunc) { cd->cbfunc(results->status, results->info, results->ninfo, cd->cbdata, relcbfunc, results); } PMIX_RELEASE(cd); }
pmix_status_t pmix_bfrops_base_unpack_query(pmix_pointer_array_t *regtypes, pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type) { pmix_query_t *ptr; int32_t i, n, m; pmix_status_t ret; int32_t nkeys; pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, "pmix_bfrop_unpack: %d queries", *num_vals); ptr = (pmix_query_t *) dest; n = *num_vals; for (i = 0; i < n; ++i) { PMIX_QUERY_CONSTRUCT(&ptr[i]); /* unpack the number of keys */ m=1; PMIX_BFROPS_UNPACK_TYPE(ret, buffer, &nkeys, &m, PMIX_INT32, regtypes); if (PMIX_SUCCESS != ret) { return ret; } if (0 < nkeys) { /* unpack the keys */ if (NULL == (ptr[i].keys = (char**)calloc(nkeys+1, sizeof(char*)))) { return PMIX_ERR_NOMEM; } /* unpack keys */ m=nkeys; PMIX_BFROPS_UNPACK_TYPE(ret, buffer, ptr[i].keys, &m, PMIX_STRING, regtypes); if (PMIX_SUCCESS != ret) { return ret; } } /* unpack the number of qualifiers */ m=1; PMIX_BFROPS_UNPACK_TYPE(ret, buffer, &ptr[i].nqual, &m, PMIX_SIZE, regtypes); if (PMIX_SUCCESS != ret) { return ret; } if (0 < ptr[i].nqual) { /* unpack the qualifiers */ PMIX_INFO_CREATE(ptr[i].qualifiers, ptr[i].nqual); m = ptr[i].nqual; PMIX_BFROPS_UNPACK_TYPE(ret, buffer, ptr[i].qualifiers, &m, PMIX_INFO, regtypes); if (PMIX_SUCCESS != ret) { return ret; } } } return PMIX_SUCCESS; }
void pmix_errhandler_invoke(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) { /* We need to parse thru each registered handler and determine * which one to call for the specific error */ int i, idflt; size_t j; bool fired = false; pmix_error_reg_info_t *errreg, *errdflt=NULL; pmix_info_t *iptr; PMIX_INFO_CREATE(iptr, ninfo+1); (void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN); iptr[0].value.type = PMIX_INT; if (NULL != info) { for (j=0; j < ninfo; j++) { PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type); } } for (i = 0; i < pmix_globals.errregs.size; i++) { if (NULL == (errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item(&pmix_globals.errregs, i))) { continue; } if (NULL == errreg->info || 0 == errreg->ninfo) { // this is a general err handler - we will call it if there is no better match errdflt = errreg; idflt = i; continue; } iptr[0].value.data.integer = i; /* match error name key first */ for (j = 0; j < errreg->ninfo; j++) { if ((0 == strcmp(errreg->info[j].key, PMIX_ERROR_NAME)) && (status == errreg->info[j].value.data.int32)) { iptr[0].value.data.integer = i; errreg->errhandler(status, procs, nprocs, iptr, ninfo+1); fired = true; break; } } } /* if nothing fired and we found a general err handler, then fire it */ if (!fired && NULL != errdflt) { iptr[0].value.data.integer = idflt; errdflt->errhandler(status, procs, nprocs, iptr, ninfo+1); } /* cleanup */ PMIX_INFO_FREE(iptr, ninfo+1); }
pmix_status_t pmix_bfrop_unpack_query(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type) { pmix_query_t *ptr; int32_t i, n, m; pmix_status_t ret; int32_t nkeys; pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack: %d queries", *num_vals); ptr = (pmix_query_t *) dest; n = *num_vals; for (i = 0; i < n; ++i) { PMIX_QUERY_CONSTRUCT(&ptr[i]); /* unpack the number of keys */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int32(buffer, &nkeys, &m, PMIX_INT32))) { return ret; } if (0 < nkeys) { /* unpack the keys */ if (NULL == (ptr[i].keys = (char**)calloc(nkeys+1, sizeof(char*)))) { return PMIX_ERR_NOMEM; } /* unpack keys */ m=nkeys; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, ptr[i].keys, &m, PMIX_STRING))) { return ret; } } /* unpack the number of qualifiers */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &ptr[i].nqual, &m, PMIX_SIZE))) { return ret; } if (0 < ptr[i].nqual) { /* unpack the qualifiers */ PMIX_INFO_CREATE(ptr[i].qualifiers, ptr[i].nqual); m = ptr[i].nqual; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_info(buffer, ptr[i].qualifiers, &m, PMIX_INFO))) { return ret; } } } return PMIX_SUCCESS; }
static void set_namespace(int nprocs, char *ranks, char *name) { size_t ninfo; pmix_info_t *info; ninfo = 8; char *regex, *ppn; PMIX_INFO_CREATE(info, ninfo); (void)strncpy(info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); info[0].value.type = PMIX_UINT32; info[0].value.data.uint32 = nprocs; (void)strncpy(info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); info[1].value.type = PMIX_UINT32; info[1].value.data.uint32 = 0; (void)strncpy(info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); info[2].value.type = PMIX_UINT32; info[2].value.data.uint32 = nprocs; (void)strncpy(info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); info[3].value.type = PMIX_STRING; info[3].value.data.string = strdup(ranks); PMIx_generate_regex(NODE_NAME, ®ex); (void)strncpy(info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); info[4].value.type = PMIX_STRING; info[4].value.data.string = regex; PMIx_generate_ppn(ranks, &ppn); (void)strncpy(info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); info[5].value.type = PMIX_STRING; info[5].value.data.string = ppn; (void)strncpy(info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); info[6].value.type = PMIX_UINT32; info[6].value.data.uint32 = nprocs; (void)strncpy(info[7].key, PMIX_APPNUM, PMIX_MAX_KEYLEN); info[7].value.type = PMIX_UINT32; info[7].value.data.uint32 = getpid (); int in_progress = 1, rc; if (PMIX_SUCCESS == (rc = PMIx_server_register_nspace(name, nprocs, info, ninfo, release_cb, &in_progress))) { PMIX_WAIT_FOR_COMPLETION(in_progress); } PMIX_INFO_FREE(info, ninfo); }
int main(int argc, char **argv) { pmix_status_t rc; pmix_proc_t myproc; pmix_info_t *info; size_t ninfo; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { fprintf(stderr, "PMIx_tool_init failed: %s\n", PMIx_Error_string(rc)); exit(rc); } fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* query something */ ninfo = 1; PMIX_INFO_CREATE(info, ninfo); (void)strncpy(info[0].key, PMIX_QUERY_NAMESPACES, PMIX_MAX_KEYLEN); if (PMIX_SUCCESS != (rc = PMIx_Query_info(info, ninfo))) { fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } if (0 != strncmp(info[0].key, PMIX_QUERY_NAMESPACES, PMIX_MAX_KEYLEN)) { fprintf(stderr, "tool ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs %s\n", myproc.nspace, myproc.rank, info[0].key, PMIX_QUERY_NAMESPACES); } if (PMIX_STRING != info[0].value.type) { fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info key[0] wrong type: %d vs %d\n", myproc.nspace, myproc.rank, info[0].value.type, PMIX_STRING); } fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info key[0] returned %s\n", myproc.nspace, myproc.rank, (NULL == info[0].value.data.string) ? "NULL" : info[0].value.data.string); PMIX_INFO_FREE(info, ninfo); done: /* finalize us */ fprintf(stderr, "Tool ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_tool_finalize())) { fprintf(stderr, "Tool ns %s rank %d:PMIx_tool_finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Tool ns %s rank %d:PMIx_tool_finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(rc); }
pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err, pmix_info_t *info, int ninfo, int *index) { int i; pmix_error_reg_info_t *errreg; if (0 != *index) { /* overwrite an existing entry */ errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, *index); if (NULL == errreg) { return PMIX_ERR_NOT_FOUND; } errreg->errhandler = err; PMIX_INFO_FREE(errreg->info, errreg->ninfo); errreg->ninfo = ninfo; } else { errreg = PMIX_NEW(pmix_error_reg_info_t); errreg->errhandler = err; errreg->ninfo = ninfo; *index = pmix_pointer_array_add(&pmix_globals.errregs, errreg); pmix_output_verbose(2, pmix_globals.debug_output, "pmix_add_errhandler index =%d", *index); if (*index < 0) { PMIX_RELEASE(errreg); return PMIX_ERROR; } } /* sadly, we have to copy the info objects as we cannot * rely on them to remain in-memory */ if (NULL != info && 0 < ninfo) { PMIX_INFO_CREATE(errreg->info, ninfo); for (i=0; i < ninfo; i++) { /* if this is a specific, single errhandler, then * mark it accordingly */ if (0 == strncmp(info[i].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) { errreg->sglhdlr = true; } (void)strncpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN); pmix_value_xfer(&errreg->info[i].value, &info[i].value); } } return PMIX_SUCCESS; }
pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err, pmix_info_t *info, int ninfo, int *index) { int i; pmix_status_t rc = PMIX_SUCCESS; pmix_error_reg_info_t *errreg = PMIX_NEW(pmix_error_reg_info_t); errreg->errhandler = err; errreg->ninfo = ninfo; PMIX_INFO_CREATE(errreg->info, ninfo); for (i=0; i < ninfo; i++) { memcpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN); pmix_value_xfer(&errreg->info[i].value, &info[i].value); } *index = pmix_pointer_array_add (&pmix_globals.errregs, errreg); if (-1 == *index) rc = PMIX_ERROR; return rc; }
static void infocbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, pmix_release_cbfunc_t release_fn, void *release_cbdata) { myxfer_t *xfer = (myxfer_t*)cbdata; size_t n; if (NULL != info) { xfer->ninfo = ninfo; PMIX_INFO_CREATE(xfer->info, xfer->ninfo); for (n=0; n < ninfo; n++) { PMIX_INFO_XFER(&xfer->info[n], &info[n]); } } if (NULL != release_fn) { release_fn(release_cbdata); } OPAL_PMIX_WAKEUP_THREAD(&xfer->lock); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t nprocs, n; pmix_info_t *info; bool flag; volatile int active; pmix_status_t dbg = PMIX_ERR_DEBUGGER_RELEASE; /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. This includes any * debugger flag instructing us to stop-in-init. If such a directive * is included, then the process will be stopped in this call until * the "debugger release" notification arrives */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ active = -1; PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&active); while (-1 == active) { sleep(1); } if (0 != active) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); exit(active); } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* check to see if we have been instructed to wait for a debugger * to attach to us. We won't get both a stop-in-init AND a * wait-for-notify directive, so we should never stop twice. This * directive is provided so that something like an MPI implementation * can do some initial setup in MPI_Init prior to pausing for the * debugger */ if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, PMIX_DEBUG_WAIT_FOR_NOTIFY, NULL, 0, &val))) { /* register for debugger release */ active = -1; PMIx_Register_event_handler(&dbg, 1, NULL, 0, release_fn, evhandler_reg_callbk, (void*)&active); /* wait for registration to complete */ while (-1 == active) { sleep(1); } if (0 != active) { fprintf(stderr, "[%s:%d] Debug handler registration failed\n", myproc.nspace, myproc.rank); exit(active); } /* wait for debugger release */ while (waiting_for_debugger) { sleep(1); } } /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, val->data.uint32); /* get the number of procs in our job - univ size is the total number of allocated * slots, not the number of procs in the job */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d num procs %d\n", myproc.nspace, myproc.rank, nprocs); /* put a few values */ if (0 > asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Store_internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); /* push the data to our PMIx server */ if (PMIX_SUCCESS != (rc = PMIx_Commit())) { fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* call fence to synchronize with our peers - instruct * the fence operation to collect and return all "put" * data from our peers */ PMIX_INFO_CREATE(info, 1); flag = true; PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } PMIX_INFO_FREE(info, 1); /* check the returned data */ for (n=0; n < nprocs; n++) { if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { exit(1); } if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); goto done; } if (PMIX_UINT64 != val->type) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } if (1234 != val->data.uint64) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { exit(1); } if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); goto done; } if (PMIX_STRING != val->type) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } if (0 != strcmp(val->data.string, "1234")) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
static pmix_status_t validate_cred(struct pmix_peer_t *peer, const pmix_info_t directives[], size_t ndirs, pmix_info_t **info, size_t *ninfo, const pmix_byte_object_t *cred) { pmix_peer_t *pr = (pmix_peer_t*)peer; #if defined(SO_PEERCRED) #ifdef HAVE_STRUCT_SOCKPEERCRED_UID #define HAVE_STRUCT_UCRED_UID struct sockpeercred ucred; #else struct ucred ucred; #endif socklen_t crlen = sizeof (ucred); #endif uid_t euid = -1; gid_t egid = -1; char *ptr; size_t ln; bool takeus; char **types; size_t n, m; uint32_t u32; pmix_output_verbose(2, pmix_globals.debug_output, "psec: native validate_cred %s", (NULL == cred) ? "NULL" : "NON-NULL"); if (PMIX_PROTOCOL_V1 == pr->protocol) { /* usock protocol - get the remote side's uid/gid */ #if defined(SO_PEERCRED) && (defined(HAVE_STRUCT_UCRED_UID) || defined(HAVE_STRUCT_UCRED_CR_UID)) /* Ignore received 'cred' and validate ucred for socket instead. */ pmix_output_verbose(2, pmix_globals.debug_output, "psec:native checking getsockopt on socket %d for peer credentials", pr->sd); if (getsockopt(pr->sd, SOL_SOCKET, SO_PEERCRED, &ucred, &crlen) < 0) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: getsockopt SO_PEERCRED failed: %s", strerror (pmix_socket_errno)); return PMIX_ERR_INVALID_CRED; } #if defined(HAVE_STRUCT_UCRED_UID) euid = ucred.uid; egid = ucred.gid; #else euid = ucred.cr_uid; egid = ucred.cr_gid; #endif #elif defined(HAVE_GETPEEREID) pmix_output_verbose(2, pmix_globals.debug_output, "psec:native checking getpeereid on socket %d for peer credentials", pr->sd); if (0 != getpeereid(pr->sd, &euid, &egid)) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: getsockopt getpeereid failed: %s", strerror (pmix_socket_errno)); return PMIX_ERR_INVALID_CRED; } #else return PMIX_ERR_NOT_SUPPORTED; #endif } else if (PMIX_PROTOCOL_V2 == pr->protocol) { /* this is a tcp protocol, so the cred is actually the uid/gid * passed upwards from the client */ if (NULL == cred) { /* not allowed */ return PMIX_ERR_INVALID_CRED; } ln = cred->size; euid = 0; egid = 0; if (sizeof(uid_t) <= ln) { memcpy(&euid, cred->bytes, sizeof(uid_t)); ln -= sizeof(uid_t); ptr = cred->bytes + sizeof(uid_t); } else { return PMIX_ERR_INVALID_CRED; } if (sizeof(gid_t) <= ln) { memcpy(&egid, ptr, sizeof(gid_t)); } else { return PMIX_ERR_INVALID_CRED; } } else if (PMIX_PROTOCOL_UNDEF != pr->protocol) { /* don't recognize the protocol */ return PMIX_ERR_NOT_SUPPORTED; } /* if we are responding to a local request to validate a credential, * then see if they specified a mechanism */ if (NULL != directives && 0 < ndirs) { for (n=0; n < ndirs; n++) { if (0 == strncmp(directives[n].key, PMIX_CRED_TYPE, PMIX_MAX_KEYLEN)) { /* split the specified string */ types = pmix_argv_split(directives[n].value.data.string, ','); takeus = false; for (m=0; NULL != types[m]; m++) { if (0 == strcmp(types[m], "native")) { /* it's us! */ takeus = true; break; } } pmix_argv_free(types); if (!takeus) { return PMIX_ERR_NOT_SUPPORTED; } } } } /* check uid */ if (euid != pr->info->uid) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: socket cred contains invalid uid %u", euid); return PMIX_ERR_INVALID_CRED; } /* check gid */ if (egid != pr->info->gid) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: socket cred contains invalid gid %u", egid); return PMIX_ERR_INVALID_CRED; } /* validated - mark that we did it */ if (NULL != info) { PMIX_INFO_CREATE(*info, 3); if (NULL == *info) { return PMIX_ERR_NOMEM; } *ninfo = 3; /* mark that this came from us */ PMIX_INFO_LOAD(info[0], PMIX_CRED_TYPE, "munge", PMIX_STRING); /* provide the uid it contained */ u32 = euid; PMIX_INFO_LOAD(info[1], PMIX_USERID, &u32, PMIX_UINT32); /* provide the gid it contained */ u32 = egid; PMIX_INFO_LOAD(info[2], PMIX_GRPID, &u32, PMIX_UINT32); } return PMIX_SUCCESS; }
static pmix_status_t create_cred(struct pmix_peer_t *peer, const pmix_info_t directives[], size_t ndirs, pmix_info_t **info, size_t *ninfo, pmix_byte_object_t *cred) { pmix_peer_t *pr = (pmix_peer_t*)peer; char **types; size_t n, m; bool takeus; uid_t euid; gid_t egid; char *tmp, *ptr; /* ensure initialization */ PMIX_BYTE_OBJECT_CONSTRUCT(cred); /* we may be responding to a local request for a credential, so * see if they specified a mechanism */ if (NULL != directives && 0 < ndirs) { /* cycle across the provided info and see if they specified * any desired credential types */ takeus = true; for (n=0; n < ndirs; n++) { if (0 == strncmp(directives[n].key, PMIX_CRED_TYPE, PMIX_MAX_KEYLEN)) { /* see if we are included */ types = pmix_argv_split(directives[n].value.data.string, ','); /* start by assuming they don't want us */ takeus = false; for (m=0; NULL != types[m]; m++) { if (0 == strcmp(types[m], "native")) { /* it's us! */ takeus = true; break; } } pmix_argv_free(types); break; } } if (!takeus) { PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; } } if (PMIX_PROTOCOL_V1 == pr->protocol) { /* usock protocol - nothing to do */ goto complete; } else if (PMIX_PROTOCOL_V2 == pr->protocol) { /* tcp protocol - need to provide our effective * uid and gid for validation on remote end */ tmp = (char*)malloc(sizeof(uid_t) + sizeof(gid_t)); if (NULL == tmp) { return PMIX_ERR_NOMEM; } euid = geteuid(); memcpy(tmp, &euid, sizeof(uid_t)); ptr = tmp + sizeof(uid_t); egid = getegid(); memcpy(ptr, &egid, sizeof(gid_t)); cred->bytes = tmp; cred->size = sizeof(uid_t) + sizeof(gid_t); goto complete; } else { /* unrecognized protocol */ PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; } complete: if (NULL != info) { /* mark that this came from us */ PMIX_INFO_CREATE(*info, 1); if (NULL == *info) { return PMIX_ERR_NOMEM; } *ninfo = 1; PMIX_INFO_LOAD(info[0], PMIX_CRED_TYPE, "native", PMIX_STRING); } return PMIX_SUCCESS; }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t nprocs, n; int cnt, j; bool doabort = false; volatile bool active; pmix_info_t info, *iptr; size_t ninfo; pmix_status_t code; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { doabort = true; } } /* init us and declare we are a test programming model */ PMIX_INFO_CREATE(iptr, 2); PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } PMIX_INFO_FREE(iptr, 2); pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* test something */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } PMIX_VALUE_RELEASE(val); /* test something */ if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_SERVER_URI, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } pmix_output(0, "CLIENT SERVER URI: %s", val->data.string); PMIX_VALUE_RELEASE(val); /* register a handler specifically for when models declare */ active = true; ninfo = 1; PMIX_INFO_CREATE(iptr, ninfo); PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); code = PMIX_MODEL_DECLARED; PMIx_Register_event_handler(&code, 1, iptr, ninfo, model_callback, model_registration_callback, (void*)&active); while (active) { usleep(10); } PMIX_INFO_FREE(iptr, ninfo); /* register our errhandler */ active = true; PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, errhandler_reg_callbk, (void*)&active); while (active) { usleep(10); } /* get our universe size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Store_internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } (void)asprintf(&tmp, "%s-%d-remote-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } if (PMIX_SUCCESS != (rc = PMIx_Commit())) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Commit failed: %s", myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); goto done; } /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Fence failed: %s", myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); goto done; } /* check the returned data */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); continue; } if (NULL == val) { pmix_output(0, "Client ns %s rank %d: NULL value returned", myproc.nspace, myproc.rank); break; } if (PMIX_UINT64 != val->type) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); continue; } if (1234 != val->data.uint64) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); PMIX_VALUE_RELEASE(val); free(tmp); continue; } pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); if (n != myproc.rank) { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { /* this data should _not_ be found as we are on the same node * and the data was "put" with a PMIX_REMOTE scope */ pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); continue; } pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } } } } /* now get the data blob for myself */ pmix_output(0, "Client ns %s rank %d testing internal modex blob", myproc.nspace, myproc.rank); if (PMIX_SUCCESS == (rc = PMIx_Get(&myproc, NULL, NULL, 0, &val))) { if (PMIX_DATA_ARRAY != val->type) { pmix_output(0, "Client ns %s rank %d did not return an array for its internal modex blob", myproc.nspace, myproc.rank); PMIX_VALUE_RELEASE(val); } else if (PMIX_INFO != val->data.darray->type) { pmix_output(0, "Client ns %s rank %d returned an internal modex array of type %s instead of PMIX_INFO", myproc.nspace, myproc.rank, PMIx_Data_type_string(val->data.darray->type)); PMIX_VALUE_RELEASE(val); } else if (0 == val->data.darray->size) { pmix_output(0, "Client ns %s rank %d returned an internal modex array of zero length", myproc.nspace, myproc.rank); PMIX_VALUE_RELEASE(val); } else { pmix_info_t *iptr = (pmix_info_t*)val->data.darray->array; for (n=0; n < val->data.darray->size; n++) { pmix_output(0, "\tKey: %s", iptr[n].key); } PMIX_VALUE_RELEASE(val); } } else { pmix_output(0, "Client ns %s rank %d internal modex blob FAILED with error %s(%d)", myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc); } /* log something */ PMIX_INFO_CONSTRUCT(&info); PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg", PMIX_STRING); active = true; rc = PMIx_Log_nb(&info, 1, NULL, 0, opcbfunc, (void*)&active); if (PMIX_SUCCESS != rc) { pmix_output(0, "Client ns %s rank %d - log_nb returned %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } else { while (active) { usleep(10); } } PMIX_INFO_DESTRUCT(&info); /* if requested and our rank is 0, call abort */ if (doabort) { if (0 == myproc.rank) { PMIx_Abort(PMIX_ERR_PROC_REQUESTED_ABORT, "CALLING ABORT", NULL, 0); } else { while(!completed) { usleep(10); } } } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(rc); }
int main(int argc, char **argv) { pmix_status_t rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs, n; pmix_info_t *info, *iptr; bool flag; mylock_t mylock; pmix_data_array_t *dptr; /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&mylock); /* wait for registration to complete */ DEBUG_WAIT_THREAD(&mylock); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); goto done; } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); /* inform the RM that we are preemptible, and that our checkpoint methods are * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */ PMIX_INFO_CREATE(info, 2); flag = true; PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL); /* can't use "load" to load a pmix_data_array_t */ (void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN); PMIX_DATA_ARRAY_CREATE(info[1].value.data.darray, 2, PMIX_INFO); dptr = info[1].value.data.darray; rc = SIGUSR2; iptr = (pmix_info_t*)dptr->array; PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT); rc = PMIX_JCTRL_CHECKPOINT; PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS); /* since this is informational and not a requested operation, the target parameter * doesn't mean anything and can be ignored */ DEBUG_CONSTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); DEBUG_DESTRUCT_LOCK(&mylock); goto done; } DEBUG_WAIT_THREAD(&mylock); PMIX_INFO_FREE(info, 2); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* now request that this process be monitored using heartbeats */ PMIX_INFO_CREATE(iptr, 1); PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER); PMIX_INFO_CREATE(info, 3); PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING); n = 5; // require a heartbeat every 5 seconds PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32); n = 2; // two heartbeats can be missed before declaring us "stalled" PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32); /* make the request */ DEBUG_CONSTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT, info, 3, infocbfunc, (void*)&mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); DEBUG_DESTRUCT_LOCK(&mylock); goto done; } DEBUG_WAIT_THREAD(&mylock); PMIX_INFO_FREE(iptr, 1); PMIX_INFO_FREE(info, 3); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* send a heartbeat */ PMIx_Heartbeat(); /* call fence to synchronize with our peers - no need to * collect any info as we didn't "put" anything */ PMIX_INFO_CREATE(info, 1); flag = false; PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } PMIX_INFO_FREE(info, 1); done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; pmix_info_t *info; pmix_pdata_t *pdata; pmix_proc_t myproc; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* publish something */ if (0 == myproc.rank) { PMIX_INFO_CREATE(info, 2); (void)strncpy(info[0].key, "FOOBAR", PMIX_MAX_KEYLEN); info[0].value.type = PMIX_UINT8; info[0].value.data.uint8 = 1; (void)strncpy(info[1].key, "PANDA", PMIX_MAX_KEYLEN); info[1].value.type = PMIX_SIZE; info[1].value.data.size = 123456; if (PMIX_SUCCESS != (rc = PMIx_Publish(info, 2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Publish failed: %d", myproc.nspace, myproc.rank, rc); goto done; } PMIX_INFO_FREE(info, 2); } /* call fence again so all procs know the data * has been published */ if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* lookup something */ if (0 != myproc.rank) { PMIX_PDATA_CREATE(pdata, 1); (void)strncpy(pdata[0].key, "FOOBAR", PMIX_MAX_KEYLEN); if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* check the return for value and source */ if (0 != strncmp(myproc.nspace, pdata[0].proc.nspace, PMIX_MAX_NSLEN)) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup returned wrong nspace: %s", myproc.nspace, myproc.rank, pdata[0].proc.nspace); goto done; } if (0 != pdata[0].proc.rank) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup returned wrong rank: %d", myproc.nspace, myproc.rank, pdata[0].proc.rank); goto done; } if (PMIX_UINT8 != pdata[0].value.type) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup returned wrong type: %d", myproc.nspace, myproc.rank, pdata[0].value.type); goto done; } if (1 != pdata[0].value.data.uint8) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup returned wrong value: %d", myproc.nspace, myproc.rank, (int)pdata[0].value.data.uint8); goto done; } PMIX_PDATA_FREE(pdata, 1); pmix_output(0, "PUBLISH-LOOKUP SUCCEEDED"); } /* call fence again so rank 0 waits before leaving */ if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } if (0 == myproc.rank) { char **keys = NULL; pmix_argv_append_nosize(&keys, "FOOBAR"); pmix_argv_append_nosize(&keys, "PANDA"); if (PMIX_SUCCESS != (rc = PMIx_Unpublish(keys, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Unpublish failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "UNPUBLISH SUCCEEDED"); } /* call fence again so everyone waits for rank 0 before leaving */ proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize())) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; char nsp2[PMIX_MAX_NSLEN+1]; pmix_app_t *app; char hostname[PMIX_MAXHOSTNAMELEN]; pmix_proc_t *peers; size_t npeers, ntmp=0; char *nodelist; gethostname(hostname, sizeof(hostname)); /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* rank=0 calls spawn */ if (0 == myproc.rank) { PMIX_APP_CREATE(app, 1); app->cmd = strdup("gumby"); app->maxprocs = 2; pmix_argv_append_nosize(&app->argv, "gumby"); pmix_argv_append_nosize(&app->argv, "-n"); pmix_argv_append_nosize(&app->argv, "2"); pmix_setenv("PMIX_ENV_VALUE", "3", true, &app->env); PMIX_INFO_CREATE(app->info, 2); (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); app->info[0].value.type = PMIX_INT8; app->info[0].value.data.int8 = 12; (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); app->info[1].value.type = PMIX_DOUBLE; app->info[1].value.data.dval = 12.34; pmix_output(0, "Client ns %s rank %d: calling PMIx_Spawn", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn failed: %d", myproc.nspace, myproc.rank, rc); goto done; } PMIX_APP_FREE(app, 1); /* check to see if we got the expected info back */ if (0 != strncmp(nsp2, "DYNSPACE", PMIX_MAX_NSLEN)) { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn returned incorrect nspace: %s", myproc.nspace, myproc.rank, nsp2); goto done; } else { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn succeeded returning nspace: %s", myproc.nspace, myproc.rank, nsp2); } /* get their universe size */ (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; val = NULL; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || NULL == val) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } /* just cycle the connect/disconnect functions */ if (PMIX_SUCCESS != (rc = PMIx_Connect(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Connect failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Connect succeeded", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Disconnect(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Disonnect failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Disconnect succeeded", myproc.nspace, myproc.rank); /* finally, test the resolve functions */ if (0 == myproc.rank) { if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, NULL, &peers, &npeers))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } if ((nprocs+ntmp) != npeers) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d", myproc.nspace, myproc.rank, (int)(nprocs+ntmp), (int)npeers); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers", myproc.nspace, myproc.rank, (int)npeers); if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(nsp2, &nodelist))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes failed for nspace %s: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); } else { if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, myproc.nspace, &peers, &npeers))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d", myproc.nspace, myproc.rank, myproc.nspace, rc); goto done; } if (nprocs != npeers) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d", myproc.nspace, myproc.rank, nprocs, (int)npeers); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers", myproc.nspace, myproc.rank, (int)npeers); if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(myproc.nspace, &nodelist))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); } PMIX_PROC_FREE(peers, npeers); free(nodelist); done: /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
/* * information about relative ranks as assigned by the RM */ static void _set_procdatas(List lresp) { pmixp_namespace_t *nsptr = pmixp_nspaces_local(); pmix_info_t *kvp, *tkvp; char *p = NULL; int i; /* (char*) jobid assigned by scheduler */ xstrfmtcat(p, "%d.%d", pmixp_info_jobid(), pmixp_info_stepid()); PMIXP_ALLOC_KEY(kvp, PMIX_JOBID); PMIX_VAL_SET(&kvp->value, string, p); xfree(p); list_append(lresp, kvp); PMIXP_ALLOC_KEY(kvp, PMIX_NODEID); PMIX_VAL_SET(&kvp->value, uint32_t, nsptr->node_id); list_append(lresp, kvp); /* store information about local processes */ for (i = 0; i < pmixp_info_tasks(); i++) { List rankinfo; ListIterator it; int count, j, localid, nodeid; char *nodename; pmix_info_t *info; rankinfo = list_create(pmixp_xfree_xmalloced); PMIXP_ALLOC_KEY(kvp, PMIX_RANK); PMIX_VAL_SET(&kvp->value, int, i); list_append(rankinfo, kvp); /* TODO: always use 0 so far. this is not the general case though * (see SLURM MIMD: man srun, section MULTIPLE PROGRAM CONFIGURATION) */ PMIXP_ALLOC_KEY(kvp, PMIX_APPNUM); PMIX_VAL_SET(&kvp->value, int, 0); list_append(rankinfo, kvp); /* TODO: the same as for previous here */ PMIXP_ALLOC_KEY(kvp, PMIX_APPLDR); PMIX_VAL_SET(&kvp->value, int, 0); list_append(rankinfo, kvp); /* TODO: fix when several apps will appear */ PMIXP_ALLOC_KEY(kvp, PMIX_GLOBAL_RANK); PMIX_VAL_SET(&kvp->value, uint32_t, i); list_append(rankinfo, kvp); /* TODO: fix when several apps will appear */ PMIXP_ALLOC_KEY(kvp, PMIX_APP_RANK); PMIX_VAL_SET(&kvp->value, uint32_t, i); list_append(rankinfo, kvp); localid = pmixp_info_taskid2localid(i); /* this rank is local, store local info ab't it! */ if (0 <= localid) { PMIXP_ALLOC_KEY(kvp, PMIX_LOCAL_RANK); PMIX_VAL_SET(&kvp->value, uint16_t, localid); list_append(rankinfo, kvp); /* TODO: fix when several apps will appear */ PMIXP_ALLOC_KEY(kvp, PMIX_NODE_RANK); PMIX_VAL_SET(&kvp->value, uint16_t, localid); list_append(rankinfo, kvp); } nodeid = nsptr->task_map[i]; nodename = hostlist_nth(nsptr->hl, nodeid); PMIXP_ALLOC_KEY(kvp, PMIX_HOSTNAME); PMIX_VAL_SET(&kvp->value, string, nodename); list_append(rankinfo, kvp); free(nodename); /* merge rankinfo into one PMIX_PROC_DATA key */ count = list_count(rankinfo); PMIXP_ALLOC_KEY(kvp, PMIX_PROC_DATA); kvp->value.type = PMIX_INFO_ARRAY; kvp->value.data.array.size = count; PMIX_INFO_CREATE(info, count); it = list_iterator_create(rankinfo); j = 0; while (NULL != (tkvp = list_next(it))) { /* Just copy all the fields here. We will free original kvp's * using list_destroy without free'ing their fields so it is * safe to do so. */ info[j] = *tkvp; j++; } list_destroy(rankinfo); kvp->value.data.array.array = (pmix_info_t *)info; info = NULL; /* put the complex key to the list */ list_append(lresp, kvp); } }
int pmixp_libpmix_job_set(void) { List lresp; pmix_info_t *info; int ninfo; ListIterator it; pmix_info_t *kvp; int i, rc; uid_t uid = pmixp_info_jobuid(); gid_t gid = pmixp_info_jobgid(); _register_caddy_t *register_caddy; register_caddy = xmalloc(sizeof(_register_caddy_t)*(pmixp_info_tasks_loc()+1)); pmixp_debug_hang(0); /* Use list to safely expand/reduce key-value pairs. */ lresp = list_create(pmixp_xfree_xmalloced); _general_proc_info(lresp); _set_tmpdirs(lresp); _set_procdatas(lresp); _set_sizeinfo(lresp); if (SLURM_SUCCESS != _set_mapsinfo(lresp)) { list_destroy(lresp); PMIXP_ERROR("Can't build nodemap"); return SLURM_ERROR; } _set_localinfo(lresp); ninfo = list_count(lresp); PMIX_INFO_CREATE(info, ninfo); it = list_iterator_create(lresp); i = 0; while (NULL != (kvp = list_next(it))) { info[i] = *kvp; i++; } list_destroy(lresp); register_caddy[0].active = 1; rc = PMIx_server_register_nspace(pmixp_info_namespace(), pmixp_info_tasks_loc(), info, ninfo, _release_cb, ®ister_caddy[0]); if (PMIX_SUCCESS != rc) { PMIXP_ERROR("Cannot register namespace %s, nlocalproc=%d, " "ninfo = %d", pmixp_info_namespace(), pmixp_info_tasks_loc(), ninfo); return SLURM_ERROR; } PMIXP_DEBUG("task initialization"); for (i = 0; i < pmixp_info_tasks_loc(); i++) { pmix_proc_t proc; register_caddy[i+1].active = 1; strncpy(proc.nspace, pmixp_info_namespace(), PMIX_MAX_NSLEN); proc.rank = pmixp_info_taskid(i); rc = PMIx_server_register_client(&proc, uid, gid, NULL, _release_cb, ®ister_caddy[i + 1]); if (PMIX_SUCCESS != rc) { PMIXP_ERROR("Cannot register client %d(%d) in namespace %s", pmixp_info_taskid(i), i, pmixp_info_namespace()); return SLURM_ERROR; } } /* wait for all registration actions to finish */ while( 1 ){ int exit_flag = 1; struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 100; for(i=0; i < pmixp_info_tasks_loc() + 1; i++){ if( register_caddy[i].active ){ exit_flag = 0; } } if( exit_flag ){ break; } nanosleep(&ts, NULL); } PMIX_INFO_FREE(info, ninfo); xfree(register_caddy); return SLURM_SUCCESS; }
/* Xfer FUNCTIONS FOR GENERIC PMIX TYPES */ pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, pmix_value_t *src) { size_t n, m; pmix_status_t rc; char **prarray, **strarray; pmix_value_t *pv, *sv; pmix_info_t *p1, *s1; pmix_app_t *pa, *sa; pmix_pdata_t *pd, *sd; pmix_buffer_t *pb, *sb; pmix_byte_object_t *pbo, *sbo; pmix_kval_t *pk, *sk; pmix_modex_data_t *pm, *sm; pmix_proc_info_t *pi, *si; pmix_query_t *pq, *sq; pmix_envar_t *pe, *se; /* copy the right field */ p->type = src->type; switch (src->type) { case PMIX_UNDEF: break; case PMIX_BOOL: p->data.flag = src->data.flag; break; case PMIX_BYTE: p->data.byte = src->data.byte; break; case PMIX_STRING: if (NULL != src->data.string) { p->data.string = strdup(src->data.string); } else { p->data.string = NULL; } break; case PMIX_SIZE: p->data.size = src->data.size; break; case PMIX_PID: p->data.pid = src->data.pid; break; case PMIX_INT: /* to avoid alignment issues */ memcpy(&p->data.integer, &src->data.integer, sizeof(int)); break; case PMIX_INT8: p->data.int8 = src->data.int8; break; case PMIX_INT16: /* to avoid alignment issues */ memcpy(&p->data.int16, &src->data.int16, 2); break; case PMIX_INT32: /* to avoid alignment issues */ memcpy(&p->data.int32, &src->data.int32, 4); break; case PMIX_INT64: /* to avoid alignment issues */ memcpy(&p->data.int64, &src->data.int64, 8); break; case PMIX_UINT: /* to avoid alignment issues */ memcpy(&p->data.uint, &src->data.uint, sizeof(unsigned int)); break; case PMIX_UINT8: p->data.uint8 = src->data.uint8; break; case PMIX_UINT16: /* to avoid alignment issues */ memcpy(&p->data.uint16, &src->data.uint16, 2); break; case PMIX_UINT32: /* to avoid alignment issues */ memcpy(&p->data.uint32, &src->data.uint32, 4); break; case PMIX_UINT64: /* to avoid alignment issues */ memcpy(&p->data.uint64, &src->data.uint64, 8); break; case PMIX_FLOAT: p->data.fval = src->data.fval; break; case PMIX_DOUBLE: p->data.dval = src->data.dval; break; case PMIX_TIMEVAL: memcpy(&p->data.tv, &src->data.tv, sizeof(struct timeval)); break; case PMIX_TIME: memcpy(&p->data.time, &src->data.time, sizeof(time_t)); break; case PMIX_STATUS: memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t)); break; case PMIX_PROC: PMIX_PROC_CREATE(p->data.proc, 1); if (NULL == p->data.proc) { return PMIX_ERR_NOMEM; } memcpy(p->data.proc, src->data.proc, sizeof(pmix_proc_t)); break; case PMIX_PROC_RANK: memcpy(&p->data.rank, &src->data.rank, sizeof(pmix_rank_t)); break; case PMIX_BYTE_OBJECT: case PMIX_COMPRESSED_STRING: memset(&p->data.bo, 0, sizeof(pmix_byte_object_t)); if (NULL != src->data.bo.bytes && 0 < src->data.bo.size) { p->data.bo.bytes = malloc(src->data.bo.size); memcpy(p->data.bo.bytes, src->data.bo.bytes, src->data.bo.size); p->data.bo.size = src->data.bo.size; } else { p->data.bo.bytes = NULL; p->data.bo.size = 0; } break; case PMIX_PERSIST: memcpy(&p->data.persist, &src->data.persist, sizeof(pmix_persistence_t)); break; case PMIX_SCOPE: memcpy(&p->data.scope, &src->data.scope, sizeof(pmix_scope_t)); break; case PMIX_DATA_RANGE: memcpy(&p->data.range, &src->data.range, sizeof(pmix_data_range_t)); break; case PMIX_PROC_STATE: memcpy(&p->data.state, &src->data.state, sizeof(pmix_proc_state_t)); break; case PMIX_PROC_INFO: PMIX_PROC_INFO_CREATE(p->data.pinfo, 1); if (NULL != src->data.pinfo->hostname) { p->data.pinfo->hostname = strdup(src->data.pinfo->hostname); } if (NULL != src->data.pinfo->executable_name) { p->data.pinfo->executable_name = strdup(src->data.pinfo->executable_name); } memcpy(&p->data.pinfo->pid, &src->data.pinfo->pid, sizeof(pid_t)); memcpy(&p->data.pinfo->exit_code, &src->data.pinfo->exit_code, sizeof(int)); memcpy(&p->data.pinfo->state, &src->data.pinfo->state, sizeof(pmix_proc_state_t)); break; case PMIX_DATA_ARRAY: p->data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); p->data.darray->type = src->data.darray->type; p->data.darray->size = src->data.darray->size; if (0 == p->data.darray->size || NULL == src->data.darray->array) { p->data.darray->array = NULL; p->data.darray->size = 0; break; } /* allocate space and do the copy */ switch (src->data.darray->type) { case PMIX_UINT8: case PMIX_INT8: case PMIX_BYTE: p->data.darray->array = (char*)malloc(src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size); break; case PMIX_UINT16: case PMIX_INT16: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint16_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint16_t)); break; case PMIX_UINT32: case PMIX_INT32: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint32_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint32_t)); break; case PMIX_UINT64: case PMIX_INT64: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint64_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint64_t)); break; case PMIX_BOOL: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(bool)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(bool)); break; case PMIX_SIZE: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(size_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(size_t)); break; case PMIX_PID: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(pid_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pid_t)); break; case PMIX_STRING: p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } prarray = (char**)p->data.darray->array; strarray = (char**)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != strarray[n]) { prarray[n] = strdup(strarray[n]); } } break; case PMIX_INT: case PMIX_UINT: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(int)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(int)); break; case PMIX_FLOAT: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(float)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(float)); break; case PMIX_DOUBLE: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(double)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(double)); break; case PMIX_TIMEVAL: p->data.darray->array = (struct timeval*)malloc(src->data.darray->size * sizeof(struct timeval)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(struct timeval)); break; case PMIX_TIME: p->data.darray->array = (time_t*)malloc(src->data.darray->size * sizeof(time_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(time_t)); break; case PMIX_STATUS: p->data.darray->array = (pmix_status_t*)malloc(src->data.darray->size * sizeof(pmix_status_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_status_t)); break; case PMIX_VALUE: PMIX_VALUE_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pv = (pmix_value_t*)p->data.darray->array; sv = (pmix_value_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (PMIX_SUCCESS != (rc = pmix_value_xfer(&pv[n], &sv[n]))) { PMIX_VALUE_FREE(pv, src->data.darray->size); return rc; } } break; case PMIX_PROC: PMIX_PROC_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_proc_t)); break; case PMIX_APP: PMIX_APP_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pa = (pmix_app_t*)p->data.darray->array; sa = (pmix_app_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != sa[n].cmd) { pa[n].cmd = strdup(sa[n].cmd); } if (NULL != sa[n].argv) { pa[n].argv = pmix_argv_copy(sa[n].argv); } if (NULL != sa[n].env) { pa[n].env = pmix_argv_copy(sa[n].env); } if (NULL != sa[n].cwd) { pa[n].cwd = strdup(sa[n].cwd); } pa[n].maxprocs = sa[n].maxprocs; if (0 < sa[n].ninfo && NULL != sa[n].info) { PMIX_INFO_CREATE(pa[n].info, sa[n].ninfo); if (NULL == pa[n].info) { PMIX_APP_FREE(pa, src->data.darray->size); return PMIX_ERR_NOMEM; } pa[n].ninfo = sa[n].ninfo; for (m=0; m < pa[n].ninfo; m++) { PMIX_INFO_XFER(&pa[n].info[m], &sa[n].info[m]); } } } break; case PMIX_INFO: PMIX_INFO_CREATE(p->data.darray->array, src->data.darray->size); p1 = (pmix_info_t*)p->data.darray->array; s1 = (pmix_info_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { PMIX_INFO_XFER(&p1[n], &s1[n]); } break; case PMIX_PDATA: PMIX_PDATA_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pd = (pmix_pdata_t*)p->data.darray->array; sd = (pmix_pdata_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { PMIX_PDATA_XFER(&pd[n], &sd[n]); } break; case PMIX_BUFFER: p->data.darray->array = (pmix_buffer_t*)malloc(src->data.darray->size * sizeof(pmix_buffer_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pb = (pmix_buffer_t*)p->data.darray->array; sb = (pmix_buffer_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { PMIX_CONSTRUCT(&pb[n], pmix_buffer_t); pmix_bfrops_base_copy_payload(&pb[n], &sb[n]); } break; case PMIX_BYTE_OBJECT: case PMIX_COMPRESSED_STRING: p->data.darray->array = (pmix_byte_object_t*)malloc(src->data.darray->size * sizeof(pmix_byte_object_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pbo = (pmix_byte_object_t*)p->data.darray->array; sbo = (pmix_byte_object_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != sbo[n].bytes && 0 < sbo[n].size) { pbo[n].size = sbo[n].size; pbo[n].bytes = (char*)malloc(pbo[n].size); memcpy(pbo[n].bytes, sbo[n].bytes, pbo[n].size); } else { pbo[n].bytes = NULL; pbo[n].size = 0; } } break; case PMIX_KVAL: p->data.darray->array = (pmix_kval_t*)calloc(src->data.darray->size , sizeof(pmix_kval_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pk = (pmix_kval_t*)p->data.darray->array; sk = (pmix_kval_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != sk[n].key) { pk[n].key = strdup(sk[n].key); } if (NULL != sk[n].value) { PMIX_VALUE_CREATE(pk[n].value, 1); if (NULL == pk[n].value) { free(p->data.darray->array); return PMIX_ERR_NOMEM; } if (PMIX_SUCCESS != (rc = pmix_value_xfer(pk[n].value, sk[n].value))) { return rc; } } } break; case PMIX_MODEX: PMIX_MODEX_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pm = (pmix_modex_data_t*)p->data.darray->array; sm = (pmix_modex_data_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t)); if (NULL != sm[n].blob && 0 < sm[n].size) { pm[n].blob = (uint8_t*)malloc(sm[n].size); if (NULL == pm[n].blob) { return PMIX_ERR_NOMEM; } memcpy(pm[n].blob, sm[n].blob, sm[n].size); pm[n].size = sm[n].size; } else { pm[n].blob = NULL; pm[n].size = 0; } } break; case PMIX_PERSIST: p->data.darray->array = (pmix_persistence_t*)malloc(src->data.darray->size * sizeof(pmix_persistence_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_persistence_t)); break; case PMIX_POINTER: p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } prarray = (char**)p->data.darray->array; strarray = (char**)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { prarray[n] = strarray[n]; } break; case PMIX_SCOPE: p->data.darray->array = (pmix_scope_t*)malloc(src->data.darray->size * sizeof(pmix_scope_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_scope_t)); break; case PMIX_DATA_RANGE: p->data.darray->array = (pmix_data_range_t*)malloc(src->data.darray->size * sizeof(pmix_data_range_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_data_range_t)); break; case PMIX_COMMAND: p->data.darray->array = (pmix_cmd_t*)malloc(src->data.darray->size * sizeof(pmix_cmd_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_cmd_t)); break; case PMIX_INFO_DIRECTIVES: p->data.darray->array = (pmix_info_directives_t*)malloc(src->data.darray->size * sizeof(pmix_info_directives_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_info_directives_t)); break; case PMIX_PROC_INFO: PMIX_PROC_INFO_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pi = (pmix_proc_info_t*)p->data.darray->array; si = (pmix_proc_info_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { memcpy(&pi[n].proc, &si[n].proc, sizeof(pmix_proc_t)); if (NULL != si[n].hostname) { pi[n].hostname = strdup(si[n].hostname); } else { pi[n].hostname = NULL; } if (NULL != si[n].executable_name) { pi[n].executable_name = strdup(si[n].executable_name); } else { pi[n].executable_name = NULL; } pi[n].pid = si[n].pid; pi[n].exit_code = si[n].exit_code; pi[n].state = si[n].state; } break; case PMIX_DATA_ARRAY: PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; // don't support iterative arrays case PMIX_QUERY: PMIX_QUERY_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pq = (pmix_query_t*)p->data.darray->array; sq = (pmix_query_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != sq[n].keys) { pq[n].keys = pmix_argv_copy(sq[n].keys); } if (NULL != sq[n].qualifiers && 0 < sq[n].nqual) { PMIX_INFO_CREATE(pq[n].qualifiers, sq[n].nqual); if (NULL == pq[n].qualifiers) { PMIX_QUERY_FREE(pq, src->data.darray->size); return PMIX_ERR_NOMEM; } for (m=0; m < sq[n].nqual; m++) { PMIX_INFO_XFER(&pq[n].qualifiers[m], &sq[n].qualifiers[m]); } pq[n].nqual = sq[n].nqual; } else { pq[n].qualifiers = NULL; pq[n].nqual = 0; } } break; case PMIX_ENVAR: PMIX_ENVAR_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pe = (pmix_envar_t*)p->data.darray->array; se = (pmix_envar_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != se[n].envar) { pe[n].envar = strdup(se[n].envar); } if (NULL != se[n].value) { pe[n].value = strdup(se[n].value); } pe[n].separator = se[n].separator; } break; default: return PMIX_ERR_UNKNOWN_DATA_TYPE; } break; case PMIX_POINTER: memcpy(&p->data.ptr, &src->data.ptr, sizeof(void*)); break; case PMIX_ENVAR: PMIX_ENVAR_CONSTRUCT(&p->data.envar); if (NULL != src->data.envar.envar) { p->data.envar.envar = strdup(src->data.envar.envar); } if (NULL != src->data.envar.value) { p->data.envar.value = strdup(src->data.envar.value); } p->data.envar.separator = src->data.envar.separator; break; /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: p->data.array->size = src->data.array->size; if (0 < src->data.array->size) { p->data.array->array = (pmix_info_t*)malloc(src->data.array->size * sizeof(pmix_info_t)); if (NULL == p->data.array->array) { return PMIX_ERR_NOMEM; } p1 = (pmix_info_t*)p->data.array->array; s1 = (pmix_info_t*)src->data.array->array; for (n=0; n < src->data.darray->size; n++) { PMIX_INFO_XFER(&p1[n], &s1[n]); } } break; /********************/ default: pmix_output(0, "XFER-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)src->type); assert(0); return PMIX_ERROR; } return PMIX_SUCCESS; }
pmix_status_t pmix_bfrop_unpack_app(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type) { pmix_app_t *ptr; int32_t i, k, n, m; pmix_status_t ret; int32_t nval; char *tmp; pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack: %d apps", *num_vals); ptr = (pmix_app_t *) dest; n = *num_vals; for (i = 0; i < n; ++i) { /* initialize the fields */ PMIX_APP_CONSTRUCT(&ptr[i]); /* unpack cmd */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &ptr[i].cmd, &m, PMIX_STRING))) { return ret; } /* unpack argc */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int(buffer, &nval, &m, PMIX_INT32))) { return ret; } /* unpack argv */ for (k=0; k < nval; k++) { m=1; tmp = NULL; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { return ret; } if (NULL == tmp) { return PMIX_ERROR; } pmix_argv_append_nosize(&ptr[i].argv, tmp); free(tmp); } /* unpack env */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int32(buffer, &nval, &m, PMIX_INT32))) { return ret; } for (k=0; k < nval; k++) { m=1; tmp = NULL; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { return ret; } if (NULL == tmp) { return PMIX_ERROR; } pmix_argv_append_nosize(&ptr[i].env, tmp); free(tmp); } /* unpack cwd */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &ptr[i].cwd, &m, PMIX_STRING))) { return ret; } /* unpack maxprocs */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int(buffer, &ptr[i].maxprocs, &m, PMIX_INT))) { return ret; } /* unpack info array */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &ptr[i].ninfo, &m, PMIX_SIZE))) { return ret; } if (0 < ptr[i].ninfo) { PMIX_INFO_CREATE(ptr[i].info, ptr[i].ninfo); m = ptr[i].ninfo; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_info(buffer, ptr[i].info, &m, PMIX_INFO))) { return ret; } } } return PMIX_SUCCESS; }
int main(int argc, char **argv) { pmix_status_t rc; pmix_value_t *val; pmix_proc_t proc; pmix_info_t *info; size_t ninfo; volatile int active; pmix_query_t *query; size_t nq, n; myquery_data_t myquery_data; fprintf(stderr, "I AM HERE\n"); fflush(stderr); sleep(10); exit(0); /* init us - since we were launched by the RM, our connection info * will have been provided at startup. */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { fprintf(stderr, "Debugger daemon ns %s rank %d: PMIx_tool_init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Debugger daemon ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* register our default event handler */ active = -1; PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&active); while (-1 == active) { usleep(10); } if (0 != active) { exit(active); } /* get the nspace of the job we are to debug */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_DEBUG_JOB, NULL, 0, &val))) { fprintf(stderr, "[%s:%d] Failed to get job being debugged - error %d\n", myproc.nspace, myproc.rank, rc); goto done; } if (NULL == val) { fprintf(stderr, "Got NULL return\n"); goto done; } fprintf(stderr, "[%s:%d] Debugging %s\n", myproc.nspace, myproc.rank, val->data.string); /* get our local proctable - for scalability reasons, we don't want to * have our "root" debugger process get the proctable for everybody and * send it out to us. So ask the local PMIx server for the pid's of * our local target processes */ nq = 1; PMIX_QUERY_CREATE(query, nq); PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE); query[0].nqual = 1; PMIX_INFO_CREATE(query[0].qualifiers, 1); PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, val->data.string, PMIX_STRING); // the nspace we are enquiring about /* setup the caddy to retrieve the data */ myquery_data.info = NULL; myquery_data.ninfo = 0; myquery_data.active = true; /* execute the query */ if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) { fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); goto done; } while (myquery_data.active) { usleep(10); } fprintf(stderr, "[%s:%d] Local proctable received\n", myproc.nspace, myproc.rank); /* now that we have the proctable for our local processes, we can do our * magic debugger stuff and attach to them. We then send a "release" event * to them - i.e., it's the equivalent to setting the MPIR breakpoint. We * do this with the event notification system */ (void)strncpy(proc.nspace, val->data.string, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* we send the notification to just the local procs of the job being debugged */ ninfo = 1; PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC); // deliver to the target nspace fprintf(stderr, "[%s:%u] Sending release\n", myproc.nspace, myproc.rank); PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE, NULL, PMIX_RANGE_LOCAL, info, ninfo, NULL, NULL); /* do some debugger magic */ n = 0; fprintf(stderr, "[%s:%u] Hanging around awhile, doing debugger magic\n", myproc.nspace, myproc.rank); while (n < 5) { usleep(1000); ++n; } done: /* finalize us */ fprintf(stderr, "Debugger daemon ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
static void rollup(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata) { int ret; orte_process_name_t child; int32_t flag, cnt; opal_byte_object_t *boptr; pmix_data_buffer_t pbkt; pmix_info_t *info; pmix_proc_t proc; pmix_status_t prc; ncollected++; /* if the sender is ourselves, then we save that buffer * so we can insert it at the beginning */ if (sender->jobid == ORTE_PROC_MY_NAME->jobid && sender->vpid == ORTE_PROC_MY_NAME->vpid) { mybucket = OBJ_NEW(opal_buffer_t); opal_dss.copy_payload(mybucket, buffer); } else { /* xfer the contents of the rollup to our bucket */ opal_dss.copy_payload(bucket, buffer); /* the first entry in the bucket will be from our * direct child - harvest it for connection info */ cnt = 1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &child, &cnt, ORTE_NAME))) { ORTE_ERROR_LOG(ret); goto report; } cnt = 1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &flag, &cnt, OPAL_INT32))) { ORTE_ERROR_LOG(ret); goto report; } if (0 < flag) { (void)opal_snprintf_jobid(proc.nspace, PMIX_MAX_NSLEN, sender->jobid); proc.rank = sender->vpid; /* we have connection info */ cnt = 1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &boptr, &cnt, OPAL_BYTE_OBJECT))) { ORTE_ERROR_LOG(ret); goto report; } /* it was packed using PMIx, so unpack it the same way */ PMIX_DATA_BUFFER_LOAD(&pbkt, boptr->bytes, boptr->size); PMIX_INFO_CREATE(info, (size_t)flag); if (PMIX_SUCCESS != (prc = PMIx_Data_unpack(&proc, &pbkt, (void*)info, &flag, PMIX_INFO))) { PMIX_ERROR_LOG(prc); goto report; } for (cnt=0; cnt < flag; cnt++) { prc = PMIx_Store_internal(&proc, PMIX_PROC_URI, &info[cnt].value); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_INFO_FREE(info, (size_t)flag); goto report; } } PMIX_INFO_FREE(info, (size_t)flag); } } report: report_orted(); }
PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_shift_caddy_t *cd; pmix_cmd_t cmd = PMIX_LOG_CMD; pmix_buffer_t *msg; pmix_status_t rc; size_t n; time_t timestamp = 0; pmix_proc_t *source = NULL; PMIX_ACQUIRE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:log non-blocking"); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } if (0 == ndata || NULL == data) { PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_BAD_PARAM; } /* check the directives - if they requested a timestamp, then * get the time, also look for a source */ if (NULL != directives) { for (n=0; n < ndirs; n++) { if (0 == strncmp(directives[n].key, PMIX_LOG_GENERATE_TIMESTAMP, PMIX_MAX_KEYLEN)) { if (PMIX_INFO_TRUE(&directives[n])) { /* pickup the timestamp */ timestamp = time(NULL); } } else if (0 == strncmp(directives[n].key, PMIX_LOG_SOURCE, PMIX_MAX_KEYLEN)) { source = directives[n].value.data.proc; } } } /* if we are a client or tool, we never do this ourselves - we * always pass this request to our server for execution */ if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } PMIX_RELEASE_THREAD(&pmix_global_lock); /* if we are not a server, then relay this request to the server */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->cbfunc.opcbfn = cbfunc; cd->cbdata = cbdata; msg = PMIX_NEW(pmix_buffer_t); PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &cmd, 1, PMIX_COMMAND); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } /* provide the timestamp - zero will indicate * that it wasn't taken */ PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, ×tamp, 1, PMIX_TIME); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } /* pack the number of data entries */ PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &ndata, 1, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } if (0 < ndata) { PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, data, ndata, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } } PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &ndirs, 1, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } if (0 < ndirs) { PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, directives, ndirs, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } } pmix_output_verbose(2, pmix_plog_base_framework.framework_output, "pmix:log sending to server"); PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cd); } return rc; } PMIX_RELEASE_THREAD(&pmix_global_lock); /* if no recorded source was found, then we must be it */ if (NULL == source) { source = &pmix_globals.myid; cd = PMIX_NEW(pmix_shift_caddy_t); cd->cbfunc.opcbfn = cbfunc; cd->cbdata = cbdata; cd->ndirs = ndirs + 1; PMIX_INFO_CREATE(cd->directives, cd->ndirs); for (n=0; n < ndirs; n++) { PMIX_INFO_XFER(&cd->directives[n], (pmix_info_t*)&directives[n]); } PMIX_INFO_LOAD(&cd->directives[ndirs], PMIX_LOG_SOURCE, &source, PMIX_PROC); /* call down to process the request - the various components * will thread shift as required */ rc = pmix_plog.log(source, data, ndata, cd->directives, cd->ndirs, localcbfunc, cd); if (PMIX_SUCCESS != rc) { PMIX_INFO_FREE(cd->directives, cd->ndirs); PMIX_RELEASE(cd); } } else if (0 == strncmp(source->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN) && source->rank == pmix_globals.myid.rank) { /* if I am the recorded source, then this is a re-submission of * something that got "upcalled" by a prior call. In this case, * we return a "not supported" error as clearly we couldn't * handle it, and neither could our host */ rc = PMIX_ERR_NOT_SUPPORTED; } else { /* call down to process the request - the various components * will thread shift as required */ rc = pmix_plog.log(source, data, ndata, directives, ndirs, cbfunc, cbdata); } return rc; }