int test_connect_disconnect(char *my_nspace, int my_rank) { int rc; pmix_proc_t proc; char nspace[PMIX_MAX_NSLEN+1]; pmix_rank_t newrank; cd_cbdata cbdata; (void)strncpy(proc.nspace, my_nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Connect(&proc, 1, NULL, 0, nspace, &newrank); if (PMIX_SUCCESS != rc) { TEST_ERROR(("%s:%d: Connect blocking test failed.", my_nspace, my_rank)); return PMIX_ERROR; } TEST_VERBOSE(("%s:%d: Connect blocking test succeded to nspace %s.", my_nspace, my_rank, nspace)); rc = PMIx_Disconnect(nspace, NULL, 0); if (PMIX_SUCCESS != rc) { TEST_ERROR(("%s:%d: Disconnect blocking test failed.", my_nspace, my_rank)); return PMIX_ERROR; } TEST_VERBOSE(("%s:%d: Disconnect blocking test succeded.", my_nspace, my_rank)); cbdata.in_progress = 1; rc = PMIx_Connect_nb(&proc, 1, NULL, 0, cnct_cb, &cbdata); if (PMIX_SUCCESS == rc) { PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress); rc = cbdata.status; } if (PMIX_SUCCESS != rc) { TEST_ERROR(("%s:%d: Connect non-blocking test failed.", my_nspace, my_rank)); return PMIX_ERROR; } TEST_VERBOSE(("%s:%d: Connect non-blocking test succeded.", my_nspace, my_rank)); cbdata.in_progress = 1; rc = PMIx_Disconnect_nb(nspace, NULL, 0, cd_cb, &cbdata); if (PMIX_SUCCESS == rc) { PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress); rc = cbdata.status; } if (PMIX_SUCCESS != rc) { TEST_ERROR(("%s:%d: Disconnect non-blocking test failed.", my_nspace, my_rank)); return PMIX_ERROR; } TEST_VERBOSE(("%s:%d: Disconnect non-blocking test succeded.", my_nspace, my_rank)); return PMIX_SUCCESS; }
int test_cd_common(pmix_proc_t *procs, size_t nprocs, int blocking, int disconnect) { int rc; if (blocking) { if (!disconnect) { rc = PMIx_Connect(procs, nprocs, NULL, 0); } else { rc = PMIx_Disconnect(procs, nprocs, NULL, 0); } } else { cd_cbdata cbdata; cbdata.in_progress = 1; if (!disconnect) { rc = PMIx_Connect_nb(procs, nprocs, NULL, 0, cd_cb, (void*)&cbdata); } else { rc = PMIx_Disconnect_nb(procs, nprocs, NULL, 0, cd_cb, (void*)&cbdata); } if (PMIX_SUCCESS == rc) { PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress); rc = cbdata.status; } } /* the host server callback currently returns PMIX_EXISTS status for checking purposes */ if (PMIX_EXISTS == rc) { rc = PMIX_SUCCESS; } return rc; }
PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[], size_t ninfo) { pmix_status_t rc; pmix_cb_t *cb; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: publish called"); if (pmix_globals.init_cntr <= 0) { return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { return PMIX_ERR_UNREACH; } /* create a callback object to let us know when it is done */ cb = PMIX_NEW(pmix_cb_t); cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(info, ninfo, op_cbfunc, cb))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cb); return rc; } /* wait for the server to ack our request */ PMIX_WAIT_FOR_COMPLETION(cb->active); rc = (pmix_status_t)cb->status; PMIX_RELEASE(cb); return rc; }
static int test_spawn_common(char *my_nspace, int my_rank, int blocking) { int rc; pmix_app_t *apps; size_t napps; char nspace[PMIX_MAX_NSLEN+1]; memset(nspace, 0, PMIX_MAX_NSLEN+1); napps = 1; PMIX_APP_CREATE(apps, napps); if (blocking) { if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, apps, napps, nspace))) { PMIX_APP_FREE(apps, napps); return rc; } } else { spawn_cbdata cbdata; cbdata.in_progress = 1; memset(cbdata.nspace, 0, PMIX_MAX_NSLEN); rc = PMIx_Spawn_nb(NULL, 0, apps, napps, spawn_cb, (void*)&cbdata); if (PMIX_SUCCESS != rc) { PMIX_APP_FREE(apps, napps); return rc; } PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress); strncpy(nspace, cbdata.nspace, strlen(cbdata.nspace)+1); } PMIX_APP_FREE(apps, napps); if (strncmp(nspace, "foobar", strlen(nspace)+1)) { return PMIX_ERROR; } return rc; }
PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys, const pmix_info_t info[], size_t ninfo) { pmix_status_t rc; pmix_cb_t *cb; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: unpublish called"); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(keys, info, ninfo, op_cbfunc, cb))) { PMIX_RELEASE(cb); return rc; } /* wait for the server to ack our request */ PMIX_WAIT_FOR_COMPLETION(cb->active); rc = cb->status; PMIX_RELEASE(cb); return rc; }
int PMIx_Disconnect(const pmix_proc_t procs[], size_t nprocs) { int rc; pmix_cb_t *cb; if (pmix_client_globals.init_cntr <= 0) { return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { return PMIX_ERR_UNREACH; } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Disconnect_nb(procs, nprocs, op_cbfunc, cb))) { PMIX_RELEASE(cb); return rc; } /* wait for the connect to complete */ PMIX_WAIT_FOR_COMPLETION(cb->active); rc = cb->status; PMIX_RELEASE(cb); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: disconnect completed"); return rc; }
int PMIx_Lookup(pmix_data_range_t scope, const pmix_info_t info[], size_t ninfo, pmix_pdata_t pdata[], size_t ndata) { int rc; pmix_cb_t *cb; char **keys = NULL; size_t i; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: lookup called"); /* bozo protection */ if (NULL == pdata) { return PMIX_ERR_BAD_PARAM; } /* transfer the pdata keys to the keys argv array */ for (i=0; i < ndata; i++) { if ('\0' != pdata[i].key[0]) { pmix_argv_append_nosize(&keys, pdata[i].key); } } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); cb->cbdata = (void*)pdata; cb->nvals = ndata; cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(scope, keys, info, ninfo, lookup_cbfunc, cb))) { PMIX_RELEASE(cb); pmix_argv_free(keys); return rc; } /* wait for the server to ack our request */ PMIX_WAIT_FOR_COMPLETION(cb->active); /* the data has been stored in the info array by lookup_cbfunc, so * nothing more for us to do */ rc = cb->status; PMIX_RELEASE(cb); return rc; }
static void set_namespace(int nprocs, char *ranks, char *name) { size_t ninfo; pmix_info_t *info; ninfo = 8; char *regex, *ppn; PMIX_INFO_CREATE(info, ninfo); (void)strncpy(info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); info[0].value.type = PMIX_UINT32; info[0].value.data.uint32 = nprocs; (void)strncpy(info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); info[1].value.type = PMIX_UINT32; info[1].value.data.uint32 = 0; (void)strncpy(info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); info[2].value.type = PMIX_UINT32; info[2].value.data.uint32 = nprocs; (void)strncpy(info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); info[3].value.type = PMIX_STRING; info[3].value.data.string = strdup(ranks); PMIx_generate_regex(NODE_NAME, ®ex); (void)strncpy(info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); info[4].value.type = PMIX_STRING; info[4].value.data.string = regex; PMIx_generate_ppn(ranks, &ppn); (void)strncpy(info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); info[5].value.type = PMIX_STRING; info[5].value.data.string = ppn; (void)strncpy(info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); info[6].value.type = PMIX_UINT32; info[6].value.data.uint32 = nprocs; (void)strncpy(info[7].key, PMIX_APPNUM, PMIX_MAX_KEYLEN); info[7].value.type = PMIX_UINT32; info[7].value.data.uint32 = getpid (); int in_progress = 1, rc; if (PMIX_SUCCESS == (rc = PMIx_server_register_nspace(name, nprocs, info, ninfo, release_cb, &in_progress))) { PMIX_WAIT_FOR_COMPLETION(in_progress); } PMIX_INFO_FREE(info, ninfo); }
PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, const pmix_app_t apps[], size_t napps, char nspace[]) { pmix_status_t rc; pmix_cb_t *cb; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: spawn called"); if (pmix_globals.init_cntr <= 0) { return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { return PMIX_ERR_UNREACH; } /* ensure the nspace (if provided) is initialized */ if (NULL != nspace) { memset(nspace, 0, PMIX_MAX_NSLEN+1); } /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Spawn_nb(job_info, ninfo, apps, napps, spawn_cbfunc, cb))) { PMIX_RELEASE(cb); return rc; } /* wait for the result */ PMIX_WAIT_FOR_COMPLETION(cb->active); rc = cb->status; if (NULL != nspace) { (void)strncpy(nspace, cb->nspace, PMIX_MAX_NSLEN); } PMIX_RELEASE(cb); return rc; }
PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo) { pmix_cb_t *cb; pmix_status_t rc; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: executing fence"); if (pmix_globals.init_cntr <= 0) { return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { return PMIX_ERR_UNREACH; } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) { PMIX_RELEASE(cb); return rc; } /* wait for the fence to complete */ PMIX_WAIT_FOR_COMPLETION(cb->active); rc = cb->status; PMIX_RELEASE(cb); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: fence released"); return rc; }
static void myerr(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) { int rc; opal_list_t plist, ilist; opal_namelist_t *nm; opal_value_t *iptr; volatile int cond = 1; size_t n; /* convert the incoming status */ rc = pmix1_convert_rc(status); /* convert the array of procs */ OBJ_CONSTRUCT(&plist, opal_list_t); for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); nm->name.jobid = strtoul(procs[n].nspace, NULL, 10); nm->name.vpid = procs[n].rank; opal_list_append(&plist, &nm->super); } /* convert the array of info */ OBJ_CONSTRUCT(&ilist, opal_list_t); for (n=0; n < ninfo; n++) { iptr = OBJ_NEW(opal_value_t); iptr->key = strdup(info[n].key); pmix1_value_unload(iptr, &info[n].value); opal_list_append(&plist, &iptr->super); } /* call the base errhandler */ opal_pmix_base_errhandler(rc, &plist, &ilist, completion_handler, (void *)&cond); PMIX_WAIT_FOR_COMPLETION(cond); OPAL_LIST_DESTRUCT(&plist); OPAL_LIST_DESTRUCT(&ilist); }
static int native_fence(opal_process_name_t *procs, size_t nprocs) { opal_buffer_t *msg, *bptr; pmix_cmd_t cmd = PMIX_FENCE_CMD; pmix_cb_t *cb; int rc, ret; opal_pmix_scope_t scope; int32_t cnt; opal_value_t *kp; opal_process_name_t id; size_t i; uint64_t np; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native executing fence on %u procs", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned int)nprocs); if (NULL == mca_pmix_native_component.uri) { /* no server available, so just return */ return OPAL_SUCCESS; } msg = OBJ_NEW(opal_buffer_t); /* pack the fence cmd */ if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } /* pack the number of procs */ if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &nprocs, 1, OPAL_SIZE))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } if (0 < nprocs) { if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, procs, nprocs, OPAL_NAME))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } } /* if we haven't already done it, ensure we have committed our values */ if (NULL != mca_pmix_native_component.cache_local) { scope = PMIX_LOCAL; if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_local, 1, OPAL_BUFFER))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } OBJ_RELEASE(mca_pmix_native_component.cache_local); } if (NULL != mca_pmix_native_component.cache_remote) { scope = PMIX_REMOTE; if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_remote, 1, OPAL_BUFFER))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } OBJ_RELEASE(mca_pmix_native_component.cache_remote); } if (NULL != mca_pmix_native_component.cache_global) { scope = PMIX_GLOBAL; if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_global, 1, OPAL_BUFFER))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } OBJ_RELEASE(mca_pmix_native_component.cache_global); } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = OBJ_NEW(pmix_cb_t); cb->active = true; /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb); /* wait for the fence to complete */ PMIX_WAIT_FOR_COMPLETION(cb->active); /* get the number of contributors */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &np, &cnt, OPAL_UINT64))) { OPAL_ERROR_LOG(rc); return rc; } /* if data was returned, unpack and store it */ for (i=0; i < np; i++) { /* get the buffer that contains the data for the next proc */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &msg, &cnt, OPAL_BUFFER))) { if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { break; } OPAL_ERROR_LOG(rc); return rc; } /* extract the id of the contributor from the blob */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(msg, &id, &cnt, OPAL_NAME))) { OPAL_ERROR_LOG(rc); return rc; } /* extract all blobs from this proc, starting with the scope */ cnt = 1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(msg, &scope, &cnt, PMIX_SCOPE_T))) { /* extract the blob for this scope */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(msg, &bptr, &cnt, OPAL_BUFFER))) { OPAL_ERROR_LOG(rc); return rc; } /* now unpack and store the values - everything goes into our internal store */ cnt = 1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) { if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, &id, kp))) { OPAL_ERROR_LOG(ret); } OBJ_RELEASE(kp); cnt = 1; } OBJ_RELEASE(bptr); cnt = 1; } if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { OPAL_ERROR_LOG(rc); } OBJ_RELEASE(msg); if (OPAL_SUCCESS != rc && OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { OPAL_ERROR_LOG(rc); } else { rc = OPAL_SUCCESS; } } OBJ_RELEASE(cb); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native fence released", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_SUCCESS; }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t n, num_gets; bool active; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Store_internal failed: %d", myproc.nspace, myproc.rank, rc); goto done; } (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank); value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", myproc.nspace, myproc.rank, rc); goto done; } (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank); value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* introduce a delay by one rank so we can check what happens * if a "get" is received prior to data being provided */ if (0 == myproc.rank) { sleep(2); } /* commit the data to the server */ if (PMIX_SUCCESS != (rc = PMIx_Commit())) { pmix_output(0, "Client ns %s rank %d: PMIx_Commit failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* call fence_nb, but don't return any data */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; active = true; if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(&proc, 1, NULL, 0, opcbfunc, &active))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* get the committed data - ask for someone who doesn't exist as well */ num_gets = 0; for (n=0; n < nprocs; n++) { (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n); proc.rank = n; if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, NULL, 0, valcbfunc, tmp))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); goto done; } ++num_gets; (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n); if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, NULL, 0, valcbfunc, tmp))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); goto done; } ++num_gets; } /* wait for the first fence to finish */ PMIX_WAIT_FOR_COMPLETION(active); /* wait for all my "get" calls to complete */ while (getcount < num_gets) { struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } /* call fence again so everyone waits before leaving */ proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
static bool native_get_attr(const char *attr, opal_value_t **kv) { opal_buffer_t *msg, *bptr; opal_list_t vals; opal_value_t *kp, *lclpeers=NULL, kvn; pmix_cmd_t cmd = PMIX_GETATTR_CMD; char **ranks; int rc, ret; int32_t cnt; bool found=false; opal_hwloc_locality_t locality; pmix_cb_t *cb; uint32_t i, myrank; opal_process_name_t id; char *cpuset; opal_buffer_t buf, buf2; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native get_attr called", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* try to retrieve the requested value from the dstore */ OBJ_CONSTRUCT(&vals, opal_list_t); if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME, attr, &vals)) { *kv = (opal_value_t*)opal_list_remove_first(&vals); OPAL_LIST_DESTRUCT(&vals); return true; } if (NULL == mca_pmix_native_component.uri) { /* no server available, so just return */ return false; } /* if the value isn't yet available, then we should try to retrieve * all the available attributes and store them for future use */ msg = OBJ_NEW(opal_buffer_t); /* pack the cmd */ if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return false; } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = OBJ_NEW(pmix_cb_t); cb->active = true; /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb); /* wait for the data to return */ PMIX_WAIT_FOR_COMPLETION(cb->active); /* we have received the entire data blob for this process - unpack * and cache all values, keeping the one we requested to return * to the caller */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &ret, &cnt, OPAL_INT))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(cb); return false; } if (OPAL_SUCCESS == ret) { /* unpack the buffer containing the values */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &bptr, &cnt, OPAL_BUFFER))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(cb); return false; } cnt = 1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s unpacked attr %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key); /* if this is the local topology, we need to save it in a special way */ #if OPAL_HAVE_HWLOC { hwloc_topology_t topo; if (0 == strcmp(PMIX_LOCAL_TOPO, kp->key)) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s saving topology", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* transfer the byte object for unpacking */ OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size); kp->data.bo.bytes = NULL; // protect the data region kp->data.bo.size = 0; OBJ_RELEASE(kp); /* extract the topology */ cnt=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &topo, &cnt, OPAL_HWLOC_TOPO))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); continue; } OBJ_DESTRUCT(&buf); if (NULL == opal_hwloc_topology) { opal_hwloc_topology = topo; } else { hwloc_topology_destroy(topo); } cnt = 1; continue; } } #endif /* if this is the local cpuset blob, then unpack and store its contents */ if (0 == strcmp(PMIX_LOCAL_CPUSETS, kp->key)) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s received local cpusets", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* transfer the byte object for unpacking */ OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size); kp->data.bo.bytes = NULL; // protect the data region kp->data.bo.size = 0; OBJ_RELEASE(kp); cnt=1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf, &id, &cnt, OPAL_NAME))) { cnt=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &cpuset, &cnt, OPAL_STRING))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); cnt = 1; continue; } opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s saving cpuset %s for local peer %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == cpuset) ? "NULL" : cpuset, OPAL_NAME_PRINT(id)); OBJ_CONSTRUCT(&kvn, opal_value_t); kvn.key = strdup(OPAL_DSTORE_CPUSET); kvn.type = OPAL_STRING; kvn.data.string = cpuset; if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &id, &kvn))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&kvn); cnt = 1; continue; } OBJ_DESTRUCT(&kvn); } OBJ_DESTRUCT(&buf); if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { OPAL_ERROR_LOG(rc); return false; } cnt=1; continue; } else if (0 == strcmp(PMIX_PROC_MAP, kp->key)) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s received proc map", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* transfer the byte object for unpacking */ OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size); kp->data.bo.bytes = NULL; // protect the data region kp->data.bo.size = 0; OBJ_RELEASE(kp); /* get the jobid */ cnt=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); cnt = 1; return false; } if (0 != strcmp(PMIX_JOBID, kp->key)) { OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); OBJ_DESTRUCT(&buf); OBJ_RELEASE(kp); cnt = 1; return false; } id.jobid = kp->data.uint32; OBJ_RELEASE(kp); /* unpack the data for each rank */ cnt=1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) { if (0 != strcmp(PMIX_RANK, kp->key)) { OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); OBJ_DESTRUCT(&buf); OBJ_RELEASE(kp); cnt = 1; return false; } id.vpid = kp->data.uint32; /* unpack the blob for this rank */ cnt=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); cnt = 1; return false; } if (0 != strcmp(PMIX_PROC_MAP, kp->key)) { OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); OBJ_DESTRUCT(&buf); OBJ_RELEASE(kp); cnt = 1; return false; } /* transfer the byte object for unpacking */ OBJ_CONSTRUCT(&buf2, opal_buffer_t); opal_dss.load(&buf2, kp->data.bo.bytes, kp->data.bo.size); kp->data.bo.bytes = NULL; // protect the data region kp->data.bo.size = 0; OBJ_RELEASE(kp); /* unpack and store the map */ cnt=1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf2, &kp, &cnt, OPAL_VALUE))) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s storing key %s for peer %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, OPAL_NAME_PRINT(id)); if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &id, kp))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(kp); OBJ_DESTRUCT(&buf2); return false; } } OBJ_DESTRUCT(&buf2); if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { OPAL_ERROR_LOG(rc); return false; } cnt=1; } OBJ_DESTRUCT(&buf); if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { OPAL_ERROR_LOG(rc); return false; } cnt=1; continue; } /* otherwise, it is a single piece of info, so store it */ if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, kp))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(kp); cnt = 1; continue; } /* save the list of local peers */ if (0 == strcmp(PMIX_LOCAL_PEERS, kp->key)) { OBJ_RETAIN(kp); lclpeers = kp; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s saving local peers %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), lclpeers->data.string); } else if (0 == strcmp(PMIX_JOBID, kp->key)) { native_pname.jobid = kp->data.uint32; } else if (0 == strcmp(PMIX_RANK, kp->key)) { native_pname.vpid = kp->data.uint32; } if (0 == strcmp(attr, kp->key)) { OBJ_RETAIN(kp); *kv = kp; found = true; } OBJ_RELEASE(kp); cnt = 1; } OBJ_RELEASE(bptr); if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { OPAL_ERROR_LOG(rc); return false; } } else { OPAL_ERROR_LOG(ret); OBJ_RELEASE(cb); return false; } OBJ_RELEASE(cb); opal_proc_set_name(&native_pname); /* if the list of local peers wasn't included, then we are done */ if (NULL == lclpeers) { opal_output_verbose(0, opal_pmix_base_framework.framework_output, "%s no local peers reported", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return found; } /* baseline all the procs as nonlocal */ myrank = native_pname.vpid; id.jobid = native_pname.jobid; #if OPAL_HAVE_HWLOC /* fetch my cpuset */ OBJ_CONSTRUCT(&vals, opal_list_t); if (OPAL_SUCCESS == (rc = opal_dstore.fetch(opal_dstore_internal, &native_pname, OPAL_DSTORE_CPUSET, &vals))) { kp = (opal_value_t*)opal_list_get_first(&vals); cpuset = strdup(kp->data.string); } else { cpuset = NULL; } OPAL_LIST_DESTRUCT(&vals); #endif /* we only need to set locality for each local rank as "not found" * equates to "non local" */ ranks = opal_argv_split(lclpeers->data.string, ','); for (i=0; NULL != ranks[i]; i++) { uint32_t vid = strtoul(ranks[i], NULL, 10); if (myrank == vid) { continue; } id.vpid = vid; #if OPAL_HAVE_HWLOC OBJ_CONSTRUCT(&vals, opal_list_t); if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, &id, OPAL_DSTORE_CPUSET, &vals))) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s cpuset for local proc %s not found", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(id)); OPAL_LIST_DESTRUCT(&vals); /* even though the cpuset wasn't found, we at least know it is * on the same node with us */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { kp = (opal_value_t*)opal_list_get_first(&vals); if (NULL == kp->data.string) { /* if we share a node, but we don't know anything more, then * mark us as on the node as this is all we know */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* determine relative location on our node */ locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, cpuset, kp->data.string); } OPAL_LIST_DESTRUCT(&vals); } #else /* all we know is we share a node */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; #endif OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, "%s pmix:native proc %s locality %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(id), opal_hwloc_base_print_locality(locality))); OBJ_CONSTRUCT(&kvn, opal_value_t); kvn.key = strdup(OPAL_DSTORE_LOCALITY); kvn.type = OPAL_UINT16; kvn.data.uint16 = locality; (void)opal_dstore.store(opal_dstore_internal, &id, &kvn); OBJ_DESTRUCT(&kvn); } #if OPAL_HAVE_HWLOC if (NULL != cpuset) { free(cpuset); } #endif opal_argv_free(ranks); return found; }
static int native_abort(int flag, const char msg[]) { opal_buffer_t *bfr; pmix_cmd_t cmd = PMIX_ABORT_CMD; int rc; pmix_cb_t *cb; opal_event_t ev; struct timeval tv = {1, 0}; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native abort called", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); if (NULL == mca_pmix_native_component.uri) { /* no server available, so just return */ return OPAL_SUCCESS; } if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) { /* create a buffer to hold the message */ bfr = OBJ_NEW(opal_buffer_t); /* pack the cmd */ if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &cmd, 1, PMIX_CMD_T))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(bfr); return rc; } /* pack the status flag */ if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &flag, 1, OPAL_INT))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(bfr); return rc; } /* pack the string message - a NULL is okay */ if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &msg, 1, OPAL_STRING))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(bfr); return rc; } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = OBJ_NEW(pmix_cb_t); cb->active = true; /* push a timeout event to wake us up just in case this * message cannot get thru - e.g., someone else may have * detected the failure of the server and ordered an abort */ opal_event_evtimer_set(mca_pmix_native_component.evbase, &ev, timeout, cb); opal_event_evtimer_add(&ev, &tv); /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(bfr, wait_cbfunc, cb); /* wait for the release */ PMIX_WAIT_FOR_COMPLETION(cb->active); OBJ_RELEASE(cb); } return OPAL_SUCCESS; }
static int native_fini(void) { opal_buffer_t *msg; pmix_cb_t *cb; pmix_cmd_t cmd = PMIX_FINALIZE_CMD; int rc; if (1 != init_cntr) { --init_cntr; return OPAL_SUCCESS; } init_cntr = 0; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native finalize called", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); if (NULL == mca_pmix_native_component.uri) { /* nothing was setup, so return */ return OPAL_SUCCESS; } if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) { /* setup a cmd message to notify the PMIx * server that we are normally terminating */ msg = OBJ_NEW(opal_buffer_t); /* pack the cmd */ if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = OBJ_NEW(pmix_cb_t); cb->active = true; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native sending finalize sync to server", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb); /* wait for the ack to return */ PMIX_WAIT_FOR_COMPLETION(cb->active); OBJ_RELEASE(cb); } if (NULL != mca_pmix_native_component.evbase) { opal_stop_progress_thread("opal_async", true); mca_pmix_native_component.evbase = NULL; } if (0 <= mca_pmix_native_component.sd) { CLOSE_THE_SOCKET(mca_pmix_native_component.sd); } return OPAL_SUCCESS; }
int main(int argc, char **argv) { char **client_env=NULL; char **client_argv=NULL; int rc; struct stat stat_buf; struct timeval tv; double test_start; cli_state_t order[CLI_TERM+1]; test_params params; INIT_TEST_PARAMS(params); int test_fail = 0; char *tmp; int ns_nprocs; gettimeofday(&tv, NULL); test_start = tv.tv_sec + 1E-6*tv.tv_usec; /* smoke test */ if (PMIX_SUCCESS != 0) { TEST_ERROR(("ERROR IN COMPUTING CONSTANTS: PMIX_SUCCESS = %d", PMIX_SUCCESS)); exit(1); } TEST_VERBOSE(("Testing version %s", PMIx_Get_version())); parse_cmd(argc, argv, ¶ms); TEST_VERBOSE(("Start PMIx_lite smoke test (timeout is %d)", params.timeout)); /* verify executable */ if( 0 > ( rc = stat(params.binary, &stat_buf) ) ){ TEST_ERROR(("Cannot stat() executable \"%s\": %d: %s", params.binary, errno, strerror(errno))); FREE_TEST_PARAMS(params); return 0; } else if( !S_ISREG(stat_buf.st_mode) ){ TEST_ERROR(("Client executable \"%s\": is not a regular file", params.binary)); FREE_TEST_PARAMS(params); return 0; }else if( !(stat_buf.st_mode & S_IXUSR) ){ TEST_ERROR(("Client executable \"%s\": has no executable flag", params.binary)); FREE_TEST_PARAMS(params); return 0; } /* setup the server library */ pmix_info_t info[1]; (void)strncpy(info[0].key, PMIX_SOCKET_MODE, PMIX_MAX_KEYLEN); info[0].value.type = PMIX_UINT32; info[0].value.data.uint32 = 0666; if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, 1))) { TEST_ERROR(("Init failed with error %d", rc)); FREE_TEST_PARAMS(params); return rc; } /* register the errhandler */ PMIx_Register_event_handler(NULL, 0, NULL, 0, errhandler, errhandler_reg_callbk, NULL); order[CLI_UNINIT] = CLI_FORKED; order[CLI_FORKED] = CLI_FIN; order[CLI_CONNECTED] = CLI_UNDEF; order[CLI_FIN] = CLI_TERM; order[CLI_DISCONN] = CLI_UNDEF; order[CLI_TERM] = CLI_UNDEF; cli_init(params.nprocs, order); /* set common argv and env */ client_env = pmix_argv_copy(environ); set_client_argv(¶ms, &client_argv); tmp = pmix_argv_join(client_argv, ' '); TEST_VERBOSE(("Executing test: %s", tmp)); free(tmp); int launched = 0; /* set namespaces and fork clients */ if (NULL == params.ns_dist) { /* we have a single namespace for all clients */ ns_nprocs = params.nprocs; rc = launch_clients(ns_nprocs, params.binary, &client_env, &client_argv); if (PMIX_SUCCESS != rc) { FREE_TEST_PARAMS(params); return rc; } launched += ns_nprocs; } else { char *pch; pch = strtok(params.ns_dist, ":"); while (NULL != pch) { ns_nprocs = (int)strtol(pch, NULL, 10); if (params.nprocs < (uint32_t)(launched+ns_nprocs)) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); FREE_TEST_PARAMS(params); return PMIX_ERROR; } if (0 < ns_nprocs) { rc = launch_clients(ns_nprocs, params.binary, &client_env, &client_argv); if (PMIX_SUCCESS != rc) { FREE_TEST_PARAMS(params); return rc; } } pch = strtok (NULL, ":"); launched += ns_nprocs; } } if (params.nprocs != (uint32_t)launched) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); cli_kill_all(); test_fail = 1; } /* hang around until the client(s) finalize */ while (!test_terminated()) { // To avoid test hang we want to interrupt the loop each 0.1s double test_current; // check if we exceed the max time gettimeofday(&tv, NULL); test_current = tv.tv_sec + 1E-6*tv.tv_usec; if( (test_current - test_start) > params.timeout ){ break; } cli_wait_all(0); } if( !test_terminated() ){ TEST_ERROR(("Test exited by a timeout!")); cli_kill_all(); test_fail = 1; } if( test_abort ){ TEST_ERROR(("Test was aborted!")); /* do not simply kill the clients as that generates * event notifications which these tests then print * out, flooding the log */ // cli_kill_all(); test_fail = 1; } if (0 != params.test_spawn) { PMIX_WAIT_FOR_COMPLETION(spawn_wait); } pmix_argv_free(client_argv); pmix_argv_free(client_env); /* deregister the errhandler */ PMIx_Deregister_event_handler(0, op_callbk, NULL); cli_wait_all(1.0); /* finalize the server library */ if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) { TEST_ERROR(("Finalize failed with error %d", rc)); } FREE_TEST_PARAMS(params); if (0 == test_fail) { TEST_OUTPUT(("Test finished OK!")); } return test_fail; }
static int native_get(const opal_process_name_t *id, const char *key, opal_value_t **kv) { opal_buffer_t *msg, *bptr; pmix_cmd_t cmd = PMIX_GET_CMD; pmix_cb_t *cb; int rc, ret; int32_t cnt; opal_list_t vals; opal_value_t *kp; bool found; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native getting value for proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id), key); /* first see if we already have the info in our dstore */ OBJ_CONSTRUCT(&vals, opal_list_t); if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, id, key, &vals)) { *kv = (opal_value_t*)opal_list_remove_first(&vals); OPAL_LIST_DESTRUCT(&vals); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native value retrieved from dstore", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_SUCCESS; } if (NULL == mca_pmix_native_component.uri) { /* no server available, so just return */ return OPAL_ERR_NOT_FOUND; } /* nope - see if we can get it */ msg = OBJ_NEW(opal_buffer_t); /* pack the get cmd */ if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } /* pack the request information - we'll get the entire blob * for this proc, so we don't need to pass the key */ if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, id, 1, OPAL_NAME))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(msg); return rc; } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = OBJ_NEW(pmix_cb_t); cb->active = true; /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb); /* wait for the data to return */ PMIX_WAIT_FOR_COMPLETION(cb->active); /* we have received the entire data blob for this process - unpack * and cache all values, keeping the one we requested to return * to the caller */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &ret, &cnt, OPAL_INT))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(cb); return rc; } found = false; cnt = 1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(&cb->data, &bptr, &cnt, OPAL_BUFFER))) { while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native retrieved %s (%s) from server for proc %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, (OPAL_STRING == kp->type) ? kp->data.string : "NS", OPAL_NAME_PRINT(*id)); if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, id, kp))) { OPAL_ERROR_LOG(ret); } if (0 == strcmp(key, kp->key)) { *kv = kp; found = true; } else { OBJ_RELEASE(kp); } } if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { OPAL_ERROR_LOG(rc); } OBJ_RELEASE(bptr); cnt = 1; } if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { OPAL_ERROR_LOG(rc); } else { rc = OPAL_SUCCESS; } OBJ_RELEASE(cb); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:native get completed", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); if (found) { return OPAL_SUCCESS; } /* we didn't find the requested data - pass back a * status that indicates the source of the problem, * either during the data fetch, message unpacking, * or not found */ *kv = NULL; if (OPAL_SUCCESS == rc) { if (OPAL_SUCCESS == ret) { rc = OPAL_ERR_NOT_FOUND; } else { rc = ret; } } return rc; }
int main(int argc, char **argv) { char **client_env=NULL; char **client_argv=NULL; int rc; struct stat stat_buf; struct timeval tv; double test_start; test_params params; INIT_TEST_PARAMS(params); int test_fail = 0; char *tmp; int ns_nprocs; gettimeofday(&tv, NULL); test_start = tv.tv_sec + 1E-6*tv.tv_usec; /* smoke test */ if (PMIX_SUCCESS != 0) { TEST_ERROR(("ERROR IN COMPUTING CONSTANTS: PMIX_SUCCESS = %d", PMIX_SUCCESS)); exit(1); } TEST_VERBOSE(("Testing version %s", PMIx_Get_version())); parse_cmd(argc, argv, ¶ms); TEST_VERBOSE(("Start PMIx_lite smoke test (timeout is %d)", params.timeout)); /* set common argv and env */ client_env = pmix_argv_copy(environ); set_client_argv(¶ms, &client_argv); tmp = pmix_argv_join(client_argv, ' '); TEST_VERBOSE(("Executing test: %s", tmp)); free(tmp); /* verify executable */ if( 0 > ( rc = stat(params.binary, &stat_buf) ) ){ TEST_ERROR(("Cannot stat() executable \"%s\": %d: %s", params.binary, errno, strerror(errno))); FREE_TEST_PARAMS(params); return 0; } else if( !S_ISREG(stat_buf.st_mode) ){ TEST_ERROR(("Client executable \"%s\": is not a regular file", params.binary)); FREE_TEST_PARAMS(params); return 0; }else if( !(stat_buf.st_mode & S_IXUSR) ){ TEST_ERROR(("Client executable \"%s\": has no executable flag", params.binary)); FREE_TEST_PARAMS(params); return 0; } if (PMIX_SUCCESS != (rc = server_init(¶ms))) { FREE_TEST_PARAMS(params); return rc; } cli_init(params.lsize); int launched = 0; /* set namespaces and fork clients */ if (NULL == params.ns_dist) { uint32_t i; int base_rank = 0; /* compute my start counter */ for(i = 0; i < (uint32_t)my_server_id; i++) { base_rank += (params.nprocs % params.nservers) > (uint32_t)i ? params.nprocs / params.nservers + 1 : params.nprocs / params.nservers; } /* we have a single namespace for all clients */ ns_nprocs = params.nprocs; launched += server_launch_clients(params.lsize, params.nprocs, base_rank, ¶ms, &client_env, &client_argv); } else { char *pch; pch = strtok(params.ns_dist, ":"); while (NULL != pch) { ns_nprocs = (int)strtol(pch, NULL, 10); if (params.nprocs < (uint32_t)(launched+ns_nprocs)) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); FREE_TEST_PARAMS(params); return PMIX_ERROR; } if (0 < ns_nprocs) { launched += server_launch_clients(ns_nprocs, ns_nprocs, 0, ¶ms, &client_env, &client_argv); } pch = strtok (NULL, ":"); } } if (params.lsize != (uint32_t)launched) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); cli_kill_all(); test_fail = 1; } /* hang around until the client(s) finalize */ while (!test_terminated()) { // To avoid test hang we want to interrupt the loop each 0.1s double test_current; // check if we exceed the max time gettimeofday(&tv, NULL); test_current = tv.tv_sec + 1E-6*tv.tv_usec; if( (test_current - test_start) > params.timeout ){ break; } cli_wait_all(0); } if( !test_terminated() ){ TEST_ERROR(("Test exited by a timeout!")); cli_kill_all(); test_fail = 1; } if( test_abort ){ TEST_ERROR(("Test was aborted!")); /* do not simply kill the clients as that generates * event notifications which these tests then print * out, flooding the log */ // cli_kill_all(); test_fail = 1; } if (0 != params.test_spawn) { PMIX_WAIT_FOR_COMPLETION(spawn_wait); } /* deregister the errhandler */ PMIx_Deregister_event_handler(0, op_callbk, NULL); cli_wait_all(1.0); test_fail += server_finalize(¶ms); FREE_TEST_PARAMS(params); pmix_argv_free(client_argv); pmix_argv_free(client_env); return test_fail; }