int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; pid_t pid; pmix_proc_t myproc; if (2 != argc) { fprintf(stderr, "Usage: probe <nspace>\n"); exit(1); } pid = getpid(); fprintf(stderr, "Client %lu: Running\n", (unsigned long)pid); /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. This includes any * debugger flag instructing us to stop-in-init. If such a directive * is included, then the process will be stopped in this call until * the "debugger release" notification arrives */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Client ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, (unsigned long)pid); /* get our universe size */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, val->data.uint32); /* now get the universe size of the specified nspace */ (void)strncpy(proc.nspace, argv[1], PMIX_MAX_NSLEN); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get of target nspace %s universe size failed: %d\n", myproc.nspace, myproc.rank, argv[1], rc); goto done; } fprintf(stderr, "Client %s:%d target nspace %s universe size %d\n", myproc.nspace, myproc.rank, argv[1], val->data.uint32); done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
void pmi_fence(int collect) { pmix_info_t *info = NULL; pmix_proc_t proc; bool value = 1; int ninfo = 0; int rc; if( collect ){ PMIX_INFO_CREATE(info, 1); (void)strncpy(info->key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); pmix_value_load(&info->value, &value, PMIX_BOOL); ninfo = 1; } /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, this_proc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, ninfo))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d", this_proc.nspace, this_proc.rank, rc); abort(); } if( collect ){ PMIX_INFO_FREE(info, ninfo); } }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t nprocs, n; pmix_info_t *info; bool flag; volatile int active; pmix_status_t dbg = PMIX_ERR_DEBUGGER_RELEASE; /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. This includes any * debugger flag instructing us to stop-in-init. If such a directive * is included, then the process will be stopped in this call until * the "debugger release" notification arrives */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ active = -1; PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&active); while (-1 == active) { sleep(1); } if (0 != active) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); exit(active); } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* check to see if we have been instructed to wait for a debugger * to attach to us. We won't get both a stop-in-init AND a * wait-for-notify directive, so we should never stop twice. This * directive is provided so that something like an MPI implementation * can do some initial setup in MPI_Init prior to pausing for the * debugger */ if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, PMIX_DEBUG_WAIT_FOR_NOTIFY, NULL, 0, &val))) { /* register for debugger release */ active = -1; PMIx_Register_event_handler(&dbg, 1, NULL, 0, release_fn, evhandler_reg_callbk, (void*)&active); /* wait for registration to complete */ while (-1 == active) { sleep(1); } if (0 != active) { fprintf(stderr, "[%s:%d] Debug handler registration failed\n", myproc.nspace, myproc.rank); exit(active); } /* wait for debugger release */ while (waiting_for_debugger) { sleep(1); } } /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, val->data.uint32); /* get the number of procs in our job - univ size is the total number of allocated * slots, not the number of procs in the job */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d num procs %d\n", myproc.nspace, myproc.rank, nprocs); /* put a few values */ if (0 > asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Store_internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { exit(1); } value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); /* push the data to our PMIx server */ if (PMIX_SUCCESS != (rc = PMIx_Commit())) { fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* call fence to synchronize with our peers - instruct * the fence operation to collect and return all "put" * data from our peers */ PMIX_INFO_CREATE(info, 1); flag = true; PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } PMIX_INFO_FREE(info, 1); /* check the returned data */ for (n=0; n < nprocs; n++) { if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { exit(1); } if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); goto done; } if (PMIX_UINT64 != val->type) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } if (1234 != val->data.uint64) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { exit(1); } if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); goto done; } if (PMIX_STRING != val->type) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } if (0 != strcmp(val->data.string, "1234")) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t n, num_gets; bool active; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Store_internal failed: %d", myproc.nspace, myproc.rank, rc); goto done; } (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank); value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", myproc.nspace, myproc.rank, rc); goto done; } (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank); value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* introduce a delay by one rank so we can check what happens * if a "get" is received prior to data being provided */ if (0 == myproc.rank) { sleep(2); } /* commit the data to the server */ if (PMIX_SUCCESS != (rc = PMIx_Commit())) { pmix_output(0, "Client ns %s rank %d: PMIx_Commit failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* call fence_nb, but don't return any data */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; active = true; if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(&proc, 1, NULL, 0, opcbfunc, &active))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* get the committed data - ask for someone who doesn't exist as well */ num_gets = 0; for (n=0; n < nprocs; n++) { (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n); proc.rank = n; if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, NULL, 0, valcbfunc, tmp))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); goto done; } ++num_gets; (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n); if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, NULL, 0, valcbfunc, tmp))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); goto done; } ++num_gets; } /* wait for the first fence to finish */ PMIX_WAIT_FOR_COMPLETION(active); /* wait for all my "get" calls to complete */ while (getcount < num_gets) { struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } /* call fence again so everyone waits before leaving */ proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t nprocs, n; int cnt, j; bool doabort = false; volatile bool active; pmix_info_t info, *iptr; size_t ninfo; pmix_status_t code; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { doabort = true; } } /* init us and declare we are a test programming model */ PMIX_INFO_CREATE(iptr, 2); PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } PMIX_INFO_FREE(iptr, 2); pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* test something */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } PMIX_VALUE_RELEASE(val); /* test something */ if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_SERVER_URI, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } pmix_output(0, "CLIENT SERVER URI: %s", val->data.string); PMIX_VALUE_RELEASE(val); /* register a handler specifically for when models declare */ active = true; ninfo = 1; PMIX_INFO_CREATE(iptr, ninfo); PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); code = PMIX_MODEL_DECLARED; PMIx_Register_event_handler(&code, 1, iptr, ninfo, model_callback, model_registration_callback, (void*)&active); while (active) { usleep(10); } PMIX_INFO_FREE(iptr, ninfo); /* register our errhandler */ active = true; PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, errhandler_reg_callbk, (void*)&active); while (active) { usleep(10); } /* get our universe size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Store_internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } (void)asprintf(&tmp, "%s-%d-remote-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } if (PMIX_SUCCESS != (rc = PMIx_Commit())) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Commit failed: %s", myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); goto done; } /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Fence failed: %s", myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); goto done; } /* check the returned data */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); continue; } if (NULL == val) { pmix_output(0, "Client ns %s rank %d: NULL value returned", myproc.nspace, myproc.rank); break; } if (PMIX_UINT64 != val->type) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); continue; } if (1234 != val->data.uint64) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); PMIX_VALUE_RELEASE(val); free(tmp); continue; } pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); if (n != myproc.rank) { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { /* this data should _not_ be found as we are on the same node * and the data was "put" with a PMIX_REMOTE scope */ pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); continue; } pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } } } } /* now get the data blob for myself */ pmix_output(0, "Client ns %s rank %d testing internal modex blob", myproc.nspace, myproc.rank); if (PMIX_SUCCESS == (rc = PMIx_Get(&myproc, NULL, NULL, 0, &val))) { if (PMIX_DATA_ARRAY != val->type) { pmix_output(0, "Client ns %s rank %d did not return an array for its internal modex blob", myproc.nspace, myproc.rank); PMIX_VALUE_RELEASE(val); } else if (PMIX_INFO != val->data.darray->type) { pmix_output(0, "Client ns %s rank %d returned an internal modex array of type %s instead of PMIX_INFO", myproc.nspace, myproc.rank, PMIx_Data_type_string(val->data.darray->type)); PMIX_VALUE_RELEASE(val); } else if (0 == val->data.darray->size) { pmix_output(0, "Client ns %s rank %d returned an internal modex array of zero length", myproc.nspace, myproc.rank); PMIX_VALUE_RELEASE(val); } else { pmix_info_t *iptr = (pmix_info_t*)val->data.darray->array; for (n=0; n < val->data.darray->size; n++) { pmix_output(0, "\tKey: %s", iptr[n].key); } PMIX_VALUE_RELEASE(val); } } else { pmix_output(0, "Client ns %s rank %d internal modex blob FAILED with error %s(%d)", myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc); } /* log something */ PMIX_INFO_CONSTRUCT(&info); PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg", PMIX_STRING); active = true; rc = PMIx_Log_nb(&info, 1, NULL, 0, opcbfunc, (void*)&active); if (PMIX_SUCCESS != rc) { pmix_output(0, "Client ns %s rank %d - log_nb returned %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } else { while (active) { usleep(10); } } PMIX_INFO_DESTRUCT(&info); /* if requested and our rank is 0, call abort */ if (doabort) { if (0 == myproc.rank) { PMIx_Abort(PMIX_ERR_PROC_REQUESTED_ABORT, "CALLING ABORT", NULL, 0); } else { while(!completed) { usleep(10); } } } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(rc); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ PMIx_Register_errhandler(NULL, 0, notification_fn, errhandler_reg_callbk, NULL); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* rank=0 calls abort */ if (0 == myproc.rank) { PMIx_Abort(PMIX_ERR_OUT_OF_RESOURCE, "Eat rocks", &proc, 1); pmix_output(0, "Client ns %s rank %d: Abort called", myproc.nspace, myproc.rank); } /* everyone simply waits */ while (!completed) { struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); PMIx_Deregister_errhandler(0, op_callbk, NULL); if (PMIX_SUCCESS != (rc = PMIx_Finalize())) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; char nsp2[PMIX_MAX_NSLEN+1]; pmix_app_t *app; char hostname[PMIX_MAXHOSTNAMELEN]; pmix_proc_t *peers; size_t npeers, ntmp=0; char *nodelist; gethostname(hostname, sizeof(hostname)); /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* rank=0 calls spawn */ if (0 == myproc.rank) { PMIX_APP_CREATE(app, 1); app->cmd = strdup("gumby"); app->maxprocs = 2; pmix_argv_append_nosize(&app->argv, "gumby"); pmix_argv_append_nosize(&app->argv, "-n"); pmix_argv_append_nosize(&app->argv, "2"); pmix_setenv("PMIX_ENV_VALUE", "3", true, &app->env); PMIX_INFO_CREATE(app->info, 2); (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); app->info[0].value.type = PMIX_INT8; app->info[0].value.data.int8 = 12; (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); app->info[1].value.type = PMIX_DOUBLE; app->info[1].value.data.dval = 12.34; pmix_output(0, "Client ns %s rank %d: calling PMIx_Spawn", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn failed: %d", myproc.nspace, myproc.rank, rc); goto done; } PMIX_APP_FREE(app, 1); /* check to see if we got the expected info back */ if (0 != strncmp(nsp2, "DYNSPACE", PMIX_MAX_NSLEN)) { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn returned incorrect nspace: %s", myproc.nspace, myproc.rank, nsp2); goto done; } else { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn succeeded returning nspace: %s", myproc.nspace, myproc.rank, nsp2); } /* get their universe size */ (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; val = NULL; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || NULL == val) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } /* just cycle the connect/disconnect functions */ if (PMIX_SUCCESS != (rc = PMIx_Connect(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Connect failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Connect succeeded", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Disconnect(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Disonnect failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Disconnect succeeded", myproc.nspace, myproc.rank); /* finally, test the resolve functions */ if (0 == myproc.rank) { if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, NULL, &peers, &npeers))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } if ((nprocs+ntmp) != npeers) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d", myproc.nspace, myproc.rank, (int)(nprocs+ntmp), (int)npeers); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers", myproc.nspace, myproc.rank, (int)npeers); if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(nsp2, &nodelist))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes failed for nspace %s: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); } else { if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, myproc.nspace, &peers, &npeers))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d", myproc.nspace, myproc.rank, myproc.nspace, rc); goto done; } if (nprocs != npeers) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d", myproc.nspace, myproc.rank, nprocs, (int)npeers); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers", myproc.nspace, myproc.rank, (int)npeers); if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(myproc.nspace, &nodelist))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); } PMIX_PROC_FREE(peers, npeers); free(nodelist); done: /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; pmix_info_t *info; pmix_pdata_t *pdata; pmix_proc_t myproc; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* publish something */ if (0 == myproc.rank) { PMIX_INFO_CREATE(info, 2); (void)strncpy(info[0].key, "FOOBAR", PMIX_MAX_KEYLEN); info[0].value.type = PMIX_UINT8; info[0].value.data.uint8 = 1; (void)strncpy(info[1].key, "PANDA", PMIX_MAX_KEYLEN); info[1].value.type = PMIX_SIZE; info[1].value.data.size = 123456; if (PMIX_SUCCESS != (rc = PMIx_Publish(info, 2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Publish failed: %d", myproc.nspace, myproc.rank, rc); goto done; } PMIX_INFO_FREE(info, 2); } /* call fence again so all procs know the data * has been published */ if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* lookup something */ if (0 != myproc.rank) { PMIX_PDATA_CREATE(pdata, 1); (void)strncpy(pdata[0].key, "FOOBAR", PMIX_MAX_KEYLEN); if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* check the return for value and source */ if (0 != strncmp(myproc.nspace, pdata[0].proc.nspace, PMIX_MAX_NSLEN)) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup returned wrong nspace: %s", myproc.nspace, myproc.rank, pdata[0].proc.nspace); goto done; } if (0 != pdata[0].proc.rank) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup returned wrong rank: %d", myproc.nspace, myproc.rank, pdata[0].proc.rank); goto done; } if (PMIX_UINT8 != pdata[0].value.type) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup returned wrong type: %d", myproc.nspace, myproc.rank, pdata[0].value.type); goto done; } if (1 != pdata[0].value.data.uint8) { pmix_output(0, "Client ns %s rank %d: PMIx_Lookup returned wrong value: %d", myproc.nspace, myproc.rank, (int)pdata[0].value.data.uint8); goto done; } PMIX_PDATA_FREE(pdata, 1); pmix_output(0, "PUBLISH-LOOKUP SUCCEEDED"); } /* call fence again so rank 0 waits before leaving */ if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } if (0 == myproc.rank) { char **keys = NULL; pmix_argv_append_nosize(&keys, "FOOBAR"); pmix_argv_append_nosize(&keys, "PANDA"); if (PMIX_SUCCESS != (rc = PMIx_Unpublish(keys, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Unpublish failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "UNPUBLISH SUCCEEDED"); } /* call fence again so everyone waits for rank 0 before leaving */ proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize())) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { char nspace[PMIX_MAX_NSLEN+1]; int rank; int rc; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t nprocs, n; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(nspace, &rank))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", nspace, rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", nspace, rank); /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(nspace, rank, PMIX_UNIV_SIZE, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", nspace, rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", nspace, rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", nspace, rank); value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(nspace, rank, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Store_internal failed: %d", nspace, rank, rc); goto done; } (void)asprintf(&tmp, "%s-%d-local", nspace, rank); value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", nspace, rank, rc); goto done; } (void)asprintf(&tmp, "%s-%d-remote", nspace, rank); value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", nspace, rank, rc); goto done; } if (PMIX_SUCCESS != (rc = PMIx_Commit())) { pmix_output(0, "Client ns %s rank %d: PMIx_Commit failed: %d", nspace, rank, rc); goto done; } /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, true))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", nspace, rank, rc); goto done; } /* check the returned data */ for (n=0; n < nprocs; n++) { (void)asprintf(&tmp, "%s-%d-local", nspace, n); if (PMIX_SUCCESS != (rc = PMIx_Get(nspace, n, tmp, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", nspace, rank, tmp, rc); goto done; } if (PMIX_UINT64 != val->type) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d", nspace, rank, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } if (1234 != val->data.uint64) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d", nspace, rank, tmp, (int)val->data.uint64); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Get %s returned correct", nspace, rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); (void)asprintf(&tmp, "%s-%d-remote", nspace, n); if (PMIX_SUCCESS != (rc = PMIx_Get(nspace, n, tmp, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", nspace, rank, tmp, rc); goto done; } if (PMIX_STRING != val->type) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d", nspace, rank, tmp, val->type); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } if (0 != strcmp(val->data.string, "1234")) { pmix_output(0, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s", nspace, rank, tmp, val->data.string); PMIX_VALUE_RELEASE(val); free(tmp); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Get %s returned correct", nspace, rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", nspace, rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize())) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", nspace, rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", nspace, rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { pmix_status_t rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs, n; pmix_info_t *info, *iptr; bool flag; mylock_t mylock; pmix_data_array_t *dptr; /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&mylock); /* wait for registration to complete */ DEBUG_WAIT_THREAD(&mylock); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); goto done; } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); /* inform the RM that we are preemptible, and that our checkpoint methods are * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */ PMIX_INFO_CREATE(info, 2); flag = true; PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL); /* can't use "load" to load a pmix_data_array_t */ (void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN); PMIX_DATA_ARRAY_CREATE(info[1].value.data.darray, 2, PMIX_INFO); dptr = info[1].value.data.darray; rc = SIGUSR2; iptr = (pmix_info_t*)dptr->array; PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT); rc = PMIX_JCTRL_CHECKPOINT; PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS); /* since this is informational and not a requested operation, the target parameter * doesn't mean anything and can be ignored */ DEBUG_CONSTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); DEBUG_DESTRUCT_LOCK(&mylock); goto done; } DEBUG_WAIT_THREAD(&mylock); PMIX_INFO_FREE(info, 2); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* now request that this process be monitored using heartbeats */ PMIX_INFO_CREATE(iptr, 1); PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER); PMIX_INFO_CREATE(info, 3); PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING); n = 5; // require a heartbeat every 5 seconds PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32); n = 2; // two heartbeats can be missed before declaring us "stalled" PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32); /* make the request */ DEBUG_CONSTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT, info, 3, infocbfunc, (void*)&mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); DEBUG_DESTRUCT_LOCK(&mylock); goto done; } DEBUG_WAIT_THREAD(&mylock); PMIX_INFO_FREE(iptr, 1); PMIX_INFO_FREE(info, 3); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* send a heartbeat */ PMIx_Heartbeat(); /* call fence to synchronize with our peers - no need to * collect any info as we didn't "put" anything */ PMIX_INFO_CREATE(info, 1); flag = false; PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } PMIX_INFO_FREE(info, 1); done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int MPIDU_bc_table_create(int rank, int size, int *nodemap, void *bc, int bc_len, int same_len, int roots_only, void **bc_table, size_t ** bc_indices) { int rc, mpi_errno = MPI_SUCCESS; int start, end, i; char *val = NULL, *val_p; int out_len, val_len, rem, flag; pmix_value_t value, *pvalue; pmix_info_t *info; pmix_proc_t proc; int local_rank, local_leader; size_t my_bc_len = bc_len; MPIR_NODEMAP_get_local_info(rank, size, nodemap, &local_size, &local_rank, &local_leader); /* if business cards can be different length, use the max value length */ if (!same_len) bc_len = VALLEN; mpi_errno = MPIDU_shm_seg_alloc(bc_len * size, (void **) &segment, MPL_MEM_ADDRESS); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIDU_shm_seg_commit(&memory, &barrier, local_size, local_rank, local_leader, rank, MPL_MEM_ADDRESS); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (size == 1) { memcpy(segment, bc, my_bc_len); goto single; } val = MPL_malloc(VALLEN, MPL_MEM_ADDRESS); memset(val, 0, VALLEN); val_p = val; rem = VALLEN; rc = MPL_str_add_binary_arg(&val_p, &rem, "mpi", (char *) bc, my_bc_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**buscard"); MPIR_Assert(rem >= 0); if (!roots_only || rank == local_leader) { value.type = PMIX_STRING; value.data.string = val; rc = PMIx_Put(PMIX_LOCAL, "bc", &value); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_put"); rc = PMIx_Put(PMIX_REMOTE, "bc", &value); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_put"); rc = PMIx_Commit(); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_commit"); } PMIX_INFO_CREATE(info, 1); PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); rc = PMIx_Fence(&MPIR_Process.pmix_wcproc, 1, info, 1); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_fence"); PMIX_INFO_FREE(info, 1); if (!roots_only) { start = local_rank * (size / local_size); end = start + (size / local_size); if (local_rank == local_size - 1) end += size % local_size; for (i = start; i < end; i++) { PMIX_PROC_CONSTRUCT(&proc); MPL_strncpy(proc.nspace, MPIR_Process.pmix_proc.nspace, PMIX_MAX_NSLEN); proc.rank = i; rc = PMIx_Get(&proc, "bc", NULL, 0, &pvalue); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_get"); rc = MPL_str_get_binary_arg(val, "mpi", &segment[i * bc_len], bc_len, &out_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost"); PMIX_VALUE_RELEASE(pvalue); } } else { int num_nodes, *node_roots; MPIR_NODEMAP_get_node_roots(nodemap, size, &node_roots, &num_nodes); start = local_rank * (num_nodes / local_size); end = start + (num_nodes / local_size); if (local_rank == local_size - 1) end += num_nodes % local_size; for (i = start; i < end; i++) { PMIX_PROC_CONSTRUCT(&proc); MPL_strncpy(proc.nspace, MPIR_Process.pmix_proc.nspace, PMIX_MAX_NSLEN); proc.rank = i; rc = PMIx_Get(&proc, "bc", NULL, 0, &pvalue); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmix_get"); rc = MPL_str_get_binary_arg(val, "mpi", &segment[i * bc_len], bc_len, &out_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost"); PMIX_VALUE_RELEASE(pvalue); } } mpi_errno = MPIDU_shm_barrier(barrier, local_size); if (mpi_errno) MPIR_ERR_POP(mpi_errno); single: if (!same_len) { indices = MPL_malloc(size * sizeof(size_t), MPL_MEM_ADDRESS); for (i = 0; i < size; i++) indices[i] = bc_len * i; *bc_indices = indices; } fn_exit: MPL_free(val); *bc_table = segment; return mpi_errno; fn_fail: goto fn_exit; }
int main(int argc, char **argv) { pmix_status_t rc; pmix_value_t *val; pmix_proc_t proc; pmix_info_t *info; size_t ninfo; pmix_query_t *query; size_t nq, n; myquery_data_t myquery_data; pid_t pid; pmix_status_t code = PMIX_ERR_JOB_TERMINATED; mylock_t mylock; myrel_t myrel; uint16_t localrank; char *target = NULL; pid = getpid(); /* init us - since we were launched by the RM, our connection info * will have been provided at startup. */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { fprintf(stderr, "Debugger daemon: PMIx_tool_init failed: %d\n", rc); exit(0); } fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, (unsigned long)pid); /* register our default event handler */ DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, (void*)&mylock); DEBUG_WAIT_THREAD(&mylock); if (PMIX_SUCCESS != mylock.status) { rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); goto done; } DEBUG_DESTRUCT_LOCK(&mylock); /* get the nspace of the job we are to debug - it will be in our JOB info */ #ifdef PMIX_LOAD_PROCID PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); #else PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_KEYLEN); proc.rank = PMIX_RANK_WILDCARD; #endif if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_DEBUG_JOB, NULL, 0, &val))) { fprintf(stderr, "[%s:%d:%lu] Failed to get job being debugged - error %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Error_string(rc)); goto done; } if (NULL == val || PMIX_STRING != val->type || NULL == val->data.string) { fprintf(stderr, "[%s:%d:%lu] Failed to get job being debugged - NULL data returned\n", myproc.nspace, myproc.rank, (unsigned long)pid); goto done; } /* save it for later */ target = strdup(val->data.string); PMIX_VALUE_RELEASE(val); fprintf(stderr, "[%s:%d:%lu] Debugging %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, target); /* get my local rank so I can determine which local proc is "mine" * to debug */ val = NULL; if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_RANK, NULL, 0, &val))) { fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - error %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Error_string(rc)); goto done; } if (NULL == val) { fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - NULL data returned\n", myproc.nspace, myproc.rank, (unsigned long)pid); goto done; } if (PMIX_UINT16 != val->type) { fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - returned wrong type %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Data_type_string(val->type)); goto done; } /* save the data */ localrank = val->data.uint16; PMIX_VALUE_RELEASE(val); fprintf(stderr, "[%s:%d:%lu] my local rank %d\n", myproc.nspace, myproc.rank, (unsigned long)pid, (int)localrank); /* register another handler specifically for when the target * job completes */ DEBUG_CONSTRUCT_LOCK(&myrel.lock); myrel.nspace = strdup(proc.nspace); PMIX_INFO_CREATE(info, 2); PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); /* only call me back when this specific job terminates */ PMIX_LOAD_PROCID(&proc, target, PMIX_RANK_WILDCARD); PMIX_INFO_LOAD(&info[1], PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); fprintf(stderr, "[%s:%d:%lu] registering for termination of %s\n", myproc.nspace, myproc.rank, (unsigned long)pid, proc.nspace); DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(&code, 1, info, 2, release_fn, evhandler_reg_callbk, (void*)&mylock); DEBUG_WAIT_THREAD(&mylock); if (PMIX_SUCCESS != mylock.status) { rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); PMIX_INFO_FREE(info, 2); goto done; } DEBUG_DESTRUCT_LOCK(&mylock); PMIX_INFO_FREE(info, 2); /* get our local proctable - for scalability reasons, we don't want to * have our "root" debugger process get the proctable for everybody and * send it out to us. So ask the local PMIx server for the pid's of * our local target processes */ nq = 1; PMIX_QUERY_CREATE(query, nq); PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE); query[0].nqual = 1; PMIX_INFO_CREATE(query[0].qualifiers, 1); PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, target, PMIX_STRING); // the nspace we are enquiring about /* setup the caddy to retrieve the data */ DEBUG_CONSTRUCT_LOCK(&myquery_data.lock); myquery_data.info = NULL; myquery_data.ninfo = 0; /* execute the query */ if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) { fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); goto done; } DEBUG_WAIT_THREAD(&myquery_data.lock); DEBUG_DESTRUCT_LOCK(&myquery_data.lock); PMIX_QUERY_FREE(query, nq); if (PMIX_SUCCESS != myquery_data.status) { rc = myquery_data.status; goto done; } fprintf(stderr, "[%s:%d:%lu] Local proctable received\n", myproc.nspace, myproc.rank, (unsigned long)pid); /* now that we have the proctable for our local processes, we can do our * magic debugger stuff and attach to them. We then send a "release" event * to them - i.e., it's the equivalent to setting the MPIR breakpoint. We * do this with the event notification system. For this example, we just * send it to all local procs of the job being debugged */ (void)strncpy(proc.nspace, target, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; ninfo = 2; PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC); // deliver to the target nspace PMIX_INFO_LOAD(&info[1], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); // deliver to the target nspace fprintf(stderr, "[%s:%u:%lu] Sending release\n", myproc.nspace, myproc.rank, (unsigned long)pid); rc = PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE, NULL, PMIX_RANGE_CUSTOM, info, ninfo, NULL, NULL); if (PMIX_SUCCESS != rc) { fprintf(stderr, "%s[%s:%u:%lu] Sending release failed with error %s(%d)\n", myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Error_string(rc), rc); goto done; } /* do some debugger magic while waiting for the job to terminate */ DEBUG_WAIT_THREAD(&myrel.lock); done: if (NULL != target) { free(target); } /* finalize us */ fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu: Finalizing\n", myproc.nspace, myproc.rank, (unsigned long)pid); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank, (unsigned long)pid); } fflush(stderr); return(0); }