static int test_spawn_common(char *my_nspace, int my_rank, int blocking) { int rc; pmix_app_t *apps; size_t napps; char nspace[PMIX_MAX_NSLEN+1]; memset(nspace, 0, PMIX_MAX_NSLEN+1); napps = 1; PMIX_APP_CREATE(apps, napps); if (blocking) { if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, apps, napps, nspace))) { PMIX_APP_FREE(apps, napps); return rc; } } else { spawn_cbdata cbdata; cbdata.in_progress = 1; memset(cbdata.nspace, 0, PMIX_MAX_NSLEN); rc = PMIx_Spawn_nb(NULL, 0, apps, napps, spawn_cb, (void*)&cbdata); if (PMIX_SUCCESS != rc) { PMIX_APP_FREE(apps, napps); return rc; } PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress); strncpy(nspace, cbdata.nspace, strlen(cbdata.nspace)+1); } PMIX_APP_FREE(apps, napps); if (strncmp(nspace, "foobar", strlen(nspace)+1)) { return PMIX_ERROR; } return rc; }
int PMI2_Job_Spawn(int count, const char * cmds[], int argcs[], const char ** argvs[], const int maxprocs[], const int info_keyval_sizes[], const PMI_keyval_t *info_keyval_vectors[], int preput_keyval_size, const PMI_keyval_t *preput_keyval_vector[], char jobId[], int jobIdSize, int errors[]) { pmix_status_t rc = PMIX_SUCCESS; pmix_app_t *apps; int i, k; size_t j; char *evar; PMI2_CHECK(); if (NULL == cmds) { return PMI2_ERR_INVALID_ARGS; } /* setup the apps */ PMIX_APP_CREATE(apps, count); for (i=0; i < count; i++) { apps[i].cmd = strdup(cmds[i]); apps[i].maxprocs = maxprocs[i]; apps[i].argv = pmix_argv_copy((char**)argvs[i]); apps[i].argc = pmix_argv_count(apps[i].argv); apps[i].ninfo = info_keyval_sizes[i]; apps[i].info = (pmix_info_t*)malloc(apps[i].ninfo * sizeof(pmix_info_t)); /* copy the info objects */ for (j=0; j < apps[i].ninfo; j++) { (void)strncpy(apps[i].info[j].key, info_keyval_vectors[i][j].key, PMIX_MAX_KEYLEN); apps[i].info[j].value.type = PMIX_STRING; apps[i].info[j].value.data.string = strdup(info_keyval_vectors[i][j].val); } /* push the preput values into the apps environ */ for (k=0; k < preput_keyval_size; k++) { (void)asprintf(&evar, "%s=%s", preput_keyval_vector[j]->key, preput_keyval_vector[j]->val); pmix_argv_append_nosize(&apps[i].env, evar); free(evar); } } rc = PMIx_Spawn(NULL, 0, apps, count, NULL); /* tear down the apps array */ for (i=0; i < count; i++) { PMIX_APP_DESTRUCT(&apps[i]); } free(apps); if (NULL != errors) { for (i=0; i < count; i++) { errors[i] = convert_err(rc); } } return convert_err(rc); }
static pmix_status_t spawn_debugger(char *appspace, myrel_t *myrel) { pmix_status_t rc; pmix_info_t *dinfo; pmix_app_t *debugger; size_t dninfo; char cwd[1024]; char dspace[PMIX_MAX_NSLEN+1]; mylock_t mylock; pmix_status_t code = PMIX_ERR_JOB_TERMINATED; /* setup the debugger */ PMIX_APP_CREATE(debugger, 1); debugger[0].cmd = strdup("./debuggerd"); PMIX_ARGV_APPEND(rc, debugger[0].argv, "./debuggerd"); getcwd(cwd, 1024); // point us to our current directory debugger[0].cwd = strdup(cwd); /* provide directives so the daemons go where we want, and * let the RM know these are debugger daemons */ dninfo = 6; PMIX_INFO_CREATE(dinfo, dninfo); PMIX_INFO_LOAD(&dinfo[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING); // instruct the RM to launch one copy of the executable on each node PMIX_INFO_LOAD(&dinfo[1], PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); // these are debugger daemons PMIX_INFO_LOAD(&dinfo[1], PMIX_DEBUG_JOB, appspace, PMIX_STRING); // the nspace being debugged PMIX_INFO_LOAD(&dinfo[2], PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL); // notify us when the debugger job completes PMIX_INFO_LOAD(&dinfo[3], PMIX_DEBUG_WAITING_FOR_NOTIFY, NULL, PMIX_BOOL); // tell the daemon that the proc is waiting to be released PMIX_INFO_LOAD(&dinfo[4], PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me PMIX_INFO_LOAD(&dinfo[5], PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me /* spawn the daemons */ fprintf(stderr, "Debugger: spawning %s\n", debugger[0].cmd); if (PMIX_SUCCESS != (rc = PMIx_Spawn(dinfo, dninfo, debugger, 1, dspace))) { fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", PMIx_Error_string(rc)); PMIX_INFO_FREE(dinfo, dninfo); PMIX_APP_FREE(debugger, 1); return rc; } /* cleanup */ PMIX_INFO_FREE(dinfo, dninfo); PMIX_APP_FREE(debugger, 1); /* register callback for when this job terminates */ myrel->nspace = strdup(dspace); PMIX_INFO_CREATE(dinfo, 2); PMIX_INFO_LOAD(&dinfo[0], PMIX_EVENT_RETURN_OBJECT, myrel, PMIX_POINTER); /* only call me back when this specific job terminates */ PMIX_INFO_LOAD(&dinfo[1], PMIX_NSPACE, dspace, PMIX_STRING); DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(&code, 1, dinfo, 2, release_fn, evhandler_reg_callbk, (void*)&mylock); DEBUG_WAIT_THREAD(&mylock); rc = mylock.status; DEBUG_DESTRUCT_LOCK(&mylock); PMIX_INFO_FREE(dinfo, 2); return rc; }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; char nsp2[PMIX_MAX_NSLEN+1]; pmix_app_t *app; char hostname[PMIX_MAXHOSTNAMELEN]; pmix_proc_t *peers; size_t npeers, ntmp=0; char *nodelist; gethostname(hostname, sizeof(hostname)); /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* rank=0 calls spawn */ if (0 == myproc.rank) { PMIX_APP_CREATE(app, 1); app->cmd = strdup("gumby"); app->maxprocs = 2; pmix_argv_append_nosize(&app->argv, "gumby"); pmix_argv_append_nosize(&app->argv, "-n"); pmix_argv_append_nosize(&app->argv, "2"); pmix_setenv("PMIX_ENV_VALUE", "3", true, &app->env); PMIX_INFO_CREATE(app->info, 2); (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); app->info[0].value.type = PMIX_INT8; app->info[0].value.data.int8 = 12; (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); app->info[1].value.type = PMIX_DOUBLE; app->info[1].value.data.dval = 12.34; pmix_output(0, "Client ns %s rank %d: calling PMIx_Spawn", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn failed: %d", myproc.nspace, myproc.rank, rc); goto done; } PMIX_APP_FREE(app, 1); /* check to see if we got the expected info back */ if (0 != strncmp(nsp2, "DYNSPACE", PMIX_MAX_NSLEN)) { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn returned incorrect nspace: %s", myproc.nspace, myproc.rank, nsp2); goto done; } else { pmix_output(0, "Client ns %s rank %d: PMIx_Spawn succeeded returning nspace: %s", myproc.nspace, myproc.rank, nsp2); } /* get their universe size */ (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; val = NULL; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || NULL == val) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } /* just cycle the connect/disconnect functions */ if (PMIX_SUCCESS != (rc = PMIx_Connect(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Connect failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Connect succeeded", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Disconnect(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Disonnect failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Disconnect succeeded", myproc.nspace, myproc.rank); /* finally, test the resolve functions */ if (0 == myproc.rank) { if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, NULL, &peers, &npeers))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } if ((nprocs+ntmp) != npeers) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d", myproc.nspace, myproc.rank, (int)(nprocs+ntmp), (int)npeers); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers", myproc.nspace, myproc.rank, (int)npeers); if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(nsp2, &nodelist))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes failed for nspace %s: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); } else { if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, myproc.nspace, &peers, &npeers))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d", myproc.nspace, myproc.rank, myproc.nspace, rc); goto done; } if (nprocs != npeers) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d", myproc.nspace, myproc.rank, nprocs, (int)npeers); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers", myproc.nspace, myproc.rank, (int)npeers); if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(myproc.nspace, &nodelist))) { pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes failed: %d", myproc.nspace, myproc.rank, rc); goto done; } pmix_output(0, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); } PMIX_PROC_FREE(peers, npeers); free(nodelist); done: /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
/* Xfer FUNCTIONS FOR GENERIC PMIX TYPES */ pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, pmix_value_t *src) { size_t n, m; pmix_status_t rc; char **prarray, **strarray; pmix_value_t *pv, *sv; pmix_info_t *p1, *s1; pmix_app_t *pa, *sa; pmix_pdata_t *pd, *sd; pmix_buffer_t *pb, *sb; pmix_byte_object_t *pbo, *sbo; pmix_kval_t *pk, *sk; pmix_modex_data_t *pm, *sm; pmix_proc_info_t *pi, *si; pmix_query_t *pq, *sq; pmix_envar_t *pe, *se; /* copy the right field */ p->type = src->type; switch (src->type) { case PMIX_UNDEF: break; case PMIX_BOOL: p->data.flag = src->data.flag; break; case PMIX_BYTE: p->data.byte = src->data.byte; break; case PMIX_STRING: if (NULL != src->data.string) { p->data.string = strdup(src->data.string); } else { p->data.string = NULL; } break; case PMIX_SIZE: p->data.size = src->data.size; break; case PMIX_PID: p->data.pid = src->data.pid; break; case PMIX_INT: /* to avoid alignment issues */ memcpy(&p->data.integer, &src->data.integer, sizeof(int)); break; case PMIX_INT8: p->data.int8 = src->data.int8; break; case PMIX_INT16: /* to avoid alignment issues */ memcpy(&p->data.int16, &src->data.int16, 2); break; case PMIX_INT32: /* to avoid alignment issues */ memcpy(&p->data.int32, &src->data.int32, 4); break; case PMIX_INT64: /* to avoid alignment issues */ memcpy(&p->data.int64, &src->data.int64, 8); break; case PMIX_UINT: /* to avoid alignment issues */ memcpy(&p->data.uint, &src->data.uint, sizeof(unsigned int)); break; case PMIX_UINT8: p->data.uint8 = src->data.uint8; break; case PMIX_UINT16: /* to avoid alignment issues */ memcpy(&p->data.uint16, &src->data.uint16, 2); break; case PMIX_UINT32: /* to avoid alignment issues */ memcpy(&p->data.uint32, &src->data.uint32, 4); break; case PMIX_UINT64: /* to avoid alignment issues */ memcpy(&p->data.uint64, &src->data.uint64, 8); break; case PMIX_FLOAT: p->data.fval = src->data.fval; break; case PMIX_DOUBLE: p->data.dval = src->data.dval; break; case PMIX_TIMEVAL: memcpy(&p->data.tv, &src->data.tv, sizeof(struct timeval)); break; case PMIX_TIME: memcpy(&p->data.time, &src->data.time, sizeof(time_t)); break; case PMIX_STATUS: memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t)); break; case PMIX_PROC: PMIX_PROC_CREATE(p->data.proc, 1); if (NULL == p->data.proc) { return PMIX_ERR_NOMEM; } memcpy(p->data.proc, src->data.proc, sizeof(pmix_proc_t)); break; case PMIX_PROC_RANK: memcpy(&p->data.rank, &src->data.rank, sizeof(pmix_rank_t)); break; case PMIX_BYTE_OBJECT: case PMIX_COMPRESSED_STRING: memset(&p->data.bo, 0, sizeof(pmix_byte_object_t)); if (NULL != src->data.bo.bytes && 0 < src->data.bo.size) { p->data.bo.bytes = malloc(src->data.bo.size); memcpy(p->data.bo.bytes, src->data.bo.bytes, src->data.bo.size); p->data.bo.size = src->data.bo.size; } else { p->data.bo.bytes = NULL; p->data.bo.size = 0; } break; case PMIX_PERSIST: memcpy(&p->data.persist, &src->data.persist, sizeof(pmix_persistence_t)); break; case PMIX_SCOPE: memcpy(&p->data.scope, &src->data.scope, sizeof(pmix_scope_t)); break; case PMIX_DATA_RANGE: memcpy(&p->data.range, &src->data.range, sizeof(pmix_data_range_t)); break; case PMIX_PROC_STATE: memcpy(&p->data.state, &src->data.state, sizeof(pmix_proc_state_t)); break; case PMIX_PROC_INFO: PMIX_PROC_INFO_CREATE(p->data.pinfo, 1); if (NULL != src->data.pinfo->hostname) { p->data.pinfo->hostname = strdup(src->data.pinfo->hostname); } if (NULL != src->data.pinfo->executable_name) { p->data.pinfo->executable_name = strdup(src->data.pinfo->executable_name); } memcpy(&p->data.pinfo->pid, &src->data.pinfo->pid, sizeof(pid_t)); memcpy(&p->data.pinfo->exit_code, &src->data.pinfo->exit_code, sizeof(int)); memcpy(&p->data.pinfo->state, &src->data.pinfo->state, sizeof(pmix_proc_state_t)); break; case PMIX_DATA_ARRAY: p->data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); p->data.darray->type = src->data.darray->type; p->data.darray->size = src->data.darray->size; if (0 == p->data.darray->size || NULL == src->data.darray->array) { p->data.darray->array = NULL; p->data.darray->size = 0; break; } /* allocate space and do the copy */ switch (src->data.darray->type) { case PMIX_UINT8: case PMIX_INT8: case PMIX_BYTE: p->data.darray->array = (char*)malloc(src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size); break; case PMIX_UINT16: case PMIX_INT16: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint16_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint16_t)); break; case PMIX_UINT32: case PMIX_INT32: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint32_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint32_t)); break; case PMIX_UINT64: case PMIX_INT64: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint64_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint64_t)); break; case PMIX_BOOL: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(bool)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(bool)); break; case PMIX_SIZE: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(size_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(size_t)); break; case PMIX_PID: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(pid_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pid_t)); break; case PMIX_STRING: p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } prarray = (char**)p->data.darray->array; strarray = (char**)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != strarray[n]) { prarray[n] = strdup(strarray[n]); } } break; case PMIX_INT: case PMIX_UINT: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(int)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(int)); break; case PMIX_FLOAT: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(float)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(float)); break; case PMIX_DOUBLE: p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(double)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(double)); break; case PMIX_TIMEVAL: p->data.darray->array = (struct timeval*)malloc(src->data.darray->size * sizeof(struct timeval)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(struct timeval)); break; case PMIX_TIME: p->data.darray->array = (time_t*)malloc(src->data.darray->size * sizeof(time_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(time_t)); break; case PMIX_STATUS: p->data.darray->array = (pmix_status_t*)malloc(src->data.darray->size * sizeof(pmix_status_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_status_t)); break; case PMIX_VALUE: PMIX_VALUE_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pv = (pmix_value_t*)p->data.darray->array; sv = (pmix_value_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (PMIX_SUCCESS != (rc = pmix_value_xfer(&pv[n], &sv[n]))) { PMIX_VALUE_FREE(pv, src->data.darray->size); return rc; } } break; case PMIX_PROC: PMIX_PROC_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_proc_t)); break; case PMIX_APP: PMIX_APP_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pa = (pmix_app_t*)p->data.darray->array; sa = (pmix_app_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != sa[n].cmd) { pa[n].cmd = strdup(sa[n].cmd); } if (NULL != sa[n].argv) { pa[n].argv = pmix_argv_copy(sa[n].argv); } if (NULL != sa[n].env) { pa[n].env = pmix_argv_copy(sa[n].env); } if (NULL != sa[n].cwd) { pa[n].cwd = strdup(sa[n].cwd); } pa[n].maxprocs = sa[n].maxprocs; if (0 < sa[n].ninfo && NULL != sa[n].info) { PMIX_INFO_CREATE(pa[n].info, sa[n].ninfo); if (NULL == pa[n].info) { PMIX_APP_FREE(pa, src->data.darray->size); return PMIX_ERR_NOMEM; } pa[n].ninfo = sa[n].ninfo; for (m=0; m < pa[n].ninfo; m++) { PMIX_INFO_XFER(&pa[n].info[m], &sa[n].info[m]); } } } break; case PMIX_INFO: PMIX_INFO_CREATE(p->data.darray->array, src->data.darray->size); p1 = (pmix_info_t*)p->data.darray->array; s1 = (pmix_info_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { PMIX_INFO_XFER(&p1[n], &s1[n]); } break; case PMIX_PDATA: PMIX_PDATA_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pd = (pmix_pdata_t*)p->data.darray->array; sd = (pmix_pdata_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { PMIX_PDATA_XFER(&pd[n], &sd[n]); } break; case PMIX_BUFFER: p->data.darray->array = (pmix_buffer_t*)malloc(src->data.darray->size * sizeof(pmix_buffer_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pb = (pmix_buffer_t*)p->data.darray->array; sb = (pmix_buffer_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { PMIX_CONSTRUCT(&pb[n], pmix_buffer_t); pmix_bfrops_base_copy_payload(&pb[n], &sb[n]); } break; case PMIX_BYTE_OBJECT: case PMIX_COMPRESSED_STRING: p->data.darray->array = (pmix_byte_object_t*)malloc(src->data.darray->size * sizeof(pmix_byte_object_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pbo = (pmix_byte_object_t*)p->data.darray->array; sbo = (pmix_byte_object_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != sbo[n].bytes && 0 < sbo[n].size) { pbo[n].size = sbo[n].size; pbo[n].bytes = (char*)malloc(pbo[n].size); memcpy(pbo[n].bytes, sbo[n].bytes, pbo[n].size); } else { pbo[n].bytes = NULL; pbo[n].size = 0; } } break; case PMIX_KVAL: p->data.darray->array = (pmix_kval_t*)calloc(src->data.darray->size , sizeof(pmix_kval_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pk = (pmix_kval_t*)p->data.darray->array; sk = (pmix_kval_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != sk[n].key) { pk[n].key = strdup(sk[n].key); } if (NULL != sk[n].value) { PMIX_VALUE_CREATE(pk[n].value, 1); if (NULL == pk[n].value) { free(p->data.darray->array); return PMIX_ERR_NOMEM; } if (PMIX_SUCCESS != (rc = pmix_value_xfer(pk[n].value, sk[n].value))) { return rc; } } } break; case PMIX_MODEX: PMIX_MODEX_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pm = (pmix_modex_data_t*)p->data.darray->array; sm = (pmix_modex_data_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t)); if (NULL != sm[n].blob && 0 < sm[n].size) { pm[n].blob = (uint8_t*)malloc(sm[n].size); if (NULL == pm[n].blob) { return PMIX_ERR_NOMEM; } memcpy(pm[n].blob, sm[n].blob, sm[n].size); pm[n].size = sm[n].size; } else { pm[n].blob = NULL; pm[n].size = 0; } } break; case PMIX_PERSIST: p->data.darray->array = (pmix_persistence_t*)malloc(src->data.darray->size * sizeof(pmix_persistence_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_persistence_t)); break; case PMIX_POINTER: p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } prarray = (char**)p->data.darray->array; strarray = (char**)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { prarray[n] = strarray[n]; } break; case PMIX_SCOPE: p->data.darray->array = (pmix_scope_t*)malloc(src->data.darray->size * sizeof(pmix_scope_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_scope_t)); break; case PMIX_DATA_RANGE: p->data.darray->array = (pmix_data_range_t*)malloc(src->data.darray->size * sizeof(pmix_data_range_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_data_range_t)); break; case PMIX_COMMAND: p->data.darray->array = (pmix_cmd_t*)malloc(src->data.darray->size * sizeof(pmix_cmd_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_cmd_t)); break; case PMIX_INFO_DIRECTIVES: p->data.darray->array = (pmix_info_directives_t*)malloc(src->data.darray->size * sizeof(pmix_info_directives_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_info_directives_t)); break; case PMIX_PROC_INFO: PMIX_PROC_INFO_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pi = (pmix_proc_info_t*)p->data.darray->array; si = (pmix_proc_info_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { memcpy(&pi[n].proc, &si[n].proc, sizeof(pmix_proc_t)); if (NULL != si[n].hostname) { pi[n].hostname = strdup(si[n].hostname); } else { pi[n].hostname = NULL; } if (NULL != si[n].executable_name) { pi[n].executable_name = strdup(si[n].executable_name); } else { pi[n].executable_name = NULL; } pi[n].pid = si[n].pid; pi[n].exit_code = si[n].exit_code; pi[n].state = si[n].state; } break; case PMIX_DATA_ARRAY: PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; // don't support iterative arrays case PMIX_QUERY: PMIX_QUERY_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pq = (pmix_query_t*)p->data.darray->array; sq = (pmix_query_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != sq[n].keys) { pq[n].keys = pmix_argv_copy(sq[n].keys); } if (NULL != sq[n].qualifiers && 0 < sq[n].nqual) { PMIX_INFO_CREATE(pq[n].qualifiers, sq[n].nqual); if (NULL == pq[n].qualifiers) { PMIX_QUERY_FREE(pq, src->data.darray->size); return PMIX_ERR_NOMEM; } for (m=0; m < sq[n].nqual; m++) { PMIX_INFO_XFER(&pq[n].qualifiers[m], &sq[n].qualifiers[m]); } pq[n].nqual = sq[n].nqual; } else { pq[n].qualifiers = NULL; pq[n].nqual = 0; } } break; case PMIX_ENVAR: PMIX_ENVAR_CREATE(p->data.darray->array, src->data.darray->size); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; } pe = (pmix_envar_t*)p->data.darray->array; se = (pmix_envar_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { if (NULL != se[n].envar) { pe[n].envar = strdup(se[n].envar); } if (NULL != se[n].value) { pe[n].value = strdup(se[n].value); } pe[n].separator = se[n].separator; } break; default: return PMIX_ERR_UNKNOWN_DATA_TYPE; } break; case PMIX_POINTER: memcpy(&p->data.ptr, &src->data.ptr, sizeof(void*)); break; case PMIX_ENVAR: PMIX_ENVAR_CONSTRUCT(&p->data.envar); if (NULL != src->data.envar.envar) { p->data.envar.envar = strdup(src->data.envar.envar); } if (NULL != src->data.envar.value) { p->data.envar.value = strdup(src->data.envar.value); } p->data.envar.separator = src->data.envar.separator; break; /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: p->data.array->size = src->data.array->size; if (0 < src->data.array->size) { p->data.array->array = (pmix_info_t*)malloc(src->data.array->size * sizeof(pmix_info_t)); if (NULL == p->data.array->array) { return PMIX_ERR_NOMEM; } p1 = (pmix_info_t*)p->data.array->array; s1 = (pmix_info_t*)src->data.array->array; for (n=0; n < src->data.darray->size; n++) { PMIX_INFO_XFER(&p1[n], &s1[n]); } } break; /********************/ default: pmix_output(0, "XFER-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)src->type); assert(0); return PMIX_ERROR; } return PMIX_SUCCESS; }
static int attach_to_running_job(char *nspace) { pmix_status_t rc; pmix_proc_t myproc; pmix_query_t *query; size_t nq; myquery_data_t *q; /* query the active nspaces so we can verify that the * specified one exists */ nq = 1; PMIX_QUERY_CREATE(query, nq); PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_NAMESPACES); q = (myquery_data_t*)malloc(sizeof(myquery_data_t)); DEBUG_CONSTRUCT_LOCK(&q->lock); if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)q))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); return -1; } DEBUG_WAIT_THREAD(&q->lock); DEBUG_DESTRUCT_LOCK(&q->lock); if (NULL == q->info) { fprintf(stderr, "Query returned no info\n"); return -1; } /* the query should have returned a comma-delimited list of nspaces */ if (PMIX_STRING != q->info[0].value.type) { fprintf(stderr, "Query returned incorrect data type: %d\n", q->info[0].value.type); return -1; } if (NULL == q->info[0].value.data.string) { fprintf(stderr, "Query returned no active nspaces\n"); return -1; } fprintf(stderr, "Query returned %s\n", q->info[0].value.data.string); return 0; #if 0 /* split the returned string and look for the given nspace */ /* if not found, then we have an error */ PMIX_INFO_FREE(info, ninfo); /* get the proctable for this nspace */ ninfo = 1; PMIX_INFO_CREATE(info, ninfo); (void)strncpy(info[0].key, PMIX_QUERY_PROC_TABLE, PMIX_MAX_KEYLEN); (void)strncpy(info[0].qualifier, nspace, PMIX_MAX_KEYLEN); if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(info, ninfo, infocbfunc, (void*)&active))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info_nb failed: %d\n", myproc.nspace, myproc.rank, rc); return -1; } /* wait to get a response */ /* the query should have returned a data_array */ if (PMIX_DATA_ARRAY != info[0].type) { fprintf(stderr, "Query returned incorrect data type: %d\n", info[0].type); return -1; } if (NULL == info[0].data.darray.array) { fprintf(stderr, "Query returned no proctable info\n"); return -1; } /* the data array consists of a struct: * size_t size; * void* array; * * In this case, the array is composed of pmix_proc_info_t structs: * pmix_proc_t proc; // contains the nspace,rank of this proc * char* hostname; * char* executable_name; * pid_t pid; * int exit_code; * pmix_proc_state_t state; */ /* this is where a debugger tool would process the proctable to * create whatever blob it needs to provide to its daemons */ PMIX_INFO_FREE(info, ninfo); /* setup the debugger daemon spawn request */ napps = 1; PMIX_APP_CREATE(app, napps); /* setup the name of the daemon executable to launch */ app[0].cmd = strdup("debuggerdaemon"); app[0].argc = 1; app[0].argv = (char**)malloc(2*sizeof(char*)); app[0].argv[0] = strdup("debuggerdaemon"); app[0].argv[1] = NULL; /* provide directives so the daemons go where we want, and * let the RM know these are debugger daemons */ ninfo = 3; PMIX_INFO_CREATE(app[0].info, ninfo); PMIX_INFO_LOAD(&app[0].info[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING); // instruct the RM to launch one copy of the executable on each node PMIX_INFO_LOAD(&app[0].info[1], PMIX_DEBUGGER_DAEMONS, true, PMIX_BOOL); // these are debugger daemons PMIX_INFO_LOAD(&app[0].info[2], PMIX_DEBUG_TARGET, nspace, PMIX_STRING); // the "jobid" of the application to be debugged /* spawn the daemons */ PMIx_Spawn(NULL, 0, app, napps, dspace); /* cleanup */ PMIX_APP_FREE(app, napps); /* this is where a debugger tool would wait until the debug operation is complete */ return 0; #endif }
int main(int argc, char **argv, const char **environ) { pmix_status_t rc; pmix_info_t *info = NULL; bool flag; pmix_status_t retval; pmix_app_t *spawned_app = NULL; pmix_info_t *job_info = NULL; pmix_info_t *proc_info = NULL; int job_info_count = 0; int job_info_index = 0; int proc_info_count = 0; int proc_info_index = 0; char spawned_nsp[PMIX_MAX_NSLEN+1]; char *path_to_app = NULL; char *host_to_use = NULL; int number_of_clients = 0; int temp_counter = 0; done_flag = false; gethostname(hostn, 500); int spawned_app_argc = 0; char **scr_environ = NULL; int proc_count = 1; int node_count = 0; bool blocking_mode = true; char *node_list = NULL; bool forward_all_scr_envs = false; bool pmix_mode = false; const char *optstring = "+n:N:L:x:bB:pPvhe"; int temp_slen=0; /* todo: add arg parsing with ompi schizo */ verbose_print = false; int sleep_max = 30; const int fixed_sleep = 5; int c; while((c = getopt(argc, argv, optstring)) != -1){ switch(c){ case 'h': print_usage(argv[0]); exit(0); break; case 'n': proc_count = atoi(optarg); if(proc_count <= 0 || proc_count > 100){ printf("outside the range of allowable instances to spawn [1-100]\n"); exit(1); } if(verbose_print) { printf("proc_count = %d\n", proc_count); } break; case 'N': /* node_count = atoi(optarg); */ node_count = 1; if(verbose_print) { printf("node_count = %d\n", node_count); } break; case 'B': blocking_mode = true; sleep_max = atoi(optarg); if(sleep_max < 0){ printf("can't sleep for less than 0 seconds\n"); exit(1); } if(verbose_print){ printf("blocking mode = %x\n", blocking_mode); } break; case 'b': blocking_mode = false; if(verbose_print){ printf("blocking mode = %x\n", blocking_mode); } break; case 'L': node_list = optarg; host_to_use = node_list; if(verbose_print){ printf("node_list = '%s'\n", node_list); } break; case 'x': temp_slen = strlen(optarg); /* check if the string is the same length as 'SCR', if so compare them */ if(temp_slen == strlen(SCR_STRING)){ if(strncmp(optarg, SCR_STRING, strlen(SCR_STRING)) == 0){ /* if the string is SCR, then forward all SCR related env vars */ if(verbose_print) printf("all scr envs will be forwarded\n"); forward_all_scr_envs = true; } else{ /* handled like a normal env var */ handle_standard_env_var(optarg, &scr_environ); } } else{ /*handled like a normal env var */ handle_standard_env_var(optarg, &scr_environ); } break; case 'v': verbose_print = true; break; case 'p': pmix_mode = true; if(verbose_print){ printf("pmix_mode = %x\n", pmix_mode); } break; case 'P': pmix_mode = false; if(verbose_print){ printf("pmix_mode = %x\n", pmix_mode); } break; case 'e': experimental = true; break; case '?': printf("missing a required argument or invalid option: %x\n", optopt); print_usage(argv[0]); exit(1); break; default: printf("Unrecognized argument: %c\n", c); print_usage(argv[0]); exit(1); break; } } /* number of instances to spawn */ number_of_clients = proc_count; /* check to make sure an application was specified to launch */ if( optind < argc ){ /* if optind is < argc, it means there is at least one more arg * beyond the args for this program */ path_to_app = argv[optind]; spawned_app_argc = argc - optind; if(verbose_print) { printf("app to launch: %s @ %s:%d\n", path_to_app, __FILE__, __LINE__); } } else{ printf("program_to_spawn option was not provded\n"); print_usage(argv[0]); exit(1); } if(verbose_print){ printf("master process will spawn %d instances; app to run: %s\n\n", number_of_clients, path_to_app); printf("pmix version: %s (host: %s)\n", PMIx_Get_version(), hostn); } /* init pmix */ retval = PMIx_Init(&main_proc, NULL, 0); if(retval != PMIX_SUCCESS){ error_helper(retval, hostn, "error initializing pmix"); exit(0); } if(verbose_print){ printf("rank %d, host '%s', nspace: '%s' init'd pmix succesfully\n\n", main_proc.rank, hostn, main_proc.nspace); } /* we need to attach to a "system" PMIx server so we * can ask it to spawn applications for us. There can * only be one such connection on a node, so we will * instruct the tool library to only look for it */ int ninfo = 1; PMIX_INFO_CREATE(info, ninfo); flag = true; PMIX_INFO_LOAD(&info[0], PMIX_CONNECT_TO_SYSTEM, &flag, PMIX_BOOL); /* initialize the library and make the connection */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&tool_proc, NULL, 0 ))) { fprintf(stderr, "PMIx_tool_init failed: %d\n", rc ); exit(rc); } if (0 < ninfo) { PMIX_INFO_FREE(info, ninfo); } /* first call fence to sync all processes */ retval = fence_helper(); if(retval != PMIX_SUCCESS) { error_helper(retval, hostn, "error fencing"); exit(retval); } /* Process SCR env vars if needed */ if(forward_all_scr_envs){ parse_all_scr_envs(&scr_environ, environ); } /* finalize the env array so a NULL is in place */ finalize_array(scr_environ); /* Setup info structs to pass to this: */ /* pmix_info_t *error_info = NULL; */ /* PMIX_INFO_CREATE(error_info, 1); */ /* strncpy(error_info[0].key, PMIX_ERROR_GROUP_ABORT, PMIX_MAX_KEYLEN); error_info[0].value.type = PMIX_BOOL; error_info[0].value.data.flag = true; */ /* strncpy(error_info[0].key, PMIX_ERROR_GROUP_SPAWN, PMIX_MAX_KEYLEN); int t_val = 1; pmix_value_load(&error_info[1].value, &t_val, PMIX_BOOL); */ /*error_info[1].value.type = PMIX_BOOL; error_info[1].value.data.flag = true; */ /* strncpy(error_info[2].key, PMIX_ERROR_GROUP_GENERAL, PMIX_MAX_KEYLEN); error_info[2].value.type = PMIX_BOOL; error_info[2].value.data.flag = true; */ /* TODO: setup error handling when implemented in pmix with the * following error codes: */ /* pmix_status_t registered_codes[5]; registered_codes[0] = PMIX_ERR_JOB_TERMINATED; registered_codes[1] = PMIX_ERR_PROC_ABORTED; registered_codes[2] = PMIX_ERR_PROC_ABORTING; */ PMIx_Register_event_handler(NULL, 0, NULL, 0, errhandler_cb, errhandler_reg_callbk, (void *) NULL); /* PMIX_INFO_DESTRUCT(error_info); */ /* allocate memory to hold the spawend app struct */ PMIX_APP_CREATE(spawned_app, 1); /* maxprocs isn't documented very well, but it appears to control * how many instances of the spanwed app are created */ spawned_app->maxprocs = number_of_clients; /* set the app to run */ (void)asprintf(&spawned_app->cmd, "%s", path_to_app); /* set argv for spawned app starting with remaining argv */ spawned_app->argv = &argv[optind]; /* set the environment pointer */ spawned_app->env = scr_environ; /*--START: add all proc level infos */ /* add things to the proc level info */ if(!pmix_mode){ job_info_count++; } if(host_to_use != NULL){ proc_info_count++; } if(verbose_print){ printf("enabling debug feature for forwarding stdout/stderr\n"); proc_info_count+=2; /* add PMIX_FWD_STDOUT and PMIX_FWD_STDERR later*/ } if(experimental){ job_info_count++; } if(node_count == 1){ job_info_count++; } /*--END: add all proc level infos */ /*--START: append actual proc level info */ PMIX_INFO_CREATE(job_info, job_info_count); PMIX_INFO_CREATE(proc_info, proc_info_count); /* PMIX_VAL_set_assign(_v, _field, _val ) */ /* PMIX_VAL_set_strdup(_v, _field, _val ) */ if(host_to_use != NULL){ /* add info struct to the spawned app itself for the host */ /* old way */ strncpy(proc_info[proc_info_index].key, PMIX_HOST, PMIX_MAX_KEYLEN); //proc_info[proc_info_index].value.type = PMIX_STRING; /* set the data for host list to use */ //proc_info[proc_info_index].value.data.string = host_to_use; /* end old way */ if(verbose_print) printf("about to set host val\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), string, host_to_use ); proc_info_index++; } if(!pmix_mode){ strncpy(job_info[job_info_index].key, PMIX_NON_PMI, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set non pmix flag\n"); PMIX_VAL_SET(&(job_info[job_info_index].value), flag, true); job_info_index++; } if(verbose_print){ strncpy(proc_info[proc_info_index].key, PMIX_FWD_STDOUT, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set stdout flag\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), flag, true ); proc_info_index++; strncpy(proc_info[proc_info_index].key, PMIX_FWD_STDERR, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set stderr flag\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), flag, true ); proc_info_index++; } if(experimental){ printf("attempting to perform experiment\n"); bool local_flag = true; PMIX_INFO_LOAD(&job_info[job_info_index], PMIX_NOTIFY_COMPLETION, &local_flag, PMIX_BOOL); job_info_index++; } if(node_count == 1){ strncpy(job_info[job_info_index].key, PMIX_PPR, PMIX_MAX_KEYLEN); PMIX_VAL_SET(&(job_info[job_info_index].value), string, "1:n"); job_info_index++; } /*--END: append actual proc level info */ /* sanity check to make sure we covered all the info structs */ if(proc_info_index != proc_info_count ){ printf("bug: mismatch with appending proc info\n"); exit(1); } if(job_info_index != job_info_count){ printf("bug: mismatch with appending job info\n"); exit(1); } /* TODO: TEST PMIX_NOTIFY_COMPLETION WHEN IT'S IMPLEMENTED IN PMIX */ /* fill in job_info */ /* strncpy(job_info[0].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_INT; job_info[0].value.data.integer = 10; */ /* strncpy(job_info[0].key, PMIX_NOTIFY_COMPLETION, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_BOOL; job_info[0].value.data.flag = true; */ /*strncpy(spawned_app->info[0].key, PMIX_DISPLAY_MAP, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_BOOL; job_info[0].value.data.flag = true;*/ /* TODO: TEST PMIX_NOTIFY_COMPLETION WHEN IT'S IMPLEMENTED IN PMIX */ spawned_app->info = proc_info; spawned_app->ninfo = proc_info_count; if(verbose_print){ printf("proc level info count: %d\n", proc_info_count); } /* call spawn */ retval = PMIx_Spawn(job_info, job_info_count, spawned_app, 1, spawned_nsp); if(verbose_print) { printf("rank %d (host %s) just called spawn; spawned nspace: %s, retval:%d\n", main_proc.rank, hostn, spawned_nsp, retval); } if(retval != PMIX_SUCCESS){ error_helper(retval, hostn, "error with spawn"); goto done; } /* TODO: TEMPORARY WORKAROUND TO WAIT FOR A SPAWNED PROCESS */ if(blocking_mode){ sleep(fixed_sleep); /* wait until app completes: */ while(!done_flag){ sleep(fixed_sleep); temp_counter++; if(temp_counter*fixed_sleep >= sleep_max) { if(verbose_print) printf("broke out early\n"); break; } } if(verbose_print){ if(done_flag == true) { printf("done_flag was set to true!\n"); } } } done: /* fence first */ retval = fence_helper(); if(retval != PMIX_SUCCESS){ if(verbose_print) printf("error fencing, finalize may fail ! \n"); } /* finalize */ PMIx_Deregister_event_handler(_g_errhandler_ref, NULL, NULL); if(verbose_print){ fprintf(stdout, "spawn master process (rank %d) (host %s) finalizing\n", main_proc.rank, hostn); } /* clean up pmix */ retval = PMIx_tool_finalize(); if(retval == PMIX_SUCCESS) { if(verbose_print){ printf("spawn master process %d finalize success\n\n", main_proc.rank); } } else { printf("spawn master process %d pmix_finalize FAILURE: %d\n\n", main_proc.rank, retval); } retval = PMIx_Finalize(NULL, 0); fflush(stdout); /* cleanup before returning */ PMIX_INFO_FREE(job_info, job_info_count); spawned_app->argv = NULL; PMIX_APP_FREE(spawned_app, 1); if(verbose_print) printf("%s exiting cleanly :)\n", argv[0]); return 0; }
int main(int argc, char **argv) { pmix_status_t rc; pmix_proc_t myproc; pmix_info_t *info; pmix_app_t *app; size_t ninfo, napps; /* check for user directives - this would include: * - a flag indicating we want to attach to a specified application * - application info if we are launching a new app */ /* init us - if a PMIx server pid was provided, then pass it along */ if (0 < server_pid) { ninfo = 1; PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_SERVER_PIDINFO, server_pid, PMIX_UINT32); } else { info = NULL; ninfo = 0; } if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { fprintf(stderr, "PMIx_tool_init failed: %d\n", rc); exit(rc); } if (0 < ninfo) { PMIX_INFO_FREE(info, ninfo); } fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* if we are attaching to a running job, then attach to it */ if (attach) { ret = attach_to_running_job(argv[1]); } else { /* this is an initial launch - we need to launch the application * plus the debugger daemons, letting the RM know we are debugging * so that it will "pause" the app procs until we are ready */ napps = 2; PMIX_APP_CREATE(app, napps); /* setup the executable */ app[0].cmd = strdup("app"); app[0].argc = 1; app[0].argv = (char**)malloc(2*sizeof(char*)); app[0].argv[0] = strdup("app"); app[0].argv[1] = NULL; /* provide directives so the apps do what the user requested */ ninfo = 2; PMIX_INFO_CREATE(app[0].info, ninfo); PMIX_INFO_LOAD(&app[0].info[0], PMIX_NP, 128, PMIX_UINT64); PMIX_INFO_LOAD(&app[0].info[0], PMIX_MAPBY, "slot", PMIX_STRING); /* setup the name of the daemon executable to launch */ app[1].cmd = strdup("debuggerdaemon"); app[1].argc = 1; app[1].argv = (char**)malloc(2*sizeof(char*)); app[1].argv[0] = strdup("debuggerdaemon"); app[1].argv[1] = NULL; /* provide directives so the daemons go where we want, and * let the RM know these are debugger daemons */ ninfo = 2; PMIX_INFO_CREATE(app[1].info, ninfo); PMIX_INFO_LOAD(&app[1].info[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING); // instruct the RM to launch one copy of the executable on each node PMIX_INFO_LOAD(&app[1].info[1], PMIX_DEBUGGER_DAEMONS, true, PMIX_BOOL); // these are debugger daemons /* spawn the daemons */ PMIx_Spawn(NULL, 0, app, napps, dspace); /* cleanup */ PMIX_APP_FREE(app, napps); /* this is where a debugger tool would wait until the debug operation is complete */ } done: PMIx_tool_finalize(NULL, 0); return(ret); }