static int _handle_get(int fd, int lrank, client_req_t *req) { int rc; client_resp_t *resp; char *kvsname = NULL, *key = NULL, *val = NULL; debug3("mpi/pmi2: in _handle_get"); client_req_parse_body(req); client_req_get_str(req, KVSNAME_KEY, &kvsname); /* not used */ client_req_get_str(req, KEY_KEY, &key); val = kvs_get(key); resp = client_resp_new(); if (val != NULL) { client_resp_append(resp, CMD_KEY"="GETRESULT_CMD" " RC_KEY"=0 " VALUE_KEY"=%s\n", val); } else { client_resp_append(resp, CMD_KEY"="GETRESULT_CMD" " RC_KEY"=1\n"); } rc = client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get"); return rc; }
static int _handle_fullinit(int fd, int lrank, client_req_t *req) { int pmi_jobid, pmi_rank; bool threaded; int found, rc = PMI2_SUCCESS; client_resp_t *resp; debug3("mpi/pmi2: _handle_fullinit"); client_req_parse_body(req); found = client_req_get_int(req, PMIJOBID_KEY, &pmi_jobid); if (! found) { error(PMIJOBID_KEY" missing in fullinit command"); rc = PMI2_ERR_INVALID_ARG; goto response; } found = client_req_get_int(req, PMIRANK_KEY, &pmi_rank); if (! found) { error(PMIRANK_KEY" missing in fullinit command"); rc = PMI2_ERR_INVALID_ARG; goto response; } found = client_req_get_bool(req, THREADED_KEY, &threaded); if (! found) { error(THREADED_KEY" missing in fullinit command"); rc = PMI2_ERR_INVALID_ARG; goto response; } /* TODO: use threaded */ response: resp = client_resp_new(); /* what's the difference between DEBUGGED and VERBOSE? */ /* TODO: APPNUM */ client_resp_append(resp, CMD_KEY"="FULLINITRESP_CMD";" RC_KEY"=%d;" PMIVERSION_KEY"=%d;" PMISUBVER_KEY"=%d;" RANK_KEY"=%d;" SIZE_KEY"=%d;" APPNUM_KEY"=-1;" DEBUGGED_KEY"="FALSE_VAL";" PMIVERBOSE_KEY"=%s;", rc, PMI20_VERSION, PMI20_SUBVERSION, job_info.gtids[lrank], job_info.ntasks, (job_info.pmi_debugged ? TRUE_VAL : FALSE_VAL)); if (job_info.spawner_jobid) { client_resp_append(resp, SPAWNERJOBID_KEY"=%s;", job_info.spawner_jobid); } rc = client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: fullinit done"); return rc; }
static int _handle_kvs_put(int fd, int lrank, client_req_t *req) { int rc = SLURM_SUCCESS; client_resp_t *resp; char *key = NULL, *val = NULL; debug3("mpi/pmi2: in _handle_kvs_put"); client_req_parse_body(req); client_req_get_str(req, KEY_KEY, &key); client_req_get_str(req, VALUE_KEY, &val); /* no need to add k-v to hash. just get it ready to be up-forward */ rc = temp_kvs_add(key, val); xfree(key); xfree(val); resp = client_resp_new(); client_resp_append(resp, CMD_KEY"="KVSPUTRESP_CMD";" RC_KEY"=%d;", rc); rc = client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_kvs_put"); return rc; }
static int _handle_put(int fd, int lrank, client_req_t *req) { int rc = SLURM_SUCCESS; client_resp_t *resp; char *kvsname = NULL, *key = NULL, *val = NULL; debug3("mpi/pmi2: in _handle_put"); client_req_parse_body(req); client_req_get_str(req, KVSNAME_KEY, &kvsname); /* not used */ client_req_get_str(req, KEY_KEY, &key); client_req_get_str(req, VALUE_KEY, &val); /* no need to add k-v to hash. just get it ready to be up-forward */ rc = temp_kvs_add(key, val); if (rc == SLURM_SUCCESS) rc = 0; else rc = 1; resp = client_resp_new(); client_resp_append(resp, CMD_KEY"="PUTRESULT_CMD" " RC_KEY"=%d\n", rc); rc = client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_put"); return rc; }
extern int node_attr_put(char *key, char *val) { nag_req_t *req = NULL, **pprev = NULL; client_resp_t *resp = NULL; int rc = SLURM_SUCCESS; debug3("mpi/pmi2: node_attr_put: %s=%s", key, val); if (na_cnt * 2 >= na_size) { na_size += NODE_ATTR_SIZE_INC; xrealloc(node_attr, na_size * sizeof(char*)); } node_attr[KEY_INDEX(na_cnt)] = xstrdup(key); node_attr[VAL_INDEX(na_cnt)] = xstrdup(val); na_cnt ++; /* process pending requests */ pprev = &nag_req_list; req = *pprev; while (req != NULL) { if (strncmp(key, req->key, PMI2_MAX_KEYLEN)) { pprev = &req->next; req = *pprev; } else { debug("mpi/pmi2: found pending request from rank %d", req->rank); /* send response msg */ if (! resp) { resp = client_resp_new(); client_resp_append(resp, CMD_KEY"=" GETNODEATTRRESP_CMD";" RC_KEY"=0;" FOUND_KEY"="TRUE_VAL";" VALUE_KEY"=%s;", val); } rc = client_resp_send(resp, req->fd); if (rc != SLURM_SUCCESS) { error("mpi/pmi2: failed to send '" GETNODEATTRRESP_CMD "' to task %d", req->rank); } /* remove the request */ *pprev = req->next; _free_nag_req(req); req = *pprev; } } if (resp) { client_resp_free (resp); } debug3("mpi/pmi2: out node_attr_put"); return SLURM_SUCCESS; }
/* send fence_resp/barrier_out to tasks */ extern int send_kvs_fence_resp_to_clients(int rc, char *errmsg) { int i = 0; client_resp_t *resp; char *msg; resp = client_resp_new(); if ( is_pmi11() ) { if (rc != 0 && errmsg != NULL) { // XXX: pmi1.1 does not check the rc msg = _str_replace(errmsg, ' ', '_'); client_resp_append(resp, CMD_KEY"="BARRIEROUT_CMD" " RC_KEY"=%d "MSG_KEY"=%s\n", rc, msg); xfree(msg); } else { client_resp_append(resp, CMD_KEY"="BARRIEROUT_CMD" " RC_KEY"=%d\n", rc); } } else if (is_pmi20()) { if (rc != 0 && errmsg != NULL) { // TODO: pmi2.0 accept escaped ';' (";;") msg = _str_replace(errmsg, ';', '_'); client_resp_append(resp, CMD_KEY"="KVSFENCERESP_CMD";" RC_KEY"=%d;"ERRMSG_KEY"=%s;", rc, msg); xfree(msg); } else { client_resp_append(resp, CMD_KEY"="KVSFENCERESP_CMD";" RC_KEY"=%d;", rc); } } for (i = 0; i < job_info.ltasks; i ++) { rc = client_resp_send(resp, STEPD_PMI_SOCK(i)); } client_resp_free(resp); return rc; }
static int _handle_job_getid(int fd, int lrank, client_req_t *req) { int rc = SLURM_SUCCESS; client_resp_t *resp; debug3("mpi/pmi2: in _handle_job_getid"); resp = client_resp_new(); client_resp_append(resp, CMD_KEY"="JOBGETIDRESP_CMD";" RC_KEY"=0;" JOBID_KEY"=%s;", job_info.pmi_jobid); rc = client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_job_getid"); return rc; }
static int _handle_get_my_kvsname(int fd, int lrank, client_req_t *req) { client_resp_t *resp; int rc = 0; debug3("mpi/pmi2: in _handle_get_my_kvsname"); resp = client_resp_new(); client_resp_append(resp, CMD_KEY"="GETMYKVSNAMERESP_CMD" " RC_KEY"=%d " KVSNAME_KEY"=%u.%u\n", rc, job_info.jobid, job_info.stepid); rc = client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_my_kvsname"); return rc; }
static int _handle_finalize(int fd, int lrank, client_req_t *req) { client_resp_t *resp; int rc = 0; resp = client_resp_new(); client_resp_append(resp, CMD_KEY"="FINALIZERESP_CMD";" RC_KEY"=%d;", rc); rc = client_resp_send(resp, fd); client_resp_free(resp); /* shutdown the PMI fd */ shutdown(fd, SHUT_RDWR); close(fd); task_finalize(lrank); return rc; }
static int _handle_get_universe_size(int fd, int lrank, client_req_t *req) { int rc = 0; client_resp_t *resp; debug3("mpi/pmi2: in _handle_get_universe_size"); resp = client_resp_new(); client_resp_append(resp, CMD_KEY"="UNIVSIZE_CMD" " RC_KEY"=%d " SIZE_KEY"=%d\n", rc, job_info.ntasks); (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_universe_size"); return SLURM_SUCCESS; }
static int _handle_get_maxes(int fd, int lrank, client_req_t *req) { int rc = 0; client_resp_t *resp; debug3("mpi/pmi2: in _handle_get_maxes"); resp = client_resp_new(); client_resp_append(resp, CMD_KEY"="MAXES_CMD" " RC_KEY"=%d " KVSNAMEMAX_KEY"=%d " KEYLENMAX_KEY"=%d " VALLENMAX_KEY"=%d\n", rc, MAXKVSNAME, MAXKEYLEN, MAXVALLEN); (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_maxes"); return SLURM_SUCCESS; }
static int _handle_finalize(int fd, int lrank, client_req_t *req) { client_resp_t *resp; int rc = 0; debug3("mpi/pmi2: in _handle_finalize"); resp = client_resp_new(); client_resp_append(resp, CMD_KEY"="FINALIZEACK_CMD" " RC_KEY"=%d\n", rc); rc = client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_finalize"); /* shutdown the PMI fd */ shutdown(fd, SHUT_RDWR); close(fd); task_finalize(lrank); return rc; }
static int _handle_get_appnum(int fd, int lrank, client_req_t *req) { int rc = 0; client_resp_t *resp; debug3("mpi/pmi2: in _handle_get_appnum"); resp = client_resp_new(); /* * TODO: spawn_multiple: order number of command * spawn: 0 * otherwise: -1, since no way to get the order * number from multi-prog conf */ client_resp_append(resp, CMD_KEY"="APPNUM_CMD" " RC_KEY"=%d " APPNUM_KEY"=-1\n", rc); (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_appnum"); return SLURM_SUCCESS; }
static int _handle_mcmd(int fd, int lrank, client_req_t *req) { spawn_subcmd_t *subcmd = NULL; spawn_resp_t *spawn_resp = NULL; client_resp_t *task_resp = NULL; int spawnssofar = 0, rc = SLURM_SUCCESS, i; char buf[64]; debug3("mpi/pmi2: in _handle_mcmd"); client_req_parse_body(req); subcmd = client_req_parse_spawn_subcmd(req); debug3("mpi/pmi2: got subcmd"); client_req_get_int(req, SPAWNSSOFAR_KEY, &spawnssofar); if (spawnssofar == 1) { pmi1_spawn = spawn_req_new(); client_req_get_int(req, TOTSPAWNS_KEY, (int *)&pmi1_spawn->subcmd_cnt); pmi1_spawn->subcmds = xmalloc(pmi1_spawn->subcmd_cnt * sizeof(spawn_subcmd_t *)); client_req_get_int(req, PREPUTNUM_KEY, (int *)&pmi1_spawn->preput_cnt); pmi1_spawn->pp_keys = xmalloc(pmi1_spawn->preput_cnt * sizeof(char *)); pmi1_spawn->pp_vals = xmalloc(pmi1_spawn->preput_cnt * sizeof(char *)); for (i = 0; i < pmi1_spawn->preput_cnt; i ++) { snprintf(buf, 64, PREPUTKEY_KEY"%d", i); client_req_get_str(req, buf, &pmi1_spawn->pp_keys[i]); snprintf(buf, 64, PREPUTVAL_KEY"%d", i); client_req_get_str(req, buf, &pmi1_spawn->pp_vals[i]); } } pmi1_spawn->subcmds[spawnssofar - 1] = subcmd; if (spawnssofar == pmi1_spawn->subcmd_cnt) { debug3("mpi/pmi2: got whole spawn req"); /* a resp will be send back from srun. this will not be forwarded to the tasks */ rc = spawn_req_send_to_srun(pmi1_spawn, &spawn_resp); if (spawn_resp->rc != SLURM_SUCCESS) { task_resp = client_resp_new(); client_resp_append(task_resp, CMD_KEY"="SPAWNRESP_CMD";" RC_KEY"=%d;" ERRMSG_KEY"=spawn failed;", spawn_resp->rc); client_resp_send(task_resp, fd); client_resp_free(task_resp); spawn_resp_free(spawn_resp); spawn_req_free(pmi1_spawn); pmi1_spawn = NULL; error("mpi/pmi2: spawn failed"); rc = SLURM_ERROR; goto out; } debug("mpi/pmi2: spawn request sent to srun"); spawn_psr_enqueue(spawn_resp->seq, fd, lrank, NULL); spawn_resp_free(spawn_resp); spawn_req_free(pmi1_spawn); pmi1_spawn = NULL; } out: debug3("mpi/pmi2: out _handle_mcmd"); return rc; }
/* ring_out messages come in from our parent, * we process this and send ring_out messages to each of our children: * count - starting rank for our leftmost application process * left - left value for leftmost application process in our subtree * right - right value for rightmost application process in our subtree */ int pmix_ring_out(int count, char* left, char* right) { int rc = SLURM_SUCCESS; debug3("mpi/pmi2: in pmix_ring_out rank=%d count=%d left=%s right=%s", pmix_stepd_rank, count, left, right); /* our parent will send us a pmix_ring_out message, the count value * contained in this message will be the rank of the first process * in our subtree, the left value will be the left value for the * first process in the subtree, and the right value will be the * right value for the last process in our subtree */ /* allocate a structure to compute values to send to each child */ pmix_ring_msg* outmsgs = (pmix_ring_msg*) xmalloc(pmix_ring_children * sizeof(pmix_ring_msg)); /* initialize messages to all children */ int i; for (i = 0; i < pmix_ring_children; i++) { outmsgs[i].count = 0; outmsgs[i].left = NULL; outmsgs[i].right = NULL; } /* iterate over all msgs and set count and left neighbor */ for (i = 0; i < pmix_ring_children; i++) { /* store current count in output message */ outmsgs[i].count = count; /* add count for this child to our running total */ count += pmix_ring_msgs[i].count; /* set left value for this child */ outmsgs[i].left = left; /* get right value from child, if it exists, * it will be the left neighbor of the next child, * otherwise, reuse the current left value */ char* next = pmix_ring_msgs[i].right; if (next != NULL) { left = next; } } /* now set all right values (iterate backwards through children) */ for (i = (pmix_ring_children - 1); i >= 0; i--) { /* set right value for this child */ outmsgs[i].right = right; /* get left value from child, if it exists, * it will be the right neighbor of the next child, * otherwise, reuse the current right value */ char* next = pmix_ring_msgs[i].left; if (next != NULL) { right = next; } } /* send messages to children in stepd tree, * we do this first to get the message down the tree quickly */ for (i = 0; i < pmix_stepd_children; i++) { /* get pointer to message data for this child */ int ring_id = pmix_app_children + i; pmix_ring_msg* msg = &outmsgs[ring_id]; /* TODO: do we need hton translation? */ /* construct message */ Buf buf = init_buf(1024); pack16(TREE_CMD_RING_RESP, buf); /* specify message type (RING_OUT) */ pack32((uint32_t) msg->count, buf); /* send count value */ packstr(msg->left, buf); /* send left value */ packstr(msg->right, buf); /* send right value */ /* get global rank of our i-th child stepd */ int rank = pmix_stepd_rank_child(i); debug3("mpi/pmi2: rank=%d sending RING_OUT to rank=%d count=%d left=%s right=%s", pmix_stepd_rank, rank, msg->count, msg->left, msg->right); /* send message to child */ rc = pmix_stepd_send(get_buf_data(buf), (uint32_t) size_buf(buf), rank); /* TODO: use tmp_rc here to catch any failure */ /* free message */ free_buf(buf); } /* now send messages to children app procs, * and set their state back to normal */ for (i = 0; i < pmix_app_children; i++) { /* get pointer to message data for this child */ pmix_ring_msg* msg = &outmsgs[i]; /* TODO: want to catch send failure here? */ /* construct message and send to client */ client_resp_t *resp = client_resp_new(); client_resp_append(resp, "%s=%s;%s=%d;%s=%d;%s=%s;%s=%s;", CMD_KEY, RINGRESP_CMD, RC_KEY, 0, RING_COUNT_KEY, msg->count, RING_LEFT_KEY, msg->left, RING_RIGHT_KEY, msg->right); client_resp_send(resp, STEPD_PMI_SOCK(i)); client_resp_free(resp); } /* delete messages, note that we don't need to free * left and right strings in each message since they * are pointers to strings allocated in pmix_ring_msgs */ xfree(outmsgs); /* clear the pmix_ring_in messages for next ring operation */ for (i = 0; i < pmix_ring_children; i++) { pmix_ring_msg* msg = &pmix_ring_msgs[i]; msg->count = 0; if (msg->left != NULL) { xfree(msg->left); msg->left = NULL; } if (msg->right != NULL) { xfree(msg->right); msg->right = NULL; } } /* reset our ring count */ pmix_ring_count = 0; debug3("mpi/pmi2: out pmix_ring_out"); return rc; }