int pmixp_server_health_chk(char *hostlist, const char *addr) { send_header_t hdr; char nhdr[sizeof(send_header_t)]; size_t hsize; Buf buf = pmixp_server_new_buf(); char *data = get_buf_data(buf); int rc; hdr.magic = PMIX_SERVER_MSG_MAGIC; hdr.type = PMIXP_MSG_HEALTH_CHK; hdr.msgsize = 1; hdr.seq = 0; /* Store global nodeid that is * independent from exact collective */ hdr.nodeid = pmixp_info_nodeid_job(); hsize = _send_pack_hdr(&hdr, nhdr); memcpy(data, nhdr, hsize); grow_buf(buf, sizeof(char)); pack8('\n', buf); rc = pmixp_stepd_send(hostlist, addr, data, get_buf_offset(buf), 4, 14, 1); if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Was unable to wait for the parent %s to become alive on addr %s", hostlist, addr); } return rc; }
Buf pmixp_server_buf_new(void) { size_t offset = PMIXP_SERVER_BUFFER_OFFS; Buf buf = create_buf(xmalloc(offset), offset); uint32_t *service = (uint32_t*)get_buf_data(buf); /* Use the first size_t cell to identify the payload * offset. Value 0 is special meaning that buffer wasn't * yet finalized */ service[0] = 0; #ifdef PMIXP_DEBUG_SERVER xassert( PMIXP_BASE_HDR_MAX >= sizeof(uint32_t)); /* Makesure that we only use buffers allocated through * this call, because we reserve the space for the * header here */ service[1] = PMIXP_SERVER_BUF_MAGIC; #endif /* Skip header. It will be filled right before the sending */ set_buf_offset(buf, offset); return buf; }
static int _send_resp(slurm_fd_t fd, Buf buffer) { uint32_t msg_size, nw_size; ssize_t msg_wrote; char *out_buf; if ((fd < 0) || (!fd_writeable(fd))) goto io_err; msg_size = get_buf_offset(buffer); nw_size = htonl(msg_size); if (!fd_writeable(fd)) goto io_err; msg_wrote = write(fd, &nw_size, sizeof(nw_size)); if (msg_wrote != sizeof(nw_size)) goto io_err; out_buf = get_buf_data(buffer); while (msg_size > 0) { if (!fd_writeable(fd)) goto io_err; msg_wrote = write(fd, out_buf, msg_size); if (msg_wrote <= 0) goto io_err; out_buf += msg_wrote; msg_size -= msg_wrote; } free_buf(buffer); return SLURM_SUCCESS; io_err: free_buf(buffer); return SLURM_ERROR; }
int io_init_msg_write_to_fd(int fd, struct slurm_io_init_msg *msg) { Buf buf; void *ptr; int n; xassert(msg); debug2("Entering io_init_msg_write_to_fd"); msg->version = IO_PROTOCOL_VERSION; buf = init_buf(io_init_msg_packed_size()); debug2(" msg->nodeid = %d", msg->nodeid); io_init_msg_pack(msg, buf); ptr = get_buf_data(buf); again: if ((n = write(fd, ptr, io_init_msg_packed_size())) < 0) { if (errno == EINTR) goto again; free_buf(buf); return SLURM_ERROR; } if (n != io_init_msg_packed_size()) { error("io init msg write too small"); free_buf(buf); return SLURM_ERROR; } free_buf(buf); debug2("Leaving io_init_msg_write_to_fd"); return SLURM_SUCCESS; }
static void _dmdx_pmix_cb(pmix_status_t status, char *data, size_t sz, void *cbdata) { dmdx_caddy_t *caddy = (dmdx_caddy_t *)cbdata; Buf buf = pmixp_server_new_buf(); char *addr; int rc; /* setup response header */ _setup_header(buf, DMDX_RESPONSE, caddy->proc.nspace, caddy->proc.rank, status); /* pack the response */ packmem(data, sz, buf); /* setup response address */ addr = pmixp_info_nspace_usock(caddy->sender_ns); /* send the request */ rc = pmixp_server_send(caddy->sender_host, PMIXP_MSG_DMDX, caddy->seq_num, addr, get_buf_data(buf), get_buf_offset(buf), 1); if (SLURM_SUCCESS != rc) { /* not much we can do here. Caller will react by timeout */ PMIXP_ERROR("Cannot send direct modex response to %s", caddy->sender_host); } xfree(addr); free_buf(buf); _dmdx_free_caddy(caddy); }
extern int name_unpublish_up(char *name) { Buf buf = NULL, resp_buf = NULL; uint32_t size, tmp_32; int rc; buf = init_buf(1024); pack16((uint16_t)TREE_CMD_NAME_UNPUBLISH, buf); packstr(name, buf); size = get_buf_offset(buf); rc = tree_msg_to_srun_with_resp(size, get_buf_data(buf), &resp_buf); free_buf(buf); if (rc == SLURM_SUCCESS) { safe_unpack32(&tmp_32, resp_buf); rc = (int) tmp_32; } unpack_error: if (resp_buf) free_buf(resp_buf); return rc; }
static void _pack_buffer(void *in, uint16_t rpc_version, Buf buffer) { Buf object = (Buf)in; packmem(get_buf_data(object), get_buf_offset(object), buffer); }
extern int slurm_persist_send_msg( slurm_persist_conn_t *persist_conn, Buf buffer) { uint32_t msg_size, nw_size; char *msg; ssize_t msg_wrote; int rc, retry_cnt = 0; xassert(persist_conn); if (persist_conn->fd < 0) return EAGAIN; if (!buffer) return SLURM_ERROR; rc = slurm_persist_conn_writeable(persist_conn); if (rc == -1) { re_open: if (retry_cnt++ > 3) return EAGAIN; /* if errno is ACCESS_DENIED do not try to reopen to connection just return that */ if (errno == ESLURM_ACCESS_DENIED) return ESLURM_ACCESS_DENIED; if (persist_conn->flags & PERSIST_FLAG_RECONNECT) { slurm_persist_conn_reopen(persist_conn, true); rc = slurm_persist_conn_writeable(persist_conn); } else return SLURM_ERROR; } if (rc < 1) return EAGAIN; msg_size = get_buf_offset(buffer); nw_size = htonl(msg_size); msg_wrote = write(persist_conn->fd, &nw_size, sizeof(nw_size)); if (msg_wrote != sizeof(nw_size)) return EAGAIN; msg = get_buf_data(buffer); while (msg_size > 0) { rc = slurm_persist_conn_writeable(persist_conn); if (rc == -1) goto re_open; if (rc < 1) return EAGAIN; msg_wrote = write(persist_conn->fd, msg, msg_size); if (msg_wrote <= 0) return EAGAIN; msg += msg_wrote; msg_size -= msg_wrote; } return SLURM_SUCCESS; }
/* * save_cred_state - save the current credential list to a file * IN list - list of credentials * RET int - zero or error code */ int save_cred_state(slurm_cred_ctx_t ctx) { char *old_file, *new_file, *reg_file; int cred_fd = -1, error_code = SLURM_SUCCESS, rc; Buf buffer = NULL; static pthread_mutex_t state_mutex = PTHREAD_MUTEX_INITIALIZER; old_file = xstrdup(conf->spooldir); xstrcat(old_file, "/cred_state.old"); reg_file = xstrdup(conf->spooldir); xstrcat(reg_file, "/cred_state"); new_file = xstrdup(conf->spooldir); xstrcat(new_file, "/cred_state.new"); slurm_mutex_lock(&state_mutex); if ((cred_fd = creat(new_file, 0600)) < 0) { error("creat(%s): %m", new_file); if (errno == ENOSPC) _drain_node("SlurmdSpoolDir is full"); error_code = errno; goto cleanup; } buffer = init_buf(1024); slurm_cred_ctx_pack(ctx, buffer); rc = write(cred_fd, get_buf_data(buffer), get_buf_offset(buffer)); if (rc != get_buf_offset(buffer)) { error("write %s error %m", new_file); (void) unlink(new_file); if ((rc < 0) && (errno == ENOSPC)) _drain_node("SlurmdSpoolDir is full"); error_code = errno; goto cleanup; } (void) unlink(old_file); if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink(reg_file); if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink(new_file); cleanup: slurm_mutex_unlock(&state_mutex); xfree(old_file); xfree(reg_file); xfree(new_file); if (buffer) free_buf(buffer); if (cred_fd > 0) close(cred_fd); return error_code; }
int pmixp_dmdx_get(const char *nspace, int rank, pmix_modex_cbfunc_t cbfunc, void *cbdata) { dmdx_req_info_t *req; char *addr, *host; Buf buf; int rc; uint32_t seq; /* need to send the request */ host = pmixp_nspace_resolve(nspace, rank); xassert(NULL != host); if (NULL == host) { return SLURM_ERROR; } buf = pmixp_server_new_buf(); /* setup message header */ _setup_header(buf, DMDX_REQUEST, nspace, rank, SLURM_SUCCESS); /* generate namespace usocket name */ addr = pmixp_info_nspace_usock(nspace); /* store cur seq. num and move to the next request */ seq = _dmdx_seq_num++; /* track this request */ req = xmalloc(sizeof(dmdx_req_info_t)); req->seq_num = seq; req->cbfunc = cbfunc; req->cbdata = cbdata; req->ts = time(NULL); #ifndef NDEBUG strncpy(req->nspace, nspace, PMIX_MAX_NSLEN); req->rank = rank; #endif list_append(_dmdx_requests, req); /* send the request */ rc = pmixp_server_send(host, PMIXP_MSG_DMDX, seq, addr, get_buf_data(buf), get_buf_offset(buf), 1); /* cleanup the resources */ xfree(addr); free_buf(buf); /* check the return status */ if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot send direct modex request to %s", host); cbfunc(PMIX_ERROR, NULL, 0, cbdata, NULL, NULL); return SLURM_ERROR; } return rc; }
static int _copy_payload(Buf inbuf, size_t offs, Buf *outbuf) { size_t total_size, copy_size; char *ptr; pmix_proc_t *procs = NULL; size_t nprocs = 0; pmixp_coll_type_t type = 0; Buf buf; total_size = get_buf_offset(inbuf); set_buf_offset(inbuf, offs); int rc = pmixp_coll_unpack_ranges(inbuf, &type, &procs, &nprocs); xfree(procs); ptr = get_buf_data(inbuf) + get_buf_offset(inbuf); copy_size = total_size - get_buf_offset(inbuf); buf = init_buf(copy_size); memcpy(get_buf_data(buf), ptr, copy_size); *outbuf = buf; set_buf_offset(inbuf, total_size); return rc; }
extern int archive_write_file(Buf buffer, char *cluster_name, time_t period_start, time_t period_end, char *arch_dir, char *arch_type, uint32_t archive_period) { int fd = 0; int rc = SLURM_SUCCESS; char *new_file = NULL; static pthread_mutex_t local_file_lock = PTHREAD_MUTEX_INITIALIZER; xassert(buffer); slurm_mutex_lock(&local_file_lock); /* write the buffer to file */ new_file = _make_archive_name(period_start, period_end, cluster_name, arch_dir, arch_type, archive_period); if (!new_file) { error("%s: Unable to make archive file name.", __func__); return SLURM_ERROR; } debug("Storing %s archive for %s at %s", arch_type, cluster_name, new_file); fd = creat(new_file, 0600); if (fd < 0) { error("Can't save archive, create file %s error %m", new_file); rc = SLURM_ERROR; } else { int amount; uint32_t pos = 0, nwrite = get_buf_offset(buffer); char *data = (char *)get_buf_data(buffer); while (nwrite > 0) { amount = write(fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); rc = SLURM_ERROR; break; } nwrite -= amount; pos += amount; } fsync(fd); close(fd); } xfree(new_file); slurm_mutex_unlock(&local_file_lock); return rc; }
/* * * Returns SLURM_SUCCESS if successful. On error returns SLURM_ERROR * and sets errno. */ int stepd_completion(int fd, uint16_t protocol_version, step_complete_msg_t *sent) { int req = REQUEST_STEP_COMPLETION_V2; int rc; int errnum = 0; Buf buffer; int len = 0; buffer = init_buf(0); debug("Entering stepd_completion for %u.%u, range_first = %d, range_last = %d", sent->job_id, sent->job_step_id, sent->range_first, sent->range_last); if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_write(fd, &req, sizeof(int)); safe_write(fd, &sent->range_first, sizeof(int)); safe_write(fd, &sent->range_last, sizeof(int)); safe_write(fd, &sent->step_rc, sizeof(int)); /* * We must not use setinfo over a pipe with slurmstepd here * Indeed, slurmd does a large use of getinfo over a pipe * with slurmstepd and doing the reverse can result in * a deadlock scenario with slurmstepd : * slurmd(lockforread,write)/slurmstepd(write,lockforread) * Do pack/unpack instead to be sure of independances of * slurmd and slurmstepd */ jobacctinfo_pack(sent->jobacct, protocol_version, PROTOCOL_TYPE_SLURM, buffer); len = get_buf_offset(buffer); safe_write(fd, &len, sizeof(int)); safe_write(fd, get_buf_data(buffer), len); free_buf(buffer); /* Receive the return code and errno */ safe_read(fd, &rc, sizeof(int)); safe_read(fd, &errnum, sizeof(int)); } else { error("%s: bad protocol version %hu", __func__, protocol_version); rc = SLURM_ERROR; } errno = errnum; return rc; rwfail: FREE_NULL_BUFFER(buffer); return -1; }
/* * switch functions for global state save/restore */ int switch_p_libstate_save(char *dir_name) { #ifdef HAVE_NATIVE_CRAY Buf buffer; char *file_name; int ret = SLURM_SUCCESS; int state_fd; xassert(dir_name != NULL); if (debug_flags & DEBUG_FLAG_SWITCH) CRAY_INFO("save to %s", dir_name); buffer = init_buf(SWITCH_BUF_SIZE); _state_write_buf(buffer); file_name = xstrdup(dir_name); xstrcat(file_name, "/switch_cray_state"); (void) unlink(file_name); state_fd = creat(file_name, 0600); if (state_fd < 0) { CRAY_ERR("Can't save state, error creating file %s %m", file_name); ret = SLURM_ERROR; } else { char *buf = get_buf_data(buffer); size_t len = get_buf_offset(buffer); while (1) { int wrote = write(state_fd, buf, len); if ((wrote < 0) && (errno == EINTR)) continue; if (wrote == 0) break; if (wrote < 0) { CRAY_ERR("Can't save switch state: %m"); ret = SLURM_ERROR; break; } buf += wrote; len -= wrote; } close(state_fd); } xfree(file_name); if (buffer) free_buf(buffer); return ret; #else return SLURM_SUCCESS; #endif }
static int _ring_forward_data(pmixp_coll_ring_ctx_t *coll_ctx, uint32_t contrib_id, uint32_t hop_seq, void *data, size_t size) { pmixp_coll_ring_msg_hdr_t hdr; pmixp_coll_t *coll = _ctx_get_coll(coll_ctx); pmixp_coll_ring_t *ring = &coll->state.ring; hdr.nodeid = coll->my_peerid; hdr.msgsize = size; hdr.seq = coll_ctx->seq; hdr.hop_seq = hop_seq; hdr.contrib_id = contrib_id; pmixp_ep_t *ep = (pmixp_ep_t*)xmalloc(sizeof(*ep)); pmixp_coll_ring_cbdata_t *cbdata = NULL; uint32_t offset = 0; Buf buf = _get_fwd_buf(coll_ctx); int rc = SLURM_SUCCESS; pmixp_coll_ring_ctx_sanity_check(coll_ctx); #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: transit data to nodeid=%d, seq=%d, hop=%d, size=%lu, contrib=%d", coll_ctx, _ring_next_id(coll), hdr.seq, hdr.hop_seq, hdr.msgsize, hdr.contrib_id); #endif if (!buf) { rc = SLURM_ERROR; goto exit; } ep->type = PMIXP_EP_NOIDEID; ep->ep.nodeid = ring->next_peerid; /* pack ring info */ _pack_coll_ring_info(coll, &hdr, buf); /* insert payload to buf */ offset = get_buf_offset(buf); pmixp_server_buf_reserve(buf, size); memcpy(get_buf_data(buf) + offset, data, size); set_buf_offset(buf, offset + size); cbdata = xmalloc(sizeof(pmixp_coll_ring_cbdata_t)); cbdata->buf = buf; cbdata->coll = coll; cbdata->coll_ctx = coll_ctx; cbdata->seq = coll_ctx->seq; rc = pmixp_server_send_nb(ep, PMIXP_MSG_RING, coll_ctx->seq, buf, _ring_sent_cb, cbdata); exit: return rc; }
static void *_buf_finalize(Buf buf, void *nhdr, size_t hsize, size_t *dsize) { size_t offset; uint32_t *service = (uint32_t*)get_buf_data(buf); char *ptr = get_buf_data(buf); if (!service[0]) { offset = PMIXP_SERVER_BUFFER_OFFS - hsize; #ifdef PMIXP_DEBUG_SERVER xassert(PMIXP_BASE_HDR_MAX >= hsize); xassert(PMIXP_BASE_HDR_MAX <= get_buf_offset(buf)); /* Makesure that we only use buffers allocated through * this call, because we reserve the space for the * header here */ xassert(PMIXP_SERVER_BUF_MAGIC == service[1]); #endif /* Enough space for any header was reserved at the * time of buffer initialization in `pmixp_server_new_buf` * put the header in place and return proper pointer */ if (hsize) { memcpy(ptr + offset, nhdr, hsize); } service[0] = offset; } else { /* This buffer was already finalized */ offset = service[0]; #ifdef PMIXP_DEBUG_SERVER /* We expect header to be the same */ xassert(0 == memcmp(ptr+offset, nhdr, hsize)); #endif } *dsize = get_buf_offset(buf) - offset; return ptr + offset; }
/**************************************************************************** * Functions for agent to manage queue of pending message for the Slurm DBD ****************************************************************************/ static Buf _load_dbd_rec(int fd) { ssize_t size, rd_size; uint32_t msg_size, magic; char *msg; Buf buffer; size = sizeof(msg_size); rd_size = read(fd, &msg_size, size); if (rd_size == 0) return (Buf) NULL; if (rd_size != size) { error("slurmdbd: state recover error: %m"); return (Buf) NULL; } if (msg_size > MAX_DBD_MSG_LEN) { error("slurmdbd: state recover error, msg_size=%u", msg_size); return (Buf) NULL; } buffer = init_buf((int) msg_size); set_buf_offset(buffer, msg_size); msg = get_buf_data(buffer); size = msg_size; while (size) { rd_size = read(fd, msg, size); if ((rd_size > 0) && (rd_size <= size)) { msg += rd_size; size -= rd_size; } else if ((rd_size == -1) && (errno == EINTR)) continue; else { error("slurmdbd: state recover error: %m"); free_buf(buffer); return (Buf) NULL; } } size = sizeof(magic); rd_size = read(fd, &magic, size); if ((rd_size != size) || (magic != DBD_MAGIC)) { error("slurmdbd: state recover error"); free_buf(buffer); return (Buf) NULL; } return buffer; }
size_t pmixp_server_buf_reset(Buf buf) { uint32_t *service = (uint32_t*)get_buf_data(buf); service[0] = 0; #ifdef PMIXP_DEBUG_SERVER xassert( PMIXP_BASE_HDR_MAX >= sizeof(uint32_t)); xassert( PMIXP_BASE_HDR_MAX <= get_buf_offset(buf) ); /* Makesure that we only use buffers allocated through * this call, because we reserve the space for the * header here */ service[1] = PMIXP_SERVER_BUF_MAGIC; #endif set_buf_offset(buf, PMIXP_SERVER_BUFFER_OFFS); return PMIXP_SERVER_BUFFER_OFFS; }
extern int temp_kvs_init(void) { uint16_t cmd; uint32_t nodeid, num_children, size; Buf buf = NULL; xfree(temp_kvs_buf); temp_kvs_cnt = 0; temp_kvs_size = TEMP_KVS_SIZE_INC; temp_kvs_buf = xmalloc(temp_kvs_size); /* put the tree cmd here to simplify message sending */ if (in_stepd()) { cmd = TREE_CMD_KVS_FENCE; } else { cmd = TREE_CMD_KVS_FENCE_RESP; } buf = init_buf(1024); pack16(cmd, buf); if (in_stepd()) { nodeid = job_info.nodeid; /* XXX: TBC */ num_children = tree_info.num_children + 1; pack32(nodeid, buf); /* from_nodeid */ packstr(tree_info.this_node, buf); /* from_node */ pack32(num_children, buf); /* num_children */ pack32(kvs_seq, buf); } else { pack32(kvs_seq, buf); } size = get_buf_offset(buf); if (temp_kvs_cnt + size > temp_kvs_size) { temp_kvs_size += TEMP_KVS_SIZE_INC; xrealloc(temp_kvs_buf, temp_kvs_size); } memcpy(&temp_kvs_buf[temp_kvs_cnt], get_buf_data(buf), size); temp_kvs_cnt += size; free_buf(buf); tasks_to_wait = 0; children_to_wait = 0; return SLURM_SUCCESS; }
extern int spawn_resp_send_to_srun(spawn_resp_t *resp) { Buf buf; int rc; uint16_t cmd; buf = init_buf(1024); cmd = TREE_CMD_SPAWN_RESP; pack16(cmd, buf); spawn_resp_pack(resp, buf); rc = tree_msg_to_srun(get_buf_offset(buf), get_buf_data(buf)); free_buf(buf); return rc; }
extern int spawn_resp_send_to_fd(spawn_resp_t *resp, int fd) { Buf buf; int rc; buf = init_buf(1024); /* sync with spawn_req_send_to_srun */ /* cmd = TREE_CMD_SPAWN_RESP; */ /* pack16(cmd, buf); */ spawn_resp_pack(resp, buf); rc = slurm_msg_sendto(fd, get_buf_data(buf), get_buf_offset(buf), SLURM_PROTOCOL_NO_SEND_RECV_FLAGS); free_buf(buf); return rc; }
/* * * Returns SLURM_SUCCESS if successful. On error returns SLURM_ERROR * and sets errno. */ int stepd_completion(int fd, step_complete_msg_t *sent) { int req = REQUEST_STEP_COMPLETION_V2; int rc; int errnum = 0; Buf buffer; int len = 0; int version = SLURM_PROTOCOL_VERSION; buffer = init_buf(0); debug("Entering stepd_completion, range_first = %d, range_last = %d", sent->range_first, sent->range_last); safe_write(fd, &req, sizeof(int)); safe_write(fd, &version, sizeof(int)); safe_write(fd, &sent->range_first, sizeof(int)); safe_write(fd, &sent->range_last, sizeof(int)); safe_write(fd, &sent->step_rc, sizeof(int)); /* * We must not use setinfo over a pipe with slurmstepd here * Indeed, slurmd does a large use of getinfo over a pipe * with slurmstepd and doing the reverse can result in a deadlock * scenario with slurmstepd : * slurmd(lockforread,write)/slurmstepd(write,lockforread) * Do pack/unpack instead to be sure of independances of * slurmd and slurmstepd */ jobacctinfo_pack(sent->jobacct, SLURM_PROTOCOL_VERSION, PROTOCOL_TYPE_SLURM, buffer); len = get_buf_offset(buffer); safe_write(fd, &len, sizeof(int)); safe_write(fd, get_buf_data(buffer), len); free_buf(buffer); /* Receive the return code and errno */ safe_read(fd, &rc, sizeof(int)); safe_read(fd, &errnum, sizeof(int)); errno = errnum; return rc; rwfail: return -1; }
/* save and purge the libstate if free_flag is true */ static int _switch_p_libstate_save ( char * dir_name, bool free_flag ) { Buf buffer; char *file_name; int ret = SLURM_SUCCESS; int state_fd; buffer = init_buf(NRT_LIBSTATE_LEN); (void) nrt_libstate_save(buffer, free_flag); file_name = xstrdup(dir_name); xstrcat(file_name, "/nrt_state"); (void) unlink(file_name); state_fd = creat(file_name, 0600); if (state_fd < 0) { error("Can't save state, error creating file %s %m", file_name); ret = SLURM_ERROR; } else { char *buf = get_buf_data(buffer); size_t len = get_buf_offset(buffer); while (1) { int wrote = write (state_fd, buf, len); if ((wrote < 0) && (errno == EINTR)) continue; if (wrote == 0) break; if (wrote < 0) { error("Can't save switch state: %m"); ret = SLURM_ERROR; break; } buf += wrote; len -= wrote; } close(state_fd); } xfree(file_name); if (buffer) free_buf(buffer); return ret; }
static void _respond_with_error(int seq_num, char *sender_host, char *sender_ns, int status) { Buf buf = create_buf(NULL, 0); char *addr; int rc; /* rank doesn't matter here, don't send it */ _setup_header(buf, DMDX_RESPONSE, pmixp_info_namespace(), -1, status); /* generate namespace usocket name */ addr = pmixp_info_nspace_usock(sender_ns); /* send response */ rc = pmixp_server_send(sender_host, PMIXP_MSG_DMDX, seq_num, addr, get_buf_data(buf), get_buf_offset(buf), 1); if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot send direct modex error" " response to %s", sender_host); } xfree(addr); free_buf(buf); }
extern int spawn_req_send_to_srun(spawn_req_t *req, spawn_resp_t **resp_ptr) { Buf req_buf = NULL, resp_buf = NULL; int rc; uint16_t cmd; req_buf = init_buf(2048); cmd = TREE_CMD_SPAWN; pack16(cmd, req_buf); spawn_req_pack(req, req_buf); rc = tree_msg_to_srun_with_resp(get_buf_offset(req_buf), get_buf_data(req_buf), &resp_buf); free_buf(req_buf); if (rc == SLURM_SUCCESS) { rc = spawn_resp_unpack(resp_ptr, resp_buf); free_buf(resp_buf); } return rc; }
extern int temp_kvs_merge(Buf buf) { char *data; uint32_t offset, size; size = remaining_buf(buf); if (size == 0) { return SLURM_SUCCESS; } data = get_buf_data(buf); offset = get_buf_offset(buf); if (temp_kvs_cnt + size > temp_kvs_size) { temp_kvs_size += size; xrealloc(temp_kvs_buf, temp_kvs_size); } memcpy(&temp_kvs_buf[temp_kvs_cnt], &data[offset], size); temp_kvs_cnt += size; return SLURM_SUCCESS; }
inline static int _pmixp_coll_contrib(pmixp_coll_ring_ctx_t *coll_ctx, int contrib_id, uint32_t hop, char *data, size_t size) { pmixp_coll_t *coll = _ctx_get_coll(coll_ctx); char *data_ptr = NULL; int ret; /* change the state */ coll->ts = time(NULL); /* save contribution */ if (!size_buf(coll_ctx->ring_buf)) { grow_buf(coll_ctx->ring_buf, size * coll->peers_cnt); } else if(remaining_buf(coll_ctx->ring_buf) < size) { uint32_t new_size = size_buf(coll_ctx->ring_buf) + size * _ring_remain_contrib(coll_ctx); grow_buf(coll_ctx->ring_buf, new_size); } grow_buf(coll_ctx->ring_buf, size); data_ptr = get_buf_data(coll_ctx->ring_buf) + get_buf_offset(coll_ctx->ring_buf); memcpy(data_ptr, data, size); set_buf_offset(coll_ctx->ring_buf, get_buf_offset(coll_ctx->ring_buf) + size); /* check for ring is complete */ if (contrib_id != _ring_next_id(coll)) { /* forward data to the next node */ ret = _ring_forward_data(coll_ctx, contrib_id, hop, data_ptr, size); if (ret) { PMIXP_ERROR("Cannot forward ring data"); return SLURM_ERROR; } } return SLURM_SUCCESS; }
static int _progress_ufwd_wpc(pmixp_coll_t *coll) { xassert(PMIXP_COLL_UPFWD_WPC == coll->state); if (!coll->contrib_prnt) { return false; } /* Need to wait only for the local completion callback if installed*/ coll->dfwd_status = PMIXP_COLL_SND_ACTIVE; coll->dfwd_cb_wait = 0; /* move to the next state */ coll->state = PMIXP_COLL_DOWNFWD; /* local delivery */ if (coll->cbfunc) { pmixp_coll_cbdata_t *cbdata; cbdata = xmalloc(sizeof(pmixp_coll_cbdata_t)); cbdata->coll = coll; cbdata->seq = coll->seq; cbdata->refcntr = 1; char *data = get_buf_data(coll->dfwd_buf) + coll->dfwd_offset; size_t size = get_buf_offset(coll->dfwd_buf) - coll->dfwd_offset; coll->cbfunc(PMIX_SUCCESS, data, size, coll->cbdata, _libpmix_cb, (void *)cbdata); coll->dfwd_cb_wait++; #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: local delivery, size = %lu", coll, (uint64_t)size); #endif } /* events observed - need another iteration */ return true; }
extern int spawn_resp_send_to_stepd(spawn_resp_t *resp, char *node) { Buf buf; int rc; uint16_t cmd; hostlist_t hl; buf = init_buf(1024); cmd = TREE_CMD_SPAWN_RESP; pack16(cmd, buf); spawn_resp_pack(resp, buf); hl = hostlist_create(node); rc = tree_msg_to_stepds(hl, get_buf_offset(buf), get_buf_data(buf)); hostlist_destroy(hl); free_buf(buf); return rc; }
int pmixp_coll_contrib_local(pmixp_coll_t *coll, char *data, size_t size) { PMIXP_DEBUG("%s:%d: get local contribution", pmixp_info_namespace(), pmixp_info_nodeid()); /* sanity check */ pmixp_coll_sanity_check(coll); /* lock the structure */ slurm_mutex_lock(&coll->lock); /* change the collective state if need */ if (PMIXP_COLL_SYNC == coll->state) { PMIXP_DEBUG( "%s:%d: get local contribution: switch to PMIXP_COLL_FAN_IN", pmixp_info_namespace(), pmixp_info_nodeid()); coll->state = PMIXP_COLL_FAN_IN; coll->ts = time(NULL); } xassert(PMIXP_COLL_FAN_IN == coll->state); /* save & mark local contribution */ coll->contrib_local = true; grow_buf(coll->buf, size); memcpy(get_buf_data(coll->buf) + get_buf_offset(coll->buf), data, size); set_buf_offset(coll->buf, get_buf_offset(coll->buf) + size); /* unlock the structure */ slurm_mutex_unlock(&coll->lock); /* check if the collective is ready to progress */ _progress_fan_in(coll); PMIXP_DEBUG("%s:%d: get local contribution: finish", pmixp_info_namespace(), pmixp_info_nodeid()); return SLURM_SUCCESS; }