/* * Given a buffer containing a network byte order 16-bit integer, * and an arbitrary data string, copy the data string into the location * specified by valp. Also return the sizes of 'valp' in bytes. * Adjust buffer counters. * NOTE: valp is set to point into a newly created buffer, * the caller is responsible for calling free() on *valp * if non-NULL (set to NULL on zero size buffer value) */ int unpackmem_malloc(char **valp, uint32_t * size_valp, Buf buffer) { uint32_t ns; if (remaining_buf(buffer) < sizeof(ns)) return SLURM_ERROR; memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns)); *size_valp = ntohl(ns); buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_MEM_LEN) { error("%s: Buffer to be unpacked is too large (%u > %u)", __func__, *size_valp, MAX_PACK_MEM_LEN); return SLURM_ERROR; } else if (*size_valp > 0) { if (remaining_buf(buffer) < *size_valp) return SLURM_ERROR; *valp = malloc(*size_valp); if (*valp == NULL) { log_oom(__FILE__, __LINE__, __func__); abort(); } memcpy(*valp, &buffer->head[buffer->processed], *size_valp); buffer->processed += *size_valp; } else *valp = NULL; return SLURM_SUCCESS; }
/* * Given a buffer containing a network byte order 16-bit integer, * and an arbitrary data string, copy the data string into the location * specified by valp. Also return the sizes of 'valp' in bytes. * Adjust buffer counters. * NOTE: The caller is responsible for the management of valp and * insuring it has sufficient size */ int unpackmem(char *valp, uint32_t * size_valp, Buf buffer) { uint32_t ns; if (remaining_buf(buffer) < sizeof(ns)) return SLURM_ERROR; memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns)); *size_valp = ntohl(ns); buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_MEM_LEN) { error("%s: Buffer to be unpacked is too large (%u > %u)", __func__, *size_valp, MAX_PACK_MEM_LEN); return SLURM_ERROR; } else if (*size_valp > 0) { if (remaining_buf(buffer) < *size_valp) return SLURM_ERROR; memcpy(valp, &buffer->head[buffer->processed], *size_valp); buffer->processed += *size_valp; } else *valp = 0; return SLURM_SUCCESS; }
/* * Given a pointer to memory (valp) and a size (size_val), convert * size_val to network byte order and store at buffer followed by * the data at valp. Adjust buffer counters. */ void packmem(char *valp, uint32_t size_val, Buf buffer) { uint32_t ns = htonl(size_val); if (size_val > MAX_PACK_MEM_LEN) { error("%s: Buffer to be packed is too large (%u > %u)", __func__, size_val, MAX_PACK_MEM_LEN); return; } if (remaining_buf(buffer) < (sizeof(ns) + size_val)) { if ((buffer->size + size_val + BUF_SIZE) > MAX_BUF_SIZE) { error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + size_val + BUF_SIZE), MAX_BUF_SIZE); return; } buffer->size += (size_val + BUF_SIZE); xrealloc_nz(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns)); buffer->processed += sizeof(ns); if (size_val) { memcpy(&buffer->head[buffer->processed], valp, size_val); buffer->processed += size_val; } }
/* * Given 'buffer' pointing to a network byte order 16-bit integer * (size) and a array of strings store the number of strings in * 'size_valp' and the array of strings in valp * NOTE: valp is set to point into a newly created buffer, * the caller is responsible for calling xfree on *valp * if non-NULL (set to NULL on zero size buffer value) */ int unpackstr_array(char ***valp, uint32_t * size_valp, Buf buffer) { int i; uint32_t ns; uint32_t uint32_tmp; if (remaining_buf(buffer) < sizeof(ns)) return SLURM_ERROR; memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns)); *size_valp = ntohl(ns); buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_ARRAY_LEN) { error("%s: Buffer to be unpacked is too large (%u > %u)", __func__, *size_valp, MAX_PACK_ARRAY_LEN); return SLURM_ERROR; } else if (*size_valp > 0) { *valp = xmalloc_nz(sizeof(char *) * (*size_valp + 1)); for (i = 0; i < *size_valp; i++) { if (unpackmem_xmalloc(&(*valp)[i], &uint32_tmp, buffer)) return SLURM_ERROR; } (*valp)[i] = NULL; /* NULL terminated array so that execle */ /* can detect end of array */ } else *valp = NULL; return SLURM_SUCCESS; }
/* * Given a double, multiple by FLOAT_MULT and then * typecast to a uint64_t in host byte order, convert to network byte order * store in buffer, and adjust buffer counters. * NOTE: There is an IEEE standard format for double. */ void packdouble(double val, Buf buffer) { uint64_t nl; union { double d; uint64_t u; } uval; /* The 0.5 is here to round off. We have found on systems going out * more than 15 decimals will mess things up, but this corrects it. */ uval.d = (val * FLOAT_MULT); nl = HTON_uint64(uval.u); if (remaining_buf(buffer) < sizeof(nl)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; } buffer->size += BUF_SIZE; xrealloc_nz(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &nl, sizeof(nl)); buffer->processed += sizeof(nl); }
/* * Given a buffer containing a network byte order 8-bit integer, * store a host integer at 'valp', and adjust buffer counters. */ int unpack8(uint8_t * valp, Buf buffer) { if (remaining_buf(buffer) < sizeof(uint8_t)) return SLURM_ERROR; memcpy(valp, &buffer->head[buffer->processed], sizeof(uint8_t)); buffer->processed += sizeof(uint8_t); return SLURM_SUCCESS; }
/* * Given a buffer containing a network byte order 32-bit integer, * store a host integer at 'valp', and adjust buffer counters. */ int unpack32(uint32_t * valp, Buf buffer) { uint32_t nl; if (remaining_buf(buffer) < sizeof(nl)) return SLURM_ERROR; memcpy(&nl, &buffer->head[buffer->processed], sizeof(nl)); *valp = ntohl(nl); buffer->processed += sizeof(nl); return SLURM_SUCCESS; }
/* * Given a pointer to memory (valp), size (size_val), and buffer, * store the buffer contents into memory */ int unpackmem_array(char *valp, uint32_t size_valp, Buf buffer) { if (remaining_buf(buffer) >= size_valp) { memcpy(valp, &buffer->head[buffer->processed], size_valp); buffer->processed += size_valp; return SLURM_SUCCESS; } else { *valp = 0; return SLURM_ERROR; } }
/* * Given a buffer containing a network byte order 16-bit integer, * store a host integer at 'valp', and adjust buffer counters. */ int unpack16(uint16_t * valp, Buf buffer) { uint16_t ns; if (remaining_buf(buffer) < sizeof(ns)) return SLURM_ERROR; memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns)); *valp = ntohs(ns); buffer->processed += sizeof(ns); return SLURM_SUCCESS; }
int unpack_time(time_t * valp, Buf buffer) { int64_t n64; if (remaining_buf(buffer) < sizeof(n64)) return SLURM_ERROR; memcpy(&n64, &buffer->head[buffer->processed], sizeof(n64)); buffer->processed += sizeof(n64); *valp = (time_t) NTOH_int64(n64); return SLURM_SUCCESS; }
/* * Given a buffer containing a network byte order 16-bit integer, * and an arbitrary data string, copy the data string into the location * specified by valp. Also return the sizes of 'valp' in bytes. * Adjust buffer counters. * NOTE: The caller is responsible for the management of valp and * insuring it has sufficient size */ int unpackmem(char *valp, uint32_t * size_valp, Buf buffer) { uint32_t ns; if (remaining_buf(buffer) < sizeof(ns)) return SLURM_ERROR; memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns)); *size_valp = ntohl(ns); buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_MEM_LEN) return SLURM_ERROR; else if (*size_valp > 0) { if (remaining_buf(buffer) < *size_valp) return SLURM_ERROR; memcpy(valp, &buffer->head[buffer->processed], *size_valp); buffer->processed += *size_valp; } else *valp = 0; return SLURM_SUCCESS; }
/* * Given a 8-bit integer in host byte order, convert to network byte order * store in buffer, and adjust buffer counters. */ void pack8(uint8_t val, Buf buffer) { if (remaining_buf(buffer) < sizeof(uint8_t)) { if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { error("pack8: buffer size too large"); return; } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &val, sizeof(uint8_t)); buffer->processed += sizeof(uint8_t); }
/* * Given a pointer to memory (valp), size (size_val), and buffer, * store the memory contents into the buffer */ void packmem_array(char *valp, uint32_t size_val, Buf buffer) { if (remaining_buf(buffer) < size_val) { if (buffer->size > (MAX_BUF_SIZE - size_val - BUF_SIZE)) { error("packmem_array: buffer size too large"); return; } buffer->size += (size_val + BUF_SIZE); xrealloc(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], valp, size_val); buffer->processed += size_val; }
/* * Given a 8-bit integer in host byte order, convert to network byte order * store in buffer, and adjust buffer counters. */ void pack8(uint8_t val, Buf buffer) { if (remaining_buf(buffer) < sizeof(uint8_t)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; } buffer->size += BUF_SIZE; xrealloc_nz(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &val, sizeof(uint8_t)); buffer->processed += sizeof(uint8_t); }
/* * Given a time_t in host byte order, promote it to int64_t, convert to * network byte order, store in buffer and adjust buffer acc'd'ngly */ void pack_time(time_t val, Buf buffer) { int64_t n64 = HTON_int64((int64_t) val); if (remaining_buf(buffer) < sizeof(n64)) { if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { error("pack_time: buffer size too large"); return; } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &n64, sizeof(n64)); buffer->processed += sizeof(n64); }
/* * Given a 32-bit integer in host byte order, convert to network byte order * store in buffer, and adjust buffer counters. */ void pack32(uint32_t val, Buf buffer) { uint32_t nl = htonl(val); if (remaining_buf(buffer) < sizeof(nl)) { if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { error("pack32: buffer size too large"); return; } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &nl, sizeof(nl)); buffer->processed += sizeof(nl); }
/* * Given a 16-bit integer in host byte order, convert to network byte order, * store in buffer and adjust buffer counters. */ void pack16(uint16_t val, Buf buffer) { uint16_t ns = htons(val); if (remaining_buf(buffer) < sizeof(ns)) { if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { error("pack16: buffer size too large"); return; } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns)); buffer->processed += sizeof(ns); }
/* * Given a pointer to memory (valp), size (size_val), and buffer, * store the memory contents into the buffer */ void packmem_array(char *valp, uint32_t size_val, Buf buffer) { if (remaining_buf(buffer) < size_val) { if ((buffer->size + size_val + BUF_SIZE) > MAX_BUF_SIZE) { error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + size_val + BUF_SIZE), MAX_BUF_SIZE); return; } buffer->size += (size_val + BUF_SIZE); xrealloc_nz(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], valp, size_val); buffer->processed += size_val; }
/* * Given a 16-bit integer in host byte order, convert to network byte order, * store in buffer and adjust buffer counters. */ void pack16(uint16_t val, Buf buffer) { uint16_t ns = htons(val); if (remaining_buf(buffer) < sizeof(ns)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { error("%s: Buffer size limit exceeded (%d > %d)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; } buffer->size += BUF_SIZE; xrealloc_nz(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns)); buffer->processed += sizeof(ns); }
/* * Given a time_t in host byte order, promote it to int64_t, convert to * network byte order, store in buffer and adjust buffer acc'd'ngly */ void pack_time(time_t val, Buf buffer) { int64_t n64 = HTON_int64((int64_t) val); if (remaining_buf(buffer) < sizeof(n64)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; } buffer->size += BUF_SIZE; xrealloc_nz(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &n64, sizeof(n64)); buffer->processed += sizeof(n64); }
/* * Given a buffer containing a network byte order 64-bit integer, * typecast as double, and divide by FLOAT_MULT * store a host double at 'valp', and adjust buffer counters. * NOTE: There is an IEEE standard format for double. */ int unpackdouble(double *valp, Buf buffer) { uint64_t nl; union { double d; uint64_t u; } uval; if (remaining_buf(buffer) < sizeof(nl)) return SLURM_ERROR; memcpy(&nl, &buffer->head[buffer->processed], sizeof(nl)); buffer->processed += sizeof(nl); uval.u = NTOH_uint64(nl); *valp = uval.d / FLOAT_MULT; return SLURM_SUCCESS; }
/* * Given a pointer to memory (valp) and a size (size_val), convert * size_val to network byte order and store at buffer followed by * the data at valp. Adjust buffer counters. */ void packmem(char *valp, uint32_t size_val, Buf buffer) { uint32_t ns = htonl(size_val); if (remaining_buf(buffer) < (sizeof(ns) + size_val)) { if (buffer->size > (MAX_BUF_SIZE - size_val - BUF_SIZE)) { error("packmem: buffer size too large"); return; } buffer->size += (size_val + BUF_SIZE); xrealloc(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns)); buffer->processed += sizeof(ns); if (size_val) { memcpy(&buffer->head[buffer->processed], valp, size_val); buffer->processed += size_val; } }
extern void msg_aggr_add_comp(Buf buffer, void *auth_cred, header_t *header) { slurm_msg_t *msg; if (!msg_collection.running) return; msg = xmalloc_nz(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); msg->protocol_version = header->version; msg->msg_type = header->msg_type; msg->flags = header->flags; msg->auth_cred = auth_cred; msg->data = buffer; msg->data_size = remaining_buf(buffer); msg_aggr_add_msg(msg, 0, NULL); }
inline static int _pmixp_coll_contrib(pmixp_coll_ring_ctx_t *coll_ctx, int contrib_id, uint32_t hop, char *data, size_t size) { pmixp_coll_t *coll = _ctx_get_coll(coll_ctx); char *data_ptr = NULL; int ret; /* change the state */ coll->ts = time(NULL); /* save contribution */ if (!size_buf(coll_ctx->ring_buf)) { grow_buf(coll_ctx->ring_buf, size * coll->peers_cnt); } else if(remaining_buf(coll_ctx->ring_buf) < size) { uint32_t new_size = size_buf(coll_ctx->ring_buf) + size * _ring_remain_contrib(coll_ctx); grow_buf(coll_ctx->ring_buf, new_size); } grow_buf(coll_ctx->ring_buf, size); data_ptr = get_buf_data(coll_ctx->ring_buf) + get_buf_offset(coll_ctx->ring_buf); memcpy(data_ptr, data, size); set_buf_offset(coll_ctx->ring_buf, get_buf_offset(coll_ctx->ring_buf) + size); /* check for ring is complete */ if (contrib_id != _ring_next_id(coll)) { /* forward data to the next node */ ret = _ring_forward_data(coll_ctx, contrib_id, hop, data_ptr, size); if (ret) { PMIXP_ERROR("Cannot forward ring data"); return SLURM_ERROR; } } return SLURM_SUCCESS; }
/* * Given a pointer to array of char * (char ** or char *[] ) and a size * (size_val), convert size_val to network byte order and store in the * buffer followed by the data at valp. Adjust buffer counters. */ void packstr_array(char **valp, uint32_t size_val, Buf buffer) { int i; uint32_t ns = htonl(size_val); if (remaining_buf(buffer) < sizeof(ns)) { if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { error("packstr_array: buffer size too large"); return; } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns)); buffer->processed += sizeof(ns); for (i = 0; i < size_val; i++) { packstr(valp[i], buffer); } }
extern int temp_kvs_merge(Buf buf) { char *data; uint32_t offset, size; size = remaining_buf(buf); if (size == 0) { return SLURM_SUCCESS; } data = get_buf_data(buf); offset = get_buf_offset(buf); if (temp_kvs_cnt + size > temp_kvs_size) { temp_kvs_size += size; xrealloc(temp_kvs_buf, temp_kvs_size); } memcpy(&temp_kvs_buf[temp_kvs_cnt], &data[offset], size); temp_kvs_cnt += size; return SLURM_SUCCESS; }
void _progres_fan_out(pmixp_coll_t *coll, Buf buf) { PMIXP_DEBUG("%s:%d: start", pmixp_info_namespace(), pmixp_info_nodeid()); pmixp_coll_sanity_check(coll); xassert(PMIXP_COLL_FAN_OUT == coll->state || PMIXP_COLL_FAN_OUT_IN == coll->state); /* update the database */ if (NULL != coll->cbfunc) { void *data = get_buf_data(buf) + get_buf_offset(buf); size_t size = remaining_buf(buf); PMIXP_DEBUG("%s:%d: use the callback", pmixp_info_namespace(), pmixp_info_nodeid()); coll->cbfunc(PMIX_SUCCESS, data, size, coll->cbdata, pmixp_free_Buf, (void *)buf); } /* Prepare for the next collective operation */ _fan_out_finished(coll); PMIXP_DEBUG("%s:%d: collective is prepared for the next use", pmixp_info_namespace(), pmixp_info_nodeid()); }
/* * Given a pointer to array of char * (char ** or char *[] ) and a size * (size_val), convert size_val to network byte order and store in the * buffer followed by the data at valp. Adjust buffer counters. */ void packstr_array(char **valp, uint32_t size_val, Buf buffer) { int i; uint32_t ns = htonl(size_val); if (remaining_buf(buffer) < sizeof(ns)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; } buffer->size += BUF_SIZE; xrealloc_nz(buffer->head, buffer->size); } memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns)); buffer->processed += sizeof(ns); for (i = 0; i < size_val; i++) { packstr(valp[i], buffer); } }
void *_forward_thread(void *arg) { forward_msg_t *fwd_msg = (forward_msg_t *)arg; forward_struct_t *fwd_struct = fwd_msg->fwd_struct; Buf buffer = init_buf(BUF_SIZE); /* probably enough for header */ List ret_list = NULL; int fd = -1; ret_data_info_t *ret_data_info = NULL; char *name = NULL; hostlist_t hl = hostlist_create(fwd_msg->header.forward.nodelist); slurm_addr_t addr; char *buf = NULL; int steps = 0; int start_timeout = fwd_msg->timeout; /* repeat until we are sure the message was sent */ while ((name = hostlist_shift(hl))) { if (slurm_conf_get_addr(name, &addr) == SLURM_ERROR) { error("forward_thread: can't find address for host " "%s, check slurm.conf", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, SLURM_UNKNOWN_FORWARD_ADDR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); continue; } goto cleanup; } if ((fd = slurm_open_msg_conn(&addr)) < 0) { error("forward_thread to %s: %m", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } buf = hostlist_ranged_string_xmalloc(hl); xfree(fwd_msg->header.forward.nodelist); fwd_msg->header.forward.nodelist = buf; fwd_msg->header.forward.cnt = hostlist_count(hl); #if 0 info("sending %d forwards (%s) to %s", fwd_msg->header.forward.cnt, fwd_msg->header.forward.nodelist, name); #endif if (fwd_msg->header.forward.nodelist[0]) { debug3("forward: send to %s along with %s", name, fwd_msg->header.forward.nodelist); } else debug3("forward: send to %s ", name); pack_header(&fwd_msg->header, buffer); /* add forward data to buffer */ if (remaining_buf(buffer) < fwd_struct->buf_len) { int new_size = buffer->processed + fwd_struct->buf_len; new_size += 1024; /* padded for paranoia */ xrealloc_nz(buffer->head, new_size); buffer->size = new_size; } if (fwd_struct->buf_len) { memcpy(&buffer->head[buffer->processed], fwd_struct->buf, fwd_struct->buf_len); buffer->processed += fwd_struct->buf_len; } /* * forward message */ if (slurm_msg_sendto(fd, get_buf_data(buffer), get_buf_offset(buffer), SLURM_PROTOCOL_NO_SEND_RECV_FLAGS ) < 0) { error("forward_thread: slurm_msg_sendto: %m"); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } /* These messages don't have a return message, but if * we got here things worked out so make note of the * list of nodes as success. */ if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) || (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) || (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) { slurm_mutex_lock(&fwd_struct->forward_mutex); ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); while ((name = hostlist_shift(hl))) { ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); } goto cleanup; } if (fwd_msg->header.forward.cnt > 0) { static int message_timeout = -1; if (message_timeout < 0) message_timeout = slurm_get_msg_timeout() * 1000; if (!fwd_msg->header.forward.tree_width) fwd_msg->header.forward.tree_width = slurm_get_tree_width(); steps = (fwd_msg->header.forward.cnt+1) / fwd_msg->header.forward.tree_width; fwd_msg->timeout = (message_timeout*steps); /* info("got %d * %d = %d", message_timeout, */ /* steps, fwd_msg->timeout); */ steps++; fwd_msg->timeout += (start_timeout*steps); /* info("now + %d*%d = %d", start_timeout, */ /* steps, fwd_msg->timeout); */ } ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout); /* info("sent %d forwards got %d back", */ /* fwd_msg->header.forward.cnt, list_count(ret_list)); */ if (!ret_list || (fwd_msg->header.forward.cnt != 0 && list_count(ret_list) <= 1)) { slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); FREE_NULL_LIST(ret_list); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; continue; } goto cleanup; } else if ((fwd_msg->header.forward.cnt+1) != list_count(ret_list)) { /* this should never be called since the above should catch the failed forwards and pipe them back down, but this is here so we never have to worry about a locked mutex */ ListIterator itr = NULL; char *tmp = NULL; int first_node_found = 0; hostlist_iterator_t host_itr = hostlist_iterator_create(hl); error("We shouldn't be here. We forwarded to %d " "but only got %d back", (fwd_msg->header.forward.cnt+1), list_count(ret_list)); while ((tmp = hostlist_next(host_itr))) { int node_found = 0; itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr))) { if (!ret_data_info->node_name) { first_node_found = 1; ret_data_info->node_name = xstrdup(name); } if (!xstrcmp(tmp, ret_data_info->node_name)) { node_found = 1; break; } } list_iterator_destroy(itr); if (!node_found) { mark_as_failed_forward( &fwd_struct->ret_list, tmp, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } free(tmp); } hostlist_iterator_destroy(host_itr); if (!first_node_found) { mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } } break; } slurm_mutex_lock(&fwd_struct->forward_mutex); if (ret_list) { while ((ret_data_info = list_pop(ret_list)) != NULL) { if (!ret_data_info->node_name) { ret_data_info->node_name = xstrdup(name); } list_push(fwd_struct->ret_list, ret_data_info); debug3("got response from %s", ret_data_info->node_name); } FREE_NULL_LIST(ret_list); } free(name); cleanup: if ((fd >= 0) && slurm_close(fd) < 0) error ("close(%d): %m", fd); hostlist_destroy(hl); destroy_forward(&fwd_msg->header.forward); free_buf(buffer); slurm_cond_signal(&fwd_struct->notify); slurm_mutex_unlock(&fwd_struct->forward_mutex); xfree(fwd_msg); return (NULL); }
/* * load_all_part_state - load the partition state from file, recover on * slurmctld restart. execute this after loading the configuration * file data. * NOTE: READ lock_slurmctld config before entry */ int load_all_part_state(void) { char *part_name = NULL, *allow_groups = NULL, *nodes = NULL; char *state_file, *data = NULL; uint32_t max_time, default_time, max_nodes, min_nodes; uint32_t max_cpus_per_node = INFINITE, grace_time = 0; time_t time; uint16_t flags; uint16_t max_share, preempt_mode, priority, state_up, cr_type; struct part_record *part_ptr; uint32_t data_size = 0, name_len; int data_allocated, data_read = 0, error_code = 0, part_cnt = 0; int state_fd; Buf buffer; char *ver_str = NULL; char* allow_alloc_nodes = NULL; uint16_t protocol_version = (uint16_t)NO_VAL; char* alternate = NULL; /* read the file */ lock_state_files(); state_fd = _open_part_state_file(&state_file); if (state_fd < 0) { info("No partition state file (%s) to recover", state_file); error_code = ENOENT; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); } xfree(state_file); unlock_state_files(); buffer = create_buf(data, data_size); safe_unpackstr_xmalloc( &ver_str, &name_len, buffer); debug3("Version string in part_state header is %s", ver_str); if (ver_str) { if (!strcmp(ver_str, PART_STATE_VERSION)) { protocol_version = SLURM_PROTOCOL_VERSION; } else if (!strcmp(ver_str, PART_2_5_STATE_VERSION)) { protocol_version = SLURM_2_5_PROTOCOL_VERSION; } } if (protocol_version == (uint16_t)NO_VAL) { error("**********************************************************"); error("Can not recover partition state, data version incompatible"); error("**********************************************************"); xfree(ver_str); free_buf(buffer); return EFAULT; } xfree(ver_str); safe_unpack_time(&time, buffer); while (remaining_buf(buffer) > 0) { if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&part_name, &name_len, buffer); safe_unpack32(&grace_time, buffer); safe_unpack32(&max_time, buffer); safe_unpack32(&default_time, buffer); safe_unpack32(&max_cpus_per_node, buffer); safe_unpack32(&max_nodes, buffer); safe_unpack32(&min_nodes, buffer); safe_unpack16(&flags, buffer); safe_unpack16(&max_share, buffer); safe_unpack16(&preempt_mode, buffer); safe_unpack16(&priority, buffer); if (priority > part_max_priority) part_max_priority = priority; safe_unpack16(&state_up, buffer); safe_unpack16(&cr_type, buffer); safe_unpackstr_xmalloc(&allow_groups, &name_len, buffer); safe_unpackstr_xmalloc(&allow_alloc_nodes, &name_len, buffer); safe_unpackstr_xmalloc(&alternate, &name_len, buffer); safe_unpackstr_xmalloc(&nodes, &name_len, buffer); if ((flags & PART_FLAG_DEFAULT_CLR) || (flags & PART_FLAG_HIDDEN_CLR) || (flags & PART_FLAG_NO_ROOT_CLR) || (flags & PART_FLAG_ROOT_ONLY_CLR) || (flags & PART_FLAG_REQ_RESV_CLR)) { error("Invalid data for partition %s: flags=%u", part_name, flags); error_code = EINVAL; } } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&part_name, &name_len, buffer); safe_unpack32(&grace_time, buffer); safe_unpack32(&max_time, buffer); safe_unpack32(&default_time, buffer); safe_unpack32(&max_nodes, buffer); safe_unpack32(&min_nodes, buffer); safe_unpack16(&flags, buffer); safe_unpack16(&max_share, buffer); safe_unpack16(&preempt_mode, buffer); safe_unpack16(&priority, buffer); if (priority > part_max_priority) part_max_priority = priority; cr_type = 0; /* Default value */ safe_unpack16(&state_up, buffer); safe_unpackstr_xmalloc(&allow_groups, &name_len, buffer); safe_unpackstr_xmalloc(&allow_alloc_nodes, &name_len, buffer); safe_unpackstr_xmalloc(&alternate, &name_len, buffer); safe_unpackstr_xmalloc(&nodes, &name_len, buffer); if ((flags & PART_FLAG_DEFAULT_CLR) || (flags & PART_FLAG_HIDDEN_CLR) || (flags & PART_FLAG_NO_ROOT_CLR) || (flags & PART_FLAG_ROOT_ONLY_CLR) || (flags & PART_FLAG_REQ_RESV_CLR)) { error("Invalid data for partition %s: flags=%u", part_name, flags); error_code = EINVAL; } } else { error("load_all_part_state: protocol_version " "%hu not supported", protocol_version); goto unpack_error; } /* validity test as possible */ if (state_up > PARTITION_UP) { error("Invalid data for partition %s: state_up=%u", part_name, state_up); error_code = EINVAL; } if (error_code) { error("No more partition data will be processed from " "the checkpoint file"); xfree(allow_groups); xfree(allow_alloc_nodes); xfree(alternate); xfree(part_name); xfree(nodes); error_code = EINVAL; break; } /* find record and perform update */ part_ptr = list_find_first(part_list, &list_find_part, part_name); part_cnt++; if (part_ptr == NULL) { info("load_all_part_state: partition %s missing from " "configuration file", part_name); part_ptr = create_part_record(); xfree(part_ptr->name); part_ptr->name = xstrdup(part_name); } part_ptr->flags = flags; if (part_ptr->flags & PART_FLAG_DEFAULT) { xfree(default_part_name); default_part_name = xstrdup(part_name); default_part_loc = part_ptr; } part_ptr->max_time = max_time; part_ptr->default_time = default_time; part_ptr->max_cpus_per_node = max_cpus_per_node; part_ptr->max_nodes = max_nodes; part_ptr->max_nodes_orig = max_nodes; part_ptr->min_nodes = min_nodes; part_ptr->min_nodes_orig = min_nodes; part_ptr->max_share = max_share; part_ptr->grace_time = grace_time; if (preempt_mode != (uint16_t) NO_VAL) part_ptr->preempt_mode = preempt_mode; part_ptr->priority = priority; part_ptr->state_up = state_up; part_ptr->cr_type = cr_type; xfree(part_ptr->allow_groups); part_ptr->allow_groups = allow_groups; xfree(part_ptr->allow_alloc_nodes); part_ptr->allow_alloc_nodes = allow_alloc_nodes; xfree(part_ptr->alternate); part_ptr->alternate = alternate; xfree(part_ptr->nodes); part_ptr->nodes = nodes; xfree(part_name); } info("Recovered state of %d partitions", part_cnt); free_buf(buffer); return error_code; unpack_error: error("Incomplete partition data checkpoint file"); info("Recovered state of %d partitions", part_cnt); free_buf(buffer); return EFAULT; }