Beispiel #1
0
/*
 * Given a buffer containing a network byte order 16-bit integer,
 * and an arbitrary data string, copy the data string into the location
 * specified by valp.  Also return the sizes of 'valp' in bytes.
 * Adjust buffer counters.
 * NOTE: valp is set to point into a newly created buffer,
 *	the caller is responsible for calling free() on *valp
 *	if non-NULL (set to NULL on zero size buffer value)
 */
int unpackmem_malloc(char **valp, uint32_t * size_valp, Buf buffer)
{
	uint32_t ns;

	if (remaining_buf(buffer) < sizeof(ns))
		return SLURM_ERROR;

	memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns));
	*size_valp = ntohl(ns);
	buffer->processed += sizeof(ns);
	if (*size_valp > MAX_PACK_MEM_LEN) {
		error("%s: Buffer to be unpacked is too large (%u > %u)",
		      __func__, *size_valp, MAX_PACK_MEM_LEN);
		return SLURM_ERROR;
	}
	else if (*size_valp > 0) {
		if (remaining_buf(buffer) < *size_valp)
			return SLURM_ERROR;
		*valp = malloc(*size_valp);
		if (*valp == NULL) {
			log_oom(__FILE__, __LINE__, __func__);
			abort();
		}
		memcpy(*valp, &buffer->head[buffer->processed],
		       *size_valp);
		buffer->processed += *size_valp;
	} else
		*valp = NULL;
	return SLURM_SUCCESS;
}
Beispiel #2
0
/*
 * Given a buffer containing a network byte order 16-bit integer,
 * and an arbitrary data string, copy the data string into the location
 * specified by valp.  Also return the sizes of 'valp' in bytes.
 * Adjust buffer counters.
 * NOTE: The caller is responsible for the management of valp and
 * insuring it has sufficient size
 */
int unpackmem(char *valp, uint32_t * size_valp, Buf buffer)
{
	uint32_t ns;

	if (remaining_buf(buffer) < sizeof(ns))
		return SLURM_ERROR;

	memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns));
	*size_valp = ntohl(ns);
	buffer->processed += sizeof(ns);

	if (*size_valp > MAX_PACK_MEM_LEN) {
		error("%s: Buffer to be unpacked is too large (%u > %u)",
		      __func__, *size_valp, MAX_PACK_MEM_LEN);
		return SLURM_ERROR;
	}
	else if (*size_valp > 0) {
		if (remaining_buf(buffer) < *size_valp)
			return SLURM_ERROR;
		memcpy(valp, &buffer->head[buffer->processed], *size_valp);
		buffer->processed += *size_valp;
	} else
		*valp = 0;
	return SLURM_SUCCESS;
}
Beispiel #3
0
/*
 * Given a pointer to memory (valp) and a size (size_val), convert
 * size_val to network byte order and store at buffer followed by
 * the data at valp. Adjust buffer counters.
 */
void packmem(char *valp, uint32_t size_val, Buf buffer)
{
	uint32_t ns = htonl(size_val);

	if (size_val > MAX_PACK_MEM_LEN) {
		error("%s: Buffer to be packed is too large (%u > %u)",
		      __func__, size_val, MAX_PACK_MEM_LEN);
		return;
	}
	if (remaining_buf(buffer) < (sizeof(ns) + size_val)) {
		if ((buffer->size + size_val + BUF_SIZE) > MAX_BUF_SIZE) {
			error("%s: Buffer size limit exceeded (%u > %u)",
			      __func__, (buffer->size + size_val + BUF_SIZE),
			      MAX_BUF_SIZE);
			return;
		}
		buffer->size += (size_val + BUF_SIZE);
		xrealloc_nz(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns));
	buffer->processed += sizeof(ns);

	if (size_val) {
		memcpy(&buffer->head[buffer->processed], valp, size_val);
		buffer->processed += size_val;
	}
}
Beispiel #4
0
/*
 * Given 'buffer' pointing to a network byte order 16-bit integer
 * (size) and a array of strings  store the number of strings in
 * 'size_valp' and the array of strings in valp
 * NOTE: valp is set to point into a newly created buffer,
 *	the caller is responsible for calling xfree on *valp
 *	if non-NULL (set to NULL on zero size buffer value)
 */
int unpackstr_array(char ***valp, uint32_t * size_valp, Buf buffer)
{
	int i;
	uint32_t ns;
	uint32_t uint32_tmp;

	if (remaining_buf(buffer) < sizeof(ns))
		return SLURM_ERROR;

	memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns));
	*size_valp = ntohl(ns);
	buffer->processed += sizeof(ns);

	if (*size_valp > MAX_PACK_ARRAY_LEN) {
		error("%s: Buffer to be unpacked is too large (%u > %u)",
		      __func__, *size_valp, MAX_PACK_ARRAY_LEN);
		return SLURM_ERROR;
	}
	else if (*size_valp > 0) {
		*valp = xmalloc_nz(sizeof(char *) * (*size_valp + 1));
		for (i = 0; i < *size_valp; i++) {
			if (unpackmem_xmalloc(&(*valp)[i], &uint32_tmp, buffer))
				return SLURM_ERROR;
		}
		(*valp)[i] = NULL;	/* NULL terminated array so that execle */
		/*    can detect end of array */
	} else
		*valp = NULL;
	return SLURM_SUCCESS;
}
Beispiel #5
0
/*
 * Given a double, multiple by FLOAT_MULT and then
 * typecast to a uint64_t in host byte order, convert to network byte order
 * store in buffer, and adjust buffer counters.
 * NOTE: There is an IEEE standard format for double.
 */
void 	packdouble(double val, Buf buffer)
{
	uint64_t nl;
	union {
		double d;
		uint64_t u;
	} uval;

	 /* The 0.5 is here to round off.  We have found on systems going out
	  * more than 15 decimals will mess things up, but this corrects it. */
	uval.d =  (val * FLOAT_MULT);
	nl =  HTON_uint64(uval.u);
	if (remaining_buf(buffer) < sizeof(nl)) {
		if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) {
			error("%s: Buffer size limit exceeded (%u > %u)",
			      __func__, (buffer->size + BUF_SIZE),
			      MAX_BUF_SIZE);
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc_nz(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &nl, sizeof(nl));
	buffer->processed += sizeof(nl);
}
Beispiel #6
0
/*
 * Given a buffer containing a network byte order 8-bit integer,
 * store a host integer at 'valp', and adjust buffer counters.
 */
int unpack8(uint8_t * valp, Buf buffer)
{
	if (remaining_buf(buffer) < sizeof(uint8_t))
		return SLURM_ERROR;

	memcpy(valp, &buffer->head[buffer->processed], sizeof(uint8_t));
	buffer->processed += sizeof(uint8_t);
	return SLURM_SUCCESS;
}
Beispiel #7
0
/*
 * Given a buffer containing a network byte order 32-bit integer,
 * store a host integer at 'valp', and adjust buffer counters.
 */
int unpack32(uint32_t * valp, Buf buffer)
{
	uint32_t nl;
	if (remaining_buf(buffer) < sizeof(nl))
		return SLURM_ERROR;

	memcpy(&nl, &buffer->head[buffer->processed], sizeof(nl));
	*valp = ntohl(nl);
	buffer->processed += sizeof(nl);
	return SLURM_SUCCESS;
}
Beispiel #8
0
/*
 * Given a pointer to memory (valp), size (size_val), and buffer,
 * store the buffer contents into memory
 */
int unpackmem_array(char *valp, uint32_t size_valp, Buf buffer)
{
	if (remaining_buf(buffer) >= size_valp) {
		memcpy(valp, &buffer->head[buffer->processed], size_valp);
		buffer->processed += size_valp;
		return SLURM_SUCCESS;
	} else {
		*valp = 0;
		return SLURM_ERROR;
	}
}
Beispiel #9
0
/*
 * Given a buffer containing a network byte order 16-bit integer,
 * store a host integer at 'valp', and adjust buffer counters.
 */
int unpack16(uint16_t * valp, Buf buffer)
{
	uint16_t ns;

	if (remaining_buf(buffer) < sizeof(ns))
		return SLURM_ERROR;

	memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns));
	*valp = ntohs(ns);
	buffer->processed += sizeof(ns);
	return SLURM_SUCCESS;
}
Beispiel #10
0
int unpack_time(time_t * valp, Buf buffer)
{
	int64_t n64;

	if (remaining_buf(buffer) < sizeof(n64))
		return SLURM_ERROR;

	memcpy(&n64, &buffer->head[buffer->processed], sizeof(n64));
	buffer->processed += sizeof(n64);
	*valp = (time_t) NTOH_int64(n64);
	return SLURM_SUCCESS;
}
Beispiel #11
0
/*
 * Given a buffer containing a network byte order 16-bit integer,
 * and an arbitrary data string, copy the data string into the location
 * specified by valp.  Also return the sizes of 'valp' in bytes.
 * Adjust buffer counters.
 * NOTE: The caller is responsible for the management of valp and
 * insuring it has sufficient size
 */
int unpackmem(char *valp, uint32_t * size_valp, Buf buffer)
{
	uint32_t ns;

	if (remaining_buf(buffer) < sizeof(ns))
		return SLURM_ERROR;

	memcpy(&ns, &buffer->head[buffer->processed], sizeof(ns));
	*size_valp = ntohl(ns);
	buffer->processed += sizeof(ns);

	if (*size_valp > MAX_PACK_MEM_LEN)
		return SLURM_ERROR;
	else if (*size_valp > 0) {
		if (remaining_buf(buffer) < *size_valp)
			return SLURM_ERROR;
		memcpy(valp, &buffer->head[buffer->processed], *size_valp);
		buffer->processed += *size_valp;
	} else
		*valp = 0;
	return SLURM_SUCCESS;
}
Beispiel #12
0
/*
 * Given a 8-bit integer in host byte order, convert to network byte order
 * store in buffer, and adjust buffer counters.
 */
void pack8(uint8_t val, Buf buffer)
{
	if (remaining_buf(buffer) < sizeof(uint8_t)) {
		if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) {
			error("pack8: buffer size too large");
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &val, sizeof(uint8_t));
	buffer->processed += sizeof(uint8_t);
}
Beispiel #13
0
/*
 * Given a pointer to memory (valp), size (size_val), and buffer,
 * store the memory contents into the buffer
 */
void packmem_array(char *valp, uint32_t size_val, Buf buffer)
{
	if (remaining_buf(buffer) < size_val) {
		if (buffer->size > (MAX_BUF_SIZE - size_val - BUF_SIZE)) {
			error("packmem_array: buffer size too large");
			return;
		}
		buffer->size += (size_val + BUF_SIZE);
		xrealloc(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], valp, size_val);
	buffer->processed += size_val;
}
Beispiel #14
0
/*
 * Given a 8-bit integer in host byte order, convert to network byte order
 * store in buffer, and adjust buffer counters.
 */
void pack8(uint8_t val, Buf buffer)
{
	if (remaining_buf(buffer) < sizeof(uint8_t)) {
		if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) {
			error("%s: Buffer size limit exceeded (%u > %u)",
			      __func__, (buffer->size + BUF_SIZE),
			      MAX_BUF_SIZE);
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc_nz(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &val, sizeof(uint8_t));
	buffer->processed += sizeof(uint8_t);
}
Beispiel #15
0
/*
 * Given a time_t in host byte order, promote it to int64_t, convert to
 * network byte order, store in buffer and adjust buffer acc'd'ngly
 */
void pack_time(time_t val, Buf buffer)
{
	int64_t n64 = HTON_int64((int64_t) val);

	if (remaining_buf(buffer) < sizeof(n64)) {
		if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) {
			error("pack_time: buffer size too large");
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &n64, sizeof(n64));
	buffer->processed += sizeof(n64);
}
Beispiel #16
0
/*
 * Given a 32-bit integer in host byte order, convert to network byte order
 * store in buffer, and adjust buffer counters.
 */
void pack32(uint32_t val, Buf buffer)
{
	uint32_t nl = htonl(val);

	if (remaining_buf(buffer) < sizeof(nl)) {
		if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) {
			error("pack32: buffer size too large");
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &nl, sizeof(nl));
	buffer->processed += sizeof(nl);
}
Beispiel #17
0
/*
 * Given a 16-bit integer in host byte order, convert to network byte order,
 * store in buffer and adjust buffer counters.
 */
void pack16(uint16_t val, Buf buffer)
{
	uint16_t ns = htons(val);

	if (remaining_buf(buffer) < sizeof(ns)) {
		if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) {
			error("pack16: buffer size too large");
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns));
	buffer->processed += sizeof(ns);
}
Beispiel #18
0
/*
 * Given a pointer to memory (valp), size (size_val), and buffer,
 * store the memory contents into the buffer
 */
void packmem_array(char *valp, uint32_t size_val, Buf buffer)
{
	if (remaining_buf(buffer) < size_val) {
		if ((buffer->size + size_val + BUF_SIZE) > MAX_BUF_SIZE) {
			error("%s: Buffer size limit exceeded (%u > %u)",
			      __func__, (buffer->size + size_val + BUF_SIZE),
			      MAX_BUF_SIZE);
			return;
		}
		buffer->size += (size_val + BUF_SIZE);
		xrealloc_nz(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], valp, size_val);
	buffer->processed += size_val;
}
/*
 * Given a 16-bit integer in host byte order, convert to network byte order,
 * store in buffer and adjust buffer counters.
 */
void pack16(uint16_t val, Buf buffer)
{
	uint16_t ns = htons(val);

	if (remaining_buf(buffer) < sizeof(ns)) {
		if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) {
			error("%s: Buffer size limit exceeded (%d > %d)",
			      __func__, (buffer->size + BUF_SIZE),
			      MAX_BUF_SIZE);
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc_nz(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns));
	buffer->processed += sizeof(ns);
}
Beispiel #20
0
/*
 * Given a time_t in host byte order, promote it to int64_t, convert to
 * network byte order, store in buffer and adjust buffer acc'd'ngly
 */
void pack_time(time_t val, Buf buffer)
{
	int64_t n64 = HTON_int64((int64_t) val);

	if (remaining_buf(buffer) < sizeof(n64)) {
		if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) {
			error("%s: Buffer size limit exceeded (%u > %u)",
			      __func__, (buffer->size + BUF_SIZE),
			      MAX_BUF_SIZE);
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc_nz(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &n64, sizeof(n64));
	buffer->processed += sizeof(n64);
}
Beispiel #21
0
/*
 * Given a buffer containing a network byte order 64-bit integer,
 * typecast as double, and  divide by FLOAT_MULT
 * store a host double at 'valp', and adjust buffer counters.
 * NOTE: There is an IEEE standard format for double.
 */
int	unpackdouble(double *valp, Buf buffer)
{
	uint64_t nl;
	union {
		double d;
		uint64_t u;
	} uval;

	if (remaining_buf(buffer) < sizeof(nl))
		return SLURM_ERROR;

	memcpy(&nl, &buffer->head[buffer->processed], sizeof(nl));
	buffer->processed += sizeof(nl);

	uval.u = NTOH_uint64(nl);
	*valp = uval.d / FLOAT_MULT;

	return SLURM_SUCCESS;
}
Beispiel #22
0
/*
 * Given a pointer to memory (valp) and a size (size_val), convert
 * size_val to network byte order and store at buffer followed by
 * the data at valp. Adjust buffer counters.
 */
void packmem(char *valp, uint32_t size_val, Buf buffer)
{
	uint32_t ns = htonl(size_val);

	if (remaining_buf(buffer) < (sizeof(ns) + size_val)) {
		if (buffer->size > (MAX_BUF_SIZE -  size_val - BUF_SIZE)) {
			error("packmem: buffer size too large");
			return;
		}
		buffer->size += (size_val + BUF_SIZE);
		xrealloc(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns));
	buffer->processed += sizeof(ns);

	if (size_val) {
		memcpy(&buffer->head[buffer->processed], valp, size_val);
		buffer->processed += size_val;
	}
}
Beispiel #23
0
extern void msg_aggr_add_comp(Buf buffer, void *auth_cred, header_t *header)
{
	slurm_msg_t *msg;

	if (!msg_collection.running)
		return;

	msg = xmalloc_nz(sizeof(slurm_msg_t));
	slurm_msg_t_init(msg);

	msg->protocol_version = header->version;
	msg->msg_type = header->msg_type;
	msg->flags = header->flags;

	msg->auth_cred = auth_cred;

	msg->data = buffer;
	msg->data_size = remaining_buf(buffer);

	msg_aggr_add_msg(msg, 0, NULL);
}
Beispiel #24
0
inline static int _pmixp_coll_contrib(pmixp_coll_ring_ctx_t *coll_ctx,
				      int contrib_id,
				      uint32_t hop, char *data, size_t size)
{
	pmixp_coll_t *coll = _ctx_get_coll(coll_ctx);
	char *data_ptr = NULL;
	int ret;

	/* change the state */
	coll->ts = time(NULL);

	/* save contribution */
	if (!size_buf(coll_ctx->ring_buf)) {
		grow_buf(coll_ctx->ring_buf, size * coll->peers_cnt);
	} else if(remaining_buf(coll_ctx->ring_buf) < size) {
		uint32_t new_size = size_buf(coll_ctx->ring_buf) + size *
			_ring_remain_contrib(coll_ctx);
		grow_buf(coll_ctx->ring_buf, new_size);
	}
	grow_buf(coll_ctx->ring_buf, size);
	data_ptr = get_buf_data(coll_ctx->ring_buf) +
		get_buf_offset(coll_ctx->ring_buf);
	memcpy(data_ptr, data, size);
	set_buf_offset(coll_ctx->ring_buf,
		       get_buf_offset(coll_ctx->ring_buf) + size);

	/* check for ring is complete */
	if (contrib_id != _ring_next_id(coll)) {
		/* forward data to the next node */
		ret = _ring_forward_data(coll_ctx, contrib_id, hop,
					 data_ptr, size);
		if (ret) {
			PMIXP_ERROR("Cannot forward ring data");
			return SLURM_ERROR;
		}
	}

	return SLURM_SUCCESS;
}
Beispiel #25
0
/*
 * Given a pointer to array of char * (char ** or char *[] ) and a size
 * (size_val), convert size_val to network byte order and store in the
 * buffer followed by the data at valp. Adjust buffer counters.
 */
void packstr_array(char **valp, uint32_t size_val, Buf buffer)
{
	int i;
	uint32_t ns = htonl(size_val);

	if (remaining_buf(buffer) < sizeof(ns)) {
		if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) {
			error("packstr_array: buffer size too large");
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns));
	buffer->processed += sizeof(ns);

	for (i = 0; i < size_val; i++) {
		packstr(valp[i], buffer);
	}

}
Beispiel #26
0
extern int
temp_kvs_merge(Buf buf)
{
	char *data;
	uint32_t offset, size;

	size = remaining_buf(buf);
	if (size == 0) {
		return SLURM_SUCCESS;
	}
	data = get_buf_data(buf);
	offset = get_buf_offset(buf);

	if (temp_kvs_cnt + size > temp_kvs_size) {
		temp_kvs_size += size;
		xrealloc(temp_kvs_buf, temp_kvs_size);
	}
	memcpy(&temp_kvs_buf[temp_kvs_cnt], &data[offset], size);
	temp_kvs_cnt += size;

	return SLURM_SUCCESS;
}
Beispiel #27
0
void _progres_fan_out(pmixp_coll_t *coll, Buf buf)
{
	PMIXP_DEBUG("%s:%d: start", pmixp_info_namespace(), pmixp_info_nodeid());

	pmixp_coll_sanity_check(coll);

	xassert(PMIXP_COLL_FAN_OUT == coll->state || PMIXP_COLL_FAN_OUT_IN == coll->state);

	/* update the database */
	if (NULL != coll->cbfunc) {
		void *data = get_buf_data(buf) + get_buf_offset(buf);
		size_t size = remaining_buf(buf);
		PMIXP_DEBUG("%s:%d: use the callback", pmixp_info_namespace(),
				pmixp_info_nodeid());
		coll->cbfunc(PMIX_SUCCESS, data, size, coll->cbdata,
				pmixp_free_Buf, (void *)buf);
	}
	/* Prepare for the next collective operation */
	_fan_out_finished(coll);

	PMIXP_DEBUG("%s:%d: collective is prepared for the next use",
			pmixp_info_namespace(), pmixp_info_nodeid());
}
Beispiel #28
0
/*
 * Given a pointer to array of char * (char ** or char *[] ) and a size
 * (size_val), convert size_val to network byte order and store in the
 * buffer followed by the data at valp. Adjust buffer counters.
 */
void packstr_array(char **valp, uint32_t size_val, Buf buffer)
{
	int i;
	uint32_t ns = htonl(size_val);

	if (remaining_buf(buffer) < sizeof(ns)) {
		if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) {
			error("%s: Buffer size limit exceeded (%u > %u)",
			      __func__, (buffer->size + BUF_SIZE),
			      MAX_BUF_SIZE);
			return;
		}
		buffer->size += BUF_SIZE;
		xrealloc_nz(buffer->head, buffer->size);
	}

	memcpy(&buffer->head[buffer->processed], &ns, sizeof(ns));
	buffer->processed += sizeof(ns);

	for (i = 0; i < size_val; i++) {
		packstr(valp[i], buffer);
	}

}
Beispiel #29
0
void *_forward_thread(void *arg)
{
	forward_msg_t *fwd_msg = (forward_msg_t *)arg;
	forward_struct_t *fwd_struct = fwd_msg->fwd_struct;
	Buf buffer = init_buf(BUF_SIZE);	/* probably enough for header */
	List ret_list = NULL;
	int fd = -1;
	ret_data_info_t *ret_data_info = NULL;
	char *name = NULL;
	hostlist_t hl = hostlist_create(fwd_msg->header.forward.nodelist);
	slurm_addr_t addr;
	char *buf = NULL;
	int steps = 0;
	int start_timeout = fwd_msg->timeout;

	/* repeat until we are sure the message was sent */
	while ((name = hostlist_shift(hl))) {
		if (slurm_conf_get_addr(name, &addr) == SLURM_ERROR) {
			error("forward_thread: can't find address for host "
			      "%s, check slurm.conf", name);
			slurm_mutex_lock(&fwd_struct->forward_mutex);
			mark_as_failed_forward(&fwd_struct->ret_list, name,
					       SLURM_UNKNOWN_FORWARD_ADDR);
 			free(name);
			if (hostlist_count(hl) > 0) {
				slurm_mutex_unlock(&fwd_struct->forward_mutex);
				continue;
			}
			goto cleanup;
		}
		if ((fd = slurm_open_msg_conn(&addr)) < 0) {
			error("forward_thread to %s: %m", name);

			slurm_mutex_lock(&fwd_struct->forward_mutex);
			mark_as_failed_forward(
				&fwd_struct->ret_list, name,
				SLURM_COMMUNICATIONS_CONNECTION_ERROR);
			free(name);
			if (hostlist_count(hl) > 0) {
				slurm_mutex_unlock(&fwd_struct->forward_mutex);
				/* Abandon tree. This way if all the
				 * nodes in the branch are down we
				 * don't have to time out for each
				 * node serially.
				 */
				_forward_msg_internal(hl, NULL, fwd_struct,
						      &fwd_msg->header, 0,
						      hostlist_count(hl));
				continue;
			}
			goto cleanup;
		}
		buf = hostlist_ranged_string_xmalloc(hl);

		xfree(fwd_msg->header.forward.nodelist);
		fwd_msg->header.forward.nodelist = buf;
		fwd_msg->header.forward.cnt = hostlist_count(hl);
#if 0
		info("sending %d forwards (%s) to %s",
		     fwd_msg->header.forward.cnt,
		     fwd_msg->header.forward.nodelist, name);
#endif
		if (fwd_msg->header.forward.nodelist[0]) {
			debug3("forward: send to %s along with %s",
			       name, fwd_msg->header.forward.nodelist);
		} else
			debug3("forward: send to %s ", name);

		pack_header(&fwd_msg->header, buffer);

		/* add forward data to buffer */
		if (remaining_buf(buffer) < fwd_struct->buf_len) {
			int new_size = buffer->processed + fwd_struct->buf_len;
			new_size += 1024; /* padded for paranoia */
			xrealloc_nz(buffer->head, new_size);
			buffer->size = new_size;
		}
		if (fwd_struct->buf_len) {
			memcpy(&buffer->head[buffer->processed],
			       fwd_struct->buf, fwd_struct->buf_len);
			buffer->processed += fwd_struct->buf_len;
		}

		/*
		 * forward message
		 */
		if (slurm_msg_sendto(fd,
				     get_buf_data(buffer),
				     get_buf_offset(buffer),
				     SLURM_PROTOCOL_NO_SEND_RECV_FLAGS ) < 0) {
			error("forward_thread: slurm_msg_sendto: %m");

			slurm_mutex_lock(&fwd_struct->forward_mutex);
			mark_as_failed_forward(&fwd_struct->ret_list, name,
					       errno);
			free(name);
			if (hostlist_count(hl) > 0) {
				free_buf(buffer);
				buffer = init_buf(fwd_struct->buf_len);
				slurm_mutex_unlock(&fwd_struct->forward_mutex);
				slurm_close(fd);
				fd = -1;
				/* Abandon tree. This way if all the
				 * nodes in the branch are down we
				 * don't have to time out for each
				 * node serially.
				 */
				_forward_msg_internal(hl, NULL, fwd_struct,
						      &fwd_msg->header, 0,
						      hostlist_count(hl));
				continue;
			}
			goto cleanup;
		}

		/* These messages don't have a return message, but if
		 * we got here things worked out so make note of the
		 * list of nodes as success.
		 */
		if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) ||
		    (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) ||
		    (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) {
			slurm_mutex_lock(&fwd_struct->forward_mutex);
			ret_data_info = xmalloc(sizeof(ret_data_info_t));
			list_push(fwd_struct->ret_list, ret_data_info);
			ret_data_info->node_name = xstrdup(name);
			free(name);
			while ((name = hostlist_shift(hl))) {
				ret_data_info =
					xmalloc(sizeof(ret_data_info_t));
				list_push(fwd_struct->ret_list, ret_data_info);
				ret_data_info->node_name = xstrdup(name);
				free(name);
			}
			goto cleanup;
		}

		if (fwd_msg->header.forward.cnt > 0) {
			static int message_timeout = -1;
			if (message_timeout < 0)
				message_timeout =
					slurm_get_msg_timeout() * 1000;
			if (!fwd_msg->header.forward.tree_width)
				fwd_msg->header.forward.tree_width =
					slurm_get_tree_width();
			steps = (fwd_msg->header.forward.cnt+1) /
					fwd_msg->header.forward.tree_width;
			fwd_msg->timeout = (message_timeout*steps);
			/* info("got %d * %d = %d", message_timeout, */
			/*      steps, fwd_msg->timeout); */
			steps++;
			fwd_msg->timeout += (start_timeout*steps);
			/* info("now  + %d*%d = %d", start_timeout, */
			/*      steps, fwd_msg->timeout); */
		}

		ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout);
		/* info("sent %d forwards got %d back", */
		/*      fwd_msg->header.forward.cnt, list_count(ret_list)); */

		if (!ret_list || (fwd_msg->header.forward.cnt != 0
				  && list_count(ret_list) <= 1)) {
			slurm_mutex_lock(&fwd_struct->forward_mutex);
			mark_as_failed_forward(&fwd_struct->ret_list, name,
					       errno);
			free(name);
			FREE_NULL_LIST(ret_list);
			if (hostlist_count(hl) > 0) {
				free_buf(buffer);
				buffer = init_buf(fwd_struct->buf_len);
				slurm_mutex_unlock(&fwd_struct->forward_mutex);
				slurm_close(fd);
				fd = -1;
				continue;
			}
			goto cleanup;
		} else if ((fwd_msg->header.forward.cnt+1)
			  != list_count(ret_list)) {
			/* this should never be called since the above
			   should catch the failed forwards and pipe
			   them back down, but this is here so we
			   never have to worry about a locked
			   mutex */
			ListIterator itr = NULL;
			char *tmp = NULL;
			int first_node_found = 0;
			hostlist_iterator_t host_itr
				= hostlist_iterator_create(hl);
			error("We shouldn't be here.  We forwarded to %d "
			      "but only got %d back",
			      (fwd_msg->header.forward.cnt+1),
			      list_count(ret_list));
			while ((tmp = hostlist_next(host_itr))) {
				int node_found = 0;
				itr = list_iterator_create(ret_list);
				while ((ret_data_info = list_next(itr))) {
					if (!ret_data_info->node_name) {
						first_node_found = 1;
						ret_data_info->node_name =
							xstrdup(name);
					}
					if (!xstrcmp(tmp,
						   ret_data_info->node_name)) {
						node_found = 1;
						break;
					}
				}
				list_iterator_destroy(itr);
				if (!node_found) {
					mark_as_failed_forward(
						&fwd_struct->ret_list,
						tmp,
						SLURM_COMMUNICATIONS_CONNECTION_ERROR);
				}
				free(tmp);
			}
			hostlist_iterator_destroy(host_itr);
			if (!first_node_found) {
				mark_as_failed_forward(
					&fwd_struct->ret_list,
					name,
					SLURM_COMMUNICATIONS_CONNECTION_ERROR);
			}
		}
		break;
	}
	slurm_mutex_lock(&fwd_struct->forward_mutex);
	if (ret_list) {
		while ((ret_data_info = list_pop(ret_list)) != NULL) {
			if (!ret_data_info->node_name) {
				ret_data_info->node_name = xstrdup(name);
			}
			list_push(fwd_struct->ret_list, ret_data_info);
			debug3("got response from %s",
			       ret_data_info->node_name);
		}
		FREE_NULL_LIST(ret_list);
	}
	free(name);
cleanup:
	if ((fd >= 0) && slurm_close(fd) < 0)
		error ("close(%d): %m", fd);
	hostlist_destroy(hl);
	destroy_forward(&fwd_msg->header.forward);
	free_buf(buffer);
	slurm_cond_signal(&fwd_struct->notify);
	slurm_mutex_unlock(&fwd_struct->forward_mutex);
	xfree(fwd_msg);

	return (NULL);
}
Beispiel #30
0
/*
 * load_all_part_state - load the partition state from file, recover on
 *	slurmctld restart. execute this after loading the configuration
 *	file data.
 * NOTE: READ lock_slurmctld config before entry
 */
int load_all_part_state(void)
{
	char *part_name = NULL, *allow_groups = NULL, *nodes = NULL;
	char *state_file, *data = NULL;
	uint32_t max_time, default_time, max_nodes, min_nodes;
	uint32_t max_cpus_per_node = INFINITE, grace_time = 0;
	time_t time;
	uint16_t flags;
	uint16_t max_share, preempt_mode, priority, state_up, cr_type;
	struct part_record *part_ptr;
	uint32_t data_size = 0, name_len;
	int data_allocated, data_read = 0, error_code = 0, part_cnt = 0;
	int state_fd;
	Buf buffer;
	char *ver_str = NULL;
	char* allow_alloc_nodes = NULL;
	uint16_t protocol_version = (uint16_t)NO_VAL;
	char* alternate = NULL;

	/* read the file */
	lock_state_files();
	state_fd = _open_part_state_file(&state_file);
	if (state_fd < 0) {
		info("No partition state file (%s) to recover",
		     state_file);
		error_code = ENOENT;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if  (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
						state_file);
					break;
				}
			} else if (data_read == 0)     /* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);
	unlock_state_files();

	buffer = create_buf(data, data_size);

	safe_unpackstr_xmalloc( &ver_str, &name_len, buffer);
	debug3("Version string in part_state header is %s", ver_str);
	if (ver_str) {
		if (!strcmp(ver_str, PART_STATE_VERSION)) {
			protocol_version = SLURM_PROTOCOL_VERSION;
		} else if (!strcmp(ver_str, PART_2_5_STATE_VERSION)) {
			protocol_version = SLURM_2_5_PROTOCOL_VERSION;
		}
	}

	if (protocol_version == (uint16_t)NO_VAL) {
		error("**********************************************************");
		error("Can not recover partition state, data version incompatible");
		error("**********************************************************");
		xfree(ver_str);
		free_buf(buffer);
		return EFAULT;
	}
	xfree(ver_str);
	safe_unpack_time(&time, buffer);

	while (remaining_buf(buffer) > 0) {
		if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc(&part_name, &name_len, buffer);
			safe_unpack32(&grace_time, buffer);
			safe_unpack32(&max_time, buffer);
			safe_unpack32(&default_time, buffer);
			safe_unpack32(&max_cpus_per_node, buffer);
			safe_unpack32(&max_nodes, buffer);
			safe_unpack32(&min_nodes, buffer);

			safe_unpack16(&flags,        buffer);
			safe_unpack16(&max_share,    buffer);
			safe_unpack16(&preempt_mode, buffer);
			safe_unpack16(&priority,     buffer);

			if (priority > part_max_priority)
				part_max_priority = priority;

			safe_unpack16(&state_up, buffer);
			safe_unpack16(&cr_type, buffer);

			safe_unpackstr_xmalloc(&allow_groups,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&allow_alloc_nodes,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&alternate, &name_len, buffer);
			safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
			if ((flags & PART_FLAG_DEFAULT_CLR) ||
			    (flags & PART_FLAG_HIDDEN_CLR)  ||
			    (flags & PART_FLAG_NO_ROOT_CLR) ||
			    (flags & PART_FLAG_ROOT_ONLY_CLR) ||
			    (flags & PART_FLAG_REQ_RESV_CLR)) {
				error("Invalid data for partition %s: flags=%u",
				      part_name, flags);
				error_code = EINVAL;
			}
		} else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc(&part_name, &name_len, buffer);
			safe_unpack32(&grace_time, buffer);
			safe_unpack32(&max_time, buffer);
			safe_unpack32(&default_time, buffer);
			safe_unpack32(&max_nodes, buffer);
			safe_unpack32(&min_nodes, buffer);

			safe_unpack16(&flags,        buffer);
			safe_unpack16(&max_share,    buffer);
			safe_unpack16(&preempt_mode, buffer);
			safe_unpack16(&priority,     buffer);

			if (priority > part_max_priority)
				part_max_priority = priority;
			cr_type = 0;	/* Default value */

			safe_unpack16(&state_up, buffer);
			safe_unpackstr_xmalloc(&allow_groups,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&allow_alloc_nodes,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&alternate, &name_len, buffer);
			safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
			if ((flags & PART_FLAG_DEFAULT_CLR) ||
			    (flags & PART_FLAG_HIDDEN_CLR)  ||
			    (flags & PART_FLAG_NO_ROOT_CLR) ||
			    (flags & PART_FLAG_ROOT_ONLY_CLR) ||
			    (flags & PART_FLAG_REQ_RESV_CLR)) {
				error("Invalid data for partition %s: flags=%u",
				      part_name, flags);
				error_code = EINVAL;
			}
		} else {
			error("load_all_part_state: protocol_version "
			      "%hu not supported", protocol_version);
			goto unpack_error;
		}
		/* validity test as possible */
		if (state_up > PARTITION_UP) {
			error("Invalid data for partition %s: state_up=%u",
			      part_name, state_up);
			error_code = EINVAL;
		}
		if (error_code) {
			error("No more partition data will be processed from "
			      "the checkpoint file");
			xfree(allow_groups);
			xfree(allow_alloc_nodes);
			xfree(alternate);
			xfree(part_name);
			xfree(nodes);
			error_code = EINVAL;
			break;
		}

		/* find record and perform update */
		part_ptr = list_find_first(part_list, &list_find_part,
					   part_name);
		part_cnt++;
		if (part_ptr == NULL) {
			info("load_all_part_state: partition %s missing from "
				"configuration file", part_name);
			part_ptr = create_part_record();
			xfree(part_ptr->name);
			part_ptr->name = xstrdup(part_name);
		}

		part_ptr->flags          = flags;
		if (part_ptr->flags & PART_FLAG_DEFAULT) {
			xfree(default_part_name);
			default_part_name = xstrdup(part_name);
			default_part_loc = part_ptr;
		}
		part_ptr->max_time       = max_time;
		part_ptr->default_time   = default_time;
		part_ptr->max_cpus_per_node = max_cpus_per_node;
		part_ptr->max_nodes      = max_nodes;
		part_ptr->max_nodes_orig = max_nodes;
		part_ptr->min_nodes      = min_nodes;
		part_ptr->min_nodes_orig = min_nodes;
		part_ptr->max_share      = max_share;
		part_ptr->grace_time     = grace_time;
		if (preempt_mode != (uint16_t) NO_VAL)
			part_ptr->preempt_mode   = preempt_mode;
		part_ptr->priority       = priority;
		part_ptr->state_up       = state_up;
		part_ptr->cr_type	 = cr_type;
		xfree(part_ptr->allow_groups);
		part_ptr->allow_groups   = allow_groups;
		xfree(part_ptr->allow_alloc_nodes);
		part_ptr->allow_alloc_nodes   = allow_alloc_nodes;
		xfree(part_ptr->alternate);
		part_ptr->alternate      = alternate;
		xfree(part_ptr->nodes);
		part_ptr->nodes = nodes;

		xfree(part_name);
	}

	info("Recovered state of %d partitions", part_cnt);
	free_buf(buffer);
	return error_code;

      unpack_error:
	error("Incomplete partition data checkpoint file");
	info("Recovered state of %d partitions", part_cnt);
	free_buf(buffer);
	return EFAULT;
}