Ejemplo n.º 1
0
extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer,
				 uint16_t protocol_version)
{
	uint32_t uint32_tmp;
	struct check_job_info *check_ptr =
		(struct check_job_info *)jobinfo;

	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		uint16_t id;
		uint32_t size;

		safe_unpack16(&id, buffer);
		safe_unpack32(&size, buffer);
		if (id != CHECK_POE) {
			uint32_t x;
			x = get_buf_offset(buffer);
			set_buf_offset(buffer, x + size);
		} else {
			safe_unpack16(&check_ptr->disabled, buffer);
			safe_unpack16(&check_ptr->node_cnt, buffer);
			safe_unpack16(&check_ptr->reply_cnt, buffer);
			safe_unpack16(&check_ptr->wait_time, buffer);
			safe_unpack32(&check_ptr->error_code, buffer);
			safe_unpackstr_xmalloc(&check_ptr->error_msg,
					       &uint32_tmp, buffer);
			safe_unpack_time(&check_ptr->time_stamp, buffer);
		}
	}

	return SLURM_SUCCESS;

    unpack_error:
	xfree(check_ptr->error_msg);
	return SLURM_ERROR;
}
Ejemplo n.º 2
0
extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer,
				 uint16_t protocol_version)
{
	uint32_t uint32_tmp;
	struct check_job_info *check_ptr =
		(struct check_job_info *)jobinfo;

	if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
		safe_unpack16(&check_ptr->disabled, buffer);
		safe_unpack16(&check_ptr->node_cnt, buffer);
		safe_unpack16(&check_ptr->reply_cnt, buffer);
		safe_unpack16(&check_ptr->wait_time, buffer);

		safe_unpack32(&check_ptr->error_code, buffer);
		safe_unpackstr_xmalloc(&check_ptr->error_msg,
				       &uint32_tmp, buffer);
		safe_unpack_time(&check_ptr->time_stamp, buffer);
	}

	return SLURM_SUCCESS;

    unpack_error:
	xfree(check_ptr->error_msg);
	return SLURM_ERROR;
}
Ejemplo n.º 3
0
static int _unpack_node_state_msg(dbd_node_state_msg_t **msg,
				  uint16_t rpc_version, Buf buffer)
{
	dbd_node_state_msg_t *msg_ptr;
	uint32_t uint32_tmp;

	msg_ptr = xmalloc(sizeof(dbd_node_state_msg_t));
	*msg = msg_ptr;

	msg_ptr->reason_uid = NO_VAL;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpackstr_xmalloc(&msg_ptr->hostlist, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->reason,   &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->reason_uid, buffer);
		safe_unpack16(&msg_ptr->new_state, buffer);
		safe_unpack_time(&msg_ptr->event_time, buffer);
		safe_unpack32(&msg_ptr->state, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->tres_str,
				       &uint32_tmp, buffer);
	}

	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_node_state_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 4
0
extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer,
				     uint16_t protocol_version)
{
	acct_gather_energy_t *energy_ptr = acct_gather_energy_alloc();
	*energy = energy_ptr;

	if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) {
		safe_unpack32(&energy_ptr->base_consumed_energy, buffer);
		safe_unpack32(&energy_ptr->base_watts, buffer);
		safe_unpack32(&energy_ptr->consumed_energy, buffer);
		safe_unpack32(&energy_ptr->current_watts, buffer);
		safe_unpack32(&energy_ptr->previous_consumed_energy, buffer);
		safe_unpack_time(&energy_ptr->poll_time, buffer);
	} else {
		safe_unpack32(&energy_ptr->base_consumed_energy, buffer);
		safe_unpack32(&energy_ptr->base_watts, buffer);
		safe_unpack32(&energy_ptr->consumed_energy, buffer);
		safe_unpack32(&energy_ptr->current_watts, buffer);
	}

	return SLURM_SUCCESS;

unpack_error:
	acct_gather_energy_destroy(energy_ptr);
	*energy = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 5
0
extern int slurmdbd_unpack_usage_msg(dbd_usage_msg_t **msg,
				     uint16_t rpc_version,
				     slurmdbd_msg_type_t type,
				     Buf buffer)
{
	dbd_usage_msg_t *msg_ptr = NULL;
	int (*my_rec) (void **object, uint16_t rpc_version, Buf buffer);

	msg_ptr = xmalloc(sizeof(dbd_usage_msg_t));
	*msg = msg_ptr;

	switch (type) {
	case DBD_GET_ASSOC_USAGE:
	case DBD_GOT_ASSOC_USAGE:
		my_rec = slurmdb_unpack_assoc_rec;
		break;
	case DBD_GET_CLUSTER_USAGE:
	case DBD_GOT_CLUSTER_USAGE:
		my_rec = slurmdb_unpack_cluster_rec;
		break;
	case DBD_GET_WCKEY_USAGE:
	case DBD_GOT_WCKEY_USAGE:
		my_rec = slurmdb_unpack_wckey_rec;
		break;
	default:
		fatal("Unknown pack type");
		return SLURM_ERROR;
	}

	if ((*(my_rec))(&msg_ptr->rec, rpc_version, buffer) == SLURM_ERROR)
		goto unpack_error;

	safe_unpack_time(&msg_ptr->start, buffer);
	safe_unpack_time(&msg_ptr->end, buffer);


	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_usage_msg(msg_ptr, type);
	*msg = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 6
0
extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer,
				     uint16_t protocol_version, bool need_alloc)
{
	uint32_t uint32_tmp;
	acct_gather_energy_t *energy_ptr;

	if (need_alloc) {
		energy_ptr = acct_gather_energy_alloc(1);
		*energy = energy_ptr;
	} else {
		energy_ptr = *energy;
	}

	if (protocol_version >= SLURM_15_08_PROTOCOL_VERSION) {
		safe_unpack64(&energy_ptr->base_consumed_energy, buffer);
		safe_unpack32(&energy_ptr->base_watts, buffer);
		safe_unpack64(&energy_ptr->consumed_energy, buffer);
		safe_unpack32(&energy_ptr->current_watts, buffer);
		safe_unpack64(&energy_ptr->previous_consumed_energy, buffer);
		safe_unpack_time(&energy_ptr->poll_time, buffer);
	} else if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack32(&uint32_tmp, buffer);
		energy_ptr->base_consumed_energy = (uint64_t) uint32_tmp;
		safe_unpack32(&energy_ptr->base_watts, buffer);
		safe_unpack32(&uint32_tmp, buffer);
		energy_ptr->consumed_energy = (uint64_t) uint32_tmp;
		safe_unpack32(&energy_ptr->current_watts, buffer);
		safe_unpack32(&uint32_tmp, buffer);
		energy_ptr->previous_consumed_energy = (uint64_t) uint32_tmp;
		safe_unpack_time(&energy_ptr->poll_time, buffer);
	}

	return SLURM_SUCCESS;

unpack_error:
	if (need_alloc) {
		acct_gather_energy_destroy(energy_ptr);
		*energy = NULL;
	} else
		memset(energy_ptr, 0, sizeof(acct_gather_energy_t));

	return SLURM_ERROR;
}
Ejemplo n.º 7
0
static int _unpack_roll_usage_msg(dbd_roll_usage_msg_t **msg,
				  uint16_t rpc_version, Buf buffer)
{
	dbd_roll_usage_msg_t *msg_ptr = xmalloc(sizeof(dbd_roll_usage_msg_t));

	*msg = msg_ptr;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack16(&msg_ptr->archive_data, buffer);
		safe_unpack_time(&msg_ptr->end, buffer);
		safe_unpack_time(&msg_ptr->start, buffer);
	}
	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_roll_usage_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 8
0
static int _unpack_job_suspend_msg(dbd_job_suspend_msg_t **msg,
				   uint16_t rpc_version, Buf buffer)
{
	dbd_job_suspend_msg_t *msg_ptr = xmalloc(sizeof(dbd_job_suspend_msg_t));
	*msg = msg_ptr;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack32(&msg_ptr->assoc_id, buffer);
		safe_unpack64(&msg_ptr->db_index, buffer);
		safe_unpack32(&msg_ptr->job_id, buffer);
		safe_unpack32(&msg_ptr->job_state, buffer);
		safe_unpack_time(&msg_ptr->submit_time, buffer);
		safe_unpack_time(&msg_ptr->suspend_time, buffer);
	}

	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_job_suspend_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 9
0
static int _unpack_step_start_msg(dbd_step_start_msg_t **msg,
				  uint16_t rpc_version, Buf buffer)
{
	uint32_t uint32_tmp = 0;
	dbd_step_start_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_start_msg_t));
	*msg = msg_ptr;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack32(&msg_ptr->assoc_id, buffer);
		safe_unpack64(&msg_ptr->db_index, buffer);
		safe_unpack32(&msg_ptr->job_id, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->node_cnt, buffer);
		safe_unpack_time(&msg_ptr->start_time, buffer);
		safe_unpack_time(&msg_ptr->job_submit_time, buffer);
		safe_unpack32(&msg_ptr->req_cpufreq_min, buffer);
		safe_unpack32(&msg_ptr->req_cpufreq_max, buffer);
		safe_unpack32(&msg_ptr->req_cpufreq_gov, buffer);
		safe_unpack32(&msg_ptr->step_id, buffer);
		safe_unpack32(&msg_ptr->task_dist, buffer);
		safe_unpack32(&msg_ptr->total_tasks, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str,
				       &uint32_tmp, buffer);
	} else
		goto unpack_error;

	return SLURM_SUCCESS;

unpack_error:
	debug2("slurmdbd_unpack_step_start_msg:"
	       "unpack_error: size_buf(buffer) %u",
	       size_buf(buffer));
	slurmdbd_free_step_start_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 10
0
static int _unpack_job_complete_msg(dbd_job_comp_msg_t **msg,
				    uint16_t rpc_version, Buf buffer)
{
	uint32_t uint32_tmp;
	dbd_job_comp_msg_t *msg_ptr = xmalloc(sizeof(dbd_job_comp_msg_t));
	*msg = msg_ptr;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpackstr_xmalloc(&msg_ptr->admin_comment,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->assoc_id, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->comment, &uint32_tmp, buffer);
		safe_unpack64(&msg_ptr->db_index, buffer);
		safe_unpack32(&msg_ptr->derived_ec, buffer);
		safe_unpack_time(&msg_ptr->end_time, buffer);
		safe_unpack32(&msg_ptr->exit_code, buffer);
		safe_unpack32(&msg_ptr->job_id, buffer);
		safe_unpack32(&msg_ptr->job_state, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->req_uid, buffer);
		safe_unpack_time(&msg_ptr->start_time, buffer);
		safe_unpack_time(&msg_ptr->submit_time, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->system_comment,
				       &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str,
				       &uint32_tmp, buffer);
	} else
		goto unpack_error;

	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_job_complete_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 11
0
static int _unpack_step_complete_msg(dbd_step_comp_msg_t **msg,
				     uint16_t rpc_version, Buf buffer)
{
	uint32_t uint32_tmp;
	dbd_step_comp_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_comp_msg_t));
	*msg = msg_ptr;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack32(&msg_ptr->assoc_id, buffer);
		safe_unpack64(&msg_ptr->db_index, buffer);
		safe_unpack_time(&msg_ptr->end_time, buffer);
		safe_unpack32(&msg_ptr->exit_code, buffer);
		jobacctinfo_unpack((struct jobacctinfo **)&msg_ptr->jobacct,
				   rpc_version, PROTOCOL_TYPE_DBD, buffer, 1);
		safe_unpack32(&msg_ptr->job_id, buffer);
		safe_unpack_time(&msg_ptr->job_submit_time, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->job_tres_alloc_str,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->req_uid, buffer);
		safe_unpack_time(&msg_ptr->start_time, buffer);
		safe_unpack16(&msg_ptr->state, buffer);
		safe_unpack32(&msg_ptr->step_id, buffer);
		safe_unpack32(&msg_ptr->total_tasks, buffer);
	} else
		goto unpack_error;

	return SLURM_SUCCESS;

unpack_error:
	debug2("slurmdbd_unpack_step_complete_msg:"
	       "unpack_error: size_buf(buffer) %u",
	       size_buf(buffer));
	slurmdbd_free_step_complete_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 12
0
extern int ext_sensors_data_unpack(ext_sensors_data_t **ext_sensors, Buf buffer,
				     uint16_t protocol_version)
{
	ext_sensors_data_t *ext_sensors_ptr = ext_sensors_alloc();
	*ext_sensors = ext_sensors_ptr;
	if (ext_sensors_ptr == NULL)
		return SLURM_ERROR;

	safe_unpack32(&ext_sensors_ptr->consumed_energy, buffer);
	safe_unpack32(&ext_sensors_ptr->temperature, buffer);
	safe_unpack_time(&ext_sensors_ptr->energy_update_time, buffer);
	safe_unpack32(&ext_sensors_ptr->current_watts, buffer);

	return SLURM_SUCCESS;

unpack_error:
	ext_sensors_destroy(ext_sensors_ptr);
	*ext_sensors = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 13
0
static int _unpack_cluster_tres_msg(dbd_cluster_tres_msg_t **msg,
				    uint16_t rpc_version, Buf buffer)
{
	dbd_cluster_tres_msg_t *msg_ptr;
	uint32_t uint32_tmp;

	msg_ptr = xmalloc(sizeof(dbd_cluster_tres_msg_t));
	*msg = msg_ptr;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpackstr_xmalloc(&msg_ptr->cluster_nodes,
				       &uint32_tmp, buffer);
		safe_unpack_time(&msg_ptr->event_time, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->tres_str,
				       &uint32_tmp, buffer);
	}

	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_cluster_tres_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 14
0
/*
 * load_all_part_state - load the partition state from file, recover on
 *	slurmctld restart. execute this after loading the configuration
 *	file data.
 * NOTE: READ lock_slurmctld config before entry
 */
int load_all_part_state(void)
{
	char *part_name = NULL, *allow_groups = NULL, *nodes = NULL;
	char *state_file, *data = NULL;
	uint32_t max_time, default_time, max_nodes, min_nodes;
	uint32_t max_cpus_per_node = INFINITE, grace_time = 0;
	time_t time;
	uint16_t flags;
	uint16_t max_share, preempt_mode, priority, state_up, cr_type;
	struct part_record *part_ptr;
	uint32_t data_size = 0, name_len;
	int data_allocated, data_read = 0, error_code = 0, part_cnt = 0;
	int state_fd;
	Buf buffer;
	char *ver_str = NULL;
	char* allow_alloc_nodes = NULL;
	uint16_t protocol_version = (uint16_t)NO_VAL;
	char* alternate = NULL;

	/* read the file */
	lock_state_files();
	state_fd = _open_part_state_file(&state_file);
	if (state_fd < 0) {
		info("No partition state file (%s) to recover",
		     state_file);
		error_code = ENOENT;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if  (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
						state_file);
					break;
				}
			} else if (data_read == 0)     /* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);
	unlock_state_files();

	buffer = create_buf(data, data_size);

	safe_unpackstr_xmalloc( &ver_str, &name_len, buffer);
	debug3("Version string in part_state header is %s", ver_str);
	if (ver_str) {
		if (!strcmp(ver_str, PART_STATE_VERSION)) {
			protocol_version = SLURM_PROTOCOL_VERSION;
		} else if (!strcmp(ver_str, PART_2_5_STATE_VERSION)) {
			protocol_version = SLURM_2_5_PROTOCOL_VERSION;
		}
	}

	if (protocol_version == (uint16_t)NO_VAL) {
		error("**********************************************************");
		error("Can not recover partition state, data version incompatible");
		error("**********************************************************");
		xfree(ver_str);
		free_buf(buffer);
		return EFAULT;
	}
	xfree(ver_str);
	safe_unpack_time(&time, buffer);

	while (remaining_buf(buffer) > 0) {
		if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc(&part_name, &name_len, buffer);
			safe_unpack32(&grace_time, buffer);
			safe_unpack32(&max_time, buffer);
			safe_unpack32(&default_time, buffer);
			safe_unpack32(&max_cpus_per_node, buffer);
			safe_unpack32(&max_nodes, buffer);
			safe_unpack32(&min_nodes, buffer);

			safe_unpack16(&flags,        buffer);
			safe_unpack16(&max_share,    buffer);
			safe_unpack16(&preempt_mode, buffer);
			safe_unpack16(&priority,     buffer);

			if (priority > part_max_priority)
				part_max_priority = priority;

			safe_unpack16(&state_up, buffer);
			safe_unpack16(&cr_type, buffer);

			safe_unpackstr_xmalloc(&allow_groups,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&allow_alloc_nodes,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&alternate, &name_len, buffer);
			safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
			if ((flags & PART_FLAG_DEFAULT_CLR) ||
			    (flags & PART_FLAG_HIDDEN_CLR)  ||
			    (flags & PART_FLAG_NO_ROOT_CLR) ||
			    (flags & PART_FLAG_ROOT_ONLY_CLR) ||
			    (flags & PART_FLAG_REQ_RESV_CLR)) {
				error("Invalid data for partition %s: flags=%u",
				      part_name, flags);
				error_code = EINVAL;
			}
		} else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc(&part_name, &name_len, buffer);
			safe_unpack32(&grace_time, buffer);
			safe_unpack32(&max_time, buffer);
			safe_unpack32(&default_time, buffer);
			safe_unpack32(&max_nodes, buffer);
			safe_unpack32(&min_nodes, buffer);

			safe_unpack16(&flags,        buffer);
			safe_unpack16(&max_share,    buffer);
			safe_unpack16(&preempt_mode, buffer);
			safe_unpack16(&priority,     buffer);

			if (priority > part_max_priority)
				part_max_priority = priority;
			cr_type = 0;	/* Default value */

			safe_unpack16(&state_up, buffer);
			safe_unpackstr_xmalloc(&allow_groups,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&allow_alloc_nodes,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&alternate, &name_len, buffer);
			safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
			if ((flags & PART_FLAG_DEFAULT_CLR) ||
			    (flags & PART_FLAG_HIDDEN_CLR)  ||
			    (flags & PART_FLAG_NO_ROOT_CLR) ||
			    (flags & PART_FLAG_ROOT_ONLY_CLR) ||
			    (flags & PART_FLAG_REQ_RESV_CLR)) {
				error("Invalid data for partition %s: flags=%u",
				      part_name, flags);
				error_code = EINVAL;
			}
		} else {
			error("load_all_part_state: protocol_version "
			      "%hu not supported", protocol_version);
			goto unpack_error;
		}
		/* validity test as possible */
		if (state_up > PARTITION_UP) {
			error("Invalid data for partition %s: state_up=%u",
			      part_name, state_up);
			error_code = EINVAL;
		}
		if (error_code) {
			error("No more partition data will be processed from "
			      "the checkpoint file");
			xfree(allow_groups);
			xfree(allow_alloc_nodes);
			xfree(alternate);
			xfree(part_name);
			xfree(nodes);
			error_code = EINVAL;
			break;
		}

		/* find record and perform update */
		part_ptr = list_find_first(part_list, &list_find_part,
					   part_name);
		part_cnt++;
		if (part_ptr == NULL) {
			info("load_all_part_state: partition %s missing from "
				"configuration file", part_name);
			part_ptr = create_part_record();
			xfree(part_ptr->name);
			part_ptr->name = xstrdup(part_name);
		}

		part_ptr->flags          = flags;
		if (part_ptr->flags & PART_FLAG_DEFAULT) {
			xfree(default_part_name);
			default_part_name = xstrdup(part_name);
			default_part_loc = part_ptr;
		}
		part_ptr->max_time       = max_time;
		part_ptr->default_time   = default_time;
		part_ptr->max_cpus_per_node = max_cpus_per_node;
		part_ptr->max_nodes      = max_nodes;
		part_ptr->max_nodes_orig = max_nodes;
		part_ptr->min_nodes      = min_nodes;
		part_ptr->min_nodes_orig = min_nodes;
		part_ptr->max_share      = max_share;
		part_ptr->grace_time     = grace_time;
		if (preempt_mode != (uint16_t) NO_VAL)
			part_ptr->preempt_mode   = preempt_mode;
		part_ptr->priority       = priority;
		part_ptr->state_up       = state_up;
		part_ptr->cr_type	 = cr_type;
		xfree(part_ptr->allow_groups);
		part_ptr->allow_groups   = allow_groups;
		xfree(part_ptr->allow_alloc_nodes);
		part_ptr->allow_alloc_nodes   = allow_alloc_nodes;
		xfree(part_ptr->alternate);
		part_ptr->alternate      = alternate;
		xfree(part_ptr->nodes);
		part_ptr->nodes = nodes;

		xfree(part_name);
	}

	info("Recovered state of %d partitions", part_cnt);
	free_buf(buffer);
	return error_code;

      unpack_error:
	error("Incomplete partition data checkpoint file");
	info("Recovered state of %d partitions", part_cnt);
	free_buf(buffer);
	return EFAULT;
}
Ejemplo n.º 15
0
/*
 * load_all_front_end_state - Load the front_end node state from file, recover
 *	on slurmctld restart. Execute this after loading the configuration
 *	file data. Data goes into common storage.
 * IN state_only - if true, overwrite only front_end node state and reason
 *	Use this to overwrite the "UNKNOWN state typically used in slurm.conf
 * RET 0 or error code
 * NOTE: READ lock_slurmctld config before entry
 */
extern int load_all_front_end_state(bool state_only)
{
#ifdef HAVE_FRONT_END
	char *node_name = NULL, *reason = NULL, *data = NULL, *state_file;
	int data_allocated, data_read = 0, error_code = 0, node_cnt = 0;
	uint16_t node_state;
	uint32_t data_size = 0, name_len;
	uint32_t reason_uid = NO_VAL;
	time_t reason_time = 0;
	front_end_record_t *front_end_ptr;
	int state_fd;
	time_t time_stamp;
	Buf buffer;
	char *ver_str = NULL;
	uint16_t protocol_version = (uint16_t) NO_VAL;

	/* read the file */
	lock_state_files ();
	state_fd = _open_front_end_state_file(&state_file);
	if (state_fd < 0) {
		info ("No node state file (%s) to recover", state_file);
		error_code = ENOENT;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size], BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error ("Read error on %s: %m",
						state_file);
					break;
				}
			} else if (data_read == 0)     /* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close (state_fd);
	}
	xfree (state_file);
	unlock_state_files ();

	buffer = create_buf (data, data_size);

	safe_unpackstr_xmalloc( &ver_str, &name_len, buffer);
	debug3("Version string in front_end_state header is %s", ver_str);
	if (ver_str) {
		if (!strcmp(ver_str, FRONT_END_STATE_VERSION)) {
			protocol_version = SLURM_PROTOCOL_VERSION;
		}
	}

	if (protocol_version == (uint16_t) NO_VAL) {
		error("*****************************************************");
		error("Can not recover front_end state, version incompatible");
		error("*****************************************************");
		xfree(ver_str);
		free_buf(buffer);
		return EFAULT;
	}
	xfree(ver_str);

	safe_unpack_time(&time_stamp, buffer);

	while (remaining_buf (buffer) > 0) {
		uint16_t base_state;
		if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc (&node_name, &name_len, buffer);
			safe_unpack16 (&node_state,  buffer);
			safe_unpackstr_xmalloc (&reason,    &name_len, buffer);
			safe_unpack_time (&reason_time, buffer);
			safe_unpack32 (&reason_uid,  buffer);
			base_state = node_state & NODE_STATE_BASE;
		} else
			goto unpack_error;

		/* validity test as possible */

		/* find record and perform update */
		front_end_ptr = find_front_end_record(node_name);
		if (front_end_ptr == NULL) {
			error("Front_end node %s has vanished from "
			      "configuration", node_name);
		} else if (state_only) {
			uint16_t orig_flags;
			orig_flags = front_end_ptr->node_state &
				     NODE_STATE_FLAGS;
			node_cnt++;
			if (IS_NODE_UNKNOWN(front_end_ptr)) {
				if (base_state == NODE_STATE_DOWN) {
					orig_flags &= (~NODE_STATE_COMPLETING);
					front_end_ptr->node_state =
						NODE_STATE_DOWN | orig_flags;
				}
				if (node_state & NODE_STATE_DRAIN) {
					 front_end_ptr->node_state |=
						 NODE_STATE_DRAIN;
				}
				if (node_state & NODE_STATE_FAIL) {
					front_end_ptr->node_state |=
						NODE_STATE_FAIL;
				}
			}
			if (front_end_ptr->reason == NULL) {
				front_end_ptr->reason = reason;
				reason = NULL;	/* Nothing to free */
				front_end_ptr->reason_time = reason_time;
				front_end_ptr->reason_uid = reason_uid;
			}
		} else {
			node_cnt++;
			front_end_ptr->node_state = node_state;
			xfree(front_end_ptr->reason);
			front_end_ptr->reason	= reason;
			reason			= NULL;	/* Nothing to free */
			front_end_ptr->reason_time	= reason_time;
			front_end_ptr->reason_uid	= reason_uid;
			front_end_ptr->last_response	= (time_t) 0;
		}

		xfree(node_name);
		xfree(reason);
	}

fini:	info("Recovered state of %d front_end nodes", node_cnt);
	free_buf (buffer);
	return error_code;

unpack_error:
	error("Incomplete front_end node data checkpoint file");
	error_code = EFAULT;
	xfree (node_name);
	xfree(reason);
	goto fini;
#else
	return 0;
#endif
}
Ejemplo n.º 16
0
extern slurmdb_federation_rec_t *fed_mgr_state_load(char *state_save_location)
{
	Buf buffer = NULL;
	char *data = NULL, *state_file;
	time_t buf_time;
	uint16_t ver = 0;
	uint32_t data_size = 0;
	int state_fd;
	int data_allocated, data_read = 0, error_code = SLURM_SUCCESS;
	slurmdb_federation_rec_t *ret_fed = NULL;

	state_file = xstrdup_printf("%s/%s", state_save_location,
				    FED_MGR_STATE_FILE);
	state_fd = open(state_file, O_RDONLY);
	if (state_fd < 0) {
		error("No fed_mgr state file (%s) to recover", state_file);
		xfree(state_file);
		return NULL;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
					      state_file);
					break;
				}
			} else if (data_read == 0)	/* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);

	buffer = create_buf(data, data_size);

	safe_unpack16(&ver, buffer);

	debug3("Version in fed_mgr_state header is %u", ver);
	if (ver > SLURM_PROTOCOL_VERSION || ver < SLURM_MIN_PROTOCOL_VERSION) {
		error("***********************************************");
		error("Can not recover fed_mgr state, incompatible version, "
		      "got %u need > %u <= %u", ver,
		      SLURM_MIN_PROTOCOL_VERSION, SLURM_PROTOCOL_VERSION);
		error("***********************************************");
		free_buf(buffer);
		return NULL;
	}

	safe_unpack_time(&buf_time, buffer);

	error_code = slurmdb_unpack_federation_rec((void **)&ret_fed, ver,
						   buffer);
	if (error_code != SLURM_SUCCESS)
		goto unpack_error;
	else if (!ret_fed || !ret_fed->name ||
		 !list_count(ret_fed->cluster_list)) {
		slurmdb_destroy_federation_rec(ret_fed);
		ret_fed = NULL;
		error("No feds retrieved");
	} else {
		/* We want to free the connections here since they don't exist
		 * anymore, but they were packed when state was saved. */
		slurmdb_cluster_rec_t *cluster;
		ListIterator itr = list_iterator_create(
			ret_fed->cluster_list);
		while ((cluster = list_next(itr))) {
			slurm_persist_conn_destroy(cluster->fed.recv);
			cluster->fed.recv = NULL;
			slurm_persist_conn_destroy(cluster->fed.send);
			cluster->fed.send = NULL;
		}
		list_iterator_destroy(itr);
	}

	free_buf(buffer);

	return ret_fed;

unpack_error:
	free_buf(buffer);

	return NULL;
}
Ejemplo n.º 17
0
static void _read_last_decay_ran(time_t *last_ran, time_t *last_reset)
{
	int data_allocated, data_read = 0;
	uint32_t data_size = 0;
	int state_fd;
	char *data = NULL, *state_file;
	Buf buffer;

	xassert(last_ran);
	xassert(last_reset);

	(*last_ran) = 0;
	(*last_reset) = 0;

	/* read the file */
	state_file = xstrdup(slurmctld_conf.state_save_location);
	xstrcat(state_file, "/priority_last_decay_ran");
	lock_state_files();
	state_fd = open(state_file, O_RDONLY);
	if (state_fd < 0) {
		info("No last decay (%s) to recover", state_file);
		unlock_state_files();
		return;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
					      state_file);
					break;
				}
			} else if (data_read == 0)	/* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);
	unlock_state_files();

	buffer = create_buf(data, data_size);
	safe_unpack_time(last_ran, buffer);
	safe_unpack_time(last_reset, buffer);
	free_buf(buffer);
	if (priority_debug)
		info("Last ran decay on jobs at %ld", (long)*last_ran);

	return;

unpack_error:
	error("Incomplete priority last decay file returning");
	free_buf(buffer);
	return;

}
Ejemplo n.º 18
0
Archivo: sicp.c Proyecto: rohgarg/slurm
static void _load_sicp_state(void)
{
    int data_allocated, data_read = 0;
    uint32_t data_size = 0;
    int state_fd, sicp_cnt = 0;
    char *data = NULL, *state_file;
    struct stat stat_buf;
    Buf buffer;
    char *ver_str = NULL;
    uint32_t ver_str_len;
    uint16_t protocol_version = (uint16_t)NO_VAL;
    uint32_t job_id = 0;
    uint32_t job_state = 0;
    sicp_job_t *sicp_ptr;
    time_t buf_time, now;

    /* read the file */
    lock_state_files();
    state_file = xstrdup(slurmctld_conf.state_save_location);
    xstrcat(state_file, "/sicp_state");
    state_fd = open(state_file, O_RDONLY);
    if (state_fd < 0) {
        error("Could not open job state file %s: %m", state_file);
        unlock_state_files();
        xfree(state_file);
        return;
    } else if (fstat(state_fd, &stat_buf) < 0) {
        error("Could not stat job state file %s: %m", state_file);
        unlock_state_files();
        (void) close(state_fd);
        xfree(state_file);
        return;
    } else if (stat_buf.st_size < 10) {
        error("Job state file %s too small", state_file);
        unlock_state_files();
        (void) close(state_fd);
        xfree(state_file);
        return;
    }

    data_allocated = BUF_SIZE;
    data = xmalloc(data_allocated);
    while (1) {
        data_read = read(state_fd, &data[data_size], BUF_SIZE);
        if (data_read < 0) {
            if (errno == EINTR)
                continue;
            else {
                error("Read error on %s: %m", state_file);
                break;
            }
        } else if (data_read == 0)	/* eof */
            break;
        data_size      += data_read;
        data_allocated += data_read;
        xrealloc(data, data_allocated);
    }
    close(state_fd);
    xfree(state_file);
    unlock_state_files();

    buffer = create_buf(data, data_size);
    safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer);
    debug3("Version string in sicp_state header is %s", ver_str);
    if (ver_str && !strcmp(ver_str, "PROTOCOL_VERSION"))
        safe_unpack16(&protocol_version, buffer);
    xfree(ver_str);

    if (protocol_version == (uint16_t)NO_VAL) {
        error("************************************************");
        error("Can not recover SICP state, incompatible version");
        error("************************************************");
        xfree(ver_str);
        free_buf(buffer);
        return;
    }
    safe_unpack_time(&buf_time, buffer);

    now = time(NULL);
    while (remaining_buf(buffer) > 0) {
        safe_unpack32(&job_id,    buffer);
        safe_unpack32(&job_state, buffer);
        sicp_ptr = xmalloc(sizeof(sicp_job_t));
        sicp_ptr->job_id      = job_id;
        sicp_ptr->job_state   = job_state;
        sicp_ptr->update_time = now;
        list_append(sicp_job_list, sicp_ptr);
        _add_job_hash(sicp_ptr);
        sicp_cnt++;
    }

    free_buf(buffer);
    info("Recovered information about %d sicp jobs", sicp_cnt);
    if (slurm_get_debug_flags() & DEBUG_FLAG_SICP)
        _log_sicp_recs();
    return;

unpack_error:
    error("Incomplete sicp data checkpoint file");
    info("Recovered information about %d sicp jobs", sicp_cnt);
    free_buf(buffer);
    return;
}
Ejemplo n.º 19
0
static int _unpack_job_start_msg(void **msg,
				 uint16_t rpc_version, Buf buffer)
{
	uint32_t uint32_tmp;
	dbd_job_start_msg_t *msg_ptr = xmalloc(sizeof(dbd_job_start_msg_t));
	*msg = msg_ptr;

	msg_ptr->array_job_id = 0;
	msg_ptr->array_task_id = NO_VAL;

	if (rpc_version >= SLURM_19_05_PROTOCOL_VERSION) {
		safe_unpackstr_xmalloc(&msg_ptr->account, &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->alloc_nodes, buffer);
		safe_unpack32(&msg_ptr->array_job_id, buffer);
		safe_unpack32(&msg_ptr->array_max_tasks, buffer);
		safe_unpack32(&msg_ptr->array_task_id, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->array_task_str,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->array_task_pending, buffer);
		safe_unpack32(&msg_ptr->assoc_id, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->constraints,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->db_flags, buffer);
		safe_unpack64(&msg_ptr->db_index, buffer);
		safe_unpack_time(&msg_ptr->eligible_time, buffer);
		safe_unpack32(&msg_ptr->gid, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->gres_alloc, &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&msg_ptr->gres_req, &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&msg_ptr->gres_used, &uint32_tmp,
				       buffer);
		safe_unpack32(&msg_ptr->job_id, buffer);
		safe_unpack32(&msg_ptr->job_state, buffer);
		safe_unpack32(&msg_ptr->state_reason_prev, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->mcs_label,
				       &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->pack_job_id, buffer);
		safe_unpack32(&msg_ptr->pack_job_offset, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->partition,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->priority, buffer);
		safe_unpack32(&msg_ptr->qos_id, buffer);
		safe_unpack32(&msg_ptr->req_cpus, buffer);
		safe_unpack64(&msg_ptr->req_mem, buffer);
		safe_unpack32(&msg_ptr->resv_id, buffer);
		safe_unpack_time(&msg_ptr->start_time, buffer);
		safe_unpack_time(&msg_ptr->submit_time, buffer);
		safe_unpack32(&msg_ptr->timelimit, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str,
				       &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->tres_req_str,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->uid, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->wckey, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->work_dir, &uint32_tmp, buffer);
	} else if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		char *tmp_char = NULL;
		safe_unpackstr_xmalloc(&msg_ptr->account, &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->alloc_nodes, buffer);
		safe_unpack32(&msg_ptr->array_job_id, buffer);
		safe_unpack32(&msg_ptr->array_max_tasks, buffer);
		safe_unpack32(&msg_ptr->array_task_id, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->array_task_str,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->array_task_pending, buffer);
		safe_unpack32(&msg_ptr->assoc_id, buffer);
		safe_unpackstr_xmalloc(&tmp_char, &uint32_tmp, buffer);
		xfree(tmp_char); /* block_id */
		safe_unpack64(&msg_ptr->db_index, buffer);
		safe_unpack_time(&msg_ptr->eligible_time, buffer);
		safe_unpack32(&msg_ptr->gid, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->gres_alloc, &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&msg_ptr->gres_req, &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&msg_ptr->gres_used, &uint32_tmp,
				       buffer);
		safe_unpack32(&msg_ptr->job_id, buffer);
		safe_unpack32(&msg_ptr->job_state, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->mcs_label,
				       &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->pack_job_id, buffer);
		safe_unpack32(&msg_ptr->pack_job_offset, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->partition,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->priority, buffer);
		safe_unpack32(&msg_ptr->qos_id, buffer);
		safe_unpack32(&msg_ptr->req_cpus, buffer);
		safe_unpack64(&msg_ptr->req_mem, buffer);
		safe_unpack32(&msg_ptr->resv_id, buffer);
		safe_unpack_time(&msg_ptr->start_time, buffer);
		safe_unpack_time(&msg_ptr->submit_time, buffer);
		safe_unpack32(&msg_ptr->timelimit, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str,
				       &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->tres_req_str,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->uid, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->wckey, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->work_dir, &uint32_tmp, buffer);
	} else
		  goto unpack_error;

	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_job_start_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}