예제 #1
0
static int _unpack_jobacct_id(jobacct_id_t *jobacct_id,
			      uint16_t rpc_version, Buf buffer)
{
	safe_unpack32(&jobacct_id->nodeid, buffer);
	safe_unpack16(&jobacct_id->taskid, buffer);

	return SLURM_SUCCESS;
unpack_error:
	return SLURM_ERROR;
}
예제 #2
0
static int _unpack_register_ctld_msg(dbd_register_ctld_msg_t **msg,
				     uint16_t rpc_version, Buf buffer)
{
	dbd_register_ctld_msg_t *msg_ptr = xmalloc(
		sizeof(dbd_register_ctld_msg_t));
	*msg = msg_ptr;
	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack16(&msg_ptr->dimensions, buffer);
		safe_unpack32(&msg_ptr->flags, buffer);
		safe_unpack32(&msg_ptr->plugin_id_select, buffer);
		safe_unpack16(&msg_ptr->port, buffer);
	}
	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_register_ctld_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
예제 #3
0
extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer,
				 uint16_t protocol_version)
{
	uint32_t uint32_tmp;
	struct check_job_info *check_ptr =
		(struct check_job_info *)jobinfo;

	if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) {
		uint16_t id;
		uint32_t size;

		safe_unpack16(&id, buffer);
		safe_unpack32(&size, buffer);
		if (id != CHECK_BLCR) {
			uint32_t x;
			x = get_buf_offset(buffer);
			set_buf_offset(buffer, x + size);
		} else {
			safe_unpack16(&check_ptr->disabled, buffer);
			safe_unpack_time(&check_ptr->time_stamp, buffer);
			safe_unpack32(&check_ptr->error_code, buffer);
			safe_unpackstr_xmalloc(&check_ptr->error_msg,
					       &uint32_tmp, buffer);
		}

	} else if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) {
		safe_unpack16(&check_ptr->disabled, buffer);
		safe_unpack_time(&check_ptr->time_stamp, buffer);
		safe_unpack32(&check_ptr->error_code, buffer);
		safe_unpackstr_xmalloc(&check_ptr->error_msg,
				       &uint32_tmp, buffer);
	}
	return SLURM_SUCCESS;

    unpack_error:
	xfree(check_ptr->error_msg);
	return SLURM_ERROR;
}
예제 #4
0
파일: bg_node_info.c 프로젝트: BYUHPC/slurm
static int _unpack_node_subgrp(node_subgrp_t **subgrp_pptr, Buf buffer,
			       uint16_t bitmap_size, uint16_t protocol_version)
{
	node_subgrp_t *subgrp = xmalloc(sizeof(node_subgrp_t));
	int j;
	uint32_t uint32_tmp;
	uint16_t uint16_tmp;

	*subgrp_pptr = subgrp;

	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpackstr_xmalloc(&subgrp->str, &uint32_tmp, buffer);
		if (!subgrp->str)
			subgrp->inx = bitfmt2int("");
		else
			subgrp->inx = bitfmt2int(subgrp->str);

		subgrp->bitmap = bit_alloc(bitmap_size);

		j = 0;
		while (subgrp->inx[j] >= 0) {
			bit_nset(subgrp->bitmap, subgrp->inx[j],
				 subgrp->inx[j+1]);
			j+=2;
		}

		safe_unpack16(&subgrp->cnode_cnt, buffer);
		safe_unpack16(&uint16_tmp, buffer);
		subgrp->state = uint16_tmp;
	}
	return SLURM_SUCCESS;

unpack_error:
	_free_node_subgrp(subgrp);
	*subgrp_pptr = NULL;
	return SLURM_ERROR;
}
예제 #5
0
extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer,
				 uint16_t protocol_version)
{
	uint32_t uint32_tmp;
	struct check_job_info *check_ptr =
		(struct check_job_info *)jobinfo;

	if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
		safe_unpack16(&check_ptr->disabled, buffer);
		safe_unpack16(&check_ptr->reply_cnt, buffer);
		safe_unpack16(&check_ptr->wait_time, buffer);

		safe_unpack32(&check_ptr->error_code, buffer);
		safe_unpackstr_xmalloc(&check_ptr->error_msg,
				       &uint32_tmp, buffer);
		safe_unpack_time(&check_ptr->time_stamp, buffer);
	}

	return SLURM_SUCCESS;

    unpack_error:
	xfree(check_ptr->error_msg);
	return SLURM_ERROR;
}
예제 #6
0
파일: switch_cray.c 프로젝트: RPI-HPC/slurm
static void _state_read_buf(Buf buffer)
{
    uint16_t protocol_version = (uint16_t) NO_VAL;
    uint32_t min_port, max_port;
    int i;

    /* Validate state version */
    safe_unpack16(&protocol_version, buffer);
    debug3("Version in switch_cray header is %u", protocol_version);
    if (protocol_version < SLURM_MIN_PROTOCOL_VERSION) {
        error("******************************************************");
        error("Can't recover switch/cray state, incompatible version");
        error("******************************************************");
        return;
    }
    if (protocol_version >= SLURM_14_11_PROTOCOL_VERSION) {
        safe_unpack32(&min_port, buffer);
        safe_unpack32(&max_port, buffer);
        safe_unpack32(&last_alloc_port, buffer);
        unpack_bit_str(&port_resv, buffer);
    } else if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
        uint8_t port_set = 0;
        safe_unpack32(&min_port, buffer);
        safe_unpack32(&max_port, buffer);
        safe_unpack32(&last_alloc_port, buffer);
        port_resv = bit_alloc(PORT_CNT);
        for (i = 0; i < PORT_CNT; i++) {
            safe_unpack8(&port_set, buffer);
            if (port_set)
                bit_set(port_resv, i);
        }
    }
    if ((min_port != MIN_PORT) || (max_port != MAX_PORT)) {
        error("******************************************************");
        error("Can not recover switch/cray state");
        error("Changed MIN_PORT (%u != %u) and/or MAX_PORT (%u != %u)",
              min_port, MIN_PORT, max_port, MAX_PORT);
        error("******************************************************");
        return;
    }

    return;

unpack_error:
    CRAY_ERR("unpack error");
    return;
}
예제 #7
0
extern int switch_p_unpack_node_info(switch_node_info_t *switch_node,
				     Buf buffer, uint16_t protocol_version)
{
	sw_gen_node_info_t *gen_node_info = (sw_gen_node_info_t *) switch_node;
	sw_gen_ifa_t *ifa_ptr;
	uint32_t uint32_tmp;
	int i;

	if (debug_flags & DEBUG_FLAG_SWITCH)
		info("switch_p_unpack_node_info() starting");
	safe_unpack16(&gen_node_info->ifa_cnt, buffer);
	gen_node_info->ifa_array = xmalloc(sizeof(sw_gen_ifa_t *) *
					   gen_node_info->ifa_cnt);
	safe_unpackstr_xmalloc(&gen_node_info->node_name, &uint32_tmp,
			       buffer);
	for (i = 0; i < gen_node_info->ifa_cnt; i++) {
		ifa_ptr = xmalloc(sizeof(sw_gen_ifa_t));
		gen_node_info->ifa_array[i] = ifa_ptr;
		safe_unpackstr_xmalloc(&ifa_ptr->ifa_addr, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&ifa_ptr->ifa_family, &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&ifa_ptr->ifa_name, &uint32_tmp, buffer);
		if (debug_flags & DEBUG_FLAG_SWITCH) {
			info("%s: node=%s name=%s ip_family=%s address=%s",
			     plugin_type, gen_node_info->node_name,
			     ifa_ptr->ifa_name, ifa_ptr->ifa_family,
			     ifa_ptr->ifa_addr);
		}
	}

	_cache_node_info(gen_node_info);

	return SLURM_SUCCESS;

unpack_error:
	for (i = 0; i < gen_node_info->ifa_cnt; i++) {
		xfree(gen_node_info->ifa_array[i]->ifa_addr);
		xfree(gen_node_info->ifa_array[i]->ifa_family);
		xfree(gen_node_info->ifa_array[i]->ifa_name);
		xfree(gen_node_info->ifa_array[i]);
	}
	xfree(gen_node_info->ifa_array);
	xfree(gen_node_info->node_name);
	gen_node_info->ifa_cnt = 0;
	return SLURM_ERROR;
}
예제 #8
0
파일: io_hdr.c 프로젝트: A1ve5/slurm
static int
io_init_msg_unpack(struct slurm_io_init_msg *hdr, Buf buffer)
{
	uint32_t val;
	safe_unpack16(&hdr->version, buffer);
	safe_unpack32(&hdr->nodeid, buffer);
	safe_unpack32(&hdr->stdout_objs, buffer);
	safe_unpack32(&hdr->stderr_objs, buffer);
	safe_unpackmem((char *) hdr->cred_signature, &val, buffer);
	if (val != SLURM_IO_KEY_SIZE)
		goto unpack_error;

	return SLURM_SUCCESS;

    unpack_error:
	error("unpack error in io_init_msg_unpack");
	return SLURM_ERROR;
}
예제 #9
0
extern int slurm_persist_msg_unpack(slurm_persist_conn_t *persist_conn,
				    persist_msg_t *resp_msg, Buf buffer)
{
	int rc;

	xassert(persist_conn);
	xassert(resp_msg);

	if (persist_conn->flags & PERSIST_FLAG_DBD) {
		rc = unpack_slurmdbd_msg((slurmdbd_msg_t *)resp_msg,
					 persist_conn->version,
					 buffer);
	} else {
		slurm_msg_t msg;

		slurm_msg_t_init(&msg);

		msg.protocol_version = persist_conn->version;

		safe_unpack16(&msg.msg_type, buffer);

		rc = unpack_msg(&msg, buffer);

		resp_msg->msg_type = msg.msg_type;
		resp_msg->data = msg.data;
	}

	/* Here we transfer the auth_cred to the persist_conn just in case in the
	 * future we need to use it in some way to verify things for messages
	 * that don't have on that will follow on the connection.
	 */
	if (resp_msg->msg_type == REQUEST_PERSIST_INIT) {
		slurm_msg_t *msg = resp_msg->data;
		if (persist_conn->auth_cred)
			g_slurm_auth_destroy(persist_conn->auth_cred);

		persist_conn->auth_cred = msg->auth_cred;
		msg->auth_cred = NULL;
	}

	return rc;
unpack_error:
	return SLURM_ERROR;
}
예제 #10
0
static int _unpack_roll_usage_msg(dbd_roll_usage_msg_t **msg,
				  uint16_t rpc_version, Buf buffer)
{
	dbd_roll_usage_msg_t *msg_ptr = xmalloc(sizeof(dbd_roll_usage_msg_t));

	*msg = msg_ptr;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack16(&msg_ptr->archive_data, buffer);
		safe_unpack_time(&msg_ptr->end, buffer);
		safe_unpack_time(&msg_ptr->start, buffer);
	}
	return SLURM_SUCCESS;

unpack_error:
	slurmdbd_free_roll_usage_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
예제 #11
0
extern int slurm_persist_unpack_rc_msg(
	persist_rc_msg_t **msg, Buf buffer, uint16_t protocol_version)
{
	uint32_t uint32_tmp;

	persist_rc_msg_t *msg_ptr = xmalloc(sizeof(persist_rc_msg_t));

	*msg = msg_ptr;

	safe_unpackstr_xmalloc(&msg_ptr->comment, &uint32_tmp, buffer);
	safe_unpack32(&msg_ptr->rc, buffer);
	safe_unpack16(&msg_ptr->ret_info, buffer);

	return SLURM_SUCCESS;

unpack_error:
	slurm_persist_free_rc_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
예제 #12
0
extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer,
				 uint16_t protocol_version)
{
	uint16_t id;
	uint32_t x;
	uint32_t size;

	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack16(&id, buffer);
		safe_unpack32(&size, buffer);
		if (id != CHECK_NONE) {
			x = get_buf_offset(buffer);
			set_buf_offset(buffer, x + size);
		}
	}
	return SLURM_SUCCESS;

unpack_error:
	return SLURM_ERROR;
}
예제 #13
0
파일: select_cray.c 프로젝트: mrhaoji/slurm
static int _select_jobinfo_unpack(select_jobinfo_t **jobinfo_pptr,
				  Buf buffer, uint16_t protocol_version)
{
	select_jobinfo_t *jobinfo = xmalloc(sizeof(struct select_jobinfo));

	*jobinfo_pptr = jobinfo;

	jobinfo->magic = JOBINFO_MAGIC;

	safe_unpack16(&jobinfo->cleaning, buffer);

	return SLURM_SUCCESS;

unpack_error:
	select_p_select_jobinfo_free(jobinfo);
	*jobinfo_pptr = NULL;

	return SLURM_ERROR;


}
예제 #14
0
static int _unpack_step_complete_msg(dbd_step_comp_msg_t **msg,
				     uint16_t rpc_version, Buf buffer)
{
	uint32_t uint32_tmp;
	dbd_step_comp_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_comp_msg_t));
	*msg = msg_ptr;

	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpack32(&msg_ptr->assoc_id, buffer);
		safe_unpack64(&msg_ptr->db_index, buffer);
		safe_unpack_time(&msg_ptr->end_time, buffer);
		safe_unpack32(&msg_ptr->exit_code, buffer);
		jobacctinfo_unpack((struct jobacctinfo **)&msg_ptr->jobacct,
				   rpc_version, PROTOCOL_TYPE_DBD, buffer, 1);
		safe_unpack32(&msg_ptr->job_id, buffer);
		safe_unpack_time(&msg_ptr->job_submit_time, buffer);
		safe_unpackstr_xmalloc(&msg_ptr->job_tres_alloc_str,
				       &uint32_tmp, buffer);
		safe_unpack32(&msg_ptr->req_uid, buffer);
		safe_unpack_time(&msg_ptr->start_time, buffer);
		safe_unpack16(&msg_ptr->state, buffer);
		safe_unpack32(&msg_ptr->step_id, buffer);
		safe_unpack32(&msg_ptr->total_tasks, buffer);
	} else
		goto unpack_error;

	return SLURM_SUCCESS;

unpack_error:
	debug2("slurmdbd_unpack_step_complete_msg:"
	       "unpack_error: size_buf(buffer) %u",
	       size_buf(buffer));
	slurmdbd_free_step_complete_msg(msg_ptr);
	*msg = NULL;
	return SLURM_ERROR;
}
예제 #15
0
extern slurmdb_federation_rec_t *fed_mgr_state_load(char *state_save_location)
{
	Buf buffer = NULL;
	char *data = NULL, *state_file;
	time_t buf_time;
	uint16_t ver = 0;
	uint32_t data_size = 0;
	int state_fd;
	int data_allocated, data_read = 0, error_code = SLURM_SUCCESS;
	slurmdb_federation_rec_t *ret_fed = NULL;

	state_file = xstrdup_printf("%s/%s", state_save_location,
				    FED_MGR_STATE_FILE);
	state_fd = open(state_file, O_RDONLY);
	if (state_fd < 0) {
		error("No fed_mgr state file (%s) to recover", state_file);
		xfree(state_file);
		return NULL;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
					      state_file);
					break;
				}
			} else if (data_read == 0)	/* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);

	buffer = create_buf(data, data_size);

	safe_unpack16(&ver, buffer);

	debug3("Version in fed_mgr_state header is %u", ver);
	if (ver > SLURM_PROTOCOL_VERSION || ver < SLURM_MIN_PROTOCOL_VERSION) {
		error("***********************************************");
		error("Can not recover fed_mgr state, incompatible version, "
		      "got %u need > %u <= %u", ver,
		      SLURM_MIN_PROTOCOL_VERSION, SLURM_PROTOCOL_VERSION);
		error("***********************************************");
		free_buf(buffer);
		return NULL;
	}

	safe_unpack_time(&buf_time, buffer);

	error_code = slurmdb_unpack_federation_rec((void **)&ret_fed, ver,
						   buffer);
	if (error_code != SLURM_SUCCESS)
		goto unpack_error;
	else if (!ret_fed || !ret_fed->name ||
		 !list_count(ret_fed->cluster_list)) {
		slurmdb_destroy_federation_rec(ret_fed);
		ret_fed = NULL;
		error("No feds retrieved");
	} else {
		/* We want to free the connections here since they don't exist
		 * anymore, but they were packed when state was saved. */
		slurmdb_cluster_rec_t *cluster;
		ListIterator itr = list_iterator_create(
			ret_fed->cluster_list);
		while ((cluster = list_next(itr))) {
			slurm_persist_conn_destroy(cluster->fed.recv);
			cluster->fed.recv = NULL;
			slurm_persist_conn_destroy(cluster->fed.send);
			cluster->fed.send = NULL;
		}
		list_iterator_destroy(itr);
	}

	free_buf(buffer);

	return ret_fed;

unpack_error:
	free_buf(buffer);

	return NULL;
}
예제 #16
0
static void _state_read_buf(Buf buffer)
{
	uint16_t protocol_version = (uint16_t) NO_VAL;
	uint32_t min_port, max_port;
	int i;

	/* Validate state version */
	safe_unpack16(&protocol_version, buffer);
	debug3("Version in switch_cray header is %u", protocol_version);
	if (protocol_version < SLURM_MIN_PROTOCOL_VERSION) {
		error("******************************************************");
		error("Can't recover switch/cray state, incompatible version");
		error("******************************************************");
		return;
	}

	pthread_mutex_lock(&port_mutex);
	if (protocol_version >= SLURM_14_11_PROTOCOL_VERSION) {
		safe_unpack32(&min_port, buffer);
		safe_unpack32(&max_port, buffer);
		safe_unpack32(&last_alloc_port, buffer);
		/* make sure we are NULL here */
		FREE_NULL_BITMAP(port_resv);
		unpack_bit_str_hex(&port_resv, buffer);
	} else if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		uint8_t port_set = 0;
		safe_unpack32(&min_port, buffer);
		safe_unpack32(&max_port, buffer);
		safe_unpack32(&last_alloc_port, buffer);
		/* make sure we are NULL here */
		FREE_NULL_BITMAP(port_resv);
		port_resv = bit_alloc(PORT_CNT);
		for (i = 0; i < PORT_CNT; i++) {
			safe_unpack8(&port_set, buffer);
			if (port_set)
				bit_set(port_resv, i);
		}
	}

	if (!port_resv || (bit_size(port_resv) != PORT_CNT)) {
		error("_state_read_buf: Reserve Port size was %d not %d, "
		      "reallocating",
		      port_resv ? bit_size(port_resv) : -1, PORT_CNT);
		port_resv = bit_realloc(port_resv, PORT_CNT);
	}
	pthread_mutex_unlock(&port_mutex);

	if ((min_port != MIN_PORT) || (max_port != MAX_PORT)) {
		error("******************************************************");
		error("Can not recover switch/cray state");
		error("Changed MIN_PORT (%u != %u) and/or MAX_PORT (%u != %u)",
		      min_port, MIN_PORT, max_port, MAX_PORT);
		error("******************************************************");
		return;
	}

	return;

unpack_error:
	CRAY_ERR("unpack error");
	return;
}
예제 #17
0
extern int unpack_slurm_step_layout(slurm_step_layout_t **layout, Buf buffer,
				    uint16_t protocol_version)
{
	uint16_t uint16_tmp;
	uint32_t num_tids, uint32_tmp;
	slurm_step_layout_t *step_layout = NULL;
	int i;

	if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
		safe_unpack16(&uint16_tmp, buffer);
		if (!uint16_tmp)
			return SLURM_SUCCESS;

		step_layout = xmalloc(sizeof(slurm_step_layout_t));
		*layout = step_layout;

		safe_unpackstr_xmalloc(&step_layout->front_end,
				       &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&step_layout->node_list,
				       &uint32_tmp, buffer);
		safe_unpack32(&step_layout->node_cnt, buffer);
		safe_unpack32(&step_layout->task_cnt, buffer);
		safe_unpack16(&step_layout->task_dist, buffer);

		step_layout->tasks =
			xmalloc(sizeof(uint32_t) * step_layout->node_cnt);
		step_layout->tids = xmalloc(sizeof(uint32_t *)
					    * step_layout->node_cnt);
		for (i = 0; i < step_layout->node_cnt; i++) {
			safe_unpack32_array(&(step_layout->tids[i]),
					    &num_tids,
					    buffer);
			step_layout->tasks[i] = num_tids;
		}
	} else if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) {
		safe_unpack16(&uint16_tmp, buffer);
		if (!uint16_tmp)
			return SLURM_SUCCESS;

		step_layout = xmalloc(sizeof(slurm_step_layout_t));
		*layout = step_layout;

		safe_unpackstr_xmalloc(&step_layout->node_list,
				       &uint32_tmp, buffer);
		safe_unpack32(&step_layout->node_cnt, buffer);
		safe_unpack32(&step_layout->task_cnt, buffer);
		safe_unpack16(&step_layout->task_dist, buffer);

		step_layout->tasks =
			xmalloc(sizeof(uint32_t) * step_layout->node_cnt);
		step_layout->tids = xmalloc(sizeof(uint32_t *)
					    * step_layout->node_cnt);
		for (i = 0; i < step_layout->node_cnt; i++) {
			safe_unpack32_array(&(step_layout->tids[i]),
					    &num_tids,
					    buffer);
			step_layout->tasks[i] = num_tids;
		}
	}
	return SLURM_SUCCESS;

unpack_error:
	slurm_step_layout_destroy(step_layout);
	*layout = NULL;
	return SLURM_ERROR;
}
예제 #18
0
파일: sicp.c 프로젝트: rohgarg/slurm
static void _load_sicp_state(void)
{
    int data_allocated, data_read = 0;
    uint32_t data_size = 0;
    int state_fd, sicp_cnt = 0;
    char *data = NULL, *state_file;
    struct stat stat_buf;
    Buf buffer;
    char *ver_str = NULL;
    uint32_t ver_str_len;
    uint16_t protocol_version = (uint16_t)NO_VAL;
    uint32_t job_id = 0;
    uint32_t job_state = 0;
    sicp_job_t *sicp_ptr;
    time_t buf_time, now;

    /* read the file */
    lock_state_files();
    state_file = xstrdup(slurmctld_conf.state_save_location);
    xstrcat(state_file, "/sicp_state");
    state_fd = open(state_file, O_RDONLY);
    if (state_fd < 0) {
        error("Could not open job state file %s: %m", state_file);
        unlock_state_files();
        xfree(state_file);
        return;
    } else if (fstat(state_fd, &stat_buf) < 0) {
        error("Could not stat job state file %s: %m", state_file);
        unlock_state_files();
        (void) close(state_fd);
        xfree(state_file);
        return;
    } else if (stat_buf.st_size < 10) {
        error("Job state file %s too small", state_file);
        unlock_state_files();
        (void) close(state_fd);
        xfree(state_file);
        return;
    }

    data_allocated = BUF_SIZE;
    data = xmalloc(data_allocated);
    while (1) {
        data_read = read(state_fd, &data[data_size], BUF_SIZE);
        if (data_read < 0) {
            if (errno == EINTR)
                continue;
            else {
                error("Read error on %s: %m", state_file);
                break;
            }
        } else if (data_read == 0)	/* eof */
            break;
        data_size      += data_read;
        data_allocated += data_read;
        xrealloc(data, data_allocated);
    }
    close(state_fd);
    xfree(state_file);
    unlock_state_files();

    buffer = create_buf(data, data_size);
    safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer);
    debug3("Version string in sicp_state header is %s", ver_str);
    if (ver_str && !strcmp(ver_str, "PROTOCOL_VERSION"))
        safe_unpack16(&protocol_version, buffer);
    xfree(ver_str);

    if (protocol_version == (uint16_t)NO_VAL) {
        error("************************************************");
        error("Can not recover SICP state, incompatible version");
        error("************************************************");
        xfree(ver_str);
        free_buf(buffer);
        return;
    }
    safe_unpack_time(&buf_time, buffer);

    now = time(NULL);
    while (remaining_buf(buffer) > 0) {
        safe_unpack32(&job_id,    buffer);
        safe_unpack32(&job_state, buffer);
        sicp_ptr = xmalloc(sizeof(sicp_job_t));
        sicp_ptr->job_id      = job_id;
        sicp_ptr->job_state   = job_state;
        sicp_ptr->update_time = now;
        list_append(sicp_job_list, sicp_ptr);
        _add_job_hash(sicp_ptr);
        sicp_cnt++;
    }

    free_buf(buffer);
    info("Recovered information about %d sicp jobs", sicp_cnt);
    if (slurm_get_debug_flags() & DEBUG_FLAG_SICP)
        _log_sicp_recs();
    return;

unpack_error:
    error("Incomplete sicp data checkpoint file");
    info("Recovered information about %d sicp jobs", sicp_cnt);
    free_buf(buffer);
    return;
}
예제 #19
0
파일: bg_job_info.c 프로젝트: jsollom/slurm
/* unpack a select job credential from a buffer
 * OUT jobinfo - the select job credential read
 * IN  buffer  - buffer with select credential read from current pointer loc
 * IN protocol_version - slurm protocol version of client
 * RET         - slurm error code
 * NOTE: returned value must be freed using free_jobinfo
 */
extern int unpack_select_jobinfo(select_jobinfo_t **jobinfo_pptr, Buf buffer,
				 uint16_t protocol_version)
{
	int i;
	uint32_t uint32_tmp;
	uint16_t mp_cnode_cnt;
	int dims = slurmdb_setup_cluster_dims();
	select_jobinfo_t *jobinfo = xmalloc(sizeof(struct select_jobinfo));
	char *bit_char = NULL;
	*jobinfo_pptr = jobinfo;

	jobinfo->magic = JOBINFO_MAGIC;

	if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
		safe_unpack16(&jobinfo->dim_cnt, buffer);

		xassert(jobinfo->dim_cnt);
		dims = jobinfo->dim_cnt;

		for (i=0; i<dims; i++) {
			safe_unpack16(&(jobinfo->geometry[i]), buffer);
			safe_unpack16(&(jobinfo->conn_type[i]), buffer);
			safe_unpack16(&(jobinfo->start_loc[i]), buffer);
		}

		safe_unpack16(&(jobinfo->reboot), buffer);
		safe_unpack16(&(jobinfo->rotate), buffer);

		safe_unpack32(&(jobinfo->block_cnode_cnt), buffer);
		safe_unpack32(&(jobinfo->cnode_cnt), buffer);

		safe_unpackstr_xmalloc(&(jobinfo->bg_block_id), &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&(jobinfo->mp_str), &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&(jobinfo->ionode_str), &uint32_tmp,
				       buffer);

		safe_unpackstr_xmalloc(&(jobinfo->blrtsimage),
				       &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&(jobinfo->linuximage), &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&(jobinfo->mloaderimage), &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&(jobinfo->ramdiskimage), &uint32_tmp,
				       buffer);
		safe_unpack16(&mp_cnode_cnt, buffer);
		safe_unpackstr_xmalloc(&bit_char, &uint32_tmp, buffer);
		if (bit_char) {
			jobinfo->units_avail = bit_alloc(mp_cnode_cnt);
			bit_unfmt(jobinfo->units_avail, bit_char);
			xfree(bit_char);
		}
		safe_unpackstr_xmalloc(&bit_char, &uint32_tmp, buffer);
		if (bit_char) {
			jobinfo->units_used = bit_alloc(mp_cnode_cnt);
			bit_unfmt(jobinfo->units_used, bit_char);
			xfree(bit_char);
		}
	} else {
 		error("unpack_select_jobinfo: protocol_version "
 		      "%hu not supported", protocol_version);
	}
	return SLURM_SUCCESS;

unpack_error:
	free_select_jobinfo(jobinfo);
	*jobinfo_pptr = NULL;
	return SLURM_ERROR;
}
extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer)
{
	uint32_t uint32_tmp;
	uint16_t protocol_version;
	char *ver_str = NULL;

	safe_unpackstr_xmalloc(&ver_str, &uint32_tmp, buffer);
	if (ver_str && !xstrcmp(ver_str, PROTOCOL_VERSION)) {
		safe_unpack16(&protocol_version, buffer);
		xfree(ver_str);
	} else {
		/* Kludge to add protocol version in existing data structure:
		 * use "hostname" as a version marker. Eliminate this in
		 * the future. */
		conf->hostname = ver_str;
		protocol_version = SLURM_16_05_PROTOCOL_VERSION;
	}

	if (protocol_version >= SLURM_17_02_PROTOCOL_VERSION) {
		safe_unpackstr_xmalloc(&conf->hostname, &uint32_tmp, buffer);
		safe_unpack16(&conf->cpus, buffer);
		safe_unpack16(&conf->boards, buffer);
		safe_unpack16(&conf->sockets, buffer);
		safe_unpack16(&conf->cores, buffer);
		safe_unpack16(&conf->threads, buffer);
		safe_unpack64(&conf->real_memory_size, buffer);
		safe_unpack16(&conf->block_map_size, buffer);
		safe_unpack16_array(&conf->block_map, &uint32_tmp, buffer);
		safe_unpack16_array(&conf->block_map_inv,  &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->spooldir,    &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->node_name,   &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->logfile,     &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->task_prolog, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->task_epilog, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->job_acct_gather_freq, &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&conf->job_acct_gather_type, &uint32_tmp,
				       buffer);
		safe_unpack16(&conf->propagate_prio, buffer);
		safe_unpack64(&conf->debug_flags, buffer);
		safe_unpack32(&uint32_tmp, buffer);
		conf->debug_level = uint32_tmp;
		safe_unpack32(&uint32_tmp, buffer);
		conf->daemonize = uint32_tmp;
		safe_unpack32(&uint32_tmp, buffer);
		conf->slurm_user_id = (uid_t)uint32_tmp;
		safe_unpack16(&conf->use_pam, buffer);
		safe_unpack32(&conf->task_plugin_param, buffer);
		safe_unpackstr_xmalloc(&conf->node_topo_addr, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->node_topo_pattern, &uint32_tmp, buffer);
		safe_unpack32(&uint32_tmp, buffer);
		conf->port = uint32_tmp;
		safe_unpack16(&conf->log_fmt, buffer);
		safe_unpack16(&conf->mem_limit_enforce, buffer);
		safe_unpack64(&conf->msg_aggr_window_msgs, buffer);
	} else {
		uint32_t real_memory_size = 0;
		// safe_unpackstr_xmalloc(&conf->hostname, &uint32_tmp, buffer);
		safe_unpack16(&conf->cpus, buffer);
		safe_unpack16(&conf->boards, buffer);
		safe_unpack16(&conf->sockets, buffer);
		safe_unpack16(&conf->cores, buffer);
		safe_unpack16(&conf->threads, buffer);
		safe_unpack32(&real_memory_size, buffer);
		conf->real_memory_size = real_memory_size;
		safe_unpack16(&conf->block_map_size, buffer);
		safe_unpack16_array(&conf->block_map, &uint32_tmp, buffer);
		safe_unpack16_array(&conf->block_map_inv,  &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->spooldir,    &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->node_name,   &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->logfile,     &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->task_prolog, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->task_epilog, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->job_acct_gather_freq, &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&conf->job_acct_gather_type, &uint32_tmp,
				       buffer);
		safe_unpack16(&conf->propagate_prio, buffer);
		safe_unpack64(&conf->debug_flags, buffer);
		safe_unpack32(&uint32_tmp, buffer);
		conf->debug_level = uint32_tmp;
		safe_unpack32(&uint32_tmp, buffer);
		conf->daemonize = uint32_tmp;
		safe_unpack32(&uint32_tmp, buffer);
		conf->slurm_user_id = (uid_t)uint32_tmp;
		safe_unpack16(&conf->use_pam, buffer);
		safe_unpack32(&conf->task_plugin_param, buffer);
		safe_unpackstr_xmalloc(&conf->node_topo_addr, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->node_topo_pattern, &uint32_tmp, buffer);
		safe_unpack32(&uint32_tmp, buffer);
		conf->port = uint32_tmp;
		safe_unpack16(&conf->log_fmt, buffer);
		safe_unpack16(&conf->mem_limit_enforce, buffer);
		safe_unpack64(&conf->msg_aggr_window_msgs, buffer);
	}

	return SLURM_SUCCESS;

unpack_error:
	error("unpack_error in unpack_slurmd_conf_lite_no_alloc: %m");
	xfree(conf->job_acct_gather_freq);
	xfree(conf->job_acct_gather_type);
	xfree(conf->hostname);
	xfree(conf->spooldir);
	xfree(conf->node_name);
	xfree(conf->logfile);
	xfree(conf->task_prolog);
	xfree(conf->task_epilog);
	xfree(conf->node_topo_addr);
	xfree(conf->node_topo_pattern);
	return SLURM_ERROR;
}
예제 #21
0
extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp,
			       uint16_t rpc_version, Buf buffer)
{
	int rc = SLURM_SUCCESS;
	slurm_msg_t msg;

	safe_unpack16(&resp->msg_type, buffer);

	if (rpc_version < SLURM_MIN_PROTOCOL_VERSION) {
		error("slurmdbd: Invalid message version=%hu, type:%hu",
		      rpc_version, resp->msg_type);
		return SLURM_ERROR;
	}

	switch (resp->msg_type) {
	case PERSIST_RC:
		slurm_msg_t_init(&msg);

		msg.protocol_version = rpc_version;
		msg.msg_type = resp->msg_type;

		rc = unpack_msg(&msg, buffer);

		resp->data = msg.data;
		break;
	case REQUEST_PERSIST_INIT:
		resp->data = xmalloc(sizeof(slurm_msg_t));
		slurm_msg_t_init(resp->data);
		rc = slurm_unpack_received_msg(
			(slurm_msg_t *)resp->data, 0, buffer);
		break;
	case DBD_ADD_ACCOUNTS:
	case DBD_ADD_TRES:
	case DBD_ADD_ASSOCS:
	case DBD_ADD_CLUSTERS:
	case DBD_ADD_FEDERATIONS:
	case DBD_ADD_RES:
	case DBD_ADD_USERS:
	case DBD_GOT_ACCOUNTS:
	case DBD_GOT_TRES:
	case DBD_GOT_ASSOCS:
	case DBD_GOT_CLUSTERS:
	case DBD_GOT_EVENTS:
	case DBD_GOT_FEDERATIONS:
	case DBD_GOT_JOBS:
	case DBD_GOT_LIST:
	case DBD_GOT_PROBS:
	case DBD_ADD_QOS:
	case DBD_GOT_QOS:
	case DBD_GOT_RESVS:
	case DBD_GOT_RES:
	case DBD_ADD_WCKEYS:
	case DBD_GOT_WCKEYS:
	case DBD_GOT_TXN:
	case DBD_GOT_USERS:
	case DBD_GOT_CONFIG:
	case DBD_SEND_MULT_JOB_START:
	case DBD_GOT_MULT_JOB_START:
	case DBD_SEND_MULT_MSG:
	case DBD_GOT_MULT_MSG:
	case DBD_FIX_RUNAWAY_JOB:
		rc = slurmdbd_unpack_list_msg(
			(dbd_list_msg_t **)&resp->data, rpc_version,
			resp->msg_type, buffer);
		break;
	case DBD_ADD_ACCOUNT_COORDS:
	case DBD_REMOVE_ACCOUNT_COORDS:
		rc = _unpack_acct_coord_msg(
			(dbd_acct_coord_msg_t **)&resp->data,
			rpc_version, buffer);
		break;
	case DBD_ARCHIVE_LOAD:
		rc = slurmdb_unpack_archive_rec(
			&resp->data, rpc_version, buffer);
		break;
	case DBD_CLUSTER_TRES:
	case DBD_FLUSH_JOBS:
		rc = _unpack_cluster_tres_msg(
			(dbd_cluster_tres_msg_t **)&resp->data,
			rpc_version, buffer);
		break;
	case DBD_GET_ACCOUNTS:
	case DBD_GET_TRES:
	case DBD_GET_ASSOCS:
	case DBD_GET_CLUSTERS:
	case DBD_GET_EVENTS:
	case DBD_GET_FEDERATIONS:
	case DBD_GET_JOBS_COND:
	case DBD_GET_PROBS:
	case DBD_GET_QOS:
	case DBD_GET_RESVS:
	case DBD_GET_RES:
	case DBD_GET_TXN:
	case DBD_GET_USERS:
	case DBD_GET_WCKEYS:
	case DBD_REMOVE_ACCOUNTS:
	case DBD_REMOVE_ASSOCS:
	case DBD_REMOVE_CLUSTERS:
	case DBD_REMOVE_FEDERATIONS:
	case DBD_REMOVE_QOS:
	case DBD_REMOVE_RES:
	case DBD_REMOVE_WCKEYS:
	case DBD_REMOVE_USERS:
	case DBD_ARCHIVE_DUMP:
		rc = _unpack_cond_msg(
			(dbd_cond_msg_t **)&resp->data, rpc_version,
			resp->msg_type, buffer);
		break;
	case DBD_GET_ASSOC_USAGE:
	case DBD_GOT_ASSOC_USAGE:
	case DBD_GET_CLUSTER_USAGE:
	case DBD_GOT_CLUSTER_USAGE:
	case DBD_GET_WCKEY_USAGE:
	case DBD_GOT_WCKEY_USAGE:
		rc = slurmdbd_unpack_usage_msg(
			(dbd_usage_msg_t **)&resp->data, rpc_version,
			resp->msg_type, buffer);
		break;
	case DBD_FINI:
		rc = slurmdbd_unpack_fini_msg((dbd_fini_msg_t **)&resp->data,
					      rpc_version,
					      buffer);
		break;
	case DBD_JOB_COMPLETE:
		rc = _unpack_job_complete_msg(
			(dbd_job_comp_msg_t **)&resp->data,
			rpc_version, buffer);
		break;
	case DBD_JOB_START:
		rc = _unpack_job_start_msg(
			&resp->data, rpc_version, buffer);
		break;
	case DBD_ID_RC:
		rc = slurmdbd_unpack_id_rc_msg(
			&resp->data, rpc_version, buffer);
		break;
	case DBD_JOB_SUSPEND:
		rc = _unpack_job_suspend_msg(
			(dbd_job_suspend_msg_t **)&resp->data, rpc_version,
			buffer);
		break;
	case DBD_MODIFY_ACCOUNTS:
	case DBD_MODIFY_ASSOCS:
	case DBD_MODIFY_CLUSTERS:
	case DBD_MODIFY_FEDERATIONS:
	case DBD_MODIFY_JOB:
	case DBD_MODIFY_QOS:
	case DBD_MODIFY_RES:
	case DBD_MODIFY_USERS:
		rc = _unpack_modify_msg(
			(dbd_modify_msg_t **)&resp->data,
			rpc_version,
			resp->msg_type,
			buffer);
		break;
	case DBD_NODE_STATE:
		rc = _unpack_node_state_msg(
			(dbd_node_state_msg_t **)&resp->data, rpc_version,
			buffer);
		break;
	case DBD_STEP_COMPLETE:
		rc = _unpack_step_complete_msg(
			(dbd_step_comp_msg_t **)&resp->data,
			rpc_version, buffer);
		break;
	case DBD_STEP_START:
		rc = _unpack_step_start_msg(
			(dbd_step_start_msg_t **)&resp->data,
			rpc_version, buffer);
		break;
	case DBD_REGISTER_CTLD:
		rc = _unpack_register_ctld_msg(
			(dbd_register_ctld_msg_t **)&resp->data,
			rpc_version, buffer);
		break;
	case DBD_ROLL_USAGE:
		rc = _unpack_roll_usage_msg(
			(dbd_roll_usage_msg_t **)&resp->data, rpc_version,
			buffer);
		break;
	case DBD_ADD_RESV:
	case DBD_REMOVE_RESV:
	case DBD_MODIFY_RESV:
		rc = _unpack_rec_msg(
			(dbd_rec_msg_t **)&resp->data, rpc_version,
			resp->msg_type, buffer);
		break;
	case DBD_GET_CONFIG:
		rc = _unpack_config_name(
			(char **)&resp->data, rpc_version, buffer);
		break;
	case DBD_RECONFIG:
	case DBD_GET_STATS:
	case DBD_CLEAR_STATS:
	case DBD_SHUTDOWN:
		/* No message to unpack */
		break;
	case DBD_GOT_STATS:
		rc = slurmdb_unpack_stats_msg(
			(void **)&resp->data, rpc_version, buffer);
		break;
	default:
		error("slurmdbd: Invalid message type unpack %u(%s)",
		      resp->msg_type,
		      slurmdbd_msg_type_2_str(resp->msg_type, 1));
		return SLURM_ERROR;
	}
	return rc;

unpack_error:
	return SLURM_ERROR;
}
예제 #22
0
파일: front_end.c 프로젝트: Cray/slurm
/*
 * load_all_front_end_state - Load the front_end node state from file, recover
 *	on slurmctld restart. Execute this after loading the configuration
 *	file data. Data goes into common storage.
 * IN state_only - if true, overwrite only front_end node state and reason
 *	Use this to overwrite the "UNKNOWN state typically used in slurm.conf
 * RET 0 or error code
 * NOTE: READ lock_slurmctld config before entry
 */
extern int load_all_front_end_state(bool state_only)
{
#ifdef HAVE_FRONT_END
	char *node_name = NULL, *reason = NULL, *data = NULL, *state_file;
	int data_allocated, data_read = 0, error_code = 0, node_cnt = 0;
	uint16_t node_state;
	uint32_t data_size = 0, name_len;
	uint32_t reason_uid = NO_VAL;
	time_t reason_time = 0;
	front_end_record_t *front_end_ptr;
	int state_fd;
	time_t time_stamp;
	Buf buffer;
	char *ver_str = NULL;
	uint16_t protocol_version = (uint16_t) NO_VAL;

	/* read the file */
	lock_state_files ();
	state_fd = _open_front_end_state_file(&state_file);
	if (state_fd < 0) {
		info ("No node state file (%s) to recover", state_file);
		error_code = ENOENT;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size], BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error ("Read error on %s: %m",
						state_file);
					break;
				}
			} else if (data_read == 0)     /* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close (state_fd);
	}
	xfree (state_file);
	unlock_state_files ();

	buffer = create_buf (data, data_size);

	safe_unpackstr_xmalloc( &ver_str, &name_len, buffer);
	debug3("Version string in front_end_state header is %s", ver_str);
	if (ver_str) {
		if (!strcmp(ver_str, FRONT_END_STATE_VERSION)) {
			protocol_version = SLURM_PROTOCOL_VERSION;
		}
	}

	if (protocol_version == (uint16_t) NO_VAL) {
		error("*****************************************************");
		error("Can not recover front_end state, version incompatible");
		error("*****************************************************");
		xfree(ver_str);
		free_buf(buffer);
		return EFAULT;
	}
	xfree(ver_str);

	safe_unpack_time(&time_stamp, buffer);

	while (remaining_buf (buffer) > 0) {
		uint16_t base_state;
		if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc (&node_name, &name_len, buffer);
			safe_unpack16 (&node_state,  buffer);
			safe_unpackstr_xmalloc (&reason,    &name_len, buffer);
			safe_unpack_time (&reason_time, buffer);
			safe_unpack32 (&reason_uid,  buffer);
			base_state = node_state & NODE_STATE_BASE;
		} else
			goto unpack_error;

		/* validity test as possible */

		/* find record and perform update */
		front_end_ptr = find_front_end_record(node_name);
		if (front_end_ptr == NULL) {
			error("Front_end node %s has vanished from "
			      "configuration", node_name);
		} else if (state_only) {
			uint16_t orig_flags;
			orig_flags = front_end_ptr->node_state &
				     NODE_STATE_FLAGS;
			node_cnt++;
			if (IS_NODE_UNKNOWN(front_end_ptr)) {
				if (base_state == NODE_STATE_DOWN) {
					orig_flags &= (~NODE_STATE_COMPLETING);
					front_end_ptr->node_state =
						NODE_STATE_DOWN | orig_flags;
				}
				if (node_state & NODE_STATE_DRAIN) {
					 front_end_ptr->node_state |=
						 NODE_STATE_DRAIN;
				}
				if (node_state & NODE_STATE_FAIL) {
					front_end_ptr->node_state |=
						NODE_STATE_FAIL;
				}
			}
			if (front_end_ptr->reason == NULL) {
				front_end_ptr->reason = reason;
				reason = NULL;	/* Nothing to free */
				front_end_ptr->reason_time = reason_time;
				front_end_ptr->reason_uid = reason_uid;
			}
		} else {
			node_cnt++;
			front_end_ptr->node_state = node_state;
			xfree(front_end_ptr->reason);
			front_end_ptr->reason	= reason;
			reason			= NULL;	/* Nothing to free */
			front_end_ptr->reason_time	= reason_time;
			front_end_ptr->reason_uid	= reason_uid;
			front_end_ptr->last_response	= (time_t) 0;
		}

		xfree(node_name);
		xfree(reason);
	}

fini:	info("Recovered state of %d front_end nodes", node_cnt);
	free_buf (buffer);
	return error_code;

unpack_error:
	error("Incomplete front_end node data checkpoint file");
	error_code = EFAULT;
	xfree (node_name);
	xfree(reason);
	goto fini;
#else
	return 0;
#endif
}
예제 #23
0
extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer)
{
	uint32_t uint32_tmp;
	uint16_t protocol_version;
	char *ver_str = NULL;

	safe_unpackstr_xmalloc(&ver_str, &uint32_tmp, buffer);
	safe_unpack16(&protocol_version, buffer);
	xfree(ver_str);

	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpackstr_xmalloc(&conf->hostname, &uint32_tmp, buffer);
		safe_unpack16(&conf->cpus, buffer);
		safe_unpack16(&conf->boards, buffer);
		safe_unpack16(&conf->sockets, buffer);
		safe_unpack16(&conf->cores, buffer);
		safe_unpack16(&conf->threads, buffer);
		safe_unpack64(&conf->real_memory_size, buffer);
		safe_unpack16(&conf->block_map_size, buffer);
		safe_unpack16_array(&conf->block_map, &uint32_tmp, buffer);
		safe_unpack16_array(&conf->block_map_inv,  &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->spooldir,    &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->node_name,   &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->logfile,     &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->task_prolog, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->task_epilog, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->job_acct_gather_freq, &uint32_tmp,
				       buffer);
		safe_unpackstr_xmalloc(&conf->job_acct_gather_type, &uint32_tmp,
				       buffer);
		safe_unpack16(&conf->propagate_prio, buffer);
		safe_unpack64(&conf->debug_flags, buffer);
		safe_unpack32(&uint32_tmp, buffer);
		conf->debug_level = uint32_tmp;
		safe_unpack32(&uint32_tmp, buffer);
		conf->daemonize = uint32_tmp;
		safe_unpack32(&uint32_tmp, buffer);
		conf->slurm_user_id = (uid_t)uint32_tmp;
		safe_unpack16(&conf->use_pam, buffer);
		safe_unpack32(&conf->task_plugin_param, buffer);
		safe_unpackstr_xmalloc(&conf->node_topo_addr, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->node_topo_pattern, &uint32_tmp, buffer);
		safe_unpack32(&uint32_tmp, buffer);
		conf->port = uint32_tmp;
		safe_unpack16(&conf->log_fmt, buffer);
		safe_unpack16(&conf->mem_limit_enforce, buffer);
		safe_unpack64(&conf->msg_aggr_window_msgs, buffer);
		safe_unpackstr_xmalloc(&conf->tmpfs, &uint32_tmp, buffer);
		safe_unpackstr_xmalloc(&conf->x11_params, &uint32_tmp, buffer);
	}

	return SLURM_SUCCESS;

unpack_error:
	error("unpack_error in unpack_slurmd_conf_lite_no_alloc: %m");
	xfree(conf->job_acct_gather_freq);
	xfree(conf->job_acct_gather_type);
	xfree(conf->hostname);
	xfree(conf->spooldir);
	xfree(conf->node_name);
	xfree(conf->logfile);
	xfree(conf->task_prolog);
	xfree(conf->task_epilog);
	xfree(conf->node_topo_addr);
	xfree(conf->node_topo_pattern);
	xfree(conf->tmpfs);
	xfree(conf->x11_params);
	return SLURM_ERROR;
}
예제 #24
0
파일: slurmdbd_agent.c 프로젝트: mej/slurm
static int _handle_mult_rc_ret(void)
{
	Buf buffer;
	uint16_t msg_type;
	persist_rc_msg_t *msg = NULL;
	dbd_list_msg_t *list_msg = NULL;
	int rc = SLURM_ERROR;
	Buf out_buf = NULL;

	buffer = slurm_persist_recv_msg(slurmdbd_conn);
	if (buffer == NULL)
		return rc;

	safe_unpack16(&msg_type, buffer);
	switch (msg_type) {
	case DBD_GOT_MULT_MSG:
		if (slurmdbd_unpack_list_msg(
			    &list_msg, slurmdbd_conn->version,
			    DBD_GOT_MULT_MSG, buffer)
		    != SLURM_SUCCESS) {
			error("slurmdbd: unpack message error");
			break;
		}

		slurm_mutex_lock(&agent_lock);
		if (agent_list) {
			ListIterator itr =
				list_iterator_create(list_msg->my_list);
			while ((out_buf = list_next(itr))) {
				Buf b;
				if ((rc = _unpack_return_code(
					     slurmdbd_conn->version, out_buf))
				    != SLURM_SUCCESS)
					break;

				if ((b = list_dequeue(agent_list))) {
					free_buf(b);
				} else {
					error("slurmdbd: DBD_GOT_MULT_MSG "
					      "unpack message error");
				}
			}
			list_iterator_destroy(itr);
		}
		slurm_mutex_unlock(&agent_lock);
		slurmdbd_free_list_msg(list_msg);
		break;
	case PERSIST_RC:
		if (slurm_persist_unpack_rc_msg(
			    &msg, buffer, slurmdbd_conn->version)
		    == SLURM_SUCCESS) {
			rc = msg->rc;
			if (rc != SLURM_SUCCESS) {
				if (msg->ret_info == DBD_REGISTER_CTLD &&
				    slurm_get_accounting_storage_enforce()) {
					error("slurmdbd: PERSIST_RC is %d from "
					      "%s(%u): %s",
					      rc,
					      slurmdbd_msg_type_2_str(
						      msg->ret_info, 1),
					      msg->ret_info,
					      msg->comment);
					fatal("You need to add this cluster "
					      "to accounting if you want to "
					      "enforce associations, or no "
					      "jobs will ever run.");
				} else
					debug("slurmdbd: PERSIST_RC is %d from "
					      "%s(%u): %s",
					      rc,
					      slurmdbd_msg_type_2_str(
						      msg->ret_info, 1),
					      msg->ret_info,
					      msg->comment);
			}
			slurm_persist_free_rc_msg(msg);
		} else
			error("slurmdbd: unpack message error");
		break;
	default:
		error("slurmdbd: bad message type %d != PERSIST_RC", msg_type);
	}

unpack_error:
	free_buf(buffer);
	return rc;
}
예제 #25
0
/*
 * load_all_part_state - load the partition state from file, recover on
 *	slurmctld restart. execute this after loading the configuration
 *	file data.
 * NOTE: READ lock_slurmctld config before entry
 */
int load_all_part_state(void)
{
	char *part_name = NULL, *allow_groups = NULL, *nodes = NULL;
	char *state_file, *data = NULL;
	uint32_t max_time, default_time, max_nodes, min_nodes;
	uint32_t max_cpus_per_node = INFINITE, grace_time = 0;
	time_t time;
	uint16_t flags;
	uint16_t max_share, preempt_mode, priority, state_up, cr_type;
	struct part_record *part_ptr;
	uint32_t data_size = 0, name_len;
	int data_allocated, data_read = 0, error_code = 0, part_cnt = 0;
	int state_fd;
	Buf buffer;
	char *ver_str = NULL;
	char* allow_alloc_nodes = NULL;
	uint16_t protocol_version = (uint16_t)NO_VAL;
	char* alternate = NULL;

	/* read the file */
	lock_state_files();
	state_fd = _open_part_state_file(&state_file);
	if (state_fd < 0) {
		info("No partition state file (%s) to recover",
		     state_file);
		error_code = ENOENT;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if  (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
						state_file);
					break;
				}
			} else if (data_read == 0)     /* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);
	unlock_state_files();

	buffer = create_buf(data, data_size);

	safe_unpackstr_xmalloc( &ver_str, &name_len, buffer);
	debug3("Version string in part_state header is %s", ver_str);
	if (ver_str) {
		if (!strcmp(ver_str, PART_STATE_VERSION)) {
			protocol_version = SLURM_PROTOCOL_VERSION;
		} else if (!strcmp(ver_str, PART_2_5_STATE_VERSION)) {
			protocol_version = SLURM_2_5_PROTOCOL_VERSION;
		}
	}

	if (protocol_version == (uint16_t)NO_VAL) {
		error("**********************************************************");
		error("Can not recover partition state, data version incompatible");
		error("**********************************************************");
		xfree(ver_str);
		free_buf(buffer);
		return EFAULT;
	}
	xfree(ver_str);
	safe_unpack_time(&time, buffer);

	while (remaining_buf(buffer) > 0) {
		if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc(&part_name, &name_len, buffer);
			safe_unpack32(&grace_time, buffer);
			safe_unpack32(&max_time, buffer);
			safe_unpack32(&default_time, buffer);
			safe_unpack32(&max_cpus_per_node, buffer);
			safe_unpack32(&max_nodes, buffer);
			safe_unpack32(&min_nodes, buffer);

			safe_unpack16(&flags,        buffer);
			safe_unpack16(&max_share,    buffer);
			safe_unpack16(&preempt_mode, buffer);
			safe_unpack16(&priority,     buffer);

			if (priority > part_max_priority)
				part_max_priority = priority;

			safe_unpack16(&state_up, buffer);
			safe_unpack16(&cr_type, buffer);

			safe_unpackstr_xmalloc(&allow_groups,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&allow_alloc_nodes,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&alternate, &name_len, buffer);
			safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
			if ((flags & PART_FLAG_DEFAULT_CLR) ||
			    (flags & PART_FLAG_HIDDEN_CLR)  ||
			    (flags & PART_FLAG_NO_ROOT_CLR) ||
			    (flags & PART_FLAG_ROOT_ONLY_CLR) ||
			    (flags & PART_FLAG_REQ_RESV_CLR)) {
				error("Invalid data for partition %s: flags=%u",
				      part_name, flags);
				error_code = EINVAL;
			}
		} else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc(&part_name, &name_len, buffer);
			safe_unpack32(&grace_time, buffer);
			safe_unpack32(&max_time, buffer);
			safe_unpack32(&default_time, buffer);
			safe_unpack32(&max_nodes, buffer);
			safe_unpack32(&min_nodes, buffer);

			safe_unpack16(&flags,        buffer);
			safe_unpack16(&max_share,    buffer);
			safe_unpack16(&preempt_mode, buffer);
			safe_unpack16(&priority,     buffer);

			if (priority > part_max_priority)
				part_max_priority = priority;
			cr_type = 0;	/* Default value */

			safe_unpack16(&state_up, buffer);
			safe_unpackstr_xmalloc(&allow_groups,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&allow_alloc_nodes,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&alternate, &name_len, buffer);
			safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
			if ((flags & PART_FLAG_DEFAULT_CLR) ||
			    (flags & PART_FLAG_HIDDEN_CLR)  ||
			    (flags & PART_FLAG_NO_ROOT_CLR) ||
			    (flags & PART_FLAG_ROOT_ONLY_CLR) ||
			    (flags & PART_FLAG_REQ_RESV_CLR)) {
				error("Invalid data for partition %s: flags=%u",
				      part_name, flags);
				error_code = EINVAL;
			}
		} else {
			error("load_all_part_state: protocol_version "
			      "%hu not supported", protocol_version);
			goto unpack_error;
		}
		/* validity test as possible */
		if (state_up > PARTITION_UP) {
			error("Invalid data for partition %s: state_up=%u",
			      part_name, state_up);
			error_code = EINVAL;
		}
		if (error_code) {
			error("No more partition data will be processed from "
			      "the checkpoint file");
			xfree(allow_groups);
			xfree(allow_alloc_nodes);
			xfree(alternate);
			xfree(part_name);
			xfree(nodes);
			error_code = EINVAL;
			break;
		}

		/* find record and perform update */
		part_ptr = list_find_first(part_list, &list_find_part,
					   part_name);
		part_cnt++;
		if (part_ptr == NULL) {
			info("load_all_part_state: partition %s missing from "
				"configuration file", part_name);
			part_ptr = create_part_record();
			xfree(part_ptr->name);
			part_ptr->name = xstrdup(part_name);
		}

		part_ptr->flags          = flags;
		if (part_ptr->flags & PART_FLAG_DEFAULT) {
			xfree(default_part_name);
			default_part_name = xstrdup(part_name);
			default_part_loc = part_ptr;
		}
		part_ptr->max_time       = max_time;
		part_ptr->default_time   = default_time;
		part_ptr->max_cpus_per_node = max_cpus_per_node;
		part_ptr->max_nodes      = max_nodes;
		part_ptr->max_nodes_orig = max_nodes;
		part_ptr->min_nodes      = min_nodes;
		part_ptr->min_nodes_orig = min_nodes;
		part_ptr->max_share      = max_share;
		part_ptr->grace_time     = grace_time;
		if (preempt_mode != (uint16_t) NO_VAL)
			part_ptr->preempt_mode   = preempt_mode;
		part_ptr->priority       = priority;
		part_ptr->state_up       = state_up;
		part_ptr->cr_type	 = cr_type;
		xfree(part_ptr->allow_groups);
		part_ptr->allow_groups   = allow_groups;
		xfree(part_ptr->allow_alloc_nodes);
		part_ptr->allow_alloc_nodes   = allow_alloc_nodes;
		xfree(part_ptr->alternate);
		part_ptr->alternate      = alternate;
		xfree(part_ptr->nodes);
		part_ptr->nodes = nodes;

		xfree(part_name);
	}

	info("Recovered state of %d partitions", part_cnt);
	free_buf(buffer);
	return error_code;

      unpack_error:
	error("Incomplete partition data checkpoint file");
	info("Recovered state of %d partitions", part_cnt);
	free_buf(buffer);
	return EFAULT;
}
예제 #26
0
extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer)
{
    uint32_t uint32_tmp;

    safe_unpackstr_xmalloc(&conf->hostname, &uint32_tmp, buffer);
    safe_unpack16(&conf->cpus, buffer);
    safe_unpack16(&conf->boards, buffer);
    safe_unpack16(&conf->sockets, buffer);
    safe_unpack16(&conf->cores, buffer);
    safe_unpack16(&conf->threads, buffer);
    safe_unpack32(&conf->real_memory_size, buffer);
    safe_unpack16(&conf->block_map_size, buffer);
    safe_unpack16_array(&conf->block_map, &uint32_tmp, buffer);
    safe_unpack16_array(&conf->block_map_inv,  &uint32_tmp, buffer);
    safe_unpackstr_xmalloc(&conf->spooldir,    &uint32_tmp, buffer);
    safe_unpackstr_xmalloc(&conf->node_name,   &uint32_tmp, buffer);
    safe_unpackstr_xmalloc(&conf->logfile,     &uint32_tmp, buffer);
    safe_unpackstr_xmalloc(&conf->task_prolog, &uint32_tmp, buffer);
    safe_unpackstr_xmalloc(&conf->task_epilog, &uint32_tmp, buffer);
    safe_unpackstr_xmalloc(&conf->job_acct_gather_freq, &uint32_tmp,
                           buffer);
    safe_unpackstr_xmalloc(&conf->job_acct_gather_type, &uint32_tmp,
                           buffer);
    safe_unpack16(&conf->propagate_prio, buffer);
    safe_unpack32(&conf->debug_flags, buffer);
    safe_unpack32(&uint32_tmp, buffer);
    conf->debug_level = uint32_tmp;
    safe_unpack32(&uint32_tmp, buffer);
    conf->daemonize = uint32_tmp;
    safe_unpack32(&uint32_tmp, buffer);
    conf->slurm_user_id = (uid_t)uint32_tmp;
    safe_unpack16(&conf->use_pam, buffer);
    safe_unpack16(&conf->task_plugin_param, buffer);
    safe_unpackstr_xmalloc(&conf->node_topo_addr, &uint32_tmp, buffer);
    safe_unpackstr_xmalloc(&conf->node_topo_pattern, &uint32_tmp, buffer);
    safe_unpack32(&uint32_tmp, buffer);
    safe_unpack16(&conf->log_fmt, buffer);
    safe_unpack16(&conf->mem_limit_enforce, buffer);
    conf->port = uint32_tmp;
    return SLURM_SUCCESS;

unpack_error:
    error("unpack_error in unpack_slurmd_conf_lite_no_alloc: %m");
    xfree(conf->job_acct_gather_freq);
    xfree(conf->job_acct_gather_type);
    xfree(conf->hostname);
    xfree(conf->spooldir);
    xfree(conf->node_name);
    xfree(conf->logfile);
    xfree(conf->task_prolog);
    xfree(conf->task_epilog);
    xfree(conf->node_topo_addr);
    xfree(conf->node_topo_pattern);
    return SLURM_ERROR;
}