Exemple #1
0
extern Buf slurm_persist_make_rc_msg(slurm_persist_conn_t *persist_conn,
				     uint32_t rc, char *comment,
				     uint16_t ret_info)
{
	persist_rc_msg_t msg;
	persist_msg_t resp;

	memset(&msg, 0, sizeof(persist_rc_msg_t));
	memset(&resp, 0, sizeof(persist_msg_t));

	msg.rc = rc;
	msg.comment = comment;
	msg.ret_info = ret_info;

	resp.msg_type = PERSIST_RC;
	resp.data = &msg;

	return slurm_persist_msg_pack(persist_conn, &resp);
}
Exemple #2
0
/* Send an RPC to the SlurmDBD. Do not wait for the reply. The RPC
 * will be queued and processed later if the SlurmDBD is not responding.
 * NOTE: slurm_open_slurmdbd_conn() must have been called with callbacks set
 *
 * Returns SLURM_SUCCESS or an error code */
extern int send_slurmdbd_msg(uint16_t rpc_version, slurmdbd_msg_t *req)
{
	Buf buffer;
	int cnt, rc = SLURM_SUCCESS;
	static time_t syslog_time = 0;
	static int max_agent_queue = 0;

	/*
	 * Whatever our max job count is multiplied by 2 plus node count
	 * multiplied by 4 or MAX_AGENT_QUEUE which ever is bigger.
	 */
	if (!max_agent_queue)
		max_agent_queue =
			MAX(MAX_AGENT_QUEUE,
			    ((slurmctld_conf.max_job_cnt * 2) +
			     (node_record_count * 4)));

	buffer = slurm_persist_msg_pack(
		slurmdbd_conn, (persist_msg_t *)req);
	if (!buffer)	/* pack error */
		return SLURM_ERROR;

	slurm_mutex_lock(&agent_lock);
	if ((agent_tid == 0) || (agent_list == NULL)) {
		_create_agent();
		if ((agent_tid == 0) || (agent_list == NULL)) {
			slurm_mutex_unlock(&agent_lock);
			free_buf(buffer);
			return SLURM_ERROR;
		}
	}
	cnt = list_count(agent_list);
	if ((cnt >= (max_agent_queue / 2)) &&
	    (difftime(time(NULL), syslog_time) > 120)) {
		/* Record critical error every 120 seconds */
		syslog_time = time(NULL);
		error("slurmdbd: agent queue filling (%d), RESTART SLURMDBD NOW",
		      cnt);
		syslog(LOG_CRIT, "*** RESTART SLURMDBD NOW ***");
		if (slurmdbd_conn->trigger_callbacks.dbd_fail)
			(slurmdbd_conn->trigger_callbacks.dbd_fail)();
	}
	if (cnt == (max_agent_queue - 1))
		cnt -= _purge_step_req();
	if (cnt == (max_agent_queue - 1))
		cnt -= _purge_job_start_req();
	if (cnt < max_agent_queue) {
		if (list_enqueue(agent_list, buffer) == NULL)
			fatal("list_enqueue: memory allocation failure");
	} else {
		error("slurmdbd: agent queue is full (%u), discarding %s:%u request",
		      cnt,
		      slurmdbd_msg_type_2_str(req->msg_type, 1),
		      req->msg_type);
		if (slurmdbd_conn->trigger_callbacks.acct_full)
			(slurmdbd_conn->trigger_callbacks.acct_full)();
		free_buf(buffer);
		rc = SLURM_ERROR;
	}

	slurm_cond_broadcast(&agent_cond);
	slurm_mutex_unlock(&agent_lock);
	return rc;
}