/*
 * _thread_ipmi_run is the thread calling ipmi and launching _thread_ipmi_write
 */
static void *_thread_ipmi_run(void *no_data)
{
// need input (attr)
	struct timeval tvnow;
	struct timespec abs;

	flag_energy_accounting_shutdown = false;
	if (debug_flags & DEBUG_FLAG_ENERGY)
		info("ipmi-thread: launched");

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	slurm_mutex_lock(&ipmi_mutex);
	if (_thread_init() != SLURM_SUCCESS) {
		if (debug_flags & DEBUG_FLAG_ENERGY)
			info("ipmi-thread: aborted");
		slurm_mutex_unlock(&ipmi_mutex);

		slurm_cond_signal(&launch_cond);

		return NULL;
	}

	(void) pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);

	slurm_mutex_unlock(&ipmi_mutex);
	flag_thread_started = true;

	slurm_cond_signal(&launch_cond);

	/* setup timer */
	gettimeofday(&tvnow, NULL);
	abs.tv_sec = tvnow.tv_sec;
	abs.tv_nsec = tvnow.tv_usec * 1000;

	//loop until slurm stop
	while (!flag_energy_accounting_shutdown) {
		slurm_mutex_lock(&ipmi_mutex);

		_thread_update_node_energy();

		/* Sleep until the next time. */
		abs.tv_sec += slurm_ipmi_conf.freq;
		slurm_cond_timedwait(&ipmi_cond, &ipmi_mutex, &abs);

		slurm_mutex_unlock(&ipmi_mutex);
	}

	if (debug_flags & DEBUG_FLAG_ENERGY)
		info("ipmi-thread: ended");

	return NULL;
}
Exemple #2
0
/* Terminate the gang scheduling thread and free its data structures */
extern void gs_fini(void)
{
	if (!(slurmctld_conf.preempt_mode & PREEMPT_MODE_GANG))
		return;

	/* terminate the timeslicer thread */
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: entering gs_fini");
	slurm_mutex_lock(&thread_flag_mutex);
	if (thread_running) {
		slurm_mutex_lock(&term_lock);
		thread_shutdown = true;
		slurm_cond_signal(&term_cond);
		slurm_mutex_unlock(&term_lock);
		slurm_mutex_unlock(&thread_flag_mutex);
		usleep(120000);
		if (timeslicer_thread_id)
			error("gang: timeslicer pthread still running");
	} else {
		slurm_mutex_unlock(&thread_flag_mutex);
	}

	FREE_NULL_LIST(preempt_job_list);

	slurm_mutex_lock(&data_mutex);
	FREE_NULL_LIST(gs_part_list);
	gs_part_list = NULL;
	xfree(gs_bits_per_node);
	slurm_mutex_unlock(&data_mutex);
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: leaving gs_fini");
}
Exemple #3
0
/* Terminate builtin_agent */
extern void stop_builtin_agent(void)
{
	slurm_mutex_lock(&term_lock);
	stop_builtin = true;
	slurm_cond_signal(&term_cond);
	slurm_mutex_unlock(&term_lock);
}
Exemple #4
0
static void *_create_container_thread(void *args)
{
	stepd_step_rec_t *job = (stepd_step_rec_t *)args;

	job->cont_id = (uint64_t)job_create(0, job->uid, 0);

	/* Signal the container_create we are done */
	slurm_mutex_lock(&notify_mutex);

	/* We need to signal failure or not */
	slurm_cond_signal(&notify);

	/* Don't unlock the notify_mutex here, wait, it is not needed
	 * and can cause deadlock if done. */

	if (job->cont_id == (jid_t)-1)
		error("Failed to create job container: %m");
	else
		/* Wait around for something else to be added and then exit
		   when that takes place.
		*/
		slurm_cond_wait(&notify, &notify_mutex);

	slurm_mutex_unlock(&notify_mutex);

	return NULL;
}
void step_terminate_monitor_stop(void)
{
	slurm_mutex_lock(&lock);

	if (!running_flag) {
		slurm_mutex_unlock(&lock);
		return;
	}
	if (stop_flag) {
		error("step_terminate_monitor_stop: already stopped");
		slurm_mutex_unlock(&lock);
		return;
	}

	stop_flag = 1;
	debug("step_terminate_monitor_stop signalling condition");
	slurm_cond_signal(&cond);
	slurm_mutex_unlock(&lock);

	if (pthread_join(tid, NULL) != 0) {
		error("step_terminate_monitor_stop: pthread_join: %m");
	}

	xfree(program_name);
	return;
}
extern void acct_gather_profile_endpoll(void)
{
	int i;

	if (!acct_gather_profile_running) {
		debug2("acct_gather_profile_startpoll: poll already ended!");
		return;
	}

	acct_gather_profile_running = false;

	for (i=0; i < PROFILE_CNT; i++) {
		/* end remote threads */
		slurm_mutex_lock(&acct_gather_profile_timer[i].notify_mutex);
		slurm_cond_signal(&acct_gather_profile_timer[i].notify);
		slurm_mutex_unlock(&acct_gather_profile_timer[i].notify_mutex);
		slurm_cond_destroy(&acct_gather_profile_timer[i].notify);
		acct_gather_profile_timer[i].freq = 0;
		switch (i) {
		case PROFILE_ENERGY:
			break;
		case PROFILE_TASK:
			jobacct_gather_endpoll();
			break;
		case PROFILE_FILESYSTEM:
			break;
		case PROFILE_NETWORK:
			break;
		default:
			fatal("Unhandled profile option %d please update "
			      "slurm_acct_gather_profile.c "
			      "(acct_gather_profile_endpoll)", i);
		}
	}
}
Exemple #7
0
static msg_aggr_t *_handle_msg_aggr_ret(uint32_t msg_index, bool locked)
{
	msg_aggr_t *msg_aggr;
	ListIterator itr;

	if (!locked)
		slurm_mutex_lock(&msg_collection.aggr_mutex);

	itr = list_iterator_create(msg_collection.msg_aggr_list);

	while ((msg_aggr = list_next(itr))) {
		/* just remove them all */
		if (!msg_index) {
			/* make sure we don't wait any longer */
			slurm_cond_signal(&msg_aggr->wait_cond);
			list_remove(itr);
		} else if (msg_aggr->msg_index == msg_index) {
			list_remove(itr);
			break;
		}

	}
	list_iterator_destroy(itr);

	if (!locked)
		slurm_mutex_unlock(&msg_collection.aggr_mutex);

	return msg_aggr;
}
Exemple #8
0
void heartbeat_stop(void)
{
	slurm_mutex_lock(&heartbeat_mutex);
	if (heart_beating) {
		heart_beating = false;
		slurm_cond_signal(&heartbeat_cond);
	}
	slurm_mutex_unlock(&heartbeat_mutex);
}
Exemple #9
0
/*
 * config_power_mgr - Read power management configuration
 */
extern void config_power_mgr(void)
{
	slurm_mutex_lock(&power_mutex);
	if (!power_save_config) {
		if (_init_power_config() == 0)
			power_save_enabled = true;
		power_save_config = true;
	}
	slurm_cond_signal(&power_cond);
	slurm_mutex_unlock(&power_mutex);
}
Exemple #10
0
static void _end_container_thread(void)
{
	if (threadid) {
		/* This will end the thread and remove it from the container */
		slurm_mutex_lock(&thread_mutex);
		slurm_mutex_lock(&notify_mutex);
		slurm_cond_signal(&notify);
		slurm_mutex_unlock(&notify_mutex);

		pthread_join(threadid, NULL);
		threadid = 0;
		slurm_mutex_unlock(&thread_mutex);
	}
}
Exemple #11
0
void _destroy_tree_fwd(fwd_tree_t *fwd_tree)
{
	if (fwd_tree) {
		if (fwd_tree->tree_hl)
			hostlist_destroy(fwd_tree->tree_hl);

		/*
		 * Lock and decrease thread counter, start_msg_tree is waiting
		 * for a null thread count to exit its main loop
		 */
		slurm_mutex_lock(fwd_tree->tree_mutex);
		(*(fwd_tree->p_thr_count))--;
		slurm_cond_signal(fwd_tree->notify);
		slurm_mutex_unlock(fwd_tree->tree_mutex);

		xfree(fwd_tree);
	}
}
Exemple #12
0
extern void msg_aggr_sender_fini(void)
{
	if (!msg_collection.running)
		return;
	msg_collection.running = 0;
	slurm_mutex_lock(&msg_collection.mutex);

	slurm_cond_signal(&msg_collection.cond);
	slurm_mutex_unlock(&msg_collection.mutex);

	pthread_join(msg_collection.thread_id, NULL);
	msg_collection.thread_id = (pthread_t) 0;

	slurm_cond_destroy(&msg_collection.cond);
	/* signal and clear the waiting list */
	slurm_mutex_lock(&msg_collection.aggr_mutex);
	_handle_msg_aggr_ret(0, 1);
	FREE_NULL_LIST(msg_collection.msg_aggr_list);
	slurm_mutex_unlock(&msg_collection.aggr_mutex);
	FREE_NULL_LIST(msg_collection.msg_list);
	slurm_mutex_destroy(&msg_collection.mutex);
}
extern int acct_gather_interconnect_fini(void)
{
	int rc2, rc = SLURM_SUCCESS;
	int i;

	slurm_mutex_lock(&g_context_lock);
	init_run = false;

	if (watch_node_thread_id) {
		slurm_mutex_unlock(&g_context_lock);
		slurm_mutex_lock(&profile_timer->notify_mutex);
		slurm_cond_signal(&profile_timer->notify);
		slurm_mutex_unlock(&profile_timer->notify_mutex);
		pthread_join(watch_node_thread_id, NULL);
		slurm_mutex_lock(&g_context_lock);
	}

	for (i = 0; i < g_context_num; i++) {
		if (!g_context[i])
			continue;

		rc2 = plugin_context_destroy(g_context[i]);
		if (rc2 != SLURM_SUCCESS) {
			debug("%s: %s: %s", __func__,
			      g_context[i]->type,
			      slurm_strerror(rc2));
			rc = SLURM_ERROR;
		}
	}

	xfree(ops);
	xfree(g_context);
	g_context_num = -1;

	slurm_mutex_unlock(&g_context_lock);

	return rc;
}
Exemple #14
0
static void *
_handle_accept(void *arg)
{
	/*struct request_params *param = (struct request_params *)arg;*/
	int fd = ((struct request_params *)arg)->fd;
	stepd_step_rec_t *job = ((struct request_params *)arg)->job;
	int req;
	int len;
	Buf buffer;
	void *auth_cred;
	int rc;
	uid_t uid;
	gid_t gid;
	char *auth_info;

	debug3("Entering _handle_accept (new thread)");
	xfree(arg);

	safe_read(fd, &req, sizeof(int));
	if (req != REQUEST_CONNECT) {
		error("First message must be REQUEST_CONNECT");
		goto fail;
	}

	safe_read(fd, &len, sizeof(int));
	buffer = init_buf(len);
	safe_read(fd, get_buf_data(buffer), len);

	/* Unpack and verify the auth credential */
	auth_cred = g_slurm_auth_unpack(buffer);
	if (auth_cred == NULL) {
		error("Unpacking authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
		free_buf(buffer);
		goto fail;
	}
	auth_info = slurm_get_auth_info();
	rc = g_slurm_auth_verify(auth_cred, NULL, 2, auth_info);
	if (rc != SLURM_SUCCESS) {
		error("Verifying authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
		xfree(auth_info);
		(void) g_slurm_auth_destroy(auth_cred);
		free_buf(buffer);
		goto fail;
	}

	/* Get the uid & gid from the credential, then destroy it. */
	uid = g_slurm_auth_get_uid(auth_cred, auth_info);
	gid = g_slurm_auth_get_gid(auth_cred, auth_info);
	xfree(auth_info);
	debug3("  Identity: uid=%d, gid=%d", uid, gid);
	g_slurm_auth_destroy(auth_cred);
	free_buf(buffer);

	rc = SLURM_PROTOCOL_VERSION;
	safe_write(fd, &rc, sizeof(int));

	while (1) {
		rc = _handle_request(fd, job, uid, gid);
		if (rc != SLURM_SUCCESS)
			break;
	}

	if (close(fd) == -1)
		error("Closing accepted fd: %m");

	slurm_mutex_lock(&message_lock);
	message_connections--;
	slurm_cond_signal(&message_cond);
	slurm_mutex_unlock(&message_lock);

	debug3("Leaving  _handle_accept");
	return NULL;

fail:
	rc = SLURM_FAILURE;
	safe_write(fd, &rc, sizeof(int));
rwfail:
	if (close(fd) == -1)
		error("Closing accepted fd after error: %m");
	debug("Leaving  _handle_accept on an error");
	return NULL;
}
Exemple #15
0
extern void msg_aggr_resp(slurm_msg_t *msg)
{
	slurm_msg_t *next_msg;
	composite_msg_t *comp_msg;
	msg_aggr_t *msg_aggr;
	ListIterator itr;

	comp_msg = (composite_msg_t *)msg->data;
	itr = list_iterator_create(comp_msg->msg_list);
	if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE)
		info("msg_aggr_resp: processing composite msg_list...");
	while ((next_msg = list_next(itr))) {
		switch (next_msg->msg_type) {
		case REQUEST_BATCH_JOB_LAUNCH:
		case RESPONSE_SLURM_RC:
			/* signal sending thread that slurmctld received this
			 * msg */
			if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE)
				info("msg_aggr_resp: response found for "
				     "index %u signaling sending thread",
				     next_msg->msg_index);
			slurm_mutex_lock(&msg_collection.aggr_mutex);
			if (!(msg_aggr = _handle_msg_aggr_ret(
				      next_msg->msg_index, 1))) {
				debug2("msg_aggr_resp: error: unable to "
				       "locate aggr message struct for job %u",
					next_msg->msg_index);
				slurm_mutex_unlock(&msg_collection.aggr_mutex);
				continue;
			}
			if (msg_aggr->resp_callback &&
			    (next_msg->msg_type != RESPONSE_SLURM_RC))
				(*(msg_aggr->resp_callback))(next_msg);
			slurm_cond_signal(&msg_aggr->wait_cond);
			slurm_mutex_unlock(&msg_collection.aggr_mutex);
			break;
		case RESPONSE_MESSAGE_COMPOSITE:
			comp_msg = (composite_msg_t *)next_msg->data;
			/* set up the address here for the next node */
			memcpy(&next_msg->address, &comp_msg->sender,
			       sizeof(slurm_addr_t));

			if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE) {
				char addrbuf[100];
				slurm_print_slurm_addr(&next_msg->address,
						       addrbuf, 32);
				info("msg_aggr_resp: composite response msg "
				     "found for %s", addrbuf);
			}

			slurm_send_only_node_msg(next_msg);

			break;
		default:
			error("_rpc_composite_resp: invalid msg type in "
			      "composite msg_list");
			break;
		}
	}
	list_iterator_destroy(itr);
	if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE)
		info("msg aggr: _rpc_composite_resp: finished processing "
		     "composite msg_list...");
}
Exemple #16
0
extern void msg_aggr_add_msg(slurm_msg_t *msg, bool wait,
			     void (*resp_callback) (slurm_msg_t *msg))
{
	int count;
	static uint16_t msg_index = 1;
	static uint32_t wait_count = 0;

	if (!msg_collection.running)
		return;

	slurm_mutex_lock(&msg_collection.mutex);
	if (msg_collection.max_msgs == true) {
		slurm_cond_wait(&msg_collection.cond, &msg_collection.mutex);
	}

	msg->msg_index = msg_index++;

	/* Add msg to message collection */
	list_append(msg_collection.msg_list, msg);

	count = list_count(msg_collection.msg_list);


	/* First msg in collection; initiate new window */
	if (count == 1)
		slurm_cond_signal(&msg_collection.cond);

	/* Max msgs reached; terminate window */
	if (count >= msg_collection.max_msg_cnt) {
		msg_collection.max_msgs = true;
		slurm_cond_signal(&msg_collection.cond);
	}
	slurm_mutex_unlock(&msg_collection.mutex);

	if (wait) {
		msg_aggr_t *msg_aggr = xmalloc(sizeof(msg_aggr_t));
		uint16_t        msg_timeout;
		struct timeval  now;
		struct timespec timeout;

		msg_aggr->msg_index = msg->msg_index;
		msg_aggr->resp_callback = resp_callback;
		slurm_cond_init(&msg_aggr->wait_cond, NULL);

		slurm_mutex_lock(&msg_collection.aggr_mutex);
		list_append(msg_collection.msg_aggr_list, msg_aggr);

		msg_timeout = slurm_get_msg_timeout();
		gettimeofday(&now, NULL);
		timeout.tv_sec = now.tv_sec + msg_timeout;
		timeout.tv_nsec = now.tv_usec * 1000;

		wait_count++;
		if (pthread_cond_timedwait(&msg_aggr->wait_cond,
					   &msg_collection.aggr_mutex,
					   &timeout) == ETIMEDOUT)
			_handle_msg_aggr_ret(msg_aggr->msg_index, 1);
		wait_count--;
		slurm_mutex_unlock(&msg_collection.aggr_mutex);
;
		if (!msg_collection.running && !wait_count)
			slurm_mutex_destroy(&msg_collection.aggr_mutex);

		_msg_aggr_free(msg_aggr);
	}
}
Exemple #17
0
void *_forward_thread(void *arg)
{
	forward_msg_t *fwd_msg = (forward_msg_t *)arg;
	forward_struct_t *fwd_struct = fwd_msg->fwd_struct;
	Buf buffer = init_buf(BUF_SIZE);	/* probably enough for header */
	List ret_list = NULL;
	int fd = -1;
	ret_data_info_t *ret_data_info = NULL;
	char *name = NULL;
	hostlist_t hl = hostlist_create(fwd_msg->header.forward.nodelist);
	slurm_addr_t addr;
	char *buf = NULL;
	int steps = 0;
	int start_timeout = fwd_msg->timeout;

	/* repeat until we are sure the message was sent */
	while ((name = hostlist_shift(hl))) {
		if (slurm_conf_get_addr(name, &addr) == SLURM_ERROR) {
			error("forward_thread: can't find address for host "
			      "%s, check slurm.conf", name);
			slurm_mutex_lock(&fwd_struct->forward_mutex);
			mark_as_failed_forward(&fwd_struct->ret_list, name,
					       SLURM_UNKNOWN_FORWARD_ADDR);
 			free(name);
			if (hostlist_count(hl) > 0) {
				slurm_mutex_unlock(&fwd_struct->forward_mutex);
				continue;
			}
			goto cleanup;
		}
		if ((fd = slurm_open_msg_conn(&addr)) < 0) {
			error("forward_thread to %s: %m", name);

			slurm_mutex_lock(&fwd_struct->forward_mutex);
			mark_as_failed_forward(
				&fwd_struct->ret_list, name,
				SLURM_COMMUNICATIONS_CONNECTION_ERROR);
			free(name);
			if (hostlist_count(hl) > 0) {
				slurm_mutex_unlock(&fwd_struct->forward_mutex);
				/* Abandon tree. This way if all the
				 * nodes in the branch are down we
				 * don't have to time out for each
				 * node serially.
				 */
				_forward_msg_internal(hl, NULL, fwd_struct,
						      &fwd_msg->header, 0,
						      hostlist_count(hl));
				continue;
			}
			goto cleanup;
		}
		buf = hostlist_ranged_string_xmalloc(hl);

		xfree(fwd_msg->header.forward.nodelist);
		fwd_msg->header.forward.nodelist = buf;
		fwd_msg->header.forward.cnt = hostlist_count(hl);
#if 0
		info("sending %d forwards (%s) to %s",
		     fwd_msg->header.forward.cnt,
		     fwd_msg->header.forward.nodelist, name);
#endif
		if (fwd_msg->header.forward.nodelist[0]) {
			debug3("forward: send to %s along with %s",
			       name, fwd_msg->header.forward.nodelist);
		} else
			debug3("forward: send to %s ", name);

		pack_header(&fwd_msg->header, buffer);

		/* add forward data to buffer */
		if (remaining_buf(buffer) < fwd_struct->buf_len) {
			int new_size = buffer->processed + fwd_struct->buf_len;
			new_size += 1024; /* padded for paranoia */
			xrealloc_nz(buffer->head, new_size);
			buffer->size = new_size;
		}
		if (fwd_struct->buf_len) {
			memcpy(&buffer->head[buffer->processed],
			       fwd_struct->buf, fwd_struct->buf_len);
			buffer->processed += fwd_struct->buf_len;
		}

		/*
		 * forward message
		 */
		if (slurm_msg_sendto(fd,
				     get_buf_data(buffer),
				     get_buf_offset(buffer),
				     SLURM_PROTOCOL_NO_SEND_RECV_FLAGS ) < 0) {
			error("forward_thread: slurm_msg_sendto: %m");

			slurm_mutex_lock(&fwd_struct->forward_mutex);
			mark_as_failed_forward(&fwd_struct->ret_list, name,
					       errno);
			free(name);
			if (hostlist_count(hl) > 0) {
				free_buf(buffer);
				buffer = init_buf(fwd_struct->buf_len);
				slurm_mutex_unlock(&fwd_struct->forward_mutex);
				slurm_close(fd);
				fd = -1;
				/* Abandon tree. This way if all the
				 * nodes in the branch are down we
				 * don't have to time out for each
				 * node serially.
				 */
				_forward_msg_internal(hl, NULL, fwd_struct,
						      &fwd_msg->header, 0,
						      hostlist_count(hl));
				continue;
			}
			goto cleanup;
		}

		/* These messages don't have a return message, but if
		 * we got here things worked out so make note of the
		 * list of nodes as success.
		 */
		if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) ||
		    (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) ||
		    (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) {
			slurm_mutex_lock(&fwd_struct->forward_mutex);
			ret_data_info = xmalloc(sizeof(ret_data_info_t));
			list_push(fwd_struct->ret_list, ret_data_info);
			ret_data_info->node_name = xstrdup(name);
			free(name);
			while ((name = hostlist_shift(hl))) {
				ret_data_info =
					xmalloc(sizeof(ret_data_info_t));
				list_push(fwd_struct->ret_list, ret_data_info);
				ret_data_info->node_name = xstrdup(name);
				free(name);
			}
			goto cleanup;
		}

		if (fwd_msg->header.forward.cnt > 0) {
			static int message_timeout = -1;
			if (message_timeout < 0)
				message_timeout =
					slurm_get_msg_timeout() * 1000;
			if (!fwd_msg->header.forward.tree_width)
				fwd_msg->header.forward.tree_width =
					slurm_get_tree_width();
			steps = (fwd_msg->header.forward.cnt+1) /
					fwd_msg->header.forward.tree_width;
			fwd_msg->timeout = (message_timeout*steps);
			/* info("got %d * %d = %d", message_timeout, */
			/*      steps, fwd_msg->timeout); */
			steps++;
			fwd_msg->timeout += (start_timeout*steps);
			/* info("now  + %d*%d = %d", start_timeout, */
			/*      steps, fwd_msg->timeout); */
		}

		ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout);
		/* info("sent %d forwards got %d back", */
		/*      fwd_msg->header.forward.cnt, list_count(ret_list)); */

		if (!ret_list || (fwd_msg->header.forward.cnt != 0
				  && list_count(ret_list) <= 1)) {
			slurm_mutex_lock(&fwd_struct->forward_mutex);
			mark_as_failed_forward(&fwd_struct->ret_list, name,
					       errno);
			free(name);
			FREE_NULL_LIST(ret_list);
			if (hostlist_count(hl) > 0) {
				free_buf(buffer);
				buffer = init_buf(fwd_struct->buf_len);
				slurm_mutex_unlock(&fwd_struct->forward_mutex);
				slurm_close(fd);
				fd = -1;
				continue;
			}
			goto cleanup;
		} else if ((fwd_msg->header.forward.cnt+1)
			  != list_count(ret_list)) {
			/* this should never be called since the above
			   should catch the failed forwards and pipe
			   them back down, but this is here so we
			   never have to worry about a locked
			   mutex */
			ListIterator itr = NULL;
			char *tmp = NULL;
			int first_node_found = 0;
			hostlist_iterator_t host_itr
				= hostlist_iterator_create(hl);
			error("We shouldn't be here.  We forwarded to %d "
			      "but only got %d back",
			      (fwd_msg->header.forward.cnt+1),
			      list_count(ret_list));
			while ((tmp = hostlist_next(host_itr))) {
				int node_found = 0;
				itr = list_iterator_create(ret_list);
				while ((ret_data_info = list_next(itr))) {
					if (!ret_data_info->node_name) {
						first_node_found = 1;
						ret_data_info->node_name =
							xstrdup(name);
					}
					if (!xstrcmp(tmp,
						   ret_data_info->node_name)) {
						node_found = 1;
						break;
					}
				}
				list_iterator_destroy(itr);
				if (!node_found) {
					mark_as_failed_forward(
						&fwd_struct->ret_list,
						tmp,
						SLURM_COMMUNICATIONS_CONNECTION_ERROR);
				}
				free(tmp);
			}
			hostlist_iterator_destroy(host_itr);
			if (!first_node_found) {
				mark_as_failed_forward(
					&fwd_struct->ret_list,
					name,
					SLURM_COMMUNICATIONS_CONNECTION_ERROR);
			}
		}
		break;
	}
	slurm_mutex_lock(&fwd_struct->forward_mutex);
	if (ret_list) {
		while ((ret_data_info = list_pop(ret_list)) != NULL) {
			if (!ret_data_info->node_name) {
				ret_data_info->node_name = xstrdup(name);
			}
			list_push(fwd_struct->ret_list, ret_data_info);
			debug3("got response from %s",
			       ret_data_info->node_name);
		}
		FREE_NULL_LIST(ret_list);
	}
	free(name);
cleanup:
	if ((fd >= 0) && slurm_close(fd) < 0)
		error ("close(%d): %m", fd);
	hostlist_destroy(hl);
	destroy_forward(&fwd_msg->header.forward);
	free_buf(buffer);
	slurm_cond_signal(&fwd_struct->notify);
	slurm_mutex_unlock(&fwd_struct->forward_mutex);
	xfree(fwd_msg);

	return (NULL);
}
Exemple #18
0
void *_fwd_tree_thread(void *arg)
{
	fwd_tree_t *fwd_tree = (fwd_tree_t *)arg;
	List ret_list = NULL;
	char *name = NULL;
	char *buf = NULL;
	slurm_msg_t send_msg;

	slurm_msg_t_init(&send_msg);
	send_msg.msg_type = fwd_tree->orig_msg->msg_type;
	send_msg.data = fwd_tree->orig_msg->data;
	send_msg.protocol_version = fwd_tree->orig_msg->protocol_version;

	/* repeat until we are sure the message was sent */
	while ((name = hostlist_shift(fwd_tree->tree_hl))) {
		if (slurm_conf_get_addr(name, &send_msg.address)
		    == SLURM_ERROR) {
			error("fwd_tree_thread: can't find address for host "
			      "%s, check slurm.conf", name);
			slurm_mutex_lock(fwd_tree->tree_mutex);
			mark_as_failed_forward(&fwd_tree->ret_list, name,
					       SLURM_UNKNOWN_FORWARD_ADDR);
 			slurm_cond_signal(fwd_tree->notify);
			slurm_mutex_unlock(fwd_tree->tree_mutex);
			free(name);

			continue;
		}

		send_msg.forward.timeout = fwd_tree->timeout;
		if ((send_msg.forward.cnt = hostlist_count(fwd_tree->tree_hl))){
			buf = hostlist_ranged_string_xmalloc(
					fwd_tree->tree_hl);
			send_msg.forward.nodelist = buf;
		} else
			send_msg.forward.nodelist = NULL;

		if (send_msg.forward.nodelist && send_msg.forward.nodelist[0]) {
			debug3("Tree sending to %s along with %s",
			       name, send_msg.forward.nodelist);
		} else
			debug3("Tree sending to %s", name);

		ret_list = slurm_send_addr_recv_msgs(&send_msg, name,
						     fwd_tree->timeout);

		xfree(send_msg.forward.nodelist);

		if (ret_list) {
			int ret_cnt = list_count(ret_list);
			/* This is most common if a slurmd is running
			   an older version of Slurm than the
			   originator of the message.
			*/
			if ((ret_cnt <= send_msg.forward.cnt) &&
			    (errno != SLURM_COMMUNICATIONS_CONNECTION_ERROR)) {
				error("fwd_tree_thread: %s failed to forward "
				      "the message, expecting %d ret got only "
				      "%d",
				      name, send_msg.forward.cnt + 1, ret_cnt);
				if (ret_cnt > 1) { /* not likely */
					ret_data_info_t *ret_data_info = NULL;
					ListIterator itr =
						list_iterator_create(ret_list);
					while ((ret_data_info =
						list_next(itr))) {
						if (xstrcmp(ret_data_info->
							    node_name, name))
							hostlist_delete_host(
								fwd_tree->
								tree_hl,
								ret_data_info->
								node_name);
					}
					list_iterator_destroy(itr);
				}
			}

			slurm_mutex_lock(fwd_tree->tree_mutex);
			list_transfer(fwd_tree->ret_list, ret_list);
			slurm_cond_signal(fwd_tree->notify);
			slurm_mutex_unlock(fwd_tree->tree_mutex);
			FREE_NULL_LIST(ret_list);
			/* try next node */
			if (ret_cnt <= send_msg.forward.cnt) {
				free(name);
				/* Abandon tree. This way if all the
				 * nodes in the branch are down we
				 * don't have to time out for each
				 * node serially.
				 */
				_start_msg_tree_internal(
					fwd_tree->tree_hl, NULL,
					fwd_tree,
					hostlist_count(fwd_tree->tree_hl));
				continue;
			}
		} else {
			/* This should never happen (when this was
			 * written slurm_send_addr_recv_msgs always
			 * returned a list */
			error("fwd_tree_thread: no return list given from "
			      "slurm_send_addr_recv_msgs spawned for %s",
			      name);
			slurm_mutex_lock(fwd_tree->tree_mutex);
			mark_as_failed_forward(
				&fwd_tree->ret_list, name,
				SLURM_COMMUNICATIONS_CONNECTION_ERROR);
 			slurm_cond_signal(fwd_tree->notify);
			slurm_mutex_unlock(fwd_tree->tree_mutex);
			free(name);

			continue;
		}

		free(name);

		/* check for error and try again */
		if (errno == SLURM_COMMUNICATIONS_CONNECTION_ERROR)
 			continue;

		break;
	}

	_destroy_tree_fwd(fwd_tree);

	return NULL;
}
Exemple #19
0
static void *
_cancel_job_id (void *ci)
{
	int error_code = SLURM_SUCCESS, i;
	job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci;
	bool sig_set = true;
	uint16_t flags = 0;
	char *job_type = "";
	DEF_TIMERS;

	if (cancel_info->sig == (uint16_t) NO_VAL) {
		cancel_info->sig = SIGKILL;
		sig_set = false;
	}
	if (opt.batch) {
		flags |= KILL_JOB_BATCH;
		job_type = "batch ";
	}
	if (opt.full) {
		flags |= KILL_FULL_JOB;
		job_type = "full ";
	}
	if (cancel_info->array_flag)
		flags |= KILL_JOB_ARRAY;

	if (!cancel_info->job_id_str) {
		if (cancel_info->array_job_id &&
		    (cancel_info->array_task_id == INFINITE)) {
			xstrfmtcat(cancel_info->job_id_str, "%u_*",
				   cancel_info->array_job_id);
		} else if (cancel_info->array_job_id) {
			xstrfmtcat(cancel_info->job_id_str, "%u_%u",
				   cancel_info->array_job_id,
				   cancel_info->array_task_id);
		} else {
			xstrfmtcat(cancel_info->job_id_str, "%u",
				   cancel_info->job_id);
		}
	}

	if (!sig_set) {
		verbose("Terminating %sjob %s", job_type,
			cancel_info->job_id_str);
	} else {
		verbose("Signal %u to %sjob %s", cancel_info->sig, job_type,
			cancel_info->job_id_str);
	}

	for (i = 0; i < MAX_CANCEL_RETRY; i++) {
		_add_delay();
		START_TIMER;
		error_code = slurm_kill_job2(cancel_info->job_id_str,
					     cancel_info->sig, flags);
		END_TIMER;
		slurm_mutex_lock(&max_delay_lock);
		max_resp_time = MAX(max_resp_time, DELTA_TIMER);
		slurm_mutex_unlock(&max_delay_lock);

		if ((error_code == 0) ||
		    (errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
			break;
		verbose("Job is in transistional state, retrying");
		sleep(5 + i);
	}
	if (error_code) {
		error_code = slurm_get_errno();
		if ((opt.verbose > 0) ||
		    ((error_code != ESLURM_ALREADY_DONE) &&
		     (error_code != ESLURM_INVALID_JOB_ID))) {
			error("Kill job error on job id %s: %s",
			      cancel_info->job_id_str,
			      slurm_strerror(slurm_get_errno()));
		}
		if (((error_code == ESLURM_ALREADY_DONE) ||
		     (error_code == ESLURM_INVALID_JOB_ID)) &&
		    (cancel_info->sig == SIGKILL)) {
			error_code = 0;	/* Ignore error if job done */
		}	
	}

	/* Purposely free the struct passed in here, so the caller doesn't have
	 * to keep track of it, but don't destroy the mutex and condition
	 * variables contained. */
	slurm_mutex_lock(cancel_info->num_active_threads_lock);
	*(cancel_info->rc) = MAX(*(cancel_info->rc), error_code);
	(*(cancel_info->num_active_threads))--;
	slurm_cond_signal(cancel_info->num_active_threads_cond);
	slurm_mutex_unlock(cancel_info->num_active_threads_lock);

	xfree(cancel_info->job_id_str);
	xfree(cancel_info);
	return NULL;
}
Exemple #20
0
static void *
_cancel_step_id (void *ci)
{
	int error_code = SLURM_SUCCESS, i;
	job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci;
	uint32_t job_id  = cancel_info->job_id;
	uint32_t step_id = cancel_info->step_id;
	bool sig_set = true;
	DEF_TIMERS;

	if (cancel_info->sig == (uint16_t) NO_VAL) {
		cancel_info->sig = SIGKILL;
		sig_set = false;
	}

	if (!cancel_info->job_id_str) {
		if (cancel_info->array_job_id &&
		    (cancel_info->array_task_id == INFINITE)) {
			xstrfmtcat(cancel_info->job_id_str, "%u_*",
				   cancel_info->array_job_id);
		} else if (cancel_info->array_job_id) {
			xstrfmtcat(cancel_info->job_id_str, "%u_%u",
				   cancel_info->array_job_id,
				   cancel_info->array_task_id);
		} else {
			xstrfmtcat(cancel_info->job_id_str, "%u",
				   cancel_info->job_id);
		}
	}

	for (i = 0; i < MAX_CANCEL_RETRY; i++) {
		if (cancel_info->sig == SIGKILL) {
			verbose("Terminating step %s.%u",
				cancel_info->job_id_str, step_id);
		} else {
			verbose("Signal %u to step %s.%u",
				cancel_info->sig,
				cancel_info->job_id_str, step_id);
		}

		_add_delay();
		START_TIMER;
		if ((!sig_set) || opt.ctld)
			error_code = slurm_kill_job_step(job_id, step_id,
							 cancel_info->sig);
		else if (cancel_info->sig == SIGKILL)
			error_code = slurm_terminate_job_step(job_id, step_id);
		else
			error_code = slurm_signal_job_step(job_id, step_id,
							   cancel_info->sig);
		END_TIMER;
		slurm_mutex_lock(&max_delay_lock);
		max_resp_time = MAX(max_resp_time, DELTA_TIMER);
		slurm_mutex_unlock(&max_delay_lock);

		if ((error_code == 0) ||
		    ((errno != ESLURM_TRANSITION_STATE_NO_UPDATE) &&
		     (errno != ESLURM_JOB_PENDING)))
			break;
		verbose("Job is in transistional state, retrying");
		sleep(5 + i);
	}
	if (error_code) {
		error_code = slurm_get_errno();
		if ((opt.verbose > 0) || (error_code != ESLURM_ALREADY_DONE))
			error("Kill job error on job step id %s: %s",
		 	      cancel_info->job_id_str,
			      slurm_strerror(slurm_get_errno()));

		if ((error_code == ESLURM_ALREADY_DONE) &&
		    (cancel_info->sig == SIGKILL)) {
			error_code = 0;	/* Ignore error if job done */
		}
	}

	/* Purposely free the struct passed in here, so the caller doesn't have
	 * to keep track of it, but don't destroy the mutex and condition
	 * variables contained. */
	slurm_mutex_lock(cancel_info->num_active_threads_lock);
	*(cancel_info->rc) = MAX(*(cancel_info->rc), error_code);
	(*(cancel_info->num_active_threads))--;
	slurm_cond_signal(cancel_info->num_active_threads_cond);
	slurm_mutex_unlock(cancel_info->num_active_threads_lock);

	xfree(cancel_info->job_id_str);
	xfree(cancel_info);
	return NULL;
}
static void *_timer_thread(void *args)
{
	int i, now, diff;

#if HAVE_SYS_PRCTL_H
	if (prctl(PR_SET_NAME, "acctg_prof", NULL, NULL, NULL) < 0) {
		error("%s: cannot set my name to %s %m",
		      __func__, "acctg_prof");
	}
#endif

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	DEF_TIMERS;
	while (init_run && acct_gather_profile_running) {
		slurm_mutex_lock(&g_context_lock);
		START_TIMER;
		now = time(NULL);

		for (i=0; i<PROFILE_CNT; i++) {
			if (acct_gather_suspended) {
				/* Handle suspended time as if it
				 * didn't happen */
				if (!acct_gather_profile_timer[i].freq)
					continue;
				if (acct_gather_profile_timer[i].last_notify)
					acct_gather_profile_timer[i].
						last_notify += SLEEP_TIME;
				else
					acct_gather_profile_timer[i].
						last_notify = now;
				continue;
			}

			diff = now - acct_gather_profile_timer[i].last_notify;
			/* info ("%d is %d and %d", i, */
			/*       acct_gather_profile_timer[i].freq, */
			/*       diff); */
			if (!acct_gather_profile_timer[i].freq
			    || (diff < acct_gather_profile_timer[i].freq))
				continue;
			debug2("profile signalling type %s",
			       acct_gather_profile_type_t_name(i));

			/* signal poller to start */
			slurm_mutex_lock(&acct_gather_profile_timer[i].
					 notify_mutex);
			slurm_cond_signal(
				&acct_gather_profile_timer[i].notify);
			slurm_mutex_unlock(&acct_gather_profile_timer[i].
					   notify_mutex);
			acct_gather_profile_timer[i].last_notify = now;
		}
		END_TIMER;
		slurm_mutex_unlock(&g_context_lock);

		usleep(USLEEP_TIME - DELTA_TIMER);
	}

	return NULL;
}
Exemple #22
0
/*
 * init_power_save - Initialize the power save module. Started as a
 *	pthread. Terminates automatically at slurmctld shutdown time.
 *	Input and output are unused.
 */
static void *_init_power_save(void *arg)
{
        /* Locks: Read nodes */
        slurmctld_lock_t node_read_lock = {
                NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
        /* Locks: Write nodes */
        slurmctld_lock_t node_write_lock = {
                NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
	time_t now, boot_time = 0, last_power_scan = 0;

	if (power_save_config && !power_save_enabled) {
		debug("power_save mode not enabled");
		return NULL;
	}

	suspend_node_bitmap = bit_alloc(node_record_count);
	resume_node_bitmap  = bit_alloc(node_record_count);

	while (slurmctld_config.shutdown_time == 0) {
		sleep(1);

		if (_reap_procs() < 2) {
			debug("power_save programs getting backlogged");
			continue;
		}

		if ((last_config != slurmctld_conf.last_update) &&
		    (_init_power_config())) {
			info("power_save mode has been disabled due to "
			     "configuration changes");
			goto fini;
		}

		now = time(NULL);
		if (boot_time == 0)
			boot_time = now;

		/* Only run every 60 seconds or after a node state change,
		 *  whichever happens first */
		if ((last_node_update >= last_power_scan) ||
		    (now >= (last_power_scan + 60))) {
			lock_slurmctld(node_write_lock);
			_do_power_work(now);
			unlock_slurmctld(node_write_lock);
			last_power_scan = now;
		}

		if (slurmd_timeout &&
		    (now > (boot_time + (slurmd_timeout / 2)))) {
			lock_slurmctld(node_read_lock);
			_re_wake();
			unlock_slurmctld(node_read_lock);
			/* prevent additional executions */
			boot_time += (365 * 24 * 60 * 60);
			slurmd_timeout = 0;
		}
	}

fini:	_clear_power_config();
	FREE_NULL_BITMAP(suspend_node_bitmap);
	FREE_NULL_BITMAP(resume_node_bitmap);
	_shutdown_power();
	slurm_mutex_lock(&power_mutex);
	power_save_enabled = false;
	slurm_cond_signal(&power_cond);
	slurm_mutex_unlock(&power_mutex);
	pthread_exit(NULL);
	return NULL;
}
Exemple #23
0
static int
_handle_completion(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int first;
	int last;
	jobacctinfo_t *jobacct = NULL;
	int step_rc;
	char* buf;
	int len;
	Buf buffer;
	bool lock_set = false;

	debug("_handle_completion for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("step completion message from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		/* Send the return code and errno */
		safe_write(fd, &rc, sizeof(int));
		safe_write(fd, &errnum, sizeof(int));
		return SLURM_SUCCESS;
	}

	safe_read(fd, &first, sizeof(int));
	safe_read(fd, &last, sizeof(int));
	safe_read(fd, &step_rc, sizeof(int));

	/*
	 * We must not use getinfo over a pipe with slurmd here
	 * Indeed, slurmstepd does a large use of setinfo over a pipe
	 * with slurmd and doing the reverse can result in a deadlock
	 * scenario with slurmd :
	 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
	 * Do pack/unpack instead to be sure of independances of
	 * slurmd and slurmstepd
	 */
	safe_read(fd, &len, sizeof(int));
	buf = xmalloc(len);
	safe_read(fd, buf, len);
	buffer = create_buf(buf, len);
	jobacctinfo_unpack(&jobacct, SLURM_PROTOCOL_VERSION,
			   PROTOCOL_TYPE_SLURM, buffer, 1);
	free_buf(buffer);

	/*
	 * Record the completed nodes
	 */
	slurm_mutex_lock(&step_complete.lock);
	lock_set = true;
	if (! step_complete.wait_children) {
		rc = -1;
		errnum = ETIMEDOUT; /* not used anyway */
		goto timeout;
	}

	/* SlurmUser or root can craft a launch without a valid credential
	 * ("srun --no-alloc ...") and no tree information can be built
	 *  without the hostlist from the credential. */
	if (step_complete.rank >= 0) {
#if 0
		char bits_string[128];
		debug2("Setting range %d (bit %d) through %d(bit %d)",
		       first, first-(step_complete.rank+1),
		       last, last-(step_complete.rank+1));
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  before bits: %s", bits_string);
#endif
		bit_nset(step_complete.bits,
			 first - (step_complete.rank+1),
			 last - (step_complete.rank+1));
#if 0
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  after bits: %s", bits_string);
#endif
	}
	step_complete.step_rc = MAX(step_complete.step_rc, step_rc);

	/************* acct stuff ********************/
	jobacctinfo_aggregate(step_complete.jobacct, jobacct);
timeout:
	jobacctinfo_destroy(jobacct);
	/*********************************************/

	/* Send the return code and errno, we do this within the locked
	 * region to ensure that the stepd doesn't exit before we can
	 * perform this send. */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	slurm_cond_signal(&step_complete.cond);
	slurm_mutex_unlock(&step_complete.lock);

	return SLURM_SUCCESS;


rwfail:	if (lock_set) {
		slurm_cond_signal(&step_complete.cond);
		slurm_mutex_unlock(&step_complete.lock);
	}
	return SLURM_FAILURE;
}
Exemple #24
0
/* Send an RPC to the SlurmDBD and wait for an arbitrary reply message.
 * The RPC will not be queued if an error occurs.
 * The "resp" message must be freed by the caller.
 * Returns SLURM_SUCCESS or an error code */
extern int send_recv_slurmdbd_msg(uint16_t rpc_version,
				  slurmdbd_msg_t *req,
				  slurmdbd_msg_t *resp)
{
	int rc = SLURM_SUCCESS;
	Buf buffer;

	xassert(req);
	xassert(resp);

	/* To make sure we can get this to send instead of the agent
	   sending stuff that can happen anytime we set halt_agent and
	   then after we get into the mutex we unset.
	*/
	halt_agent = 1;
	slurm_mutex_lock(&slurmdbd_lock);
	halt_agent = 0;
	if (!slurmdbd_conn || (slurmdbd_conn->fd < 0)) {
		/* Either slurm_open_slurmdbd_conn() was not executed or
		 * the connection to Slurm DBD has been closed */
		if (req->msg_type == DBD_GET_CONFIG)
			_open_slurmdbd_conn(0);
		else
			_open_slurmdbd_conn(1);
		if (!slurmdbd_conn || (slurmdbd_conn->fd < 0)) {
			rc = SLURM_ERROR;
			goto end_it;
		}
	}

	if (!(buffer = pack_slurmdbd_msg(req, rpc_version))) {
		rc = SLURM_ERROR;
		goto end_it;
	}

	rc = slurm_persist_send_msg(slurmdbd_conn, buffer);
	free_buf(buffer);
	if (rc != SLURM_SUCCESS) {
		error("slurmdbd: Sending message type %s: %d: %m",
		      rpc_num2string(req->msg_type), rc);
		goto end_it;
	}

	buffer = slurm_persist_recv_msg(slurmdbd_conn);
	if (buffer == NULL) {
		error("slurmdbd: Getting response to message type %u",
		      req->msg_type);
		rc = SLURM_ERROR;
		goto end_it;
	}

	rc = unpack_slurmdbd_msg(resp, rpc_version, buffer);
	/* check for the rc of the start job message */
	if (rc == SLURM_SUCCESS && resp->msg_type == DBD_ID_RC)
		rc = ((dbd_id_rc_msg_t *)resp->data)->return_code;

	free_buf(buffer);
end_it:
	slurm_cond_signal(&slurmdbd_cond);
	slurm_mutex_unlock(&slurmdbd_lock);

	return rc;
}
Exemple #25
0
static void *_agent(void *x)
{
	int cnt, rc;
	Buf buffer;
	struct timespec abs_time;
	static time_t fail_time = 0;
	int sigarray[] = {SIGUSR1, 0};
	slurmdbd_msg_t list_req;
	dbd_list_msg_t list_msg;

	list_req.msg_type = DBD_SEND_MULT_MSG;
	list_req.data = &list_msg;
	memset(&list_msg, 0, sizeof(dbd_list_msg_t));
	/* DEF_TIMERS; */

	/* Prepare to catch SIGUSR1 to interrupt pending
	 * I/O and terminate in a timely fashion. */
	xsignal(SIGUSR1, _sig_handler);
	xsignal_unblock(sigarray);

	while (*slurmdbd_conn->shutdown == 0) {
		/* START_TIMER; */
		slurm_mutex_lock(&slurmdbd_lock);
		if (halt_agent)
			slurm_cond_wait(&slurmdbd_cond, &slurmdbd_lock);

		if ((slurmdbd_conn->fd < 0) &&
		    (difftime(time(NULL), fail_time) >= 10)) {
			/* The connection to Slurm DBD is not open */
			_open_slurmdbd_conn(1);
			if (slurmdbd_conn->fd < 0)
				fail_time = time(NULL);
		}

		slurm_mutex_lock(&agent_lock);
		if (agent_list && slurmdbd_conn->fd)
			cnt = list_count(agent_list);
		else
			cnt = 0;
		if ((cnt == 0) || (slurmdbd_conn->fd < 0) ||
		    (fail_time && (difftime(time(NULL), fail_time) < 10))) {
			slurm_mutex_unlock(&slurmdbd_lock);
			abs_time.tv_sec  = time(NULL) + 10;
			abs_time.tv_nsec = 0;
			slurm_cond_timedwait(&agent_cond, &agent_lock,
					     &abs_time);
			slurm_mutex_unlock(&agent_lock);
			continue;
		} else if ((cnt > 0) && ((cnt % 100) == 0))
			info("slurmdbd: agent queue size %u", cnt);
		/* Leave item on the queue until processing complete */
		if (agent_list) {
			int handle_agent_count = 1000;
			if (cnt > handle_agent_count) {
				int agent_count = 0;
				ListIterator agent_itr =
					list_iterator_create(agent_list);
				list_msg.my_list = list_create(NULL);
				while ((buffer = list_next(agent_itr))) {
					list_enqueue(list_msg.my_list, buffer);
					agent_count++;
					if (agent_count > handle_agent_count)
						break;
				}
				list_iterator_destroy(agent_itr);
				buffer = pack_slurmdbd_msg(
					&list_req, SLURM_PROTOCOL_VERSION);
			} else if (cnt > 1) {
				list_msg.my_list = agent_list;
				buffer = pack_slurmdbd_msg(
					&list_req, SLURM_PROTOCOL_VERSION);
			} else
				buffer = (Buf) list_peek(agent_list);
		} else
			buffer = NULL;
		slurm_mutex_unlock(&agent_lock);
		if (buffer == NULL) {
			slurm_mutex_unlock(&slurmdbd_lock);

			slurm_mutex_lock(&assoc_cache_mutex);
			if (slurmdbd_conn->fd >= 0 && running_cache)
				slurm_cond_signal(&assoc_cache_cond);
			slurm_mutex_unlock(&assoc_cache_mutex);

			continue;
		}

		/* NOTE: agent_lock is clear here, so we can add more
		 * requests to the queue while waiting for this RPC to
		 * complete. */
		rc = slurm_persist_send_msg(slurmdbd_conn, buffer);
		if (rc != SLURM_SUCCESS) {
			if (*slurmdbd_conn->shutdown) {
				slurm_mutex_unlock(&slurmdbd_lock);
				break;
			}
			error("slurmdbd: Failure sending message: %d: %m", rc);
		} else if (list_msg.my_list) {
			rc = _handle_mult_rc_ret();
		} else {
			rc = _get_return_code();
			if (rc == EAGAIN) {
				if (*slurmdbd_conn->shutdown) {
					slurm_mutex_unlock(&slurmdbd_lock);
					break;
				}
				error("slurmdbd: Failure with "
				      "message need to resend: %d: %m", rc);
			}
		}
		slurm_mutex_unlock(&slurmdbd_lock);
		slurm_mutex_lock(&assoc_cache_mutex);
		if (slurmdbd_conn->fd >= 0 && running_cache)
			slurm_cond_signal(&assoc_cache_cond);
		slurm_mutex_unlock(&assoc_cache_mutex);

		slurm_mutex_lock(&agent_lock);
		if (agent_list && (rc == SLURM_SUCCESS)) {
			/*
			 * If we sent a mult_msg we just need to free buffer,
			 * we don't need to requeue, just mark list_msg.my_list
			 * as NULL as that is the sign we sent a mult_msg.
			 */
			if (list_msg.my_list) {
				if (list_msg.my_list != agent_list)
					FREE_NULL_LIST(list_msg.my_list);
				list_msg.my_list = NULL;
			} else
				buffer = (Buf) list_dequeue(agent_list);

			free_buf(buffer);
			fail_time = 0;
		} else {
			/* We need to free a mult_msg even on failure */
			if (list_msg.my_list) {
				if (list_msg.my_list != agent_list)
					FREE_NULL_LIST(list_msg.my_list);
				list_msg.my_list = NULL;
				free_buf(buffer);
			}

			fail_time = time(NULL);
		}
		slurm_mutex_unlock(&agent_lock);
		/* END_TIMER; */
		/* info("at the end with %s", TIME_STR); */
	}

	slurm_mutex_lock(&agent_lock);
	_save_dbd_state();
	FREE_NULL_LIST(agent_list);
	slurm_mutex_unlock(&agent_lock);
	return NULL;
}