Beispiel #1
0
/* Send a signal RPC to a specific node */
static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal,
		char *node_name, slurm_addr_t node_addr)
{
	agent_arg_t *agent_args;
	kill_tasks_msg_t *kill_tasks_msg;

	kill_tasks_msg = xmalloc(sizeof(kill_tasks_msg_t));
	kill_tasks_msg->job_id		= job_id;
	kill_tasks_msg->job_step_id	= step_id;
	kill_tasks_msg->signal		= signal;

	agent_args = xmalloc(sizeof(agent_arg_t));
	agent_args->msg_type		= REQUEST_SIGNAL_TASKS;
	agent_args->retry		= 1;
	agent_args->msg_args		= kill_tasks_msg;
	agent_args->hostlist = hostlist_create(node_name);
	agent_args->node_count		= 1;

	if ((node_ptr = find_node_record(node_name)))
		agent_args->protocol_version = node_ptr->protocol_version;

	hostlist_iterator_destroy(hi);

	agent_queue_request(agent_args);
}
Beispiel #2
0
/* Send a signal RPC to a list of nodes */
static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal,
		      char *nodelist)
{
	agent_arg_t *agent_args;
	signal_tasks_msg_t *signal_tasks_msg;
	hostlist_iterator_t hi;
	char *host;
	struct node_record *node_ptr;

	signal_tasks_msg = xmalloc(sizeof(signal_tasks_msg_t));
	signal_tasks_msg->job_id		= job_id;
	signal_tasks_msg->job_step_id	= step_id;
	signal_tasks_msg->signal		= signal;

	agent_args = xmalloc(sizeof(agent_arg_t));
	agent_args->msg_type		= REQUEST_SIGNAL_TASKS;
	agent_args->retry		= 1;
	agent_args->msg_args		= signal_tasks_msg;
	agent_args->hostlist		= hostlist_create(nodelist);
	agent_args->node_count		= hostlist_count(agent_args->hostlist);
	agent_args->protocol_version = SLURM_PROTOCOL_VERSION;
	hi = hostlist_iterator_create(agent_args->hostlist);
	while ((host = hostlist_next(hi))) {
		if ((node_ptr = find_node_record(host)) &&
		    (agent_args->protocol_version > node_ptr->protocol_version))
			agent_args->protocol_version =
				node_ptr->protocol_version;
		free(host);
	}
	hostlist_iterator_destroy(hi);

	agent_queue_request(agent_args);
}
Beispiel #3
0
/* Update acct_gather data for every node that is not DOWN */
extern void update_nodes_acct_gather_data(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *agent_args = NULL;

	agent_args = xmalloc (sizeof (agent_arg_t));
	agent_args->msg_type = REQUEST_ACCT_GATHER_UPDATE;
	agent_args->retry = 0;
	agent_args->protocol_version = SLURM_PROTOCOL_VERSION;
	agent_args->hostlist = hostlist_create(NULL);

#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		if (agent_args->protocol_version >
		    front_end_ptr->protocol_version)
			agent_args->protocol_version =
				front_end_ptr->protocol_version;

		hostlist_push_host(agent_args->hostlist, front_end_ptr->name);
		agent_args->node_count++;
	}
#else
	for (i = 0, node_ptr = node_record_table_ptr;
	     i < node_record_count; i++, node_ptr++) {
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if (agent_args->protocol_version > node_ptr->protocol_version)
			agent_args->protocol_version =
				node_ptr->protocol_version;
		hostlist_push_host(agent_args->hostlist, node_ptr->name);
		agent_args->node_count++;
	}
#endif

	if (agent_args->node_count == 0) {
		hostlist_destroy(agent_args->hostlist);
		xfree (agent_args);
	} else {
		hostlist_uniq(agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(agent_args->hostlist);
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_ENERGY)
			info("Updating acct_gather data for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(agent_args);
	}
}
Beispiel #4
0
/*
 * srun_user_message - Send arbitrary message to an srun job (no job steps)
 */
extern int srun_user_message(struct job_record *job_ptr, char *msg)
{
	slurm_addr_t * addr;
	srun_user_msg_t *msg_arg;

	xassert(job_ptr);
	if (!IS_JOB_PENDING(job_ptr) && !IS_JOB_RUNNING(job_ptr))
		return ESLURM_ALREADY_DONE;

	if (job_ptr->other_port &&
	    job_ptr->resp_host && job_ptr->resp_host[0]) {
		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host);
		msg_arg = xmalloc(sizeof(srun_user_msg_t));
		msg_arg->job_id = job_ptr->job_id;
		msg_arg->msg    = xstrdup(msg);
		_srun_agent_launch(addr, job_ptr->resp_host, SRUN_USER_MSG,
				   msg_arg);
		return SLURM_SUCCESS;
	} else if (job_ptr->batch_flag && IS_JOB_RUNNING(job_ptr)) {
#ifndef HAVE_FRONT_END
		struct node_record *node_ptr;
#endif
		job_notify_msg_t *notify_msg_ptr;
		agent_arg_t *agent_arg_ptr;
#ifdef HAVE_FRONT_END
		if (job_ptr->batch_host == NULL)
			return ESLURM_DISABLED;	/* no allocated nodes */
		agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t));
		agent_arg_ptr->hostlist = hostlist_create(job_ptr->batch_host);
#else
		node_ptr = find_first_node_record(job_ptr->node_bitmap);
		if (node_ptr == NULL)
			return ESLURM_DISABLED;	/* no allocated nodes */
		agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t));
		agent_arg_ptr->hostlist = hostlist_create(node_ptr->name);
#endif
		if (agent_arg_ptr->hostlist == NULL)
			fatal("hostlist_create: malloc failure");
		notify_msg_ptr = (job_notify_msg_t *) 
				 xmalloc(sizeof(job_notify_msg_t));
		notify_msg_ptr->job_id = job_ptr->job_id;
		notify_msg_ptr->message = xstrdup(msg);
		agent_arg_ptr->node_count = 1;
		agent_arg_ptr->retry = 0;
		agent_arg_ptr->msg_type = REQUEST_JOB_NOTIFY;
		agent_arg_ptr->msg_args = (void *) notify_msg_ptr;
		/* Launch the RPC via agent */
		agent_queue_request(agent_arg_ptr);
		return SLURM_SUCCESS;
	}
	return ESLURM_DISABLED;
}
Beispiel #5
0
/* Launch the srun request. Note that retry is always zero since
 * we don't want to clog the system up with messages destined for
 * defunct srun processes
 */
static void _srun_agent_launch(slurm_addr_t *addr, char *host,
		slurm_msg_type_t type, void *msg_args)
{
	agent_arg_t *agent_args = xmalloc(sizeof(agent_arg_t));

	agent_args->node_count = 1;
	agent_args->retry      = 0;
	agent_args->addr       = addr;
	agent_args->hostlist   = hostlist_create(host);
	agent_args->msg_type   = type;
	agent_args->msg_args   = msg_args;
	agent_queue_request(agent_args);
}
Beispiel #6
0
/* Spawn health check function for every node that is not DOWN */
extern void run_health_check(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *check_agent_args = NULL;

	check_agent_args = xmalloc (sizeof (agent_arg_t));
	check_agent_args->msg_type = REQUEST_HEALTH_CHECK;
	check_agent_args->retry = 0;
	check_agent_args->hostlist = hostlist_create("");
	if (check_agent_args->hostlist == NULL)
		fatal("hostlist_create: malloc failure");

#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		hostlist_push(check_agent_args->hostlist, front_end_ptr->name);
		check_agent_args->node_count++;
	}
#else
	for (i=0, node_ptr=node_record_table_ptr;
	     i<node_record_count; i++, node_ptr++) {
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		hostlist_push(check_agent_args->hostlist, node_ptr->name);
		check_agent_args->node_count++;
	}
#endif

	if (check_agent_args->node_count == 0) {
		hostlist_destroy(check_agent_args->hostlist);
		xfree (check_agent_args);
	} else {
		hostlist_uniq(check_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				check_agent_args->hostlist);
		debug("Spawning health check agent for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(check_agent_args);
	}
}
Beispiel #7
0
/* Send a signal RPC to a list of nodes */
static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal,
		      char *nodelist)
{
	agent_arg_t *agent_args;
	kill_tasks_msg_t *kill_tasks_msg;

	kill_tasks_msg = xmalloc(sizeof(kill_tasks_msg_t));
	kill_tasks_msg->job_id		= job_id;
	kill_tasks_msg->job_step_id	= step_id;
	kill_tasks_msg->signal		= signal;

	agent_args = xmalloc(sizeof(agent_arg_t));
	agent_args->msg_type		= REQUEST_SIGNAL_TASKS;
	agent_args->retry		= 1;
	agent_args->msg_args		= kill_tasks_msg;
	agent_args->hostlist		= hostlist_create(nodelist);
	agent_args->node_count		= hostlist_count(agent_args->hostlist);

	agent_queue_request(agent_args);
}
Beispiel #8
0
/*
 * ping_nodes - check that all nodes and daemons are alive,
 *	get nodes in UNKNOWN state to register
 */
void ping_nodes (void)
{
	static bool restart_flag = true;	/* system just restarted */
	static int offset = 0;	/* mutex via node table write lock on entry */
	static int max_reg_threads = 0;	/* max node registration threads
					 * this can include DOWN nodes, so
					 * limit the number to avoid huge
					 * communication delays */
	int i;
	time_t now, still_live_time, node_dead_time;
	static time_t last_ping_time = (time_t) 0;
	hostlist_t down_hostlist = NULL;
	char *host_str = NULL;
	agent_arg_t *ping_agent_args = NULL;
	agent_arg_t *reg_agent_args = NULL;
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr = NULL;
#else
	struct node_record *node_ptr = NULL;
#endif

	now = time (NULL);

	ping_agent_args = xmalloc (sizeof (agent_arg_t));
	ping_agent_args->msg_type = REQUEST_PING;
	ping_agent_args->retry = 0;
	ping_agent_args->hostlist = hostlist_create("");

	reg_agent_args = xmalloc (sizeof (agent_arg_t));
	reg_agent_args->msg_type = REQUEST_NODE_REGISTRATION_STATUS;
	reg_agent_args->retry = 0;
	reg_agent_args->hostlist = hostlist_create("");

	/*
	 * If there are a large number of down nodes, the node ping
	 * can take a long time to complete:
	 *  ping_time = down_nodes * agent_timeout / agent_parallelism
	 *  ping_time = down_nodes * 10_seconds / 10
	 *  ping_time = down_nodes (seconds)
	 * Because of this, we extend the SlurmdTimeout by the
	 * time needed to complete a ping of all nodes.
	 */
	if ((slurmctld_conf.slurmd_timeout == 0) ||
	    (last_ping_time == (time_t) 0)) {
		node_dead_time = (time_t) 0;
	} else {
		node_dead_time = last_ping_time -
				 slurmctld_conf.slurmd_timeout;
	}
	still_live_time = now - (slurmctld_conf.slurmd_timeout / 3);
	last_ping_time  = now;

	if (max_reg_threads == 0) {
		max_reg_threads = MAX(slurm_get_tree_width(), 1);
	}
	offset += max_reg_threads;
	if ((offset > node_record_count) &&
	    (offset >= (max_reg_threads * MAX_REG_FREQUENCY)))
		offset = 0;

#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if ((slurmctld_conf.slurmd_timeout == 0)	&&
		    (!restart_flag)				&&
		    (!IS_NODE_UNKNOWN(front_end_ptr))		&&
		    (!IS_NODE_NO_RESPOND(front_end_ptr)))
			continue;

		if ((front_end_ptr->last_response != (time_t) 0)     &&
		    (front_end_ptr->last_response <= node_dead_time) &&
		    (!IS_NODE_DOWN(front_end_ptr))) {
			if (down_hostlist)
				(void) hostlist_push_host(down_hostlist,
					front_end_ptr->name);
			else {
				down_hostlist =
					hostlist_create(front_end_ptr->name);
				if (down_hostlist == NULL)
					fatal("hostlist_create: malloc error");
			}
			set_front_end_down(front_end_ptr, "Not responding");
			front_end_ptr->not_responding = false;
			continue;
		}

		if (restart_flag) {
			front_end_ptr->last_response =
				slurmctld_conf.last_update;
		}

		/* Request a node registration if its state is UNKNOWN or
		 * on a periodic basis (about every MAX_REG_FREQUENCY ping,
		 * this mechanism avoids an additional (per node) timer or
		 * counter and gets updated configuration information
		 * once in a while). We limit these requests since they
		 * can generate a flood of incoming RPCs. */
		if (IS_NODE_UNKNOWN(front_end_ptr) || restart_flag ||
		    ((i >= offset) && (i < (offset + max_reg_threads)))) {
			hostlist_push(reg_agent_args->hostlist,
				      front_end_ptr->name);
			reg_agent_args->node_count++;
			continue;
		}

		if ((!IS_NODE_NO_RESPOND(front_end_ptr)) &&
		    (front_end_ptr->last_response >= still_live_time))
			continue;

		/* Do not keep pinging down nodes since this can induce
		 * huge delays in hierarchical communication fail-over */
		if (IS_NODE_NO_RESPOND(front_end_ptr) &&
		    IS_NODE_DOWN(front_end_ptr))
			continue;

		hostlist_push(ping_agent_args->hostlist, front_end_ptr->name);
		ping_agent_args->node_count++;
	}
#else
	for (i=0, node_ptr=node_record_table_ptr;
	     i<node_record_count; i++, node_ptr++) {
		if (IS_NODE_FUTURE(node_ptr) || IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if ((slurmctld_conf.slurmd_timeout == 0) &&
		    (!restart_flag)			 &&
		    (!IS_NODE_UNKNOWN(node_ptr))         &&
		    (!IS_NODE_NO_RESPOND(node_ptr)))
			continue;

		if ((node_ptr->last_response != (time_t) 0)     &&
		    (node_ptr->last_response <= node_dead_time) &&
		    (!IS_NODE_DOWN(node_ptr))) {
			if (down_hostlist)
				(void) hostlist_push_host(down_hostlist,
					node_ptr->name);
			else {
				down_hostlist =
					hostlist_create(node_ptr->name);
				if (down_hostlist == NULL)
					fatal("hostlist_create: malloc error");
			}
			set_node_down_ptr(node_ptr, "Not responding");
			node_ptr->not_responding = false;  /* logged below */
			continue;
		}

		if (restart_flag)
			node_ptr->last_response = slurmctld_conf.last_update;

		/* Request a node registration if its state is UNKNOWN or
		 * on a periodic basis (about every MAX_REG_FREQUENCY ping,
		 * this mechanism avoids an additional (per node) timer or
		 * counter and gets updated configuration information
		 * once in a while). We limit these requests since they
		 * can generate a flood of incoming RPCs. */
		if (IS_NODE_UNKNOWN(node_ptr) || restart_flag ||
		    ((i >= offset) && (i < (offset + max_reg_threads)))) {
			hostlist_push(reg_agent_args->hostlist,
				      node_ptr->name);
			reg_agent_args->node_count++;
			continue;
		}

		if ((!IS_NODE_NO_RESPOND(node_ptr)) &&
		    (node_ptr->last_response >= still_live_time))
			continue;

		/* Do not keep pinging down nodes since this can induce
		 * huge delays in hierarchical communication fail-over */
		if (IS_NODE_NO_RESPOND(node_ptr) && IS_NODE_DOWN(node_ptr))
			continue;

		hostlist_push(ping_agent_args->hostlist, node_ptr->name);
		ping_agent_args->node_count++;
	}
#endif

	restart_flag = false;
	if (ping_agent_args->node_count == 0) {
		hostlist_destroy(ping_agent_args->hostlist);
		xfree (ping_agent_args);
	} else {
		hostlist_uniq(ping_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				ping_agent_args->hostlist);
		debug("Spawning ping agent for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(ping_agent_args);
	}

	if (reg_agent_args->node_count == 0) {
		hostlist_destroy(reg_agent_args->hostlist);
		xfree (reg_agent_args);
	} else {
		hostlist_uniq(reg_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				reg_agent_args->hostlist);
		debug("Spawning registration agent for %s %d hosts",
		      host_str, reg_agent_args->node_count);
		xfree(host_str);
		ping_begin();
		agent_queue_request(reg_agent_args);
	}

	if (down_hostlist) {
		hostlist_uniq(down_hostlist);
		host_str = hostlist_ranged_string_xmalloc(down_hostlist);
		error("Nodes %s not responding, setting DOWN", host_str);
		xfree(host_str);
		hostlist_destroy(down_hostlist);
	}
}
Beispiel #9
0
/* Spawn health check function for every node that is not DOWN */
extern void run_health_check(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
	int node_test_cnt = 0, node_limit, node_states, run_cyclic;
	static int base_node_loc = -1;
	static time_t cycle_start_time = (time_t) 0;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *check_agent_args = NULL;

	/* Sync plugin internal data with
	 * node select_nodeinfo. This is important
	 * after reconfig otherwise select_nodeinfo
	 * will not return the correct number of
	 * allocated cpus.
	 */
	select_g_select_nodeinfo_set_all();

	check_agent_args = xmalloc (sizeof (agent_arg_t));
	check_agent_args->msg_type = REQUEST_HEALTH_CHECK;
	check_agent_args->retry = 0;
	check_agent_args->hostlist = hostlist_create(NULL);
#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		hostlist_push_host(check_agent_args->hostlist,
				   front_end_ptr->name);
		check_agent_args->node_count++;
	}
#else
	run_cyclic = slurmctld_conf.health_check_node_state &
		     HEALTH_CHECK_CYCLE;
	node_states = slurmctld_conf.health_check_node_state &
		      (~HEALTH_CHECK_CYCLE);
	if (run_cyclic) {
		time_t now = time(NULL);
		if (cycle_start_time == (time_t) 0)
			cycle_start_time = now;
		else if (base_node_loc >= 0)
			;	/* mid-cycle */
		else if (difftime(now, cycle_start_time) <
			 slurmctld_conf.health_check_interval) {
			return;	/* Wait to start next cycle */
		}
		cycle_start_time = now;
		/* Determine how many nodes we want to test on each call of
		 * run_health_check() to spread out the work. */
		node_limit = (node_record_count * 2) /
			     slurmctld_conf.health_check_interval;
		node_limit = MAX(node_limit, 10);
	}
	if ((node_states != HEALTH_CHECK_NODE_ANY) &&
	    (node_states != HEALTH_CHECK_NODE_IDLE)) {
		/* Update each node's alloc_cpus count */
		select_g_select_nodeinfo_set_all();
	}

	for (i = 0; i < node_record_count; i++) {
		if (run_cyclic) {
			if (node_test_cnt++ >= node_limit)
				break;
			base_node_loc++;
			if (base_node_loc >= node_record_count) {
				base_node_loc = -1;
				break;
			}
			node_ptr = node_record_table_ptr + base_node_loc;
		} else {
			node_ptr = node_record_table_ptr + i;
		}
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if (node_states != HEALTH_CHECK_NODE_ANY) {
			uint16_t cpus_total, cpus_used = 0;
			if (slurmctld_conf.fast_schedule) {
				cpus_total = node_ptr->config_ptr->cpus;
			} else {
				cpus_total = node_ptr->cpus;
			}
			if (!IS_NODE_IDLE(node_ptr)) {
				select_g_select_nodeinfo_get(
						node_ptr->select_nodeinfo,
						SELECT_NODEDATA_SUBCNT,
						NODE_STATE_ALLOCATED,
						&cpus_used);
			}
			/* Here the node state is inferred from
			 * the cpus allocated on it.
			 * - cpus_used == 0
			 *       means node is idle
			 * - cpus_used < cpus_total
			 *       means the node is in mixed state
			 * else cpus_used == cpus_total
			 *       means the node is allocated
			 */
			if (cpus_used == 0) {
				if (!(node_states & HEALTH_CHECK_NODE_IDLE))
					continue;
				if (!IS_NODE_IDLE(node_ptr))
					continue;
			} else if (cpus_used < cpus_total) {
				if (!(node_states & HEALTH_CHECK_NODE_MIXED))
					continue;
			} else {
				if (!(node_states & HEALTH_CHECK_NODE_ALLOC))
					continue;
			}
		}
		hostlist_push_host(check_agent_args->hostlist, node_ptr->name);
		check_agent_args->node_count++;
	}
	if (run_cyclic && (i >= node_record_count))
		base_node_loc = -1;
#endif

	if (check_agent_args->node_count == 0) {
		hostlist_destroy(check_agent_args->hostlist);
		xfree (check_agent_args);
	} else {
		hostlist_uniq(check_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				check_agent_args->hostlist);
		debug("Spawning health check agent for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(check_agent_args);
	}
}