Пример #1
0
/* run_backup - this is the backup controller, it should run in standby
 *	mode, assuming control when the primary controller stops responding */
void run_backup(slurm_trigger_callbacks_t *callbacks)
{
	int i;
	uint32_t trigger_type;
	time_t last_ping = 0;
	pthread_attr_t thread_attr_sig, thread_attr_rpc;
	slurmctld_lock_t config_read_lock = {
		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
	slurmctld_lock_t config_write_lock = {
		WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK };

	info("slurmctld running in background mode");
	takeover = false;
	last_controller_response = time(NULL);

	/* default: don't resume if shutdown */
	slurmctld_config.resume_backup = false;
	if (xsignal_block(backup_sigarray) < 0)
		error("Unable to block signals");

	/*
	 * create attached thread to process RPCs
	 */
	slurm_attr_init(&thread_attr_rpc);
	while (pthread_create(&slurmctld_config.thread_id_rpc,
			      &thread_attr_rpc, _background_rpc_mgr, NULL)) {
		error("pthread_create error %m");
		sleep(1);
	}
	slurm_attr_destroy(&thread_attr_rpc);

	/*
	 * create attached thread for signal handling
	 */
	slurm_attr_init(&thread_attr_sig);
	while (pthread_create(&slurmctld_config.thread_id_sig,
			      &thread_attr_sig, _background_signal_hand,
			      NULL)) {
		error("pthread_create %m");
		sleep(1);
	}
	slurm_attr_destroy(&thread_attr_sig);
	trigger_type = TRIGGER_TYPE_BU_CTLD_RES_OP;
	_trigger_slurmctld_event(trigger_type);

	for (i = 0; ((i < 5) && (slurmctld_config.shutdown_time == 0)); i++) {
		sleep(1);       /* Give the primary slurmctld set-up time */
	}

	/* repeatedly ping ControlMachine */
	while (slurmctld_config.shutdown_time == 0) {
		sleep(1);
		/* Lock of slurmctld_conf below not important */
		if (slurmctld_conf.slurmctld_timeout &&
		    (takeover == false) &&
		    (difftime(time(NULL), last_ping) <
		     (slurmctld_conf.slurmctld_timeout / 3)))
			continue;

		last_ping = time(NULL);
		if (_ping_controller() == 0)
			last_controller_response = time(NULL);
		else if (takeover) {
			/* in takeover mode, take control as soon as */
			/* primary no longer respond */
			break;
		} else {
			uint32_t timeout;
			lock_slurmctld(config_read_lock);
			timeout = slurmctld_conf.slurmctld_timeout;
			unlock_slurmctld(config_read_lock);

			if (difftime(time(NULL), last_controller_response) >
			    timeout) {
				break;
			}
		}
	}

	if (slurmctld_config.shutdown_time != 0) {
		/* Since pidfile is created as user root (its owner is
		 *   changed to SlurmUser) SlurmUser may not be able to
		 *   remove it, so this is not necessarily an error.
		 * No longer need slurmctld_conf lock after above join. */
		if (unlink(slurmctld_conf.slurmctld_pidfile) < 0)
			verbose("Unable to remove pidfile '%s': %m",
				slurmctld_conf.slurmctld_pidfile);

		info("BackupController terminating");
		pthread_join(slurmctld_config.thread_id_sig, NULL);
		log_fini();
		if (dump_core)
			abort();
		else
			exit(0);
	}

	lock_slurmctld(config_read_lock);
	error("ControlMachine %s not responding, "
		"BackupController %s taking over",
		slurmctld_conf.control_machine,
		slurmctld_conf.backup_controller);
	unlock_slurmctld(config_read_lock);

	backup_slurmctld_restart();
	trigger_primary_ctld_fail();
	trigger_backup_ctld_as_ctrl();

	pthread_kill(slurmctld_config.thread_id_sig, SIGTERM);
	pthread_join(slurmctld_config.thread_id_sig, NULL);
	pthread_join(slurmctld_config.thread_id_rpc, NULL);

	/* The job list needs to be freed before we run
	 * ctld_assoc_mgr_init, it should be empty here in the first place.
	 */
	lock_slurmctld(config_write_lock);
	job_fini();
	init_job_conf();
	unlock_slurmctld(config_write_lock);

	ctld_assoc_mgr_init(callbacks);

	/* clear old state and read new state */
	lock_slurmctld(config_write_lock);
	if (switch_g_restore(slurmctld_conf.state_save_location, true)) {
		error("failed to restore switch state");
		abort();
	}
	if (read_slurm_conf(2, false)) {	/* Recover all state */
		error("Unable to recover slurm state");
		abort();
	}
	slurmctld_config.shutdown_time = (time_t) 0;
	unlock_slurmctld(config_write_lock);
	select_g_select_nodeinfo_set_all();

	return;
}
Пример #2
0
/* Spawn health check function for every node that is not DOWN */
extern void run_health_check(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
	int node_test_cnt = 0, node_limit, node_states, run_cyclic;
	static int base_node_loc = -1;
	static time_t cycle_start_time = (time_t) 0;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *check_agent_args = NULL;

	/* Sync plugin internal data with
	 * node select_nodeinfo. This is important
	 * after reconfig otherwise select_nodeinfo
	 * will not return the correct number of
	 * allocated cpus.
	 */
	select_g_select_nodeinfo_set_all();

	check_agent_args = xmalloc (sizeof (agent_arg_t));
	check_agent_args->msg_type = REQUEST_HEALTH_CHECK;
	check_agent_args->retry = 0;
	check_agent_args->hostlist = hostlist_create(NULL);
#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		hostlist_push_host(check_agent_args->hostlist,
				   front_end_ptr->name);
		check_agent_args->node_count++;
	}
#else
	run_cyclic = slurmctld_conf.health_check_node_state &
		     HEALTH_CHECK_CYCLE;
	node_states = slurmctld_conf.health_check_node_state &
		      (~HEALTH_CHECK_CYCLE);
	if (run_cyclic) {
		time_t now = time(NULL);
		if (cycle_start_time == (time_t) 0)
			cycle_start_time = now;
		else if (base_node_loc >= 0)
			;	/* mid-cycle */
		else if (difftime(now, cycle_start_time) <
			 slurmctld_conf.health_check_interval) {
			return;	/* Wait to start next cycle */
		}
		cycle_start_time = now;
		/* Determine how many nodes we want to test on each call of
		 * run_health_check() to spread out the work. */
		node_limit = (node_record_count * 2) /
			     slurmctld_conf.health_check_interval;
		node_limit = MAX(node_limit, 10);
	}
	if ((node_states != HEALTH_CHECK_NODE_ANY) &&
	    (node_states != HEALTH_CHECK_NODE_IDLE)) {
		/* Update each node's alloc_cpus count */
		select_g_select_nodeinfo_set_all();
	}

	for (i = 0; i < node_record_count; i++) {
		if (run_cyclic) {
			if (node_test_cnt++ >= node_limit)
				break;
			base_node_loc++;
			if (base_node_loc >= node_record_count) {
				base_node_loc = -1;
				break;
			}
			node_ptr = node_record_table_ptr + base_node_loc;
		} else {
			node_ptr = node_record_table_ptr + i;
		}
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if (node_states != HEALTH_CHECK_NODE_ANY) {
			uint16_t cpus_total, cpus_used = 0;
			if (slurmctld_conf.fast_schedule) {
				cpus_total = node_ptr->config_ptr->cpus;
			} else {
				cpus_total = node_ptr->cpus;
			}
			if (!IS_NODE_IDLE(node_ptr)) {
				select_g_select_nodeinfo_get(
						node_ptr->select_nodeinfo,
						SELECT_NODEDATA_SUBCNT,
						NODE_STATE_ALLOCATED,
						&cpus_used);
			}
			/* Here the node state is inferred from
			 * the cpus allocated on it.
			 * - cpus_used == 0
			 *       means node is idle
			 * - cpus_used < cpus_total
			 *       means the node is in mixed state
			 * else cpus_used == cpus_total
			 *       means the node is allocated
			 */
			if (cpus_used == 0) {
				if (!(node_states & HEALTH_CHECK_NODE_IDLE))
					continue;
				if (!IS_NODE_IDLE(node_ptr))
					continue;
			} else if (cpus_used < cpus_total) {
				if (!(node_states & HEALTH_CHECK_NODE_MIXED))
					continue;
			} else {
				if (!(node_states & HEALTH_CHECK_NODE_ALLOC))
					continue;
			}
		}
		hostlist_push_host(check_agent_args->hostlist, node_ptr->name);
		check_agent_args->node_count++;
	}
	if (run_cyclic && (i >= node_record_count))
		base_node_loc = -1;
#endif

	if (check_agent_args->node_count == 0) {
		hostlist_destroy(check_agent_args->hostlist);
		xfree (check_agent_args);
	} else {
		hostlist_uniq(check_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				check_agent_args->hostlist);
		debug("Spawning health check agent for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(check_agent_args);
	}
}