Beispiel #1
0
/*
 * slurm_job_will_run - determine if a job would execute immediately if
 *	submitted now
 * IN job_desc_msg - description of resource allocation request
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
int slurm_job_will_run(job_desc_msg_t *req)
{
	will_run_response_msg_t *will_run_resp = NULL;
	char buf[64], local_hostname[64];
	int rc;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	char *type = "processors";
	char *cluster_name = NULL;
	void *ptr = NULL;

	if ((req->alloc_node == NULL) &&
	    (gethostname_short(local_hostname, sizeof(local_hostname)) == 0)) {
		req->alloc_node = local_hostname;
	}

	if (working_cluster_rec)
		cluster_name = working_cluster_rec->name;
	else
		cluster_name = slurmctld_conf.cluster_name;
	if (!slurm_load_federation(&ptr) &&
	    cluster_in_federation(ptr, cluster_name))
		rc = _fed_job_will_run(req, &will_run_resp, ptr);
	else
		rc = slurm_job_will_run2(req, &will_run_resp);

	if ((rc == 0) && will_run_resp) {
		if (cluster_flags & CLUSTER_FLAG_BG)
			type = "cnodes";
		slurm_make_time_str(&will_run_resp->start_time,
				    buf, sizeof(buf));
		info("Job %u to start at %s using %u %s on %s",
		     will_run_resp->job_id, buf,
		     will_run_resp->proc_cnt, type,
		     will_run_resp->node_list);
		if (will_run_resp->preemptee_job_id) {
			ListIterator itr;
			uint32_t *job_id_ptr;
			char *job_list = NULL, *sep = "";
			itr = list_iterator_create(will_run_resp->
						   preemptee_job_id);
			while ((job_id_ptr = list_next(itr))) {
				if (job_list)
					sep = ",";
				xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr);
			}
			list_iterator_destroy(itr);
			info("  Preempts: %s", job_list);
			xfree(job_list);
		}

		slurm_free_will_run_response_msg(will_run_resp);
	}

	if (req->alloc_node == local_hostname)
		req->alloc_node = NULL;
	if (ptr)
		slurm_destroy_federation_rec(ptr);

	return rc;
}
Beispiel #2
0
/* Initialize slurmd configuration table.
 * Everything is already NULL/zero filled when called */
static void
_init_conf(void)
{
	char  host[MAXHOSTNAMELEN];
	log_options_t lopts = LOG_OPTS_INITIALIZER;

	if (gethostname_short(host, MAXHOSTNAMELEN) < 0) {
		error("Unable to get my hostname: %m");
		exit(1);
	}
	conf->hostname    = xstrdup(host);
	conf->daemonize   =  1;
	conf->lfd         = -1;
	conf->log_opts    = lopts;
	conf->debug_level = LOG_LEVEL_INFO;
	conf->pidfile     = xstrdup(DEFAULT_SLURMD_PIDFILE);
	conf->spooldir	  = xstrdup(DEFAULT_SPOOLDIR);

	slurm_mutex_init(&conf->config_mutex);

	conf->starting_steps = list_create(destroy_starting_step);
	slurm_mutex_init(&conf->starting_steps_lock);
	pthread_cond_init(&conf->starting_steps_cond, NULL);
	conf->prolog_running_jobs = list_create(slurm_destroy_uint32_ptr);
	slurm_mutex_init(&conf->prolog_running_lock);
	pthread_cond_init(&conf->prolog_running_cond, NULL);
	return;
}
Beispiel #3
0
static void
_print_config(void)
{
	int days, hours, mins, secs;
	char name[128];

	gethostname_short(name, sizeof(name));
	printf("NodeName=%s ", name);

	get_cpuinfo(&conf->actual_cpus,
		    &conf->actual_boards,
	            &conf->actual_sockets,
	            &conf->actual_cores,
	            &conf->actual_threads,
	            &conf->block_map_size,
	            &conf->block_map, &conf->block_map_inv);
	printf("CPUs=%u Boards=%u SocketsPerBoard=%u CoresPerSocket=%u "
	       "ThreadsPerCore=%u ",
	       conf->actual_cpus, conf->actual_boards, conf->actual_sockets,
	       conf->actual_cores, conf->actual_threads);

	get_memory(&conf->real_memory_size);
	get_tmp_disk(&conf->tmp_disk_space, "/tmp");
	printf("RealMemory=%u TmpDisk=%u\n",
	       conf->real_memory_size, conf->tmp_disk_space);

	get_up_time(&conf->up_time);
	secs  =  conf->up_time % 60;
	mins  = (conf->up_time / 60) % 60;
	hours = (conf->up_time / 3600) % 24;
	days  = (conf->up_time / 86400);
	printf("UpTime=%u-%2.2u:%2.2u:%2.2u\n", days, hours, mins, secs);
}
Beispiel #4
0
/*
 * slurm_submit_batch_job - issue RPC to submit a job for later execution
 * NOTE: free the response using slurm_free_submit_response_response_msg
 * IN job_desc_msg - description of batch job request
 * OUT slurm_alloc_msg - response to request
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
int
slurm_submit_batch_job (job_desc_msg_t *req,
		        submit_response_msg_t **resp)
{
        int rc;
        slurm_msg_t req_msg;
        slurm_msg_t resp_msg;
	bool host_set = false;
	char host[64];

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);


	/*
	 * set Node and session id for this request
	 */
	if (req->alloc_sid == NO_VAL)
		req->alloc_sid = getsid(0);

	if ( (req->alloc_node == NULL)
	    && (gethostname_short(host, sizeof(host)) == 0) ) {
		req->alloc_node = host;
		host_set  = true;
	}

	req_msg.msg_type = REQUEST_SUBMIT_BATCH_JOB ;
	req_msg.data     = req;

	rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg);

	/*
	 *  Clear this hostname if set internally to this function
	 *    (memory is on the stack)
	 */
	if (host_set)
		req->alloc_node = NULL;

	if (rc == SLURM_SOCKET_ERROR)
		return SLURM_ERROR;

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURM_RC:
		rc = ((return_code_msg_t *) resp_msg.data)->return_code;
		if (rc)
			slurm_seterrno_ret(rc);
		*resp = NULL;
		break;
	case RESPONSE_SUBMIT_BATCH_JOB:
		*resp = (submit_response_msg_t *) resp_msg.data;
		break;
	default:
		slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
	}

	return SLURM_PROTOCOL_SUCCESS;
}
extern int sbatch_set_first_avail_cluster(job_desc_msg_t *req)
{
   //printf("cao: src/sbatch/multi_cluster.c:  sbatch_set_first_avail_cluster(job_desc_msg_t *req)\n");
	int rc = SLURM_SUCCESS;
	ListIterator itr;
	local_cluster_rec_t *local_cluster = NULL;
	char buf[64];
	bool host_set = false;
	List ret_list = NULL;

	/* return if we only have 1 or less clusters here */
	if (!opt.clusters || !list_count(opt.clusters)) {
		return rc;
	} else if (list_count(opt.clusters) == 1) {
		working_cluster_rec = list_peek(opt.clusters);
		return rc;
	}

	if ((req->alloc_node == NULL) &&
	    (gethostname_short(buf, sizeof(buf)) == 0)) {
		req->alloc_node = buf;
		host_set = true;
	}

	ret_list = list_create(_destroy_local_cluster_rec);
	itr = list_iterator_create(opt.clusters);
	while ((working_cluster_rec = list_next(itr))) {
		if ((local_cluster = _job_will_run(req)))
			list_append(ret_list, local_cluster);
		else
			error("Problem with submit to cluster %s: %m",
			      working_cluster_rec->name);
	}
	list_iterator_destroy(itr);

	if (host_set)
		req->alloc_node = NULL;

	if (!list_count(ret_list)) {
		error("Can't run on any of the clusters given");
		rc = SLURM_ERROR;
		goto end_it;
	}

	/* sort the list so the first spot is on top */
	local_cluster_name = slurm_get_cluster_name();
	list_sort(ret_list, (ListCmpF)_sort_local_cluster);
	xfree(local_cluster_name);
	local_cluster = list_peek(ret_list);

	/* set up the working cluster and be done */
	working_cluster_rec = local_cluster->cluster_rec;
end_it:
	list_destroy(ret_list);

	return rc;
}
Beispiel #6
0
/*
 * slurm_allocate_resources - allocate resources for a job request
 * IN job_desc_msg - description of resource allocation request
 * OUT slurm_alloc_msg - response to request
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 * NOTE: free the response using slurm_free_resource_allocation_response_msg()
 */
int
slurm_allocate_resources (job_desc_msg_t *req,
			  resource_allocation_response_msg_t **resp)
{
	int rc;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;
	bool host_set = false;
	char host[64];

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);
	/*
	 * set Node and session id for this request
	 */
	if (req->alloc_sid == NO_VAL)
		req->alloc_sid = getsid(0);

	if ( (req->alloc_node == NULL)
	    && (gethostname_short(host, sizeof(host)) == 0) ) {
		req->alloc_node = host;
		host_set  = true;
	}

	req_msg.msg_type = REQUEST_RESOURCE_ALLOCATION;
	req_msg.data     = req;

	rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg,
					    working_cluster_rec);

	/*
	 *  Clear this hostname if set internally to this function
	 *    (memory is on the stack)
	 */
	if (host_set)
		req->alloc_node = NULL;

	if (rc == SLURM_SOCKET_ERROR)
		return SLURM_SOCKET_ERROR;

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURM_RC:
		if (_handle_rc_msg(&resp_msg) < 0)
			return SLURM_PROTOCOL_ERROR;
		*resp = NULL;
		break;
	case RESPONSE_RESOURCE_ALLOCATION:
		*resp = (resource_allocation_response_msg_t *) resp_msg.data;
		break;
	default:
		slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
	}

	return SLURM_PROTOCOL_SUCCESS;
}
Beispiel #7
0
/*
 * get_mach_name - Return the name of this node
 * Input: node_name - buffer for the node name, must be at least MAX_SLURM_NAME characters
 * Output: node_name - filled in with node name
 *         return code - 0 if no error, otherwise errno
 */
extern int
get_mach_name(char *node_name)
{
    int error_code;

    error_code = gethostname_short(node_name, MAX_SLURM_NAME);
    if (error_code != 0)
	error ("get_mach_name: gethostname_short error %d", error_code);

    return error_code;
}
Beispiel #8
0
/*
 * slurm_job_will_run - determine if a job would execute immediately if
 *	submitted now
 * IN job_desc_msg - description of resource allocation request
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
int slurm_job_will_run (job_desc_msg_t *req)
{
	will_run_response_msg_t *will_run_resp = NULL;
	char buf[64];
	bool host_set = false;
	int rc;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	char *type = "processors";

	if ((req->alloc_node == NULL) &&
	    (gethostname_short(buf, sizeof(buf)) == 0)) {
		req->alloc_node = buf;
		host_set = true;
	}

	rc = slurm_job_will_run2(req, &will_run_resp);

	if ((rc == 0) && will_run_resp) {
		if (cluster_flags & CLUSTER_FLAG_BG)
			type = "cnodes";
		slurm_make_time_str(&will_run_resp->start_time,
				    buf, sizeof(buf));
		info("Job %u to start at %s using %u %s"
		     " on %s",
		     will_run_resp->job_id, buf,
		     will_run_resp->proc_cnt, type,
		     will_run_resp->node_list);
		if (will_run_resp->preemptee_job_id) {
			ListIterator itr;
			uint32_t *job_id_ptr;
			char *job_list = NULL, *sep = "";
			itr = list_iterator_create(will_run_resp->
						   preemptee_job_id);
			while ((job_id_ptr = list_next(itr))) {
				if (job_list)
					sep = ",";
				xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr);
			}
			list_iterator_destroy(itr);
			info("  Preempts: %s", job_list);
			xfree(job_list);
		}

		slurm_free_will_run_response_msg(will_run_resp);
	}

	if (host_set)
		req->alloc_node = NULL;

	return rc;
}
Beispiel #9
0
static char *
_guess_nodename()
{
	char host[256];
	char *nodename = NULL;

	if (gethostname_short(host, 256) != 0)
		return NULL;

	nodename = slurm_conf_get_nodename(host);
	if (nodename == NULL)
		nodename = slurm_conf_get_aliased_nodename();
	if (nodename == NULL) /* if no match, try localhost */
		nodename = slurm_conf_get_nodename("localhost");

	return nodename;
}
Beispiel #10
0
extern void create_daemon_popup(GtkAction *action, gpointer user_data)
{
	GtkWidget *popup = gtk_dialog_new_with_buttons(
		"SLURM Daemons running",
		GTK_WINDOW(user_data),
		GTK_DIALOG_DESTROY_WITH_PARENT,
		GTK_STOCK_CLOSE,
		GTK_RESPONSE_OK,
		NULL);

	int update = 0;
	slurm_ctl_conf_info_msg_t *conf;
	char me[MAX_SLURM_NAME], *b, *c, *n;
	int actld = 0, ctld = 0, d = 0;
	GtkTreeStore *treestore =
		_local_create_treestore_2cols(popup, 300, 100);
	GtkTreeIter iter;
	g_signal_connect(G_OBJECT(popup), "delete_event",
			 G_CALLBACK(_delete_popup), NULL);
	g_signal_connect(G_OBJECT(popup), "response",
			 G_CALLBACK(_delete_popup), NULL);

	slurm_conf_init(NULL);
	conf = slurm_conf_lock();

	gethostname_short(me, MAX_SLURM_NAME);
	if ((b = conf->backup_controller)) {
		if ((strcmp(b, me) == 0) ||
		    (strcasecmp(b, "localhost") == 0))
			ctld = 1;
	}
	if ((c = conf->control_machine)) {
		actld = 1;
		if ((strcmp(c, me) == 0) ||
		    (strcasecmp(c, "localhost") == 0))
			ctld = 1;
	}
	slurm_conf_unlock();

	if ((n = slurm_conf_get_nodename(me))) {
		d = 1;
		xfree(n);
	} else if ((n = slurm_conf_get_aliased_nodename())) {
		d = 1;
		xfree(n);
	} else if ((n = slurm_conf_get_nodename("localhost"))) {
		d = 1;
		xfree(n);
	}
	if (actld && ctld)
		add_display_treestore_line(update, treestore, &iter,
					   "Slurmctld", "1");
	if (actld && d)
		add_display_treestore_line(update, treestore, &iter,
					   "Slurmd", "1");


	gtk_widget_show_all(popup);

	return;
}
Beispiel #11
0
/*
 * slurm_job_will_run - determine if a job would execute immediately if
 *	submitted now
 * IN job_desc_msg - description of resource allocation request
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
int slurm_job_will_run (job_desc_msg_t *req)
{
	slurm_msg_t req_msg, resp_msg;
	will_run_response_msg_t *will_run_resp;
	char buf[64];
	bool host_set = false;
	int rc;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	char *type = "processors";
	/* req.immediate = true;    implicit */
	if ((req->alloc_node == NULL) &&
	    (gethostname_short(buf, sizeof(buf)) == 0)) {
		req->alloc_node = buf;
		host_set = true;
	}
	slurm_msg_t_init(&req_msg);
	req_msg.msg_type = REQUEST_JOB_WILL_RUN;
	req_msg.data     = req;

	rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg);

	if (host_set)
		req->alloc_node = NULL;

	if (rc < 0)
		return SLURM_SOCKET_ERROR;

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURM_RC:
		if (_handle_rc_msg(&resp_msg) < 0)
			return SLURM_PROTOCOL_ERROR;
		break;
	case RESPONSE_JOB_WILL_RUN:
		if(cluster_flags & CLUSTER_FLAG_BG)
			type = "cnodes";
		will_run_resp = (will_run_response_msg_t *) resp_msg.data;
		slurm_make_time_str(&will_run_resp->start_time,
				    buf, sizeof(buf));
		info("Job %u to start at %s using %u %s"
		     " on %s",
		     will_run_resp->job_id, buf,
		     will_run_resp->proc_cnt, type,
		     will_run_resp->node_list);
		if (will_run_resp->preemptee_job_id) {
			ListIterator itr;
			uint32_t *job_id_ptr;
			char *job_list = NULL, *sep = "";
			itr = list_iterator_create(will_run_resp->
						   preemptee_job_id);
			while ((job_id_ptr = list_next(itr))) {
				if (job_list)
					sep = ",";
				xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr);
			}
			info("  Preempts: %s", job_list);
			xfree(job_list);
		}

		slurm_free_will_run_response_msg(will_run_resp);
		break;
	default:
		slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
		break;
	}

	return SLURM_PROTOCOL_SUCCESS;
}
Beispiel #12
0
/*
 * slurm_job_will_run - determine if a job would execute immediately if
 *	submitted now
 * IN job_desc_msg - description of resource allocation request
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
int slurm_job_will_run(job_desc_msg_t *req)
{
	will_run_response_msg_t *will_run_resp = NULL;
	char buf[64], local_hostname[64];
	int rc;
	char *cluster_name = NULL;
	void *ptr = NULL;

	if ((req->alloc_node == NULL) &&
	    (gethostname_short(local_hostname, sizeof(local_hostname)) == 0)) {
		req->alloc_node = local_hostname;
	}

	if (working_cluster_rec)
		cluster_name = working_cluster_rec->name;
	else
		cluster_name = slurmctld_conf.cluster_name;
	if (!slurm_load_federation(&ptr) &&
	    cluster_in_federation(ptr, cluster_name))
		rc = _fed_job_will_run(req, &will_run_resp, ptr);
	else
		rc = slurm_job_will_run2(req, &will_run_resp);

	if (will_run_resp)
		print_multi_line_string(
			will_run_resp->job_submit_user_msg, -1);

	if ((rc == 0) && will_run_resp) {
		slurm_make_time_str(&will_run_resp->start_time,
				    buf, sizeof(buf));
		if (will_run_resp->part_name) {
			info("Job %u to start at %s using %u processors on nodes %s in partition %s",
			     will_run_resp->job_id, buf,
			     will_run_resp->proc_cnt,
			     will_run_resp->node_list,
			     will_run_resp->part_name);
		} else {
			/*
			 * Partition name not provided from slurmctld v17.11
			 * or earlier. Remove this in the future.
			 */
			info("Job %u to start at %s using %u processors on nodes %s",
			     will_run_resp->job_id, buf,
			     will_run_resp->proc_cnt,
			     will_run_resp->node_list);
		}
		if (will_run_resp->preemptee_job_id) {
			ListIterator itr;
			uint32_t *job_id_ptr;
			char *job_list = NULL, *sep = "";
			itr = list_iterator_create(will_run_resp->
						   preemptee_job_id);
			while ((job_id_ptr = list_next(itr))) {
				if (job_list)
					sep = ",";
				xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr);
			}
			list_iterator_destroy(itr);
			info("  Preempts: %s", job_list);
			xfree(job_list);
		}

		slurm_free_will_run_response_msg(will_run_resp);
	}

	if (req->alloc_node == local_hostname)
		req->alloc_node = NULL;
	if (ptr)
		slurm_destroy_federation_rec(ptr);

	return rc;
}
Beispiel #13
0
/* main - slurmctld main function, start various threads and process RPCs */
int main(int argc, char *argv[])
{
	pthread_attr_t thread_attr;
	char node_name[128];
	void *db_conn = NULL;
	assoc_init_args_t assoc_init_arg;

	_init_config();
	log_init(argv[0], log_opts, LOG_DAEMON, NULL);
	if (read_slurmdbd_conf())
		exit(1);
	_parse_commandline(argc, argv);
	_update_logging(true);
	_update_nice();

	if (slurm_auth_init(NULL) != SLURM_SUCCESS) {
		fatal("Unable to initialize %s authentication plugin",
		      slurmdbd_conf->auth_type);
	}
	if (slurm_acct_storage_init(NULL) != SLURM_SUCCESS) {
		fatal("Unable to initialize %s accounting storage plugin",
		      slurmdbd_conf->storage_type);
	}
	_kill_old_slurmdbd();
	if (foreground == 0)
		_daemonize();

	/*
	 * Need to create pidfile here in case we setuid() below
	 * (init_pidfile() exits if it can't initialize pid file).
	 * On Linux we also need to make this setuid job explicitly
	 * able to write a core dump.
	 * This also has to happen after daemon(), which closes all fd's,
	 * so we keep the write lock of the pidfile.
	 */
	_init_pidfile();
	_become_slurm_user();
	if (foreground == 0)
		_set_work_dir();
	log_config();

#ifdef PR_SET_DUMPABLE
	if (prctl(PR_SET_DUMPABLE, 1) < 0)
		debug ("Unable to set dumpable to 1");
#endif /* PR_SET_DUMPABLE */

	if (xsignal_block(dbd_sigarray) < 0)
		error("Unable to block signals");

	/* Create attached thread for signal handling */
	slurm_attr_init(&thread_attr);
	if (pthread_create(&signal_handler_thread, &thread_attr,
			   _signal_handler, NULL))
		fatal("pthread_create %m");
	slurm_attr_destroy(&thread_attr);

	registered_clusters = list_create(NULL);

	slurm_attr_init(&thread_attr);
	if (pthread_create(&commit_handler_thread, &thread_attr,
			   _commit_handler, NULL))
		fatal("pthread_create %m");
	slurm_attr_destroy(&thread_attr);

	memset(&assoc_init_arg, 0, sizeof(assoc_init_args_t));

	/* If we are tacking wckey we need to cache
	   wckeys, if we aren't only cache the users, qos */
	assoc_init_arg.cache_level = ASSOC_MGR_CACHE_USER | ASSOC_MGR_CACHE_QOS;
	if (slurmdbd_conf->track_wckey)
		assoc_init_arg.cache_level |= ASSOC_MGR_CACHE_WCKEY;

	db_conn = acct_storage_g_get_connection(NULL, 0, true, NULL);
	if (assoc_mgr_init(db_conn, &assoc_init_arg, errno) == SLURM_ERROR) {
		error("Problem getting cache of data");
		acct_storage_g_close_connection(&db_conn);
		goto end_it;
	}

	if (gethostname_short(node_name, sizeof(node_name)))
		fatal("getnodename: %m");

	while (1) {
		if (slurmdbd_conf->dbd_backup &&
		    (!strcmp(node_name, slurmdbd_conf->dbd_backup) ||
		     !strcmp(slurmdbd_conf->dbd_backup, "localhost"))) {
			info("slurmdbd running in background mode");
			have_control = false;
			backup = true;
			/* make sure any locks are released */
			acct_storage_g_commit(db_conn, 1);
			run_dbd_backup();
			if (!shutdown_time)
				assoc_mgr_refresh_lists(db_conn);
		} else if (slurmdbd_conf->dbd_host &&
			   (!strcmp(slurmdbd_conf->dbd_host, node_name) ||
			    !strcmp(slurmdbd_conf->dbd_host, "localhost"))) {
			backup = false;
			have_control = true;
		} else {
			fatal("This host not configured to run SlurmDBD "
			      "(%s != %s | (backup) %s)",
			      node_name, slurmdbd_conf->dbd_host,
			      slurmdbd_conf->dbd_backup);
		}

		if (!shutdown_time) {
			/* Create attached thread to process incoming RPCs */
			slurm_attr_init(&thread_attr);
			if (pthread_create(&rpc_handler_thread, &thread_attr,
					   rpc_mgr, NULL))
				fatal("pthread_create error %m");
			slurm_attr_destroy(&thread_attr);
		}

		if (!shutdown_time) {
			/* Create attached thread to do usage rollup */
			slurm_attr_init(&thread_attr);
			if (pthread_create(&rollup_handler_thread,
					   &thread_attr,
					   _rollup_handler, db_conn))
				fatal("pthread_create error %m");
			slurm_attr_destroy(&thread_attr);
		}

		/* Daemon is fully operational here */
		if (!shutdown_time || primary_resumed) {
			shutdown_time = 0;
			info("slurmdbd version %s started",
			     SLURM_VERSION_STRING);
			if (backup)
				run_dbd_backup();
		}

		_request_registrations(db_conn);
		acct_storage_g_commit(db_conn, 1);

		/* this is only ran if not backup */
		if (rollup_handler_thread)
			pthread_join(rollup_handler_thread, NULL);
		if (rpc_handler_thread)
			pthread_join(rpc_handler_thread, NULL);

		if (backup && primary_resumed) {
			shutdown_time = 0;
			info("Backup has given up control");
		}

		if (shutdown_time)
			break;
	}
	/* Daemon termination handled here */

end_it:

	if (signal_handler_thread)
		pthread_join(signal_handler_thread, NULL);
	if (commit_handler_thread)
		pthread_join(commit_handler_thread, NULL);

	acct_storage_g_commit(db_conn, 1);
	acct_storage_g_close_connection(&db_conn);

	if (slurmdbd_conf->pid_file &&
	    (unlink(slurmdbd_conf->pid_file) < 0)) {
		verbose("Unable to remove pidfile '%s': %m",
			slurmdbd_conf->pid_file);
	}

	FREE_NULL_LIST(registered_clusters);

	assoc_mgr_fini(NULL);
	slurm_acct_storage_fini();
	slurm_auth_fini();
	log_fini();
	free_slurmdbd_conf();
	exit(0);
}
Beispiel #14
0
/*
 * slurm_load_slurmd_status - issue RPC to get the status of slurmd
 *	daemon on this machine
 * IN slurmd_info_ptr - place to store slurmd status information
 * RET 0 or -1 on error
 * NOTE: free the response using slurm_free_slurmd_status()
 */
extern int
slurm_load_slurmd_status(slurmd_status_t **slurmd_status_ptr)
{
	int rc;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	char *this_addr;

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);

	if (cluster_flags & CLUSTER_FLAG_MULTSD) {
		if ((this_addr = getenv("SLURMD_NODENAME"))) {
			slurm_conf_get_addr(this_addr, &req_msg.address);
		} else {
			this_addr = "localhost";
			slurm_set_addr(&req_msg.address,
				       (uint16_t)slurm_get_slurmd_port(),
				       this_addr);
		}
	} else {
		char this_host[256];

		/*
		 *  Set request message address to slurmd on localhost
		 */
		gethostname_short(this_host, sizeof(this_host));
		this_addr = slurm_conf_get_nodeaddr(this_host);
		if (this_addr == NULL)
			this_addr = xstrdup("localhost");
		slurm_set_addr(&req_msg.address,
			       (uint16_t)slurm_get_slurmd_port(),
			       this_addr);
		xfree(this_addr);
	}
	req_msg.msg_type = REQUEST_DAEMON_STATUS;
	req_msg.data     = NULL;

	rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);

	if ((rc != 0) || !resp_msg.auth_cred) {
		error("slurm_slurmd_info: %m");
		if (resp_msg.auth_cred)
			g_slurm_auth_destroy(resp_msg.auth_cred);
		return SLURM_ERROR;
	}
	if (resp_msg.auth_cred)
		g_slurm_auth_destroy(resp_msg.auth_cred);

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURMD_STATUS:
		*slurmd_status_ptr = (slurmd_status_t *) resp_msg.data;
		break;
	case RESPONSE_SLURM_RC:
		rc = ((return_code_msg_t *) resp_msg.data)->return_code;
		slurm_free_return_code_msg(resp_msg.data);
		if (rc)
			slurm_seterrno_ret(rc);
		break;
	default:
		slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
		break;
	}

	return SLURM_PROTOCOL_SUCCESS;
}
Beispiel #15
0
/*
 * slurm_pack_job_will_run - determine if a heterogenous job would execute
 *	immediately if submitted now
 * IN job_req_list - List of job_desc_msg_t structures describing the resource
 *		allocation request
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
extern int slurm_pack_job_will_run(List job_req_list)
{
	job_desc_msg_t *req;
	will_run_response_msg_t *will_run_resp;
	char buf[64], local_hostname[64] = "", *sep = "";
	int rc = SLURM_SUCCESS;
	char *type = "processors";
	ListIterator iter, itr;
	time_t first_start = (time_t) 0;
	uint32_t first_job_id = 0, tot_proc_count = 0, *job_id_ptr;
	hostset_t hs = NULL;
	char *job_list = NULL;

	if (!job_req_list || (list_count(job_req_list) == 0)) {
		error("No job descriptors input");
		return SLURM_ERROR;
	}

	(void) gethostname_short(local_hostname, sizeof(local_hostname));
	iter = list_iterator_create(job_req_list);
	while ((req = (job_desc_msg_t *) list_next(iter))) {
		if ((req->alloc_node == NULL) && local_hostname[0])
			req->alloc_node = local_hostname;

		will_run_resp = NULL;
		rc = slurm_job_will_run2(req, &will_run_resp);
		if ((rc == SLURM_SUCCESS) && will_run_resp) {
			if (first_job_id == 0)
				first_job_id = will_run_resp->job_id;
			if ((first_start == 0) ||
			    (first_start < will_run_resp->start_time))
				first_start = will_run_resp->start_time;
			tot_proc_count += will_run_resp->proc_cnt;
			if (hs)
				hostset_insert(hs, will_run_resp->node_list);
			else
				hs = hostset_create(will_run_resp->node_list);

			if (will_run_resp->preemptee_job_id) {
				itr = list_iterator_create(will_run_resp->
							   preemptee_job_id);
				while ((job_id_ptr = list_next(itr))) {
					if (job_list)
						sep = ",";
					xstrfmtcat(job_list, "%s%u", sep,
						   *job_id_ptr);
				}
				list_iterator_destroy(itr);
			}

			slurm_free_will_run_response_msg(will_run_resp);
		}
		if (req->alloc_node == local_hostname)
			req->alloc_node = NULL;
		if (rc != SLURM_SUCCESS)
			break;
	}
	list_iterator_destroy(iter);


	if (rc == SLURM_SUCCESS) {
		uint32_t cluster_flags = slurmdb_setup_cluster_flags();
		char node_list[1028] = "";

		if (cluster_flags & CLUSTER_FLAG_BG)
			type = "cnodes";
		if (hs)
			hostset_ranged_string(hs, sizeof(node_list), node_list);
		slurm_make_time_str(&first_start, buf, sizeof(buf));
		info("Job %u to start at %s using %u %s on %s",
		     first_job_id, buf, tot_proc_count, type, node_list);
		if (job_list)
			info("  Preempts: %s", job_list);
	}

	if (hs)
		hostset_destroy(hs);
	xfree(job_list);

	return rc;
}
Beispiel #16
0
/*
 * parse_command_line
 */
extern void
parse_command_line( int argc, char* argv[] )
{
	char *env_val = NULL;
	bool override_format_env = false;
	int opt_char;
	int option_index;
	static struct option long_options[] = {
		{"accounts",   required_argument, 0, 'A'},
		{"all",        no_argument,       0, 'a'},
		{"format",     required_argument, 0, 'o'},
		{"help",       no_argument,       0, OPT_LONG_HELP},
		{"hide",       no_argument,       0, OPT_LONG_HIDE},
		{"iterate",    required_argument, 0, 'i'},
		{"jobs",       optional_argument, 0, 'j'},
		{"long",       no_argument,       0, 'l'},
		{"cluster",    required_argument, 0, 'M'},
		{"clusters",   required_argument, 0, 'M'},
		{"node",       required_argument, 0, 'n'},
		{"nodes",      required_argument, 0, 'n'},
		{"noheader",   no_argument,       0, 'h'},
		{"partitions", required_argument, 0, 'p'},
		{"qos",        required_argument, 0, 'q'},
		{"reservation",required_argument, 0, 'R'},
		{"sort",       required_argument, 0, 'S'},
		{"start",      no_argument,       0, OPT_LONG_START},
		{"steps",      optional_argument, 0, 's'},
		{"states",     required_argument, 0, 't'},
		{"usage",      no_argument,       0, OPT_LONG_USAGE},
		{"user",       required_argument, 0, 'u'},
		{"users",      required_argument, 0, 'u'},
		{"verbose",    no_argument,       0, 'v'},
		{"version",    no_argument,       0, 'V'},
		{NULL,         0,                 0, 0}
	};

	if (getenv("SQUEUE_ALL"))
		params.all_flag = true;
	if ( ( env_val = getenv("SQUEUE_SORT") ) )
		params.sort = xstrdup(env_val);
	if ( ( env_val = getenv("SLURM_CLUSTERS") ) ) {
		if (!(params.clusters = slurmdb_get_info_cluster(env_val))) {
			error("'%s' can't be reached now, "
			      "or it is an invalid entry for "
			      "SLURM_CLUSTERS.  Use 'sacctmgr --list "
			      "cluster' to see avaliable clusters.",
			      env_val);
			exit(1);
		}
		working_cluster_rec = list_peek(params.clusters);
	}

	while ((opt_char = getopt_long(argc, argv,
				       "A:ahi:j::ln:M:o:p:q:R:s::S:t:u:U:vV",
				       long_options, &option_index)) != -1) {
		switch (opt_char) {
		case (int)'?':
			fprintf(stderr, "Try \"squeue --help\" "
				"for more information\n");
			exit(1);
		case (int) 'A':
		case (int) 'U':	/* backwards compatibility */
			xfree(params.accounts);
		        params.accounts = xstrdup(optarg);
			params.account_list =
				_build_str_list( params.accounts );
		break;
		case (int)'a':
			params.all_flag = true;
			break;
		case (int)'h':
			params.no_header = true;
			break;
		case (int) 'i':
			params.iterate= atoi(optarg);
			if (params.iterate <= 0) {
				error ("--iterate=%s\n", optarg);
				exit(1);
			}
			break;
		case (int) 'j':
			if (optarg) {
				params.jobs = xstrdup(optarg);
				params.job_list =
					_build_job_list(params.jobs);
			}
			params.job_flag = true;
			break;
		case (int) 'l':
			params.long_list = true;
			override_format_env = true;
			break;
		case (int) 'M':
			if (params.clusters)
				list_destroy(params.clusters);
			if (!(params.clusters =
			    slurmdb_get_info_cluster(optarg))) {
				error("'%s' can't be reached now, "
				      "or it is an invalid entry for "
				      "--cluster.  Use 'sacctmgr --list "
				      "cluster' to see avaliable clusters.",
				      optarg);
				exit(1);
			}
			working_cluster_rec = list_peek(params.clusters);
			break;
		case (int) 'n':
			if (params.nodes)
				hostset_destroy(params.nodes);

			params.nodes = hostset_create(optarg);
			if (params.nodes == NULL) {
				error("'%s' invalid entry for --nodes",
				      optarg);
				exit(1);
			}
			break;
		case (int) 'o':
			xfree(params.format);
			params.format = xstrdup(optarg);
			override_format_env = true;

			break;
		case (int) 'p':
			xfree(params.partitions);
			params.partitions = xstrdup(optarg);
			params.part_list =
				_build_str_list( params.partitions );
			params.all_flag = true;
			break;
		case (int) 'q':
			xfree(params.qoss);
			params.qoss = xstrdup(optarg);
			params.qos_list =
				_build_str_list( params.qoss );
			break;
		case (int) 'R':
			xfree(params.reservation);
			params.reservation = xstrdup(optarg);
			break;
		case (int) 's':
			if (optarg) {
				params.steps = xstrdup(optarg);
				params.step_list =
					_build_step_list(params.steps);
			}
			params.step_flag = true;
			override_format_env = true;
			break;
		case (int) 'S':
			xfree(params.sort);
			params.sort = xstrdup(optarg);
			break;
		case (int) 't':
			xfree(params.states);
			params.states = xstrdup(optarg);
			params.state_list =
				_build_state_list( params.states );
			break;
		case (int) 'u':
			xfree(params.users);
			params.users = xstrdup(optarg);
			params.user_list =
				_build_user_list( params.users );
			break;
		case (int) 'v':
			params.verbose++;
			break;
		case (int) 'V':
			print_slurm_version();
			exit(0);
		case OPT_LONG_HELP:
			_help();
			exit(0);
		case OPT_LONG_HIDE:
			params.all_flag = false;
			break;
		case OPT_LONG_START:
			params.start_flag = true;
			break;
		case OPT_LONG_USAGE:
			_usage();
			exit(0);
		}
	}

	if ( override_format_env == false ) {
		if ( ( env_val = getenv("SQUEUE_FORMAT") ) )
			params.format = xstrdup(env_val);
	}

	params.cluster_flags = slurmdb_setup_cluster_flags();
	if (optind < argc) {
		if (params.job_flag) {
			params.jobs = xstrdup(argv[optind++]);
			params.job_list = _build_job_list(params.jobs);
		} else if (params.step_flag) {
			params.steps = xstrdup(argv[optind++]);
			params.step_list = _build_step_list(params.steps);
		}
		if (optind < argc) {
			error("Unrecognized option: %s",argv[optind]);
			_usage();
			exit(1);
		}
	}

	if ( params.job_flag && params.step_flag) {
		if (params.job_list) {
			verbose("Printing job steps with job filter");
			params.job_flag = false;
		} else {
			error("Incompatible options --jobs and --steps");
			exit(1);
		}
	}

	if ( params.nodes ) {
		char *name1 = NULL;
		char *name2 = NULL;
		hostset_t nodenames = hostset_create(NULL);
		if (nodenames == NULL)
			fatal("malloc failure");

		while ( hostset_count(params.nodes) > 0 ) {
			name1 = hostset_pop(params.nodes);

			/* localhost = use current host name */
			if ( strcasecmp("localhost", name1) == 0 ) {
				name2 = xmalloc(128);
				gethostname_short(name2, 128);
			} else {
				/* translate NodeHostName to NodeName */
				name2 = slurm_conf_get_nodename(name1);

				/* use NodeName if translation failed */
				if ( name2 == NULL )
					name2 = xstrdup(name1);
			}
			hostset_insert(nodenames, name2);
			free(name1);
			xfree(name2);
		}

		/* Replace params.nodename with the new one */
		hostset_destroy(params.nodes);
		params.nodes = nodenames;
	}

	if ( ( params.accounts == NULL ) &&
	     ( env_val = getenv("SQUEUE_ACCOUNT") ) ) {
		params.accounts = xstrdup(env_val);
		params.account_list = _build_str_list( params.accounts );
	}

	if ( ( params.partitions == NULL ) &&
	     ( env_val = getenv("SQUEUE_PARTITION") ) ) {
		params.partitions = xstrdup(env_val);
		params.part_list = _build_str_list( params.partitions );
		params.all_flag = true;
	}

	if ( ( params.qoss == NULL ) &&
	     ( env_val = getenv("SQUEUE_QOS") ) ) {
		params.qoss = xstrdup(env_val);
		params.qos_list = _build_str_list( params.qoss );
	}

	if ( ( params.states == NULL ) &&
	     ( env_val = getenv("SQUEUE_STATES") ) ) {
		params.states = xstrdup(env_val);
		params.state_list = _build_state_list( params.states );
	}

	if ( ( params.users == NULL ) &&
	     ( env_val = getenv("SQUEUE_USERS") ) ) {
		params.users = xstrdup(env_val);
		params.user_list = _build_user_list( params.users );
	}

	if ( params.start_flag && !params.step_flag ) {
		/* Set more defaults */
		if (params.format == NULL)
			params.format = xstrdup("%.7i %.9P %.8j %.8u  %.2t  %.19S %.6D %R");
		if (params.sort == NULL)
			params.sort = xstrdup("S");
		if (params.states == NULL) {
			params.states = xstrdup("PD");
			params.state_list = _build_state_list( params.states );
		}
	}

	params.max_cpus = _max_cpus_per_node();

	if ( params.verbose )
		_print_options();
}
Beispiel #17
0
/* Wait for barrier and get full PMI Keyval space data */
int  slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr,
		int pmi_rank, int pmi_size)
{
	int rc, srun_fd, retries = 0, timeout = 0;
	slurm_msg_t msg_send, msg_rcv;
	slurm_addr_t slurm_addr, srun_reply_addr;
	char hostname[64];
	uint16_t port;
	kvs_get_msg_t data;
	char *env_pmi_ifhn;

	if (kvs_set_ptr == NULL)
		return EINVAL;
	*kvs_set_ptr = NULL;	/* initialization */

	if ((rc = _get_addr()) != SLURM_SUCCESS) {
		error("_get_addr: %m");
		return rc;
	}

	_set_pmi_time();

	if (pmi_fd < 0) {
		if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) {
			error("slurm_init_msg_engine_port: %m");
			return SLURM_ERROR;
		}
		fd_set_blocking(pmi_fd);
	}
	if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) {
		error("slurm_get_stream_addr: %m");
		return SLURM_ERROR;
	}
	/* hostname is not set here, so slurm_get_addr fails
	slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */
	port = ntohs(slurm_addr.sin_port);
	if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) {
		strncpy(hostname, env_pmi_ifhn, sizeof(hostname));
		hostname[sizeof(hostname)-1] = 0;
	} else
		gethostname_short(hostname, sizeof(hostname));

	data.task_id = pmi_rank;
	data.size = pmi_size;
	data.port = port;
	data.hostname = hostname;
	slurm_msg_t_init(&msg_send);
	slurm_msg_t_init(&msg_rcv);
	msg_send.address = srun_addr;
	msg_send.msg_type = PMI_KVS_GET_REQ;
	msg_send.data = &data;

	/* Send the RPC to the local srun communcation manager.
	 * Since the srun can be sent thousands of messages at
	 * the same time and refuse some connections, retry as
	 * needed. Wait until all key-pairs have been sent by
	 * all tasks then spread out messages by task's rank.
	 * Also increase the message timeout if many tasks
	 * since the srun command can get very overloaded (the
	 * default timeout is 10 secs).
	 */
	_delay_rpc(pmi_rank, pmi_size);
	if      (pmi_size > 4000)	/* 240 secs */
		timeout = slurm_get_msg_timeout() * 24000;
	else if (pmi_size > 1000)	/* 120 secs */
		timeout = slurm_get_msg_timeout() * 12000;
	else if (pmi_size > 100)	/* 60 secs */
		timeout = slurm_get_msg_timeout() * 6000;
	else if (pmi_size > 10)		/* 20 secs */
		timeout = slurm_get_msg_timeout() * 2000;

	while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) {
		if (retries++ > MAX_RETRIES) {
			error("slurm_get_kvs_comm_set: %m");
			return SLURM_ERROR;
		} else
			debug("get kvs retry %d", retries);
		_delay_rpc(pmi_rank, pmi_size);
	}
	if (rc != SLURM_SUCCESS) {
		error("slurm_get_kvs_comm_set error_code=%d", rc);
		return rc;
	}

	/* get the message after all tasks reach the barrier */
	srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr);
	if (srun_fd < 0) {
		error("slurm_accept_msg_conn: %m");
		return errno;
	}

	while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) {
		if (errno == EINTR)
			continue;
		error("slurm_receive_msg: %m");
		slurm_close(srun_fd);
		return errno;
	}
	if (msg_rcv.auth_cred)
		(void)g_slurm_auth_destroy(msg_rcv.auth_cred);

	if (msg_rcv.msg_type != PMI_KVS_GET_RESP) {
		error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type);
		slurm_close(srun_fd);
		return SLURM_UNEXPECTED_MSG_ERROR;
	}
	if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0)
		error("slurm_send_rc_msg: %m");

	slurm_close(srun_fd);
	*kvs_set_ptr = msg_rcv.data;

	rc = _forward_comm_set(*kvs_set_ptr);
	return rc;
}
Beispiel #18
0
/*
 * slurm_get_node_energy_n - issue RPC to get the energy data of all
 * configured sensors on the target machine
 * IN  host  - name of node to query, NULL if localhost
 * IN  delta - Use cache if data is newer than this in seconds
 * OUT sensors_cnt - number of sensors
 * OUT energy - array of acct_gather_energy_t structures on success or
 *                NULL other wise
 * RET 0 on success or a slurm error code
 * NOTE: free the response using xfree
 */
extern int slurm_get_node_energy(char *host, uint16_t delta,
				 uint16_t *sensor_cnt,
				 acct_gather_energy_t **energy)
{
	int rc;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;
	acct_gather_energy_req_msg_t req;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	char *this_addr;

	xassert(sensor_cnt);
	xassert(energy);

	*sensor_cnt = 0;
	*energy = NULL;

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);

	if (host)
		slurm_conf_get_addr(host, &req_msg.address);
	else if (cluster_flags & CLUSTER_FLAG_MULTSD) {
		if ((this_addr = getenv("SLURMD_NODENAME"))) {
			slurm_conf_get_addr(this_addr, &req_msg.address);
		} else {
			this_addr = "localhost";
			slurm_set_addr(&req_msg.address,
				       (uint16_t)slurm_get_slurmd_port(),
				       this_addr);
		}
	} else {
		char this_host[256];
		/*
		 *  Set request message address to slurmd on localhost
		 */
		gethostname_short(this_host, sizeof(this_host));
		this_addr = slurm_conf_get_nodeaddr(this_host);
		if (this_addr == NULL)
			this_addr = xstrdup("localhost");
		slurm_set_addr(&req_msg.address,
			       (uint16_t)slurm_get_slurmd_port(),
			       this_addr);
		xfree(this_addr);
	}

	req.delta        = delta;
	req_msg.msg_type = REQUEST_ACCT_GATHER_ENERGY;
	req_msg.data     = &req;

	rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);

	if (rc != 0 || !resp_msg.auth_cred) {
		error("slurm_get_node_energy: %m");
		if (resp_msg.auth_cred)
			g_slurm_auth_destroy(resp_msg.auth_cred);
		return SLURM_ERROR;
	}
	if (resp_msg.auth_cred)
		g_slurm_auth_destroy(resp_msg.auth_cred);
	switch (resp_msg.msg_type) {
	case RESPONSE_ACCT_GATHER_ENERGY:
		*sensor_cnt = ((acct_gather_node_resp_msg_t *)
			       resp_msg.data)->sensor_cnt;
		*energy = ((acct_gather_node_resp_msg_t *)
			   resp_msg.data)->energy;
		((acct_gather_node_resp_msg_t *) resp_msg.data)->energy = NULL;
		slurm_free_acct_gather_node_resp_msg(resp_msg.data);
		break;
	case RESPONSE_SLURM_RC:
	        rc = ((return_code_msg_t *) resp_msg.data)->return_code;
		slurm_free_return_code_msg(resp_msg.data);
		if (rc)
			slurm_seterrno_ret(rc);
		break;
	default:
		slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
		break;
	}

	return SLURM_PROTOCOL_SUCCESS;
}