Ejemplo n.º 1
0
static void _connection_fini_callback(void *arg)
{
	slurmdbd_conn_t *conn = (slurmdbd_conn_t *) arg;

	if (conn->conn->rem_port) {
		if (!shutdown_time) {
			slurmdb_cluster_rec_t cluster_rec;
			ListIterator itr;
			slurmdbd_conn_t *slurmdbd_conn;
			memset(&cluster_rec, 0, sizeof(slurmdb_cluster_rec_t));
			cluster_rec.name = conn->conn->cluster_name;
			cluster_rec.control_host = conn->conn->rem_host;
			cluster_rec.control_port = conn->conn->rem_port;
			cluster_rec.rpc_version = conn->conn->version;
			cluster_rec.tres_str = conn->tres_str;
			debug("cluster %s has disconnected",
			      conn->conn->cluster_name);

			clusteracct_storage_g_fini_ctld(
				conn->db_conn, &cluster_rec);

			slurm_mutex_lock(&registered_lock);
			itr = list_iterator_create(registered_clusters);
			while ((slurmdbd_conn = list_next(itr))) {
				if (conn == slurmdbd_conn) {
					list_delete_item(itr);
					break;
				}
			}
			list_iterator_destroy(itr);
			slurm_mutex_unlock(&registered_lock);
		}
		/* needs to be the last thing done */
		acct_storage_g_commit(conn->db_conn, 1);
	}

	acct_storage_g_close_connection(&conn->db_conn);
	/* handled directly in the internal persist_conn code */
	//slurm_persist_conn_members_destroy(&conn->conn);
	xfree(conn->tres_str);
	xfree(conn);
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
{
	int error_code = SLURM_SUCCESS, i, opt_char, input_field_count;
	char **input_fields;
	log_options_t opts = LOG_OPTS_STDERR_ONLY ;
	int local_exit_code = 0;
	char *temp = NULL;
	int option_index;
	static struct option long_options[] = {
		{"help",     0, 0, 'h'},
		{"usage",    0, 0, 'h'},
		{"immediate",0, 0, 'i'},
		{"noheader",0, 0, 'n'},
		{"oneliner", 0, 0, 'o'},
		{"parsable", 0, 0, 'p'},
		{"parsable2", 0, 0, 'P'},
		{"quiet",    0, 0, 'Q'},
		{"readonly", 0, 0, 'r'},
		{"associations", 0, 0, 's'},
		{"verbose",  0, 0, 'v'},
		{"version",  0, 0, 'V'},
		{NULL,       0, 0, 0}
	};

	command_name      = argv[0];
	rollback_flag     = 1;
	exit_code         = 0;
	exit_flag         = 0;
	input_field_count = 0;
	quiet_flag        = 0;
	readonly_flag     = 0;
	verbosity         = 0;
	slurm_conf_init(NULL);
	log_init("sacctmgr", opts, SYSLOG_FACILITY_DAEMON, NULL);

	while((opt_char = getopt_long(argc, argv, "hionpPQrsvV",
			long_options, &option_index)) != -1) {
		switch (opt_char) {
		case (int)'?':
			fprintf(stderr, "Try \"sacctmgr --help\" "
				"for more information\n");
			exit(1);
			break;
		case (int)'h':
			_usage ();
			exit(exit_code);
			break;
		case (int)'i':
			rollback_flag = 0;
			break;
		case (int)'o':
			one_liner = 1;
			break;
		case (int)'n':
			print_fields_have_header = 0;
			break;
		case (int)'p':
			print_fields_parsable_print =
			PRINT_FIELDS_PARSABLE_ENDING;
			break;
		case (int)'P':
			print_fields_parsable_print =
			PRINT_FIELDS_PARSABLE_NO_ENDING;
			break;
		case (int)'Q':
			quiet_flag = 1;
			break;
		case (int)'r':
			readonly_flag = 1;
			break;
		case (int)'s':
			with_assoc_flag = 1;
			break;
		case (int)'v':
			quiet_flag = -1;
			verbosity++;
			break;
		case (int)'V':
			_print_version();
			exit(exit_code);
			break;
		default:
			exit_code = 1;
			fprintf(stderr, "getopt error, returned %c\n",
				opt_char);
			exit(exit_code);
		}
	}

	if (argc > MAX_INPUT_FIELDS)	/* bogus input, but continue anyway */
		input_words = argc;
	else
		input_words = 128;
	input_fields = (char **) xmalloc (sizeof (char *) * input_words);
	if (optind < argc) {
		for (i = optind; i < argc; i++) {
			input_fields[input_field_count++] = argv[i];
		}
	}

	if (verbosity) {
		opts.stderr_level += verbosity;
		opts.prefix_level = 1;
		log_alter(opts, 0, NULL);
	}

	/* Check to see if we are running a supported accounting plugin */
	temp = slurm_get_accounting_storage_type();
	if (xstrcasecmp(temp, "accounting_storage/slurmdbd")
	   && xstrcasecmp(temp, "accounting_storage/mysql")) {
		fprintf (stderr, "You are not running a supported "
			 "accounting_storage plugin\n(%s).\n"
			 "Only 'accounting_storage/slurmdbd' "
			 "and 'accounting_storage/mysql' are supported.\n",
			temp);
		xfree(temp);
		exit(1);
	}
	xfree(temp);

	errno = 0;
	db_conn = slurmdb_connection_get();
	if (errno != SLURM_SUCCESS) {
		int tmp_errno = errno;
		if ((input_field_count == 2) &&
		   (!strncasecmp(argv[2], "Configuration", strlen(argv[1]))) &&
		   ((!strncasecmp(argv[1], "list", strlen(argv[0]))) ||
		    (!strncasecmp(argv[1], "show", strlen(argv[0]))))) {
			if (tmp_errno == ESLURM_DB_CONNECTION) {
				tmp_errno = 0;
				sacctmgr_list_config(true);
			} else
				sacctmgr_list_config(false);
		}
		errno = tmp_errno;
		if (errno)
			error("Problem talking to the database: %m");
		exit(1);
	}
	my_uid = getuid();

	if (input_field_count)
		exit_flag = 1;
	else
		error_code = _get_command (&input_field_count, input_fields);
	while (error_code == SLURM_SUCCESS) {
		error_code = _process_command (input_field_count,
					       input_fields);
		if (error_code || exit_flag)
			break;
		error_code = _get_command (&input_field_count, input_fields);
		/* This is here so if someone made a mistake we allow
		 * them to fix it and let the process happen since there
		 * are checks for global exit_code we need to reset it.
		 */
		if (exit_code) {
			local_exit_code = exit_code;
			exit_code = 0;
		}
	}
	/* readline library writes \n when echoes the input string, it does
	 * not when it sees the EOF, so in that case we have to print it to
	 * align the terminal prompt.
	 */
	if (exit_flag == 2)
		putchar('\n');
	if (local_exit_code)
		exit_code = local_exit_code;
	acct_storage_g_close_connection(&db_conn);
	slurm_acct_storage_fini();
	FREE_NULL_LIST(g_qos_list);
	FREE_NULL_LIST(g_res_list);
	FREE_NULL_LIST(g_tres_list);

	exit(exit_code);
}
Ejemplo n.º 3
0
/* main - slurmctld main function, start various threads and process RPCs */
int main(int argc, char *argv[])
{
	pthread_attr_t thread_attr;
	char node_name[128];
	void *db_conn = NULL;
	assoc_init_args_t assoc_init_arg;

	_init_config();
	log_init(argv[0], log_opts, LOG_DAEMON, NULL);
	if (read_slurmdbd_conf())
		exit(1);
	_parse_commandline(argc, argv);
	_update_logging(true);
	_update_nice();

	if (slurm_auth_init(NULL) != SLURM_SUCCESS) {
		fatal("Unable to initialize %s authentication plugin",
		      slurmdbd_conf->auth_type);
	}
	if (slurm_acct_storage_init(NULL) != SLURM_SUCCESS) {
		fatal("Unable to initialize %s accounting storage plugin",
		      slurmdbd_conf->storage_type);
	}
	_kill_old_slurmdbd();
	if (foreground == 0)
		_daemonize();

	/*
	 * Need to create pidfile here in case we setuid() below
	 * (init_pidfile() exits if it can't initialize pid file).
	 * On Linux we also need to make this setuid job explicitly
	 * able to write a core dump.
	 * This also has to happen after daemon(), which closes all fd's,
	 * so we keep the write lock of the pidfile.
	 */
	_init_pidfile();
	_become_slurm_user();
	if (foreground == 0)
		_set_work_dir();
	log_config();

#ifdef PR_SET_DUMPABLE
	if (prctl(PR_SET_DUMPABLE, 1) < 0)
		debug ("Unable to set dumpable to 1");
#endif /* PR_SET_DUMPABLE */

	if (xsignal_block(dbd_sigarray) < 0)
		error("Unable to block signals");

	/* Create attached thread for signal handling */
	slurm_attr_init(&thread_attr);
	if (pthread_create(&signal_handler_thread, &thread_attr,
			   _signal_handler, NULL))
		fatal("pthread_create %m");
	slurm_attr_destroy(&thread_attr);

	registered_clusters = list_create(NULL);

	slurm_attr_init(&thread_attr);
	if (pthread_create(&commit_handler_thread, &thread_attr,
			   _commit_handler, NULL))
		fatal("pthread_create %m");
	slurm_attr_destroy(&thread_attr);

	memset(&assoc_init_arg, 0, sizeof(assoc_init_args_t));

	/* If we are tacking wckey we need to cache
	   wckeys, if we aren't only cache the users, qos */
	assoc_init_arg.cache_level = ASSOC_MGR_CACHE_USER | ASSOC_MGR_CACHE_QOS;
	if (slurmdbd_conf->track_wckey)
		assoc_init_arg.cache_level |= ASSOC_MGR_CACHE_WCKEY;

	db_conn = acct_storage_g_get_connection(NULL, 0, true, NULL);
	if (assoc_mgr_init(db_conn, &assoc_init_arg, errno) == SLURM_ERROR) {
		error("Problem getting cache of data");
		acct_storage_g_close_connection(&db_conn);
		goto end_it;
	}

	if (gethostname_short(node_name, sizeof(node_name)))
		fatal("getnodename: %m");

	while (1) {
		if (slurmdbd_conf->dbd_backup &&
		    (!strcmp(node_name, slurmdbd_conf->dbd_backup) ||
		     !strcmp(slurmdbd_conf->dbd_backup, "localhost"))) {
			info("slurmdbd running in background mode");
			have_control = false;
			backup = true;
			/* make sure any locks are released */
			acct_storage_g_commit(db_conn, 1);
			run_dbd_backup();
			if (!shutdown_time)
				assoc_mgr_refresh_lists(db_conn);
		} else if (slurmdbd_conf->dbd_host &&
			   (!strcmp(slurmdbd_conf->dbd_host, node_name) ||
			    !strcmp(slurmdbd_conf->dbd_host, "localhost"))) {
			backup = false;
			have_control = true;
		} else {
			fatal("This host not configured to run SlurmDBD "
			      "(%s != %s | (backup) %s)",
			      node_name, slurmdbd_conf->dbd_host,
			      slurmdbd_conf->dbd_backup);
		}

		if (!shutdown_time) {
			/* Create attached thread to process incoming RPCs */
			slurm_attr_init(&thread_attr);
			if (pthread_create(&rpc_handler_thread, &thread_attr,
					   rpc_mgr, NULL))
				fatal("pthread_create error %m");
			slurm_attr_destroy(&thread_attr);
		}

		if (!shutdown_time) {
			/* Create attached thread to do usage rollup */
			slurm_attr_init(&thread_attr);
			if (pthread_create(&rollup_handler_thread,
					   &thread_attr,
					   _rollup_handler, db_conn))
				fatal("pthread_create error %m");
			slurm_attr_destroy(&thread_attr);
		}

		/* Daemon is fully operational here */
		if (!shutdown_time || primary_resumed) {
			shutdown_time = 0;
			info("slurmdbd version %s started",
			     SLURM_VERSION_STRING);
			if (backup)
				run_dbd_backup();
		}

		_request_registrations(db_conn);
		acct_storage_g_commit(db_conn, 1);

		/* this is only ran if not backup */
		if (rollup_handler_thread)
			pthread_join(rollup_handler_thread, NULL);
		if (rpc_handler_thread)
			pthread_join(rpc_handler_thread, NULL);

		if (backup && primary_resumed) {
			shutdown_time = 0;
			info("Backup has given up control");
		}

		if (shutdown_time)
			break;
	}
	/* Daemon termination handled here */

end_it:

	if (signal_handler_thread)
		pthread_join(signal_handler_thread, NULL);
	if (commit_handler_thread)
		pthread_join(commit_handler_thread, NULL);

	acct_storage_g_commit(db_conn, 1);
	acct_storage_g_close_connection(&db_conn);

	if (slurmdbd_conf->pid_file &&
	    (unlink(slurmdbd_conf->pid_file) < 0)) {
		verbose("Unable to remove pidfile '%s': %m",
			slurmdbd_conf->pid_file);
	}

	FREE_NULL_LIST(registered_clusters);

	assoc_mgr_fini(NULL);
	slurm_acct_storage_fini();
	slurm_auth_fini();
	log_fini();
	free_slurmdbd_conf();
	exit(0);
}
Ejemplo n.º 4
0
static void * _service_connection(void *arg)
{
	slurmdbd_conn_t *conn = (slurmdbd_conn_t *) arg;
	uint32_t nw_size = 0, msg_size = 0, uid = NO_VAL;
	char *msg = NULL;
	ssize_t msg_read = 0, offset = 0;
	bool fini = false, first = true;
	Buf buffer = NULL;
	int rc = SLURM_SUCCESS;

	debug2("Opened connection %d from %s", conn->newsockfd, conn->ip);

	while (!fini) {
		if (!_fd_readable(conn->newsockfd))
			break;		/* problem with this socket */
		msg_read = read(conn->newsockfd, &nw_size, sizeof(nw_size));
		if (msg_read == 0)	/* EOF */
			break;
		if (msg_read != sizeof(nw_size)) {
			error("Could not read msg_size from "
			      "connection %d(%s) uid(%d)",
			      conn->newsockfd, conn->ip, uid);
			break;
		}
		msg_size = ntohl(nw_size);
		if ((msg_size < 2) || (msg_size > MAX_MSG_SIZE)) {
			error("Invalid msg_size (%u) from "
			      "connection %d(%s) uid(%d)",
			      msg_size, conn->newsockfd, conn->ip, uid);
			break;
		}

		msg = xmalloc(msg_size);
		offset = 0;
		while (msg_size > offset) {
			if (!_fd_readable(conn->newsockfd))
				break;		/* problem with this socket */
			msg_read = read(conn->newsockfd, (msg + offset),
					(msg_size - offset));
			if (msg_read <= 0) {
				error("read(%d): %m", conn->newsockfd);
				break;
			}
			offset += msg_read;
		}
		if (msg_size == offset) {
			rc = proc_req(
				conn, msg, msg_size, first, &buffer, &uid);
			first = false;
			if (rc != SLURM_SUCCESS && rc != ACCOUNTING_FIRST_REG) {
				error("Processing last message from "
				      "connection %d(%s) uid(%d)",
				      conn->newsockfd, conn->ip, uid);
				if (rc == ESLURM_ACCESS_DENIED
				    || rc == SLURM_PROTOCOL_VERSION_ERROR)
					fini = true;
			}
		} else {
			buffer = make_dbd_rc_msg(conn->rpc_version,
						 SLURM_ERROR, "Bad offset", 0);
			fini = true;
		}

		(void) _send_resp(conn->newsockfd, buffer);
		xfree(msg);
	}

	if (conn->ctld_port) {
		if (!shutdown_time) {
			slurmdb_cluster_rec_t cluster_rec;
			ListIterator itr;
			slurmdbd_conn_t *slurmdbd_conn;
			memset(&cluster_rec, 0, sizeof(slurmdb_cluster_rec_t));
			cluster_rec.name = conn->cluster_name;
			cluster_rec.control_host = conn->ip;
			cluster_rec.control_port = conn->ctld_port;
			cluster_rec.cpu_count = conn->cluster_cpus;
			debug("cluster %s has disconnected",
			      conn->cluster_name);

			clusteracct_storage_g_fini_ctld(
				conn->db_conn, &cluster_rec);

			slurm_mutex_lock(&registered_lock);
			itr = list_iterator_create(registered_clusters);
			while ((slurmdbd_conn = list_next(itr))) {
				if (conn == slurmdbd_conn) {
					list_delete_item(itr);
					break;
				}
			}
			list_iterator_destroy(itr);
			slurm_mutex_unlock(&registered_lock);
		}
		/* needs to be the last thing done */
		acct_storage_g_commit(conn->db_conn, 1);
	}

	acct_storage_g_close_connection(&conn->db_conn);
	if (slurm_close(conn->newsockfd) < 0)
		error("close(%d): %m(%s)",  conn->newsockfd, conn->ip);
	else
		debug2("Closed connection %d uid(%d)", conn->newsockfd, uid);

	xfree(conn->cluster_name);
	xfree(conn);
	_free_server_thread(pthread_self());
	return NULL;
}