extern int fini(void)
{
	_end_container_thread();

	/* free up some memory */
	slurm_mutex_destroy(&notify_mutex);
	pthread_cond_destroy(&notify);
	slurm_mutex_destroy(&thread_mutex);

	return SLURM_SUCCESS;
}
Beispiel #2
0
/* list_destroy()
 */
void
list_destroy (List l)
{
	ListIterator i, iTmp;
	ListNode p, pTmp;

	assert(l != NULL);
	slurm_mutex_lock(&l->mutex);
	assert(l->magic == LIST_MAGIC);

	i = l->iNext;
	while (i) {
		assert(i->magic == LIST_MAGIC);
		iTmp = i->iNext;
		assert(i->magic = ~LIST_MAGIC); /* clear magic via assert abuse */
		list_iterator_free(i);
		i = iTmp;
	}
	p = l->head;
	while (p) {
		pTmp = p->next;
		if (p->data && l->fDel)
			l->fDel(p->data);
		list_node_free(p);
		p = pTmp;
	}
	assert(l->magic = ~LIST_MAGIC);     /* clear magic via assert abuse */
	slurm_mutex_unlock(&l->mutex);
	slurm_mutex_destroy(&l->mutex);
	list_free(l);
}
Beispiel #3
0
void pmixp_dconn_fini()
{
	int i;
#ifdef HAVE_UCX
	if (pmixp_info_srv_direct_conn_ucx()) {
		pmixp_dconn_ucx_stop();
	}
#endif
	for (i=0; i<_pmixp_dconn_conn_cnt; i++) {
		slurm_mutex_destroy(&_pmixp_dconn_conns[i].lock);
		_pmixp_dconn_h.fini(_pmixp_dconn_conns[i].priv);
	}

#ifdef HAVE_UCX
	if (pmixp_info_srv_direct_conn_ucx()) {
		pmixp_dconn_ucx_finalize();
	} else
#endif
	{
		pmixp_dconn_tcp_finalize();
	}

	xfree(_pmixp_dconn_conns);
	_pmixp_dconn_conn_cnt = 0;
}
Beispiel #4
0
/*
 * start_msg_tree  - logic to begin the forward tree and
 *                   accumulate the return codes from processes getting the
 *                   the forwarded message
 *
 * IN: hl          - hostlist_t   - list of every node to send message to
 * IN: msg         - slurm_msg_t  - message to send.
 * IN: timeout     - int          - how long to wait in milliseconds.
 * RET List 	   - List containing the responses of the children
 *		     (if any) we forwarded the message to. List
 *		     containing type (ret_data_info_t).
 */
extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout)
{
	fwd_tree_t fwd_tree;
	pthread_mutex_t tree_mutex;
	pthread_cond_t notify;
	int count = 0;
	List ret_list = NULL;
	int thr_count = 0;
	int host_count = 0;
	hostlist_t* sp_hl;
	int hl_count = 0;

	xassert(hl);
	xassert(msg);

	hostlist_uniq(hl);
	host_count = hostlist_count(hl);

	if (route_g_split_hostlist(hl, &sp_hl, &hl_count,
				   msg->forward.tree_width)) {
		error("unable to split forward hostlist");
		return NULL;
	}
	slurm_mutex_init(&tree_mutex);
	slurm_cond_init(&notify, NULL);

	ret_list = list_create(destroy_data_info);

	memset(&fwd_tree, 0, sizeof(fwd_tree));
	fwd_tree.orig_msg = msg;
	fwd_tree.ret_list = ret_list;
	fwd_tree.timeout = timeout;
	fwd_tree.notify = &notify;
	fwd_tree.p_thr_count = &thr_count;
	fwd_tree.tree_mutex = &tree_mutex;

	_start_msg_tree_internal(NULL, sp_hl, &fwd_tree, hl_count);

	xfree(sp_hl);

	slurm_mutex_lock(&tree_mutex);

	count = list_count(ret_list);
	debug2("Tree head got back %d looking for %d", count, host_count);
	while (thr_count > 0) {
		slurm_cond_wait(&notify, &tree_mutex);
		count = list_count(ret_list);
		debug2("Tree head got back %d", count);
	}
	xassert(count >= host_count);	/* Tree head did not get all responses,
					 * but no more active fwd threads!*/
	slurm_mutex_unlock(&tree_mutex);

	slurm_mutex_destroy(&tree_mutex);
	slurm_cond_destroy(&notify);

	return ret_list;
}
Beispiel #5
0
void destroy_forward_struct(forward_struct_t *forward_struct)
{
	if (forward_struct) {
		xfree(forward_struct->buf);
		slurm_mutex_destroy(&forward_struct->forward_mutex);
		slurm_cond_destroy(&forward_struct->notify);
		xfree(forward_struct);
	}
}
Beispiel #6
0
static void
_destroy_conf(void)
{
	if (conf) {
		xfree(conf->acct_gather_energy_type);
		xfree(conf->acct_gather_filesystem_type);
		xfree(conf->acct_gather_infiniband_type);
		xfree(conf->acct_gather_profile_type);
		xfree(conf->block_map);
		xfree(conf->block_map_inv);
		xfree(conf->conffile);
		xfree(conf->epilog);
		xfree(conf->health_check_program);
		xfree(conf->hostname);
		xfree(conf->job_acct_gather_freq);
		xfree(conf->job_acct_gather_type);
		xfree(conf->logfile);
		xfree(conf->node_name);
		xfree(conf->node_addr);
		xfree(conf->node_topo_addr);
		xfree(conf->node_topo_pattern);
		xfree(conf->pidfile);
		xfree(conf->prolog);
		xfree(conf->pubkey);
		xfree(conf->select_type);
		xfree(conf->spooldir);
		xfree(conf->stepd_loc);
		xfree(conf->task_prolog);
		xfree(conf->task_epilog);
		xfree(conf->tmpfs);
		slurm_mutex_destroy(&conf->config_mutex);
		list_destroy(conf->starting_steps);
		slurm_mutex_destroy(&conf->starting_steps_lock);
		pthread_cond_destroy(&conf->starting_steps_cond);
		list_destroy(conf->prolog_running_jobs);
		slurm_mutex_destroy(&conf->prolog_running_lock);
		pthread_cond_destroy(&conf->prolog_running_cond);
		slurm_cred_ctx_destroy(conf->vctx);
		xfree(conf);
	}
	return;
}
Beispiel #7
0
static void
_task_info_destroy(stepd_step_task_info_t *t, uint16_t multi_prog)
{
	slurm_mutex_lock(&t->mutex);
	slurm_mutex_unlock(&t->mutex);
	slurm_mutex_destroy(&t->mutex);
	if (multi_prog) {
		xfree(t->argv);
	} /* otherwise, t->argv is a pointer to job->argv */
	xfree(t);
}
Beispiel #8
0
extern int destroy_mysql_conn(mysql_conn_t *mysql_conn)
{
	if (mysql_conn) {
		mysql_db_close_db_connection(mysql_conn);
		xfree(mysql_conn->pre_commit_query);
		xfree(mysql_conn->cluster_name);
		slurm_mutex_destroy(&mysql_conn->lock);
		list_destroy(mysql_conn->update_list);
		xfree(mysql_conn);
	}

	return SLURM_SUCCESS;
}
Beispiel #9
0
void eio_handle_destroy(eio_handle_t *eio)
{
	xassert(eio != NULL);
	xassert(eio->magic == EIO_MAGIC);
	close(eio->fds[0]);
	close(eio->fds[1]);
	FREE_NULL_LIST(eio->obj_list);
	FREE_NULL_LIST(eio->new_objs);
	slurm_mutex_destroy(&eio->shutdown_mutex);

	xassert(eio->magic = ~EIO_MAGIC);
	xfree(eio);
}
Beispiel #10
0
extern void msg_aggr_sender_fini(void)
{
	if (!msg_collection.running)
		return;
	msg_collection.running = 0;
	slurm_mutex_lock(&msg_collection.mutex);

	pthread_cond_signal(&msg_collection.cond);
	slurm_mutex_unlock(&msg_collection.mutex);

	pthread_join(msg_collection.thread_id, NULL);
	msg_collection.thread_id = (pthread_t) 0;

	pthread_cond_destroy(&msg_collection.cond);
	/* signal and clear the waiting list */
	slurm_mutex_lock(&msg_collection.aggr_mutex);
	_handle_msg_aggr_ret(0, 1);
	FREE_NULL_LIST(msg_collection.msg_aggr_list);
	slurm_mutex_unlock(&msg_collection.aggr_mutex);
	FREE_NULL_LIST(msg_collection.msg_list);
	slurm_mutex_destroy(&msg_collection.aggr_mutex);
	slurm_mutex_destroy(&msg_collection.mutex);
}
Beispiel #11
0
/* _cancel_jobs - filter then cancel jobs or job steps per request */
static int _cancel_jobs(int filter_cnt)
{
	int rc = 0;

	slurm_attr_init(&attr);
	if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate error %m");

	slurm_mutex_init(&num_active_threads_lock);

	if (pthread_cond_init(&num_active_threads_cond, NULL))
		error("pthread_cond_init error %m");

	_cancel_jobs_by_state(JOB_PENDING, filter_cnt, &rc);
	/* Wait for any cancel of pending jobs to complete before starting
	 * cancellation of running jobs so that we don't have a race condition
	 * with pending jobs getting scheduled while running jobs are also
	 * being cancelled. */
	pthread_mutex_lock( &num_active_threads_lock );
	while (num_active_threads > 0) {
		pthread_cond_wait(&num_active_threads_cond,
				  &num_active_threads_lock);
	}
	pthread_mutex_unlock(&num_active_threads_lock);

	_cancel_jobs_by_state(JOB_END, filter_cnt, &rc);
	/* Wait for any spawned threads that have not finished */
	pthread_mutex_lock( &num_active_threads_lock );
	while (num_active_threads > 0) {
		pthread_cond_wait(&num_active_threads_cond,
				  &num_active_threads_lock);
	}
	pthread_mutex_unlock(&num_active_threads_lock);

	slurm_attr_destroy(&attr);
	slurm_mutex_destroy(&num_active_threads_lock);
	if (pthread_cond_destroy(&num_active_threads_cond))
		error("pthread_cond_destroy error %m");

	return rc;
}
Beispiel #12
0
int p_mpi_hook_client_fini(mpi_plugin_client_state_t *state)
{
	if (p4_tid != (pthread_t)-1) {
		char tmp = 1;
		int n;

		/*
		 * Write to the pipe to break the mpi thread out of a poll
		 * (or leave the poll immediately after it is called) and exit.
		 * Do a timed wait for the mpi thread to shut down, or just
		 * exit if the mpi thread cannot respond.
		 */
		n = write(shutdown_pipe[1], &tmp, 1);
		if (n == 1) {
			struct timespec ts = {0, 0};

			slurm_mutex_lock(&shutdown_lock);
			ts.tv_sec = time(NULL) + shutdown_timeout;

			while (!shutdown_complete) {
				if (time(NULL) >= ts.tv_sec) {
					break;
				}
				pthread_cond_timedwait(
					&shutdown_cond,
					&shutdown_lock, &ts);
			}
			slurm_mutex_unlock(&shutdown_lock);
		}
		if (shutdown_complete) {
			close(shutdown_pipe[0]);
			close(shutdown_pipe[1]);

			slurm_mutex_destroy(&shutdown_lock);
			pthread_cond_destroy(&shutdown_cond);
		}
		p4_tid = (pthread_t) -1;
	}
	return SLURM_SUCCESS;
}
Beispiel #13
0
/*
 * start_msg_tree  - logic to begin the forward tree and
 *                   accumulate the return codes from processes getting the
 *                   the forwarded message
 *
 * IN: hl          - hostlist_t   - list of every node to send message to
 * IN: msg         - slurm_msg_t  - message to send.
 * IN: timeout     - int          - how long to wait in milliseconds.
 * RET List 	   - List containing the responses of the childern
 *		     (if any) we forwarded the message to. List
 *		     containing type (ret_data_info_t).
 */
extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout)
{
	int *span = NULL;
	fwd_tree_t *fwd_tree = NULL;
	pthread_mutex_t tree_mutex;
	pthread_cond_t notify;
	int j = 0, count = 0;
	List ret_list = NULL;
	char *name = NULL;
	int thr_count = 0;
	int host_count = 0;

	xassert(hl);
	xassert(msg);

	hostlist_uniq(hl);
	host_count = hostlist_count(hl);

	span = set_span(host_count, 0);

	slurm_mutex_init(&tree_mutex);
	pthread_cond_init(&notify, NULL);

	ret_list = list_create(destroy_data_info);

	while ((name = hostlist_shift(hl))) {
		pthread_attr_t attr_agent;
		pthread_t thread_agent;
		int retries = 0;

		slurm_attr_init(&attr_agent);
		if (pthread_attr_setdetachstate
		    (&attr_agent, PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");

		fwd_tree = xmalloc(sizeof(fwd_tree_t));
		fwd_tree->orig_msg = msg;
		fwd_tree->ret_list = ret_list;
		fwd_tree->timeout = timeout;
		fwd_tree->notify = &notify;
		fwd_tree->p_thr_count = &thr_count;
		fwd_tree->tree_mutex = &tree_mutex;

		if (fwd_tree->timeout <= 0) {
			/* convert secs to msec */
			fwd_tree->timeout  = slurm_get_msg_timeout() * 1000;
		}

		fwd_tree->tree_hl = hostlist_create(name);
		free(name);
		for (j = 0; j < span[thr_count]; j++) {
			name = hostlist_shift(hl);
			if (!name)
				break;
			hostlist_push(fwd_tree->tree_hl, name);
			free(name);
		}

		/*
		 * Lock and increase thread counter, we need that to protect
		 * the start_msg_tree waiting loop that was originally designed
		 * around a "while ((count < host_count))" loop. In case where a
		 * fwd thread was not able to get all the return codes from
		 * children, the waiting loop was deadlocked.
		 */
		slurm_mutex_lock(&tree_mutex);
		thr_count++;
		slurm_mutex_unlock(&tree_mutex);

		while (pthread_create(&thread_agent, &attr_agent,
				      _fwd_tree_thread, (void *)fwd_tree)) {
			error("pthread_create error %m");
			if (++retries > MAX_RETRIES)
				fatal("Can't create pthread");
			sleep(1);	/* sleep and try again */
		}
		slurm_attr_destroy(&attr_agent);

	}
	xfree(span);

	slurm_mutex_lock(&tree_mutex);

	count = list_count(ret_list);
	debug2("Tree head got back %d looking for %d", count, host_count);
	while (thr_count > 0) {
		pthread_cond_wait(&notify, &tree_mutex);
		count = list_count(ret_list);
		debug2("Tree head got back %d", count);
	}
	xassert(count >= host_count);	/* Tree head did not get all responses,
					 * but no more active fwd threads!*/
	slurm_mutex_unlock(&tree_mutex);

	slurm_mutex_destroy(&tree_mutex);
	pthread_cond_destroy(&notify);

	return ret_list;
}
Beispiel #14
0
static void *_cluster_rollup_usage(void *arg)
{
	local_rollup_t *local_rollup = (local_rollup_t *)arg;
	int rc = SLURM_SUCCESS;
	char timer_str[128];
	mysql_conn_t mysql_conn;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	char *query = NULL;
	struct tm start_tm;
	struct tm end_tm;
	time_t my_time = local_rollup->sent_end;
	time_t last_hour = local_rollup->sent_start;
	time_t last_day = local_rollup->sent_start;
	time_t last_month = local_rollup->sent_start;
	time_t hour_start;
	time_t hour_end;
	time_t day_start;
	time_t day_end;
	time_t month_start;
	time_t month_end;
	DEF_TIMERS;

	char *update_req_inx[] = {
		"hourly_rollup",
		"daily_rollup",
		"monthly_rollup"
	};

	enum {
		UPDATE_HOUR,
		UPDATE_DAY,
		UPDATE_MONTH,
		UPDATE_COUNT
	};


	memset(&mysql_conn, 0, sizeof(mysql_conn_t));
	mysql_conn.rollback = 1;
	mysql_conn.conn = local_rollup->mysql_conn->conn;
	slurm_mutex_init(&mysql_conn.lock);

	/* Each thread needs it's own connection we can't use the one
	 * sent from the parent thread. */
	rc = check_connection(&mysql_conn);

	if (rc != SLURM_SUCCESS)
		goto end_it;

	if (!local_rollup->sent_start) {
		char *tmp = NULL;
		int i=0;
		xstrfmtcat(tmp, "%s", update_req_inx[i]);
		for(i=1; i<UPDATE_COUNT; i++) {
			xstrfmtcat(tmp, ", %s", update_req_inx[i]);
		}
		query = xstrdup_printf("select %s from \"%s_%s\"",
				       tmp, local_rollup->cluster_name,
				       last_ran_table);
		xfree(tmp);

		debug4("%d(%s:%d) query\n%s", mysql_conn.conn,
		       THIS_FILE, __LINE__, query);
		if (!(result = mysql_db_query_ret(&mysql_conn, query, 0))) {
			xfree(query);
			rc = SLURM_ERROR;
			goto end_it;
		}

		xfree(query);
		row = mysql_fetch_row(result);
		if (row) {
			last_hour = slurm_atoul(row[UPDATE_HOUR]);
			last_day = slurm_atoul(row[UPDATE_DAY]);
			last_month = slurm_atoul(row[UPDATE_MONTH]);
			mysql_free_result(result);
		} else {
			time_t now = time(NULL);
			time_t lowest = now;

			mysql_free_result(result);

			query = xstrdup_printf(
				"select time_start from \"%s_%s\" "
				"where node_name='' order by "
				"time_start asc limit 1;",
				local_rollup->cluster_name, event_table);
			debug3("%d(%s:%d) query\n%s", mysql_conn.conn,
			       THIS_FILE, __LINE__, query);
			if (!(result = mysql_db_query_ret(
				      &mysql_conn, query, 0))) {
				xfree(query);
				rc = SLURM_ERROR;
				goto end_it;
			}
			xfree(query);
			if ((row = mysql_fetch_row(result))) {
				time_t check = slurm_atoul(row[0]);
				if (check < lowest)
					lowest = check;
			}
			mysql_free_result(result);

			/* If we don't have any events like adding a
			 * cluster this will not work correctly, so we
			 * will insert now as a starting point.
			 */

			query = xstrdup_printf(
				"insert into \"%s_%s\" "
				"(hourly_rollup, daily_rollup, monthly_rollup) "
				"values (%ld, %ld, %ld);",
				local_rollup->cluster_name, last_ran_table,
				lowest, lowest, lowest);

			debug3("%d(%s:%d) query\n%s", mysql_conn.conn,
			       THIS_FILE, __LINE__, query);
			rc = mysql_db_query(&mysql_conn, query);
			xfree(query);
			if (rc != SLURM_SUCCESS) {
				rc = SLURM_ERROR;
				goto end_it;
			}

			if (lowest == now) {
				debug("Cluster %s not registered, "
				      "not doing rollup",
				      local_rollup->cluster_name);
				rc = SLURM_SUCCESS;
				goto end_it;
			}

			last_hour = last_day = last_month = lowest;
		}
	}

	if (!my_time)
		my_time = time(NULL);

	/* test month gap */
/* 	last_hour = 1212299999; */
/* 	last_day = 1212217200; */
/* 	last_month = 1212217200; */
/* 	my_time = 1212307200; */

/* 	last_hour = 1211475599; */
/* 	last_day = 1211475599; */
/* 	last_month = 1211475599; */

//	last_hour = 1211403599;
	//	last_hour = 1206946800;
//	last_day = 1207033199;
//	last_day = 1197033199;
//	last_month = 1204358399;

	if (!localtime_r(&last_hour, &start_tm)) {
		error("Couldn't get localtime from hour start %ld", last_hour);
		rc = SLURM_ERROR;
		goto end_it;
	}

	if (!localtime_r(&my_time, &end_tm)) {
		error("Couldn't get localtime from hour end %ld", my_time);
		rc = SLURM_ERROR;
		goto end_it;
	}

	/* Below and anywhere in a rollup plugin when dealing with
	 * epoch times we need to set the tm_isdst = -1 so we don't
	 * have to worry about the time changes.  Not setting it to -1
	 * will cause problems in the day and month with the date change.
	 */

	start_tm.tm_sec = 0;
	start_tm.tm_min = 0;
	start_tm.tm_isdst = -1;
	hour_start = mktime(&start_tm);

	end_tm.tm_sec = 0;
	end_tm.tm_min = 0;
	end_tm.tm_isdst = -1;
	hour_end = mktime(&end_tm);

/* 	info("hour start %s", slurm_ctime(&hour_start)); */
/* 	info("hour end %s", slurm_ctime(&hour_end)); */
/* 	info("diff is %d", hour_end-hour_start); */

	slurm_mutex_lock(&rollup_lock);
	global_last_rollup = hour_end;
	slurm_mutex_unlock(&rollup_lock);

	/* set up the day period */
	if (!localtime_r(&last_day, &start_tm)) {
		error("Couldn't get localtime from day %ld", last_day);
		rc = SLURM_ERROR;
		goto end_it;
	}

	start_tm.tm_sec = 0;
	start_tm.tm_min = 0;
	start_tm.tm_hour = 0;
	start_tm.tm_isdst = -1;
	day_start = mktime(&start_tm);

	end_tm.tm_hour = 0;
	end_tm.tm_isdst = -1;
	day_end = mktime(&end_tm);

/* 	info("day start %s", slurm_ctime(&day_start)); */
/* 	info("day end %s", slurm_ctime(&day_end)); */
/* 	info("diff is %d", day_end-day_start); */

	/* set up the month period */
	if (!localtime_r(&last_month, &start_tm)) {
		error("Couldn't get localtime from month %ld", last_month);
		rc = SLURM_ERROR;
		goto end_it;
	}

	start_tm.tm_sec = 0;
	start_tm.tm_min = 0;
	start_tm.tm_hour = 0;
	start_tm.tm_mday = 1;
	start_tm.tm_isdst = -1;
	month_start = mktime(&start_tm);

	end_tm.tm_sec = 0;
	end_tm.tm_min = 0;
	end_tm.tm_hour = 0;
	end_tm.tm_mday = 1;
	end_tm.tm_isdst = -1;
	month_end = mktime(&end_tm);

/* 	info("month start %s", slurm_ctime(&month_start)); */
/* 	info("month end %s", slurm_ctime(&month_end)); */
/* 	info("diff is %d", month_end-month_start); */

	if ((hour_end - hour_start) > 0) {
		START_TIMER;
		rc = as_mysql_hourly_rollup(&mysql_conn,
					    local_rollup->cluster_name,
					    hour_start,
					    hour_end,
					    local_rollup->archive_data);
		snprintf(timer_str, sizeof(timer_str),
			 "hourly_rollup for %s", local_rollup->cluster_name);
		END_TIMER3(timer_str, 5000000);
		if (rc != SLURM_SUCCESS)
			goto end_it;
	}

	if ((day_end - day_start) > 0) {
		START_TIMER;
		rc = as_mysql_daily_rollup(&mysql_conn,
					   local_rollup->cluster_name,
					   day_start,
					   day_end,
					   local_rollup->archive_data);
		snprintf(timer_str, sizeof(timer_str),
			 "daily_rollup for %s", local_rollup->cluster_name);
		END_TIMER3(timer_str, 5000000);
		if (rc != SLURM_SUCCESS)
			goto end_it;
	}

	if ((month_end - month_start) > 0) {
		START_TIMER;
		rc = as_mysql_monthly_rollup(&mysql_conn,
					     local_rollup->cluster_name,
					     month_start,
					     month_end,
					     local_rollup->archive_data);
		snprintf(timer_str, sizeof(timer_str),
			 "monthly_rollup for %s", local_rollup->cluster_name);
		END_TIMER3(timer_str, 5000000);
		if (rc != SLURM_SUCCESS)
			goto end_it;
	}

	if ((hour_end - hour_start) > 0) {
		/* If we have a sent_end do not update the last_run_table */
		if (!local_rollup->sent_end)
			query = xstrdup_printf(
				"update \"%s_%s\" set hourly_rollup=%ld",
				local_rollup->cluster_name,
				last_ran_table, hour_end);
	} else
		debug2("No need to roll cluster %s this hour %ld <= %ld",
		       local_rollup->cluster_name, hour_end, hour_start);

	if ((day_end - day_start) > 0) {
		if (query && !local_rollup->sent_end)
			xstrfmtcat(query, ", daily_rollup=%ld", day_end);
		else if (!local_rollup->sent_end)
			query = xstrdup_printf(
				"update \"%s_%s\" set daily_rollup=%ld",
				local_rollup->cluster_name,
				last_ran_table, day_end);
	} else
		debug2("No need to roll cluster %s this day %ld <= %ld",
		       local_rollup->cluster_name, day_end, day_start);

	if ((month_end - month_start) > 0) {
		if (query && !local_rollup->sent_end)
			xstrfmtcat(query, ", monthly_rollup=%ld", month_end);
		else if (!local_rollup->sent_end)
			query = xstrdup_printf(
				"update \"%s_%s\" set monthly_rollup=%ld",
				local_rollup->cluster_name,
				last_ran_table, month_end);
	} else
		debug2("No need to roll cluster %s this month %ld <= %ld",
		       local_rollup->cluster_name, month_end, month_start);

	if (query) {
		debug3("%d(%s:%d) query\n%s",
		       mysql_conn.conn, THIS_FILE, __LINE__, query);
		rc = mysql_db_query(&mysql_conn, query);
		xfree(query);
	}
end_it:
	if (rc == SLURM_SUCCESS) {
		if (mysql_db_commit(&mysql_conn)) {
			error("Couldn't commit rollup of cluster %s",
			      local_rollup->cluster_name);
			rc = SLURM_ERROR;
		}
	} else {
		error("Cluster %s rollup failed", local_rollup->cluster_name);
		if (mysql_db_rollback(&mysql_conn))
			error("rollback failed");
	}

	mysql_db_close_db_connection(&mysql_conn);
	slurm_mutex_destroy(&mysql_conn.lock);

	slurm_mutex_lock(local_rollup->rolledup_lock);
	(*local_rollup->rolledup)++;
	if ((rc != SLURM_SUCCESS) && ((*local_rollup->rc) == SLURM_SUCCESS))
		(*local_rollup->rc) = rc;
	pthread_cond_signal(local_rollup->rolledup_cond);
	slurm_mutex_unlock(local_rollup->rolledup_lock);
	xfree(local_rollup);

	return NULL;
}
Beispiel #15
0
extern int as_mysql_roll_usage(mysql_conn_t *mysql_conn,
			       time_t sent_start, time_t sent_end,
			       uint16_t archive_data)
{
	int rc = SLURM_SUCCESS;
	int rolledup = 0;
	char *cluster_name = NULL;
	ListIterator itr;
	pthread_mutex_t rolledup_lock = PTHREAD_MUTEX_INITIALIZER;
	pthread_cond_t rolledup_cond;
	//DEF_TIMERS;

	if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return ESLURM_DB_CONNECTION;

	slurm_mutex_lock(&usage_rollup_lock);

	slurm_mutex_init(&rolledup_lock);
	pthread_cond_init(&rolledup_cond, NULL);

	//START_TIMER;
	slurm_mutex_lock(&as_mysql_cluster_list_lock);
	itr = list_iterator_create(as_mysql_cluster_list);
	while ((cluster_name = list_next(itr))) {
		/* pthread_t rollup_tid; */
		/* pthread_attr_t rollup_attr; */
		local_rollup_t *local_rollup = xmalloc(sizeof(local_rollup_t));

		local_rollup->archive_data = archive_data;
		local_rollup->cluster_name = cluster_name;

		local_rollup->mysql_conn = mysql_conn;
		local_rollup->rc = &rc;
		local_rollup->rolledup = &rolledup;
		local_rollup->rolledup_lock = &rolledup_lock;
		local_rollup->rolledup_cond = &rolledup_cond;

		local_rollup->sent_end = sent_end;
		local_rollup->sent_start = sent_start;

		/* _cluster_rollup_usage is responsible for freeing
		   this local_rollup */
		_cluster_rollup_usage(local_rollup);
		/* It turns out doing this with threads only buys a
		   very small victory, and can skew the timings.  So
		   just doing them one after the other isn't too bad.
		   If you really want to do this in threads you can
		   just uncomment this, and comment the call above.
		*/
		/* slurm_attr_init(&rollup_attr); */
		/* if (pthread_create(&rollup_tid, &rollup_attr, */
		/* 		   _cluster_rollup_usage, */
		/* 		   (void *)local_rollup)) */
		/* 	fatal("pthread_create: %m"); */
		/* slurm_attr_destroy(&rollup_attr); */
	}
	slurm_mutex_lock(&rolledup_lock);
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&as_mysql_cluster_list_lock);

	while (rolledup < list_count(as_mysql_cluster_list)) {
		pthread_cond_wait(&rolledup_cond, &rolledup_lock);
		debug2("Got %d rolled up", rolledup);
	}
	slurm_mutex_unlock(&rolledup_lock);
	debug2("Everything rolled up");
	slurm_mutex_destroy(&rolledup_lock);
	pthread_cond_destroy(&rolledup_cond);
	/* END_TIMER; */
	/* info("total time was %s", TIME_STR); */

	slurm_mutex_unlock(&usage_rollup_lock);

	return rc;
}
Beispiel #16
0
extern int as_mysql_roll_usage(mysql_conn_t *mysql_conn,
			       time_t sent_start, time_t sent_end,
			       uint16_t archive_data)
{
	int rc = SLURM_SUCCESS;
	int rolledup = 0;
	char *cluster_name = NULL;
	ListIterator itr;
	pthread_mutex_t rolledup_lock = PTHREAD_MUTEX_INITIALIZER;
	pthread_cond_t rolledup_cond;
	//DEF_TIMERS;

	if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return ESLURM_DB_CONNECTION;

	slurm_mutex_lock(&usage_rollup_lock);

	slurm_mutex_init(&rolledup_lock);
	pthread_cond_init(&rolledup_cond, NULL);

	//START_TIMER;
	slurm_mutex_lock(&as_mysql_cluster_list_lock);
	itr = list_iterator_create(as_mysql_cluster_list);
	while ((cluster_name = list_next(itr))) {
		pthread_t rollup_tid;
		pthread_attr_t rollup_attr;
		local_rollup_t *local_rollup = xmalloc(sizeof(local_rollup_t));

		local_rollup->archive_data = archive_data;
		local_rollup->cluster_name = cluster_name;

		local_rollup->mysql_conn = mysql_conn;
		local_rollup->rc = &rc;
		local_rollup->rolledup = &rolledup;
		local_rollup->rolledup_lock = &rolledup_lock;
		local_rollup->rolledup_cond = &rolledup_cond;

		local_rollup->sent_end = sent_end;
		local_rollup->sent_start = sent_start;

		/* _cluster_rollup_usage is responsible for freeing
		   this local_rollup */
		/* If you have many jobs in your system the
		 * _cluster_rollup_usage call takes up a bunch of time
		 * and all the while the as_mysql_cluster_list_lock is
		 * locked.  If a slurmctld is starting up while this
		 * is locked it will hang waiting to get information
		 * from the DBD.  So threading this makes a lot of
		 * sense.  While it only buys a very small victory in
		 * terms of speed, having the
		 * as_mysql_cluster_list_lock lock unlock in a timely
		 * fashion buys a bunch on systems with lots
		 * (millions) of jobs.
		 */
		slurm_attr_init(&rollup_attr);
		if (pthread_create(&rollup_tid, &rollup_attr,
				   _cluster_rollup_usage,
				   (void *)local_rollup))
			fatal("pthread_create: %m");
		slurm_attr_destroy(&rollup_attr);
	}
	slurm_mutex_lock(&rolledup_lock);
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&as_mysql_cluster_list_lock);

	while (rolledup < list_count(as_mysql_cluster_list)) {
		pthread_cond_wait(&rolledup_cond, &rolledup_lock);
		debug2("Got %d rolled up", rolledup);
	}
	slurm_mutex_unlock(&rolledup_lock);
	debug2("Everything rolled up");
	slurm_mutex_destroy(&rolledup_lock);
	pthread_cond_destroy(&rolledup_cond);
	/* END_TIMER; */
	/* info("total time was %s", TIME_STR); */

	slurm_mutex_unlock(&usage_rollup_lock);

	return rc;
}
Beispiel #17
0
extern void msg_aggr_add_msg(slurm_msg_t *msg, bool wait,
			     void (*resp_callback) (slurm_msg_t *msg))
{
	int count;
	static uint16_t msg_index = 1;
	static uint32_t wait_count = 0;

	if (!msg_collection.running)
		return;

	slurm_mutex_lock(&msg_collection.mutex);
	if (msg_collection.max_msgs == true) {
		slurm_cond_wait(&msg_collection.cond, &msg_collection.mutex);
	}

	msg->msg_index = msg_index++;

	/* Add msg to message collection */
	list_append(msg_collection.msg_list, msg);

	count = list_count(msg_collection.msg_list);


	/* First msg in collection; initiate new window */
	if (count == 1)
		slurm_cond_signal(&msg_collection.cond);

	/* Max msgs reached; terminate window */
	if (count >= msg_collection.max_msg_cnt) {
		msg_collection.max_msgs = true;
		slurm_cond_signal(&msg_collection.cond);
	}
	slurm_mutex_unlock(&msg_collection.mutex);

	if (wait) {
		msg_aggr_t *msg_aggr = xmalloc(sizeof(msg_aggr_t));
		uint16_t        msg_timeout;
		struct timeval  now;
		struct timespec timeout;

		msg_aggr->msg_index = msg->msg_index;
		msg_aggr->resp_callback = resp_callback;
		slurm_cond_init(&msg_aggr->wait_cond, NULL);

		slurm_mutex_lock(&msg_collection.aggr_mutex);
		list_append(msg_collection.msg_aggr_list, msg_aggr);

		msg_timeout = slurm_get_msg_timeout();
		gettimeofday(&now, NULL);
		timeout.tv_sec = now.tv_sec + msg_timeout;
		timeout.tv_nsec = now.tv_usec * 1000;

		wait_count++;
		if (pthread_cond_timedwait(&msg_aggr->wait_cond,
					   &msg_collection.aggr_mutex,
					   &timeout) == ETIMEDOUT)
			_handle_msg_aggr_ret(msg_aggr->msg_index, 1);
		wait_count--;
		slurm_mutex_unlock(&msg_collection.aggr_mutex);
;
		if (!msg_collection.running && !wait_count)
			slurm_mutex_destroy(&msg_collection.aggr_mutex);

		_msg_aggr_free(msg_aggr);
	}
}
Beispiel #18
0
/*
 * start_msg_tree  - logic to begin the forward tree and
 *                   accumulate the return codes from processes getting the
 *                   the forwarded message
 *
 * IN: hl          - hostlist_t   - list of every node to send message to
 * IN: msg         - slurm_msg_t  - message to send.
 * IN: timeout     - int          - how long to wait in milliseconds.
 * RET List 	   - List containing the responses of the childern
 *		     (if any) we forwarded the message to. List
 *		     containing type (ret_data_info_t).
 */
extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout)
{
	int *span = NULL;
	fwd_tree_t *fwd_tree = NULL;
	pthread_mutex_t tree_mutex;
	pthread_cond_t notify;
	int j = 0, count = 0;
	List ret_list = NULL;
	char *name = NULL;
	int thr_count = 0;
	int host_count = 0;

	xassert(hl);
	xassert(msg);

	hostlist_uniq(hl);
	host_count = hostlist_count(hl);

	span = set_span(host_count, 0);

	slurm_mutex_init(&tree_mutex);
	pthread_cond_init(&notify, NULL);

	ret_list = list_create(destroy_data_info);

	while ((name = hostlist_shift(hl))) {
		pthread_attr_t attr_agent;
		pthread_t thread_agent;
		int retries = 0;

		slurm_attr_init(&attr_agent);
		if (pthread_attr_setdetachstate
		    (&attr_agent, PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");

		fwd_tree = xmalloc(sizeof(fwd_tree_t));
		fwd_tree->orig_msg = msg;
		fwd_tree->ret_list = ret_list;
		fwd_tree->timeout = timeout;
		fwd_tree->notify = &notify;
		fwd_tree->tree_mutex = &tree_mutex;

		if(fwd_tree->timeout <= 0) {
			/* convert secs to msec */
			fwd_tree->timeout  = slurm_get_msg_timeout() * 1000;
		}

		fwd_tree->tree_hl = hostlist_create(name);
		free(name);
		for (j = 0; j < span[thr_count]; j++) {
			name = hostlist_shift(hl);
			if (!name)
				break;
			hostlist_push(fwd_tree->tree_hl, name);
			free(name);
		}

		while (pthread_create(&thread_agent, &attr_agent,
				      _fwd_tree_thread, (void *)fwd_tree)) {
			error("pthread_create error %m");
			if (++retries > MAX_RETRIES)
				fatal("Can't create pthread");
			sleep(1);	/* sleep and try again */
		}
		slurm_attr_destroy(&attr_agent);
		thr_count++;
	}
	xfree(span);

	slurm_mutex_lock(&tree_mutex);

	count = list_count(ret_list);
	debug2("Tree head got back %d looking for %d", count, host_count);
	while ((count < host_count)) {
		pthread_cond_wait(&notify, &tree_mutex);
		count = list_count(ret_list);
		debug2("Tree head got back %d", count);
	}
	debug2("Tree head got them all");
	slurm_mutex_unlock(&tree_mutex);

	slurm_mutex_destroy(&tree_mutex);
	pthread_cond_destroy(&notify);

	return ret_list;
}