Пример #1
0
/*
 * setup_cluster_nodes - get cluster record list within requested
 *   time period with used nodes. Used for deciding whether a nodelist is
 *   overlapping with the required nodes.
 */
extern cluster_nodes_t *
setup_cluster_nodes(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond)
{
	DEF_VARS;
	cluster_nodes_t *cnodes = NULL;
	time_t now = time(NULL);
	hostlist_t temp_hl = NULL;
	hostlist_iterator_t h_itr = NULL;

	if (!job_cond || !job_cond->used_nodes)
		return NULL;

	if (!job_cond->cluster_list || list_count(job_cond->cluster_list) != 1) {
		error("If you are doing a query against nodes "
		      "you must only have 1 cluster "
		      "you are asking for.");
		return NULL;
	}

	temp_hl = hostlist_create(job_cond->used_nodes);
	if (!hostlist_count(temp_hl)) {
		error("we didn't get any real hosts to look for.");
		hostlist_destroy(temp_hl);
		return NULL;
	}

	query = xstrdup_printf("SELECT cluster_nodes, time_start, "
			       "time_end FROM %s.%s WHERE node_name='' "
			       "AND cluster_nodes !=''",
			       (char *)list_peek(job_cond->cluster_list),
			       event_table);

	if (job_cond->usage_start) {
		if (!job_cond->usage_end)
			job_cond->usage_end = now;

		xstrfmtcat(query, " AND ((time_start<%ld) "
			   "AND (time_end>=%ld OR time_end=0))",
			   job_cond->usage_end, job_cond->usage_start);
	}

	result = DEF_QUERY_RET;
	if (!result) {
		hostlist_destroy(temp_hl);
		return NULL;
	}

	h_itr = hostlist_iterator_create(temp_hl);
	cnodes = xmalloc(sizeof(cluster_nodes_t));
	cnodes->cluster_list = list_create(_destroy_local_cluster);
	FOR_EACH_ROW {
		char *host = NULL;
		int loc = 0;
		local_cluster_t *local_cluster =
			xmalloc(sizeof(local_cluster_t));
		local_cluster->hl = hostlist_create(ROW(0));
		local_cluster->start = atoi(ROW(1));
		local_cluster->end   = atoi(ROW(2));
		local_cluster->asked_bitmap =
			bit_alloc(hostlist_count(local_cluster->hl));
		while((host = hostlist_next(h_itr))) {
			if ((loc = hostlist_find(
				    local_cluster->hl, host)) != -1)
				bit_set(local_cluster->asked_bitmap, loc);
			free(host);
		}
		hostlist_iterator_reset(h_itr);
		if (bit_ffs(local_cluster->asked_bitmap) != -1) {
			list_append(cnodes->cluster_list, local_cluster);
			if (local_cluster->end == 0) {
				local_cluster->end = now;
				cnodes->curr_cluster = local_cluster;
			}
		} else
			_destroy_local_cluster(local_cluster);
	} END_EACH_ROW;
	PQclear(result);
	hostlist_iterator_destroy(h_itr);
	if (!list_count(cnodes->cluster_list)) {
		destroy_cluster_nodes(cnodes);
		cnodes = NULL;
	}

	hostlist_destroy(temp_hl);
	return cnodes;
}
Пример #2
0
extern List setup_cluster_list_with_inx(mysql_conn_t *mysql_conn,
					slurmdb_job_cond_t *job_cond,
					void **curr_cluster)
{
	List local_cluster_list = NULL;
	time_t now = time(NULL);
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	hostlist_t temp_hl = NULL;
	hostlist_iterator_t h_itr = NULL;
	char *query = NULL;
	int dims = 0;

	if (!job_cond || !job_cond->used_nodes)
		return NULL;

	if (!job_cond->cluster_list
	    || list_count(job_cond->cluster_list) != 1) {
		error("If you are doing a query against nodes "
		      "you must only have 1 cluster "
		      "you are asking for.");
		return NULL;
	}

	/* get the dimensions of this cluster so we know how to deal
	   with the hostlists */
	query = xstrdup_printf("select dimensions, flags from %s where "
			       "name='%s'",
			       cluster_table,
			       (char *)list_peek(job_cond->cluster_list));

	debug4("%d(%s:%d) query\n%s",
	       mysql_conn->conn, THIS_FILE, __LINE__, query);
	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(query);
		return NULL;
	}
	xfree(query);

	if (!(row = mysql_fetch_row(result))) {
		error("Couldn't get the dimensions of cluster '%s'.",
		      (char *)list_peek(job_cond->cluster_list));
		mysql_free_result(result);
		return NULL;
	}

	/* On a Cray System when dealing with hostlists as we are here
	   this always needs to be 1.
	*/
	if (slurm_atoul(row[1]) & CLUSTER_FLAG_CRAY_A)
		dims = 1;
	else
		dims = atoi(row[0]);

	mysql_free_result(result);

	temp_hl = hostlist_create_dims(job_cond->used_nodes, dims);
	if (hostlist_count(temp_hl) <= 0) {
		error("we didn't get any real hosts to look for.");
		goto no_hosts;
	}
	h_itr = hostlist_iterator_create(temp_hl);

	query = xstrdup_printf("select cluster_nodes, time_start, "
			       "time_end from \"%s_%s\" where node_name='' "
			       "&& cluster_nodes !=''",
			       (char *)list_peek(job_cond->cluster_list),
			       event_table);

	if (job_cond->usage_start) {
		if (!job_cond->usage_end)
			job_cond->usage_end = now;

		xstrfmtcat(query,
			   " && ((time_start < %ld) "
			   "&& (time_end >= %ld || time_end = 0))",
			   job_cond->usage_end, job_cond->usage_start);
	}

	if (debug_flags & DEBUG_FLAG_DB_JOB)
		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(query);
		goto no_hosts;
	}
	xfree(query);

	local_cluster_list = list_create(_destroy_local_cluster);
	while ((row = mysql_fetch_row(result))) {
		char *host = NULL;
		int loc = 0;
		local_cluster_t *local_cluster =
			xmalloc(sizeof(local_cluster_t));
		local_cluster->hl = hostlist_create_dims(row[0], dims);
		local_cluster->start = slurm_atoul(row[1]);
		local_cluster->end   = slurm_atoul(row[2]);
		local_cluster->asked_bitmap =
			bit_alloc(hostlist_count(local_cluster->hl));
		while ((host = hostlist_next_dims(h_itr, dims))) {
			if ((loc = hostlist_find(
				     local_cluster->hl, host)) != -1)
				bit_set(local_cluster->asked_bitmap, loc);
			free(host);
		}
		hostlist_iterator_reset(h_itr);
		if (bit_ffs(local_cluster->asked_bitmap) != -1) {
			list_append(local_cluster_list, local_cluster);
			if (local_cluster->end == 0) {
				local_cluster->end = now;
				(*curr_cluster) = local_cluster;
			} else if (!(*curr_cluster)
				   || (((local_cluster_t *)(*curr_cluster))->end
				       < local_cluster->end)) {
				(*curr_cluster) = local_cluster;
			}
		} else
			_destroy_local_cluster(local_cluster);
	}
	mysql_free_result(result);

	if (!list_count(local_cluster_list)) {
		FREE_NULL_LIST(local_cluster_list);
		local_cluster_list = NULL;
		goto no_hosts;
	}

no_hosts:

	hostlist_iterator_destroy(h_itr);
	hostlist_destroy(temp_hl);

	return local_cluster_list;
}
Пример #3
0
/* use specific set run tasks on each host listed in hostfile
 * XXX: Need to handle over-subscribe.
 */
static int _task_layout_hostfile(slurm_step_layout_t *step_layout,
				 const char *arbitrary_nodes)
{
	int i=0, j, taskid = 0, task_cnt=0;
	hostlist_iterator_t itr = NULL, itr_task = NULL;
	char *host = NULL;
	char *host_task = NULL;
	hostlist_t job_alloc_hosts = NULL;
	hostlist_t step_alloc_hosts = NULL;

	debug2("job list is %s", step_layout->node_list);
	job_alloc_hosts = hostlist_create(step_layout->node_list);
	itr = hostlist_iterator_create(job_alloc_hosts);
	if (!arbitrary_nodes) {
		error("no hostlist given for arbitrary dist");
		return SLURM_ERROR;
	}

	debug2("list is %s", arbitrary_nodes);
	step_alloc_hosts = hostlist_create(arbitrary_nodes);
	if (hostlist_count(step_alloc_hosts) != step_layout->task_cnt) {
		error("Asked for %u tasks have %d in the nodelist.  "
		      "Check your nodelist, or set the -n option to be %d",
		      step_layout->task_cnt,
		      hostlist_count(step_alloc_hosts),
		      hostlist_count(step_alloc_hosts));
		return SLURM_ERROR;
	}
	itr_task = hostlist_iterator_create(step_alloc_hosts);
	while((host = hostlist_next(itr))) {
		step_layout->tasks[i] = 0;
		while((host_task = hostlist_next(itr_task))) {
			if (!strcmp(host, host_task)) {
				step_layout->tasks[i]++;
				task_cnt++;
			}
			free(host_task);
			if (task_cnt >= step_layout->task_cnt)
				break;
		}
		debug3("%s got %u tasks", host, step_layout->tasks[i]);
		if (step_layout->tasks[i] == 0)
			goto reset_hosts;
		step_layout->tids[i] = xmalloc(sizeof(uint32_t)
					       * step_layout->tasks[i]);
		taskid = 0;
		j = 0;
		hostlist_iterator_reset(itr_task);
		while((host_task = hostlist_next(itr_task))) {
			if (!strcmp(host, host_task)) {
				step_layout->tids[i][j] = taskid;
				j++;
			}
			taskid++;
			free(host_task);
			if (j >= step_layout->tasks[i])
				break;
		}
		i++;
	reset_hosts:
		hostlist_iterator_reset(itr_task);
		free(host);
		if (i > step_layout->task_cnt)
			break;
	}
	hostlist_iterator_destroy(itr);
	hostlist_iterator_destroy(itr_task);
	hostlist_destroy(job_alloc_hosts);
	hostlist_destroy(step_alloc_hosts);
	if (task_cnt != step_layout->task_cnt) {
		error("Asked for %u tasks but placed %d. Check your nodelist",
		      step_layout->task_cnt, task_cnt);
		return SLURM_ERROR;
	}

	return SLURM_SUCCESS;
}