示例#1
0
/*
 * find_child_switches creates an array of indexes to the
 * immediate descendants of switch sw.
 */
static void _find_child_switches (int sw)
{
	int i;
	int cldx; /* index into array of child switches */
	hostlist_iterator_t hi;
	hostlist_t swlist;
	char *swname;

	swlist = hostlist_create(switch_record_table[sw].switches);
	switch_record_table[sw].num_switches = hostlist_count(swlist);
	switch_record_table[sw].switch_index =
			xmalloc(switch_record_table[sw].num_switches
				* sizeof(uint16_t));

	hi = hostlist_iterator_create(swlist);
	cldx = 0;
	while ((swname = hostlist_next(hi))) {
		/* Find switch whose name is the name of this child.
		 * and add its index to child index array */
		for (i=0; i<switch_record_cnt; i++) {
			if (strcmp(swname, switch_record_table[i].name) == 0) {
				switch_record_table[sw].switch_index[cldx] = i;
				switch_record_table[i].parent = sw;
				cldx++;
				break;
			}
		}
		free(swname);
	}
	hostlist_iterator_destroy(hi);
	hostlist_destroy(swlist);
}
示例#2
0
static int _change_state_bps(char *com, int state)
{
	char *host;
	int i = 0;
	uint16_t pos[params.cluster_dims];
	char letter = '.';
	bool used = false;
	char *c_state = "up";
	hostlist_t hl = NULL;
	int rc = 1;

	if (state == NODE_STATE_DOWN) {
		letter = '#';
		used = true;
		c_state = "down";
	}

	while (com[i] && (com[i] != '[') &&
	       ((com[i] < '0') || (com[i] > '9')) &&
	       ((com[i] < 'A') || (com[i] > 'Z')))
		i++;
	if (com[i] == '\0') {
		memset(error_string, 0, 255);
		sprintf(error_string,
			"You didn't specify any nodes to make %s. "
			"in statement '%s'",
			c_state, com);
		return 0;
	}

	if (!(hl = hostlist_create(com+i))) {
		memset(error_string, 0, 255);
		sprintf(error_string, "Bad hostlist given '%s'", com+i);
		return 0;

	}

	while ((host = hostlist_shift(hl))) {
		ba_mp_t *ba_mp;
		smap_node_t *smap_node;

		for (i = 0; i < params.cluster_dims; i++)
			pos[i] = select_char2coord(host[i]);
		if (!(ba_mp = bg_configure_coord2ba_mp(pos))) {
			memset(error_string, 0, 255);
			sprintf(error_string, "Bad host given '%s'", host);
			rc = 0;
			break;
		}
		bg_configure_ba_update_mp_state(ba_mp, state);
		smap_node = smap_system_ptr->grid[ba_mp->index];
		smap_node->color = 0;
		smap_node->letter = letter;
		smap_node->used = used;
		free(host);
	}
	hostlist_destroy(hl);

	return rc;
}
示例#3
0
static hostlist_t 
_read_genders (List attrs, int iopt)
{
    ListIterator i  = NULL;
    hostlist_t   hl = NULL;
    char *     attr = NULL;

    if ((attrs == NULL)) /* Special "all nodes" case */
        return _read_genders_attr (ALL_NODES, iopt);

    if ((attrs == NULL) || (list_count (attrs) == 0))
        return NULL;

   if ((i = list_iterator_create (attrs)) == NULL)
        errx ("genders: unable to create list iterator: %m\n");

    while ((attr = list_next (i))) {
        hostlist_t l = _read_genders_attr (attr, iopt);

        if (hl == NULL) {
            hl = l;
		} else {
            hostlist_push_list (hl, l);
            hostlist_destroy (l);
        }
    }

    list_iterator_destroy (i);

    hostlist_uniq (hl);

    return (hl);
}
示例#4
0
static void shutdown_http(clconfig_provider *provider)
{
    http_provider *http = (http_provider *)provider;

    reset_stream_state(http);

    lcb_string_release(&http->stream.chunk);
    lcb_string_release(&http->stream.input);
    lcb_string_release(&http->stream.header);

    lcb_connection_close(&http->connection);
    lcb_connection_cleanup(&http->connection);

    if (http->current_config) {
        lcb_clconfig_decref(http->current_config);
    }
    if (http->disconn_timer) {
        lcb_timer_destroy(NULL, http->disconn_timer);
    }
    if (http->io_timer) {
        lcb_timer_destroy(NULL, http->io_timer);
    }
    if (http->as_schederr) {
        lcb_timer_destroy(NULL, http->as_schederr);
    }
    if (http->nodes) {
        hostlist_destroy(http->nodes);
    }
    free(http);
}
示例#5
0
/* Convert a SLURM hostlist expression into the equivalent node index
 * value expression.
 */
static char *_get_nids(char *nodelist)
{
	hostlist_t hl;
	char *nids = NULL;
	int node_cnt;

	if (!nodelist)
		return NULL;
	hl = hostlist_create(nodelist);
	if (!hl) {
		error("Invalid hostlist: %s", nodelist);
		return NULL;
	}
	//info("input hostlist: %s", nodelist);
	hostlist_uniq(hl);

	/* aprun needs the hostlist to be the exact size requested.
	   So if it doesn't set it.
	*/
	node_cnt = hostlist_count(hl);
	if (opt.nodes_set_opt && (node_cnt != opt.min_nodes)) {
		error("You requested %d nodes and %d hosts.  These numbers "
		      "must be the same, so setting number of nodes to %d",
		      opt.min_nodes, node_cnt, node_cnt);
	}
	opt.min_nodes = node_cnt;
	opt.nodes_set = 1;

	nids = cray_nodelist2nids(hl, NULL);

	hostlist_destroy(hl);
	//info("output node IDs: %s", nids);

	return nids;
}
示例#6
0
static void
_stat(hash_t hstatus, const char *nodes)
{
    hostlist_iterator_t hlitr;
    hostlist_t hlnodes;
    char *node;
    char *str;

    assert(hstatus);

    if (!(hlnodes = hostlist_create(nodes))) {
        perror("hostlist_create");
        exit(1);
    }
    if (!(hlitr = hostlist_iterator_create(hlnodes))) {
        perror("hostlist_iterator_create");
        exit(1);
    }
    while ((node = hostlist_next(hlitr))) {
        if ((str = hash_find(hstatus, node)))
            printf("%s: %s\n", node, str);
        else
            printf("%s: %s\n", node, "invalid hostname");
        free(node);
    }
    hostlist_iterator_destroy(hlitr);
    hostlist_destroy(hlnodes);
}
示例#7
0
static int _get_job_size(uint32_t job_id)
{
	job_info_msg_t *job_buffer_ptr;
	job_info_t * job_ptr;
	int i, size = 1;
	hostlist_t hl;

	if (slurm_load_jobs((time_t) 0, &job_buffer_ptr, SHOW_ALL)) {
		slurm_perror("slurm_load_jobs");
		return 1;
	}

	for (i = 0; i < job_buffer_ptr->record_count; i++) {
		job_ptr = &job_buffer_ptr->job_array[i];
		if (job_ptr->job_id != job_id)
			continue;
		hl = hostlist_create(job_ptr->nodes);
		if (hl) {
			size = hostlist_count(hl);
			hostlist_destroy(hl);
		}
		break;
	}
	slurm_free_job_info_msg (job_buffer_ptr);

#if _DEBUG
	printf("Size is %d\n", size);
#endif
	return size;
}
示例#8
0
文件: port_mgr.c 项目: diorsman/slurm
/* Builds the job step's resv_port_array based upon resv_ports (a string) */
static void _rebuild_port_array(struct step_record *step_ptr)
{
    int i;
    char *tmp_char;
    hostlist_t hl;

    i = strlen(step_ptr->resv_ports);
    tmp_char = xmalloc(i+3);
    sprintf(tmp_char, "[%s]", step_ptr->resv_ports);
    hl = hostlist_create(tmp_char);
    if (!hl)
        fatal("Invalid reserved ports: %s", step_ptr->resv_ports);
    xfree(tmp_char);

    step_ptr->resv_port_array = xmalloc(sizeof(int) *
                                        step_ptr->resv_port_cnt);
    step_ptr->resv_port_cnt = 0;
    while ((tmp_char = hostlist_shift(hl))) {
        i = atoi(tmp_char);
        if (i > 0)
            step_ptr->resv_port_array[step_ptr->resv_port_cnt++]=i;
        free(tmp_char);
    }
    hostlist_destroy(hl);
    if (step_ptr->resv_port_cnt == 0) {
        error("Problem recovering resv_port_array for step %u.%u: %s",
              step_ptr->job_ptr->job_id, step_ptr->step_id,
              step_ptr->resv_ports);
        xfree(step_ptr->resv_ports);
    }
}
示例#9
0
void pmixp_coll_free(pmixp_coll_t *coll)
{
	if (NULL != coll->procs) {
		xfree(coll->procs);
	}
	if (NULL != coll->parent_host) {
		xfree(coll->parent_host);
	}
	hostlist_destroy(coll->all_children);
	hostlist_destroy(coll->ch_hosts);

	if (NULL != coll->ch_contribs) {
		xfree(coll->ch_contribs);
	}
	free_buf(coll->buf);
}
示例#10
0
文件: node_conf.c 项目: perryh/slurm
/*
 * bitmap2node_name_sortable - given a bitmap, build a list of comma
 *	separated node names. names may include regular expressions
 *	(e.g. "lx[01-10]")
 * IN bitmap - bitmap pointer
 * IN sort   - returned sorted list or not
 * RET pointer to node list or NULL on error
 * globals: node_record_table_ptr - pointer to node table
 * NOTE: the caller must xfree the memory at node_list when no longer required
 */
char * bitmap2node_name_sortable (bitstr_t *bitmap, bool sort)
{
	int i, first, last;
	hostlist_t hl;
	char *buf;

	if (bitmap == NULL)
		return xstrdup("");

	first = bit_ffs(bitmap);
	if (first == -1)
		return xstrdup("");

	last  = bit_fls(bitmap);
	hl = hostlist_create("");
	for (i = first; i <= last; i++) {
		if (bit_test(bitmap, i) == 0)
			continue;
		hostlist_push(hl, node_record_table_ptr[i].name);
	}
	if (sort)
		hostlist_sort(hl);
	buf = hostlist_ranged_string_xmalloc(hl);
	hostlist_destroy(hl);

	return buf;
}
示例#11
0
int 
pstdout_hostnames_count(const char *hostnames)
{
  hostlist_t h = NULL;
  int count = 0;
  int rv = -1;

  if (!hostnames)
    {
      pstdout_errnum = PSTDOUT_ERR_PARAMETERS;
      return -1;
    }

  if (!(h = hostlist_create(hostnames)))
    {
      pstdout_errnum = PSTDOUT_ERR_OUTMEM;
      goto cleanup;
    }

  if (!(count = hostlist_count(h)))
    {
      if (pstdout_debug_flags & PSTDOUT_DEBUG_STANDARD)
        fprintf(stderr, "hostnames count == 0\n");
      pstdout_errnum = PSTDOUT_ERR_INTERNAL;
      goto cleanup;
    }

  rv = count;
 cleanup:
  if (h)
    hostlist_destroy(h);
  return rv;
}
示例#12
0
static slurmdb_step_rec_t *_slurmdb_create_step_rec(
	filetxt_step_rec_t *filetxt_step)
{
	slurmdb_step_rec_t *slurmdb_step = slurmdb_create_step_rec();

	slurmdb_step->elapsed = filetxt_step->elapsed;
	slurmdb_step->end = filetxt_step->header.timestamp;
	slurmdb_step->exitcode = filetxt_step->exitcode;
	slurmdb_step->ncpus = filetxt_step->ncpus;
	if (filetxt_step->nodes) {
		hostlist_t hl = hostlist_create(filetxt_step->nodes);
		slurmdb_step->nnodes = hostlist_count(hl);
		hostlist_destroy(hl);
	}
	slurmdb_step->nodes = xstrdup(filetxt_step->nodes);
	slurmdb_step->requid = filetxt_step->requid;
	memcpy(&slurmdb_step->stats, &filetxt_step->stats,
	       sizeof(slurmdb_stats_t));
	slurmdb_step->start = filetxt_step->header.timestamp -
		slurmdb_step->elapsed;
	slurmdb_step->state = filetxt_step->status;
	slurmdb_step->stepid = filetxt_step->stepnum;
	slurmdb_step->stepname = xstrdup(filetxt_step->stepname);
	slurmdb_step->sys_cpu_sec = filetxt_step->rusage.ru_stime.tv_sec;
	slurmdb_step->sys_cpu_usec = filetxt_step->rusage.ru_stime.tv_usec;
	slurmdb_step->tot_cpu_sec = filetxt_step->tot_cpu_sec;
	slurmdb_step->tot_cpu_usec = filetxt_step->tot_cpu_usec;
	slurmdb_step->user_cpu_sec = filetxt_step->rusage.ru_utime.tv_sec;
	slurmdb_step->user_cpu_usec = filetxt_step->rusage.ru_utime.tv_usec;

	return slurmdb_step;
}
示例#13
0
extern uint32_t RRD_consolidate(time_t step_starttime, time_t step_endtime,
                                bitstr_t* bitmap_of_nodes)
{
    uint32_t consumed_energy = 0;
    uint32_t tmp;
    char *node_name = NULL;
    hostlist_t hl;
    char* path;

    node_name = bitmap2node_name(bitmap_of_nodes);
    hl = hostlist_create(node_name);
    xfree(node_name);
    while ((node_name = hostlist_shift(hl))) {
        if (!(path = _get_node_rrd_path(node_name,
                                        EXT_SENSORS_VALUE_ENERGY)))
            consumed_energy = NO_VAL;
        free(node_name);
        if ((tmp = _rrd_consolidate_one(
                       step_starttime, step_endtime, path,
                       ext_sensors_cnf->energy_rra_name, true)) == NO_VAL)
            consumed_energy = NO_VAL;
        xfree(path);
        if (consumed_energy == NO_VAL)
            break;
        consumed_energy += tmp;
    }
    hostlist_destroy(hl);

    return consumed_energy;
}
示例#14
0
extern void select_admin_front_end(GtkTreeModel *model, GtkTreeIter *iter,
				   display_data_t *display_data,
				   GtkTreeView *treeview)
{
	if (treeview) {
		char *node_list;
		hostlist_t hl = NULL;
		front_end_user_data_t user_data;

		memset(&user_data, 0, sizeof(front_end_user_data_t));
		gtk_tree_selection_selected_foreach(
			gtk_tree_view_get_selection(treeview),
			_process_each_front_end, &user_data);

		hl = hostlist_create(user_data.node_list);
		hostlist_uniq(hl);
		hostlist_sort(hl);
		xfree(user_data.node_list);
		node_list = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

		_admin_front_end(model, iter, display_data->name, node_list);
		xfree(node_list);
	}
}
示例#15
0
文件: info_node.c 项目: BYUHPC/slurm
/*
 * scontrol_print_node_list - print information about the supplied node list
 *	(or regular expression)
 * IN node_list - print information about the supplied node list
 *	(or regular expression)
 */
extern void
scontrol_print_node_list (char *node_list)
{
	node_info_msg_t *node_info_ptr = NULL;
	hostlist_t host_list;
	int error_code;
	uint16_t show_flags = 0;
	char *this_node_name;

	if (all_flag)
		show_flags |= SHOW_ALL;
	if (detail_flag)
		show_flags |= SHOW_DETAIL;

	error_code = scontrol_load_nodes(&node_info_ptr, show_flags);
	if (error_code) {
		exit_code = 1;
		if (quiet_flag != 1)
			slurm_perror ("slurm_load_node error");
		return;
	}

	if (quiet_flag == -1) {
		char time_str[32];
		slurm_make_time_str ((time_t *)&node_info_ptr->last_update,
			             time_str, sizeof(time_str));
		printf ("last_update_time=%s, records=%d\n",
			time_str, node_info_ptr->record_count);
	}

	if (node_list == NULL) {
		scontrol_print_node (NULL, node_info_ptr);
	} else {
		if ((host_list = hostlist_create (node_list))) {
			while ((this_node_name = hostlist_shift (host_list))) {
				scontrol_print_node(this_node_name,
						    node_info_ptr);
				free(this_node_name);
			}

			hostlist_destroy(host_list);
		} else {
			exit_code = 1;
			if (quiet_flag != 1) {
				if (errno == EINVAL) {
					fprintf(stderr,
					        "unable to parse node list %s\n",
					        node_list);
				 } else if (errno == ERANGE) {
					fprintf(stderr,
					        "too many nodes in supplied range %s\n",
					        node_list);
				} else
					perror("error parsing node list");
			}
		}
	}
	return;
}
示例#16
0
文件: forward.c 项目: Poshi/slurm
void _destroy_tree_fwd(fwd_tree_t *fwd_tree)
{
	if(fwd_tree) {
		if(fwd_tree->tree_hl)
			hostlist_destroy(fwd_tree->tree_hl);
		xfree(fwd_tree);
	}
}
示例#17
0
文件: info_job.c 项目: cread/slurm
extern void
scontrol_print_completing_job(job_info_t *job_ptr,
			      node_info_msg_t *node_info_msg)
{
	int i, c_offset = 0;
	node_info_t *node_info;
	hostlist_t comp_nodes, down_nodes;
	char *node_buf;

	comp_nodes = hostlist_create(NULL);
	down_nodes = hostlist_create(NULL);

	if (job_ptr->cluster && federation_flag && !local_flag)
		c_offset = get_cluster_node_offset(job_ptr->cluster,
						   node_info_msg);

	for (i = 0; job_ptr->node_inx[i] != -1; i+=2) {
		int j = job_ptr->node_inx[i];
		for (; j <= job_ptr->node_inx[i+1]; j++) {
			int node_inx = j + c_offset;
			if (node_inx >= node_info_msg->record_count)
				break;
			node_info = &(node_info_msg->node_array[node_inx]);
			if (IS_NODE_COMPLETING(node_info))
				hostlist_push_host(comp_nodes, node_info->name);
			else if (IS_NODE_DOWN(node_info))
				hostlist_push_host(down_nodes, node_info->name);
		}
	}

	fprintf(stdout, "JobId=%u ", job_ptr->job_id);

	node_buf = hostlist_ranged_string_xmalloc(comp_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(COMPLETING)=%s ", node_buf);
	xfree(node_buf);

	node_buf = hostlist_ranged_string_xmalloc(down_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(DOWN)=%s ", node_buf);
	xfree(node_buf);
	fprintf(stdout, "\n");

	hostlist_destroy(comp_nodes);
	hostlist_destroy(down_nodes);
}
示例#18
0
lcb_error_t
lcb_clconfig_mcraw_update(clconfig_provider *pb, const char *nodes)
{
    lcb_error_t err;
    bc_MCRAW *mcr = (bc_MCRAW *)pb;
    hostlist_t hl = hostlist_create();
    err = hostlist_add_stringz(hl, nodes, LCB_CONFIG_MCCOMPAT_PORT);
    if (err != LCB_SUCCESS) {
        hostlist_destroy(hl);
        return err;
    }

    configure_nodes(pb, hl);
    hostlist_destroy(hl);
    lcbio_async_signal(mcr->async);
    return LCB_SUCCESS;
}
示例#19
0
文件: mpich1_p4.c 项目: VURM/slurm
int p_mpi_hook_slurmstepd_task (const mpi_plugin_client_info_t *job,
				char ***env)
{
	char *nodelist, *task_cnt;

	nodelist = getenvp(*env, "SLURM_NODELIST");
	if (nodelist) {
		char *host_str = NULL, *tmp;
		hostlist_t hl = hostlist_create(nodelist);
		while ((tmp = hostlist_shift(hl))) {
			if (host_str)
				xstrcat(host_str, ",");
			xstrcat(host_str, tmp);
			free(tmp);
		}
		hostlist_destroy(hl);
		env_array_overwrite_fmt(env, "SLURM_MPICH_NODELIST", "%s",
			host_str);
		xfree(host_str);
	}

	task_cnt = getenvp(*env, "SLURM_TASKS_PER_NODE");
	if (task_cnt) {
		char *task_str = NULL, tmp_str[32];
		int i=0, val, reps;
		while (task_cnt[i]) {
			if ((task_cnt[i] >= '0') && (task_cnt[i] <= '9'))
				val = atoi(&task_cnt[i]);
			else
				break;	/* bad parse */
			i++;
			while (task_cnt[i]
			&&     (task_cnt[i] != 'x') && (task_cnt[i] != ','))
				i++;
			if (task_cnt[i] == 'x') {
				i++;
				reps = atoi(&task_cnt[i]);
				while (task_cnt[i] && (task_cnt[i] != ','))
					i++;
			} else
				reps = 1;
			if (task_cnt[i] == ',')
				i++;
			while (reps) {
				if (task_str)
					xstrcat(task_str, ",");
				snprintf(tmp_str, sizeof(tmp_str), "%d", val);
				xstrcat(task_str, tmp_str);
				reps--;
			}
		}
		env_array_overwrite_fmt(env, "SLURM_MPICH_TASKS", "%s",
			task_str);
		xfree(task_str);
	}

	return SLURM_SUCCESS;
}
示例#20
0
文件: ping_nodes.c 项目: A1ve5/slurm
/* Update acct_gather data for every node that is not DOWN */
extern void update_nodes_acct_gather_data(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *agent_args = NULL;

	agent_args = xmalloc (sizeof (agent_arg_t));
	agent_args->msg_type = REQUEST_ACCT_GATHER_UPDATE;
	agent_args->retry = 0;
	agent_args->protocol_version = SLURM_PROTOCOL_VERSION;
	agent_args->hostlist = hostlist_create(NULL);

#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		if (agent_args->protocol_version >
		    front_end_ptr->protocol_version)
			agent_args->protocol_version =
				front_end_ptr->protocol_version;

		hostlist_push_host(agent_args->hostlist, front_end_ptr->name);
		agent_args->node_count++;
	}
#else
	for (i = 0, node_ptr = node_record_table_ptr;
	     i < node_record_count; i++, node_ptr++) {
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if (agent_args->protocol_version > node_ptr->protocol_version)
			agent_args->protocol_version =
				node_ptr->protocol_version;
		hostlist_push_host(agent_args->hostlist, node_ptr->name);
		agent_args->node_count++;
	}
#endif

	if (agent_args->node_count == 0) {
		hostlist_destroy(agent_args->hostlist);
		xfree (agent_args);
	} else {
		hostlist_uniq(agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(agent_args->hostlist);
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_ENERGY)
			info("Updating acct_gather data for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(agent_args);
	}
}
示例#21
0
extern int basil_node_ranking(struct node_record *node_array, int node_cnt)
{
	enum basil_version version = get_basil_version();
	struct basil_inventory *inv;
	struct basil_node *node;
	int rank_count = 0, i;
	hostlist_t hl = hostlist_create(NULL);
	bool bad_node = 0;

	inv = get_full_inventory(version);
	if (inv == NULL)
		/* FIXME: should retry here if the condition is transient */
		fatal("failed to get BASIL %s ranking", bv_names_long[version]);
	else if (!inv->batch_total)
		fatal("system has no usable batch compute nodes");

	debug("BASIL %s RANKING INVENTORY: %d/%d batch nodes",
	      bv_names_long[version], inv->batch_avail, inv->batch_total);

	/*
	 * Node ranking is based on a subset of the inventory: only nodes in
	 * batch allocation mode which are up and not allocated. Assign a
	 * 'NO_VAL' rank to all other nodes, which will translate as a very
	 * high value, (unsigned)-2, to put those nodes last in the ranking.
	 * The rest of the code must ensure that those nodes are never chosen.
	 */
	for (i = 0; i < node_cnt; i++)
		node_array[i].node_rank = NO_VAL;

	for (node = inv->f->node_head; node; node = node->next) {
		struct node_record *node_ptr;
		char tmp[50];

		node_ptr = _find_node_by_basil_id(node->node_id);
		if (node_ptr == NULL) {
			error("nid%05u (%s node in state %s) not in slurm.conf",
			      node->node_id, nam_noderole[node->role],
			      nam_nodestate[node->state]);
			bad_node = 1;
		} else
			node_ptr->node_rank = inv->nodes_total - rank_count++;
		sprintf(tmp, "nid%05u", node->node_id);
		hostlist_push(hl, tmp);
	}
	free_inv(inv);
	if (bad_node) {
		hostlist_sort(hl);
		char *name = hostlist_ranged_string_xmalloc(hl);
		info("It appears your slurm.conf nodelist doesn't "
		     "match the alps system.  Here are the nodes alps knows "
		     "about\n%s", name);
	}
	hostlist_destroy(hl);

	return SLURM_SUCCESS;
}
示例#22
0
/*
 * slurm_step_layout_create - determine how many tasks of a job will be
 *                    run on each node. Distribution is influenced
 *                    by number of cpus on each host.
 * IN tlist - hostlist corresponding to task layout
 * IN cpus_per_node - cpus per node
 * IN cpu_count_reps - how many nodes have same cpu count
 * IN num_hosts - number of hosts we have
 * IN num_tasks - number of tasks to distribute across these cpus
 * IN cpus_per_task - number of cpus per task
 * IN task_dist - type of distribution we are using
 * IN plane_size - plane size (only needed for the plane distribution)
 * RET a pointer to an slurm_step_layout_t structure
 * NOTE: allocates memory that should be xfreed by caller
 */
slurm_step_layout_t *slurm_step_layout_create(
	const char *tlist,
	uint16_t *cpus_per_node, uint32_t *cpu_count_reps,
	uint32_t num_hosts,
	uint32_t num_tasks,
	uint16_t cpus_per_task,
	uint16_t task_dist,
	uint16_t plane_size)
{
	char *arbitrary_nodes = NULL;
	slurm_step_layout_t *step_layout =
		xmalloc(sizeof(slurm_step_layout_t));
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	step_layout->task_dist = task_dist;
	if (task_dist == SLURM_DIST_ARBITRARY) {
		hostlist_t hl = NULL;
		char *buf = NULL;
		/* set the node list for the task layout later if user
		 * supplied could be different that the job allocation */
		arbitrary_nodes = xstrdup(tlist);
		hl = hostlist_create(tlist);
		hostlist_uniq(hl);
		buf = hostlist_ranged_string_xmalloc(hl);
		num_hosts = hostlist_count(hl);
		hostlist_destroy(hl);
		step_layout->node_list = buf;
	} else {
		step_layout->node_list = xstrdup(tlist);
	}

	step_layout->task_cnt  = num_tasks;
	if (cluster_flags & CLUSTER_FLAG_FE) {
		/* Limited job step support on front-end systems.
		 * All jobs execute through front-end on Blue Gene.
		 * Normally we would not permit execution of job steps,
		 * but can fake it by just allocating all tasks to
		 * one of the allocated nodes. */
		if ((cluster_flags & CLUSTER_FLAG_BG)
		    || (cluster_flags & CLUSTER_FLAG_CRAY_A))
			step_layout->node_cnt  = num_hosts;
		else
			step_layout->node_cnt  = 1;
	} else
		step_layout->node_cnt  = num_hosts;

	if (_init_task_layout(step_layout, arbitrary_nodes,
			      cpus_per_node, cpu_count_reps,
			      cpus_per_task,
			      task_dist, plane_size) != SLURM_SUCCESS) {
		slurm_step_layout_destroy(step_layout);
		step_layout = NULL;
	}
	xfree(arbitrary_nodes);
	return step_layout;
}
示例#23
0
int main(int argc, char *argv[])
{
	log_options_t log_opts = LOG_OPTS_INITIALIZER;
	hostlist_t hl = NULL;
	char *node_name;
	pthread_attr_t attr_work;
	pthread_t thread_work = 0;

	xstrfmtcat(prog_name, "%s[%u]", argv[0], (uint32_t) getpid());
	_read_config();
	log_opts.stderr_level = LOG_LEVEL_QUIET;
	log_opts.syslog_level = LOG_LEVEL_QUIET;
	if (slurm_get_debug_flags() && DEBUG_FLAG_NODE_FEATURES)
		log_opts.logfile_level += 3;
	(void) log_init(argv[0], log_opts, LOG_DAEMON, log_file);

	if ((hl = hostlist_create(argv[1])) == NULL) {
		error("%s: Invalid hostlist (%s)", prog_name, argv[1]);
		exit(2);
	}
	while ((node_name = hostlist_pop(hl))) {
		slurm_mutex_lock(&thread_cnt_mutex);
		while (1) {
			if (thread_cnt <= MAX_THREADS) {
				thread_cnt++;
				break;
			} else {	/* wait for state change and retry */
				pthread_cond_wait(&thread_cnt_cond,
						  &thread_cnt_mutex);
			}
		}
		slurm_mutex_unlock(&thread_cnt_mutex);

		slurm_attr_init(&attr_work);
		(void) pthread_attr_setdetachstate
			(&attr_work, PTHREAD_CREATE_DETACHED);
		if (pthread_create(&thread_work, &attr_work, _node_update,
				   (void *) node_name)) {
			_node_update((void *) node_name);
		}
		slurm_attr_destroy(&attr_work);
	}

	/* Wait for work threads to complete */
	slurm_mutex_lock(&thread_cnt_mutex);
	while (1) {
		if (thread_cnt == 0)
			break;
		else	/* wait for state change and retry */
			pthread_cond_wait(&thread_cnt_cond, &thread_cnt_mutex);
	}
	slurm_mutex_unlock(&thread_cnt_mutex);
	hostlist_destroy(hl);

	exit(0);
}
示例#24
0
static void _destroy_local_cluster(void *object)
{
	local_cluster_t *local_cluster = (local_cluster_t *)object;
	if (local_cluster) {
		if (local_cluster->hl)
			hostlist_destroy(local_cluster->hl);
		FREE_NULL_BITMAP(local_cluster->asked_bitmap);
		xfree(local_cluster);
	}
}
示例#25
0
文件: gres_mps.c 项目: SchedMD/slurm
/*
 * Convert all GPU records to a new entries in a list where each File is a
 * unique device (i.e. convert a record with "File=nvidia[0-3]" into 4 separate
 * records).
 */
static List _build_gpu_list(List gres_list)
{
	ListIterator itr;
	gres_slurmd_conf_t *gres_record, *gpu_record;
	List gpu_list;
	hostlist_t hl;
	char *f_name;
	bool log_fname = true;

	if (gres_list == NULL)
		return NULL;

	gpu_list = list_create(_delete_gres_list);
	itr = list_iterator_create(gres_list);
	while ((gres_record = list_next(itr))) {
		if (xstrcmp(gres_record->name, "gpu"))
			continue;
		if (!gres_record->file) {
			if (log_fname) {
				error("%s: GPU configuration lacks \"File\" specification",
				      plugin_name);
				log_fname = false;
			}
			continue;
		}
		hl = hostlist_create(gres_record->file);
		while ((f_name = hostlist_shift(hl))) {
			gpu_record = xmalloc(sizeof(gres_slurmd_conf_t));
			gpu_record->config_flags = gres_record->config_flags;
			if (gres_record->type_name) {
				gpu_record->config_flags |=
					GRES_CONF_HAS_TYPE;
			}
			gpu_record->count = 1;
			gpu_record->cpu_cnt = gres_record->cpu_cnt;
			gpu_record->cpus = xstrdup(gres_record->cpus);
			if (gres_record->cpus_bitmap) {
				gpu_record->cpus_bitmap =
					bit_copy(gres_record->cpus_bitmap);
			}
			gpu_record->file = xstrdup(f_name);
			gpu_record->links = xstrdup(gres_record->links);
			gpu_record->name = xstrdup(gres_record->name);
			gpu_record->plugin_id = gres_record->plugin_id;
			gpu_record->type_name = xstrdup(gres_record->type_name);
			list_append(gpu_list, gpu_record);
			free(f_name);
		}
		hostlist_destroy(hl);
		(void) list_delete_item(itr);
	}
	list_iterator_destroy(itr);

	return gpu_list;
}
示例#26
0
void
wrap_hostlist_destroy(WRAPPERS_ARGS, hostlist_t hl)
{
  assert(file && function);

  if (!hl)
    WRAPPERS_ERR_INVALID_PARAMETERS("hostlist_destroy");

  hostlist_destroy(hl);
  return;
}
示例#27
0
文件: print.c 项目: Cray/slurm
int _print_nodes(char *nodes, int width, bool right, bool cut)
{
	hostlist_t hl = hostlist_create(nodes);
	char *buf = NULL;
	int retval;
	buf = hostlist_ranged_string_xmalloc(hl);
	retval = _print_str(buf, width, right, false);
	xfree(buf);
	hostlist_destroy(hl);
	return retval;
}
示例#28
0
int pmixp_info_free(void)
{
	if (NULL != _pmixp_job_info.task_cnts) {
		xfree(_pmixp_job_info.task_cnts);
	}
	if (NULL != _pmixp_job_info.gtids) {
		xfree(_pmixp_job_info.gtids);
	}

	if (NULL != _pmixp_job_info.task_map_packed) {
		xfree(_pmixp_job_info.task_map_packed);
	}

	hostlist_destroy(_pmixp_job_info.job_hl);
	hostlist_destroy(_pmixp_job_info.step_hl);
	if (NULL != _pmixp_job_info.hostname) {
		xfree(_pmixp_job_info.hostname);
	}
	return SLURM_SUCCESS;
}
示例#29
0
static inline int _make_step_comp(switch_jobinfo_t *jobinfo, char *nodelist)
{
	hostlist_t list = NULL;
	int rc;

	list = hostlist_create(nodelist);
	rc = nrt_job_step_complete((slurm_nrt_jobinfo_t *)jobinfo, list);
	hostlist_destroy(list);

	return rc;
}
示例#30
0
extern void
scontrol_print_completing_job(job_info_t *job_ptr,
			      node_info_msg_t *node_info_msg)
{
	int i;
	node_info_t *node_info;
	hostlist_t all_nodes, comp_nodes, down_nodes;
	char *node_buf;

	all_nodes  = hostlist_create(job_ptr->nodes);
	comp_nodes = hostlist_create("");
	down_nodes = hostlist_create("");

	for (i=0; i<node_info_msg->record_count; i++) {
		node_info = &(node_info_msg->node_array[i]);
		if (IS_NODE_COMPLETING(node_info) &&
		    (_in_node_bit_list(i, job_ptr->node_inx)))
			hostlist_push_host(comp_nodes, node_info->name);
		else if (IS_NODE_DOWN(node_info) &&
			 (hostlist_find(all_nodes, node_info->name) != -1))
			hostlist_push_host(down_nodes, node_info->name);
	}

	fprintf(stdout, "JobId=%u ", job_ptr->job_id);

	node_buf = hostlist_ranged_string_xmalloc(comp_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(COMPLETING)=%s ", node_buf);
	xfree(node_buf);

	node_buf = hostlist_ranged_string_xmalloc(down_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(DOWN)=%s ", node_buf);
	xfree(node_buf);
	fprintf(stdout, "\n");

	hostlist_destroy(all_nodes);
	hostlist_destroy(comp_nodes);
	hostlist_destroy(down_nodes);
}