示例#1
0
extern void select_admin_front_end(GtkTreeModel *model, GtkTreeIter *iter,
				   display_data_t *display_data,
				   GtkTreeView *treeview)
{
	if (treeview) {
		char *node_list;
		hostlist_t hl = NULL;
		front_end_user_data_t user_data;

		memset(&user_data, 0, sizeof(front_end_user_data_t));
		gtk_tree_selection_selected_foreach(
			gtk_tree_view_get_selection(treeview),
			_process_each_front_end, &user_data);

		hl = hostlist_create(user_data.node_list);
		hostlist_uniq(hl);
		hostlist_sort(hl);
		xfree(user_data.node_list);
		node_list = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

		_admin_front_end(model, iter, display_data->name, node_list);
		xfree(node_list);
	}
}
示例#2
0
/* Convert a SLURM hostlist expression into the equivalent node index
 * value expression.
 */
static char *_get_nids(opt_t *opt_local)
{
	hostlist_t hl;
	char *nids = NULL;
	int node_cnt;

	if (!opt_local->nodelist)
		return NULL;
	hl = hostlist_create(opt_local->nodelist);
	if (!hl) {
		error("Invalid hostlist: %s", opt_local->nodelist);
		return NULL;
	}
	//info("input hostlist: %s", nodelist);
	hostlist_uniq(hl);

	/* aprun needs the hostlist to be the exact size requested.
	   So if it doesn't set it.
	*/
	node_cnt = hostlist_count(hl);
	if (opt_local->nodes_set_opt && (node_cnt != opt_local->min_nodes)) {
		error("You requested %d nodes and %d hosts.  These numbers "
		      "must be the same, so setting number of nodes to %d",
		      opt_local->min_nodes, node_cnt, node_cnt);
	}
	opt_local->min_nodes = node_cnt;
	opt_local->nodes_set = 1;

	nids = cray_nodelist2nids(hl, NULL);

	hostlist_destroy(hl);
	//info("output node IDs: %s", nids);

	return nids;
}
示例#3
0
static int _hostset_from_ranges(const pmix_proc_t *procs, size_t nprocs,
				hostlist_t *hl_out)
{
	int i;
	hostlist_t hl = hostlist_create("");
	pmixp_namespace_t *nsptr = NULL;
	for (i = 0; i < nprocs; i++) {
		char *node = NULL;
		hostlist_t tmp;
		nsptr = pmixp_nspaces_find(procs[i].nspace);
		if (NULL == nsptr) {
			goto err_exit;
		}
		if (procs[i].rank == PMIX_RANK_WILDCARD) {
			tmp = hostlist_copy(nsptr->hl);
		} else {
			tmp = pmixp_nspace_rankhosts(nsptr, &procs[i].rank, 1);
		}
		while (NULL != (node = hostlist_pop(tmp))) {
			hostlist_push(hl, node);
			free(node);
		}
		hostlist_destroy(tmp);
	}
	hostlist_uniq(hl);
	*hl_out = hl;
	return SLURM_SUCCESS;
err_exit:
	hostlist_destroy(hl);
	return SLURM_ERROR;
}
示例#4
0
文件: forward.c 项目: artpol84/slurm
/*
 * forward_msg        - logic to forward a message which has been received and
 *                      accumulate the return codes from processes getting the
 *                      the forwarded message
 *
 * IN: forward_struct - forward_struct_t *   - holds information about message
 *                                             that needs to be forwarded to
 *                                             children processes
 * IN: header         - header_t             - header from message that came in
 *                                             needing to be forwarded.
 * RET: SLURM_SUCCESS - int
 */
extern int forward_msg(forward_struct_t *forward_struct, header_t *header)
{
	hostlist_t hl = NULL;
	hostlist_t* sp_hl;
	int hl_count = 0;

	if (!forward_struct->ret_list) {
		error("didn't get a ret_list from forward_struct");
		return SLURM_ERROR;
	}
	hl = hostlist_create(header->forward.nodelist);
	hostlist_uniq(hl);

	if (route_g_split_hostlist(
		    hl, &sp_hl, &hl_count, header->forward.tree_width)) {
		error("unable to split forward hostlist");
		hostlist_destroy(hl);
		return SLURM_ERROR;
	}

	_forward_msg_internal(NULL, sp_hl, forward_struct, header,
			      forward_struct->timeout, hl_count);

	xfree(sp_hl);
	hostlist_destroy(hl);
	return SLURM_SUCCESS;
}
示例#5
0
static hostlist_t 
_read_genders (List attrs, int iopt)
{
    ListIterator i  = NULL;
    hostlist_t   hl = NULL;
    char *     attr = NULL;

    if ((attrs == NULL)) /* Special "all nodes" case */
        return _read_genders_attr (ALL_NODES, iopt);

    if ((attrs == NULL) || (list_count (attrs) == 0))
        return NULL;

   if ((i = list_iterator_create (attrs)) == NULL)
        errx ("genders: unable to create list iterator: %m\n");

    while ((attr = list_next (i))) {
        hostlist_t l = _read_genders_attr (attr, iopt);

        if (hl == NULL) {
            hl = l;
		} else {
            hostlist_push_list (hl, l);
            hostlist_destroy (l);
        }
    }

    list_iterator_destroy (i);

    hostlist_uniq (hl);

    return (hl);
}
示例#6
0
文件: forward.c 项目: artpol84/slurm
/*
 * start_msg_tree  - logic to begin the forward tree and
 *                   accumulate the return codes from processes getting the
 *                   the forwarded message
 *
 * IN: hl          - hostlist_t   - list of every node to send message to
 * IN: msg         - slurm_msg_t  - message to send.
 * IN: timeout     - int          - how long to wait in milliseconds.
 * RET List 	   - List containing the responses of the children
 *		     (if any) we forwarded the message to. List
 *		     containing type (ret_data_info_t).
 */
extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout)
{
	fwd_tree_t fwd_tree;
	pthread_mutex_t tree_mutex;
	pthread_cond_t notify;
	int count = 0;
	List ret_list = NULL;
	int thr_count = 0;
	int host_count = 0;
	hostlist_t* sp_hl;
	int hl_count = 0;

	xassert(hl);
	xassert(msg);

	hostlist_uniq(hl);
	host_count = hostlist_count(hl);

	if (route_g_split_hostlist(hl, &sp_hl, &hl_count,
				   msg->forward.tree_width)) {
		error("unable to split forward hostlist");
		return NULL;
	}
	slurm_mutex_init(&tree_mutex);
	slurm_cond_init(&notify, NULL);

	ret_list = list_create(destroy_data_info);

	memset(&fwd_tree, 0, sizeof(fwd_tree));
	fwd_tree.orig_msg = msg;
	fwd_tree.ret_list = ret_list;
	fwd_tree.timeout = timeout;
	fwd_tree.notify = &notify;
	fwd_tree.p_thr_count = &thr_count;
	fwd_tree.tree_mutex = &tree_mutex;

	_start_msg_tree_internal(NULL, sp_hl, &fwd_tree, hl_count);

	xfree(sp_hl);

	slurm_mutex_lock(&tree_mutex);

	count = list_count(ret_list);
	debug2("Tree head got back %d looking for %d", count, host_count);
	while (thr_count > 0) {
		slurm_cond_wait(&notify, &tree_mutex);
		count = list_count(ret_list);
		debug2("Tree head got back %d", count);
	}
	xassert(count >= host_count);	/* Tree head did not get all responses,
					 * but no more active fwd threads!*/
	slurm_mutex_unlock(&tree_mutex);

	slurm_mutex_destroy(&tree_mutex);
	slurm_cond_destroy(&notify);

	return ret_list;
}
示例#7
0
文件: ping_nodes.c 项目: A1ve5/slurm
/* Update acct_gather data for every node that is not DOWN */
extern void update_nodes_acct_gather_data(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *agent_args = NULL;

	agent_args = xmalloc (sizeof (agent_arg_t));
	agent_args->msg_type = REQUEST_ACCT_GATHER_UPDATE;
	agent_args->retry = 0;
	agent_args->protocol_version = SLURM_PROTOCOL_VERSION;
	agent_args->hostlist = hostlist_create(NULL);

#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		if (agent_args->protocol_version >
		    front_end_ptr->protocol_version)
			agent_args->protocol_version =
				front_end_ptr->protocol_version;

		hostlist_push_host(agent_args->hostlist, front_end_ptr->name);
		agent_args->node_count++;
	}
#else
	for (i = 0, node_ptr = node_record_table_ptr;
	     i < node_record_count; i++, node_ptr++) {
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if (agent_args->protocol_version > node_ptr->protocol_version)
			agent_args->protocol_version =
				node_ptr->protocol_version;
		hostlist_push_host(agent_args->hostlist, node_ptr->name);
		agent_args->node_count++;
	}
#endif

	if (agent_args->node_count == 0) {
		hostlist_destroy(agent_args->hostlist);
		xfree (agent_args);
	} else {
		hostlist_uniq(agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(agent_args->hostlist);
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_ENERGY)
			info("Updating acct_gather data for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(agent_args);
	}
}
示例#8
0
static error_t
cmdline_parse (int key, char *arg, struct argp_state *state)
{
  struct ipmidetect_arguments *cmd_args;
  char *endptr;

  assert (state);
  
  cmd_args = state->input;

  switch (key)
    {
    case IPMIDETECT_HOSTNAME_KEY:
    case IPMIDETECT_LEGACY_HOSTNAME_KEY:
      if (!(cmd_args->hostname = strdup (arg)))
	err_exit ("strdup: %s", strerror (errno));
      break;
    case IPMIDETECT_PORT_KEY:
      errno = 0;
      cmd_args->port = strtol (arg, &endptr, 10);
      if (errno
	  || endptr[0] != '\0')
	err_exit ("invalid port specified");
      break;
    case IPMIDETECT_DETECTED_KEY:
      cmd_args->output_type = IPMIDETECT_DETECTED_NODES;
      break;
    case IPMIDETECT_UNDETECTED_KEY:
      cmd_args->output_type = IPMIDETECT_UNDETECTED_NODES;
      break;
    case IPMIDETECT_HOSTRANGE_KEY:
      cmd_args->output_format = 0;
      break;
    case IPMIDETECT_COMMA_KEY:
      cmd_args->output_format = ',';
      break;
    case IPMIDETECT_NEWLINE_KEY:
      cmd_args->output_format = '\n';
      break;
    case IPMIDETECT_SPACE_KEY:
      cmd_args->output_format = ' ';
      break;
    case ARGP_KEY_ARG:
      if (!strcmp (arg, "-"))
        _read_nodes_from_stdin (cmd_args);
      else
	_push_inputted_nodes (cmd_args, arg);
      hostlist_uniq (cmd_args->inputted_nodes);
      break;
    case ARGP_KEY_END:
      break;
    default:
      return (ARGP_ERR_UNKNOWN);
    }

  return (0);
}
示例#9
0
/*
 * slurm_step_layout_create - determine how many tasks of a job will be
 *                    run on each node. Distribution is influenced
 *                    by number of cpus on each host.
 * IN tlist - hostlist corresponding to task layout
 * IN cpus_per_node - cpus per node
 * IN cpu_count_reps - how many nodes have same cpu count
 * IN num_hosts - number of hosts we have
 * IN num_tasks - number of tasks to distribute across these cpus
 * IN cpus_per_task - number of cpus per task
 * IN task_dist - type of distribution we are using
 * IN plane_size - plane size (only needed for the plane distribution)
 * RET a pointer to an slurm_step_layout_t structure
 * NOTE: allocates memory that should be xfreed by caller
 */
slurm_step_layout_t *slurm_step_layout_create(
	const char *tlist,
	uint16_t *cpus_per_node, uint32_t *cpu_count_reps,
	uint32_t num_hosts,
	uint32_t num_tasks,
	uint16_t cpus_per_task,
	uint16_t task_dist,
	uint16_t plane_size)
{
	char *arbitrary_nodes = NULL;
	slurm_step_layout_t *step_layout =
		xmalloc(sizeof(slurm_step_layout_t));
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	step_layout->task_dist = task_dist;
	if (task_dist == SLURM_DIST_ARBITRARY) {
		hostlist_t hl = NULL;
		char *buf = NULL;
		/* set the node list for the task layout later if user
		 * supplied could be different that the job allocation */
		arbitrary_nodes = xstrdup(tlist);
		hl = hostlist_create(tlist);
		hostlist_uniq(hl);
		buf = hostlist_ranged_string_xmalloc(hl);
		num_hosts = hostlist_count(hl);
		hostlist_destroy(hl);
		step_layout->node_list = buf;
	} else {
		step_layout->node_list = xstrdup(tlist);
	}

	step_layout->task_cnt  = num_tasks;
	if (cluster_flags & CLUSTER_FLAG_FE) {
		/* Limited job step support on front-end systems.
		 * All jobs execute through front-end on Blue Gene.
		 * Normally we would not permit execution of job steps,
		 * but can fake it by just allocating all tasks to
		 * one of the allocated nodes. */
		if ((cluster_flags & CLUSTER_FLAG_BG)
		    || (cluster_flags & CLUSTER_FLAG_CRAY_A))
			step_layout->node_cnt  = num_hosts;
		else
			step_layout->node_cnt  = 1;
	} else
		step_layout->node_cnt  = num_hosts;

	if (_init_task_layout(step_layout, arbitrary_nodes,
			      cpus_per_node, cpu_count_reps,
			      cpus_per_task,
			      task_dist, plane_size) != SLURM_SUCCESS) {
		slurm_step_layout_destroy(step_layout);
		step_layout = NULL;
	}
	xfree(arbitrary_nodes);
	return step_layout;
}
示例#10
0
void 
wrap_hostlist_uniq(WRAPPERS_ARGS, hostlist_t hl)
{
 assert(file && function);

  if (!hl)
    WRAPPERS_ERR_INVALID_PARAMETERS("hostlist_uniq");

  hostlist_uniq(hl);
  return;
}
示例#11
0
/* Spawn health check function for every node that is not DOWN */
extern void run_health_check(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *check_agent_args = NULL;

	check_agent_args = xmalloc (sizeof (agent_arg_t));
	check_agent_args->msg_type = REQUEST_HEALTH_CHECK;
	check_agent_args->retry = 0;
	check_agent_args->hostlist = hostlist_create("");
	if (check_agent_args->hostlist == NULL)
		fatal("hostlist_create: malloc failure");

#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		hostlist_push(check_agent_args->hostlist, front_end_ptr->name);
		check_agent_args->node_count++;
	}
#else
	for (i=0, node_ptr=node_record_table_ptr;
	     i<node_record_count; i++, node_ptr++) {
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		hostlist_push(check_agent_args->hostlist, node_ptr->name);
		check_agent_args->node_count++;
	}
#endif

	if (check_agent_args->node_count == 0) {
		hostlist_destroy(check_agent_args->hostlist);
		xfree (check_agent_args);
	} else {
		hostlist_uniq(check_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				check_agent_args->hostlist);
		debug("Spawning health check agent for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(check_agent_args);
	}
}
示例#12
0
文件: slurm.c 项目: AdityaSarang/pdsh
static hostlist_t _slurm_wcoll (List joblist)
{
    int i;
    hostlist_t hl = NULL;
    job_info_msg_t * msg;
    int32_t envjobid = 0;
    int alljobids = 0;

    if ((joblist == NULL) && (envjobid = _slurm_jobid()) < 0)
        return (NULL);

    if (slurm_load_jobs((time_t) NULL, &msg, 1) < 0) 
        errx ("Unable to contact slurm controller: %s\n", 
              slurm_strerror (errno));

    /*
     *  Check for "all" in joblist
     */
    alljobids = _alljobids_requested (joblist);

    for (i = 0; i < msg->record_count; i++) {
        job_info_t *j = &msg->job_array[i];

        if (alljobids && j->job_state == JOB_RUNNING)
            hl = _hl_append (hl, j->nodes);
        else if (!joblist && (j->job_id == envjobid)) {
            /*
             *  Only use SLURM_JOBID environment variable if user
             *   didn't override with -j option
             */
            hl = hostlist_create (j->nodes);
            break;
        }
        else if (_jobid_requested (joblist, j->job_id)) {
            hl = _hl_append (hl, j->nodes);
            /* 
             * Exit when there is no more jobids to search
             */
            if (list_count (joblist) == 0)
                break;
        }
    }
    
    slurm_free_job_info_msg (msg);

    if (hl)
        hostlist_uniq (hl);

    return (hl);
}
示例#13
0
/* Append to buf a compact tasklist expression (e.g. "tux[0-1]*2")
 * Prepend ":" to expression as needed */
static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps)
{
	char *host_str;
	char *tok, *sep;
	int i, in_bracket = 0, fini = 0;

	hostlist_uniq(*hl_tmp);
	host_str = hostlist_ranged_string_xmalloc(*hl_tmp);

	/* Note that host_str may be of this form "alpha,beta". We want
	 * to record this as "alpha*#:beta*#" and NOT "alpha,beta*#".
	 * NOTE: Do not break up command within brackets (e.g. "tux[1,2-4]") */
	if (*buf)
		sep = ":";
	else
		sep = "";
	tok = host_str;
	for (i=0; fini == 0; i++) {
		switch (tok[i]) {
			case '[':
				in_bracket = 1;
				break;
			case ']':
				in_bracket = 0;
				break;
			case '\0':
				fini = 1;
				if (in_bracket)
					error("badly formed hostlist %s", tok);
			case ',':
				if (in_bracket)
					break;
				tok[i] = '\0';
				xstrfmtcat(*buf, "%s%s*%d", sep, tok, *reps);
				sep = ":";
				tok += (i + 1);
				i = -1;
				break;
		}
	}
	xfree(host_str);
	hostlist_destroy(*hl_tmp);
	*hl_tmp = (hostlist_t) NULL;
	*reps = 0;
}
示例#14
0
文件: slurm.c 项目: AdityaSarang/pdsh
static hostlist_t _slurm_wcoll_partition (List partitionlist)
{
    int i;
    char * str;
    hostlist_t hl = NULL;
    partition_info_msg_t * msg;
    partition_info_t * p;
    ListIterator li;

    if (slurm_load_partitions((time_t) NULL, &msg, 1) < 0)
        errx ("Unable to contact slurm controller: %s\n",
              slurm_strerror (errno));

    for (i = 0; i < msg->record_count; i++){
        p = &msg->partition_array[i];

        if (_partition_requested (partitionlist, p->name)) {
            hl = _hl_append (hl, p->nodes);
            /*
             * Exit when there is no more partitions to search
             */
            if (list_count (partitionlist) == 0)
                break;
        }
    }

    /*
     *  Anything left in partitionlist wasn't found, emit a warning
     */
    li = list_iterator_create(partitionlist);
    while ((str = list_next(li))){
       err("%p: Warning - partition %s not found\n", str);
    }

    slurm_free_partition_info_msg (msg);

    if (hl)
        hostlist_uniq (hl);

    return (hl);
}
示例#15
0
文件: start_job.c 项目: VURM/slurm
/* Start a job:
 *	CMD=STARTJOB ARG=<jobid> TASKLIST=<node_list> [COMMENT=<whatever>]
 * RET 0 on success, -1 on failure */
extern int	start_job(char *cmd_ptr, int *err_code, char **err_msg)
{
	char *arg_ptr, *comment_ptr, *task_ptr, *tasklist, *tmp_char;
	int i, rc, task_cnt;
	uint32_t jobid;
	hostlist_t hl = (hostlist_t) NULL;
	char *host_string;
	static char reply_msg[128];

	arg_ptr = strstr(cmd_ptr, "ARG=");
	if (arg_ptr == NULL) {
		*err_code = -300;
		*err_msg = "STARTJOB lacks ARG";
		error("wiki: STARTJOB lacks ARG");
		return -1;
	}
	jobid = strtoul(arg_ptr+4, &tmp_char, 10);
	if (!isspace(tmp_char[0])) {
		*err_code = -300;
		*err_msg = "Invalid ARG value";
		error("wiki: STARTJOB has invalid jobid");
		return -1;
	}

	comment_ptr = strstr(cmd_ptr, "COMMENT=");
	task_ptr    = strstr(cmd_ptr, "TASKLIST=");

	if (comment_ptr) {
		comment_ptr[7] = ':';
		comment_ptr += 8;
		if (comment_ptr[0] == '\"') {
			comment_ptr++;
			for (i=0; i<MAX_COMMENT_LEN; i++) {
				if (comment_ptr[i] == '\0')
					break;
				if (comment_ptr[i] == '\"') {
					comment_ptr[i] = '\0';
					break;
				}
			}
			if (i == MAX_COMMENT_LEN)
				comment_ptr[i-1] = '\0';
		} else if (comment_ptr[0] == '\'') {
			comment_ptr++;
			for (i=0; i<MAX_COMMENT_LEN; i++) {
				if (comment_ptr[i] == '\0')
					break;
				if (comment_ptr[i] == '\'') {
					comment_ptr[i] = '\0';
					break;
				}
			}
			if (i == MAX_COMMENT_LEN)
				comment_ptr[i-1] = '\0';
		} else
			null_term(comment_ptr);
	}

	if (task_ptr == NULL) {
		*err_code = -300;
		*err_msg = "STARTJOB lacks TASKLIST";
		error("wiki: STARTJOB lacks TASKLIST");
		return -1;
	}
	task_ptr += 9;	/* skip over "TASKLIST=" */
	if ((task_ptr[0] == '\0') || isspace(task_ptr[0])) {
		/* No TASKLIST specification, useful for testing */
		host_string = xstrdup("");
		task_cnt = 0;
		tasklist = NULL;
	} else {
		null_term(task_ptr);
		tasklist = moab2slurm_task_list(task_ptr, &task_cnt);
		if (tasklist)
			hl = hostlist_create(tasklist);
		if ((tasklist == NULL) || (hl == NULL)) {
			*err_code = -300;
			*err_msg = "STARTJOB TASKLIST is invalid";
			error("wiki: STARTJOB TASKLIST is invalid: %s",
			      task_ptr);
			xfree(tasklist);
			return -1;
		}
		hostlist_uniq(hl);
		hostlist_sort(hl);
		host_string = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);
		if (host_string == NULL) {
			*err_code = -300;
			*err_msg = "STARTJOB has invalid TASKLIST";
			error("wiki: STARTJOB has invalid TASKLIST: %s",
			      tasklist);
			xfree(tasklist);
			return -1;
		}
	}

	rc = _start_job(jobid, task_cnt, host_string, tasklist, comment_ptr,
			err_code, err_msg);
	xfree(host_string);
	xfree(tasklist);
	if (rc == 0) {
		snprintf(reply_msg, sizeof(reply_msg),
			"job %u started successfully", jobid);
		*err_msg = reply_msg;
	}
	return rc;
}
示例#16
0
static int	_job_modify(uint32_t jobid, char *bank_ptr,
			char *depend_ptr, char *new_hostlist,
			uint32_t new_node_cnt, char *part_name_ptr,
			uint32_t new_time_limit, char *name_ptr,
			char *start_ptr, char *feature_ptr, char *env_ptr,
			char *comment_ptr, char *gres_ptr, char *wckey_ptr)
{
	struct job_record *job_ptr;
	time_t now = time(NULL);
	bool update_accounting = false;

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		error("wiki: MODIFYJOB has invalid jobid %u", jobid);
		return ESLURM_INVALID_JOB_ID;
	}
	if (IS_JOB_FINISHED(job_ptr) || (job_ptr->details == NULL)) {
		info("wiki: MODIFYJOB jobid %u is finished", jobid);
		return ESLURM_DISABLED;
	}

	if (comment_ptr) {
		info("wiki: change job %u comment %s", jobid, comment_ptr);
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
		last_job_update = now;
	}

	if (depend_ptr) {
		int rc = update_job_dependency(job_ptr, depend_ptr);
		if (rc == SLURM_SUCCESS) {
			info("wiki: changed job %u dependency to %s",
				jobid, depend_ptr);
		} else {
			error("wiki: changing job %u dependency to %s",
				jobid, depend_ptr);
			return EINVAL;
		}
	}

	if (env_ptr) {
		bool have_equal = false;
		char old_sep[1];
		int begin = 0, i;

		if (job_ptr->batch_flag == 0) {
			error("wiki: attempt to set environment variables "
			      "for non-batch job %u", jobid);
			return ESLURM_DISABLED;
		}
		for (i=0; ; i++) {
			if (env_ptr[i] == '=') {
				if (have_equal) {
					error("wiki: setting job %u invalid "
					      "environment variables: %s",
					      jobid, env_ptr);
					return EINVAL;
				}
				have_equal = true;
				if (env_ptr[i+1] == '\"') {
					for (i+=2; ; i++) {
						if (env_ptr[i] == '\0') {
							error("wiki: setting job %u "
							      "invalid environment "
							      "variables: %s",
					 		     jobid, env_ptr);
							return EINVAL;
						}
						if (env_ptr[i] == '\"') {
							i++;
							break;
						}
						if (env_ptr[i] == '\\') {
							i++;
						}
					}
				} else if (env_ptr[i+1] == '\'') {
					for (i+=2; ; i++) {
						if (env_ptr[i] == '\0') {
							error("wiki: setting job %u "
							      "invalid environment "
							      "variables: %s",
					 		     jobid, env_ptr);
							return EINVAL;
						}
						if (env_ptr[i] == '\'') {
							i++;
							break;
						}
						if (env_ptr[i] == '\\') {
							i++;
						}
					}
				}
			}
			if (isspace(env_ptr[i]) || (env_ptr[i] == ',')) {
				if (!have_equal) {
					error("wiki: setting job %u invalid "
					      "environment variables: %s",
					      jobid, env_ptr);
					return EINVAL;
				}
				old_sep[0] = env_ptr[i];
				env_ptr[i] = '\0';
				xrealloc(job_ptr->details->env_sup,
					 sizeof(char *) *
					 (job_ptr->details->env_cnt+1));
				job_ptr->details->env_sup
						[job_ptr->details->env_cnt++] =
						xstrdup(&env_ptr[begin]);
				info("wiki: for job %u add env: %s",
				     jobid, &env_ptr[begin]);
				env_ptr[i] = old_sep[0];
				if (isspace(old_sep[0]))
					break;
				begin = i + 1;
				have_equal = false;
			}
		}
	}

	if (new_time_limit) {
		time_t old_time = job_ptr->time_limit;
		job_ptr->time_limit = new_time_limit;
		info("wiki: change job %u time_limit to %u",
			jobid, new_time_limit);
		/* Update end_time based upon change
		 * to preserve suspend time info */
		job_ptr->end_time = job_ptr->end_time +
				((job_ptr->time_limit -
				  old_time) * 60);
		last_job_update = now;
	}

	if (bank_ptr &&
	    (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) {
		return EINVAL;
	}

	if (feature_ptr) {
		if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
			info("wiki: change job %u features to %s",
				jobid, feature_ptr);
			job_ptr->details->features = xstrdup(feature_ptr);
			last_job_update = now;
		} else {
			error("wiki: MODIFYJOB features of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (start_ptr) {
		char *end_ptr;
		uint32_t begin_time = strtol(start_ptr, &end_ptr, 10);
		if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
			info("wiki: change job %u begin time to %u",
				jobid, begin_time);
			job_ptr->details->begin_time = begin_time;
			last_job_update = now;
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB begin_time of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (name_ptr) {
		if (IS_JOB_PENDING(job_ptr)) {
			info("wiki: change job %u name %s", jobid, name_ptr);
			xfree(job_ptr->name);
			job_ptr->name = xstrdup(name_ptr);
			last_job_update = now;
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB name of non-pending job %u",
			      jobid);
			return ESLURM_DISABLED;
		}
	}

	if (new_hostlist) {
		int rc = 0, task_cnt;
		hostlist_t hl;
		char *tasklist;

		if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			/* Job is done, nothing to reset */
			if (new_hostlist == '\0')
				goto host_fini;
			error("wiki: MODIFYJOB hostlist of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}

		xfree(job_ptr->details->req_nodes);
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		if (new_hostlist == '\0')
			goto host_fini;

		tasklist = moab2slurm_task_list(new_hostlist, &task_cnt);
		if (tasklist == NULL) {
			rc = 1;
			goto host_fini;
		}
		hl = hostlist_create(tasklist);
		if (hl == 0) {
			rc = 1;
			goto host_fini;
		}
		hostlist_uniq(hl);
		hostlist_sort(hl);
		job_ptr->details->req_nodes =
			hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);
		if (job_ptr->details->req_nodes == NULL) {
			rc = 1;
			goto host_fini;
		}
		if (node_name2bitmap(job_ptr->details->req_nodes, false,
                                     &job_ptr->details->req_node_bitmap)) {
			rc = 1;
			goto host_fini;
		}

host_fini:	if (rc) {
			info("wiki: change job %u invalid hostlist %s",
				jobid, new_hostlist);
			xfree(job_ptr->details->req_nodes);
			return EINVAL;
		} else {
			info("wiki: change job %u hostlist %s",
				jobid, new_hostlist);
			update_accounting = true;
		}
	}

	if (part_name_ptr) {
		struct part_record *part_ptr;
		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB partition of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}

		part_ptr = find_part_record(part_name_ptr);
		if (part_ptr == NULL) {
			error("wiki: MODIFYJOB has invalid partition %s",
				part_name_ptr);
			return ESLURM_INVALID_PARTITION_NAME;
		}

		info("wiki: change job %u partition %s",
			jobid, part_name_ptr);
		xfree(job_ptr->partition);
		job_ptr->partition = xstrdup(part_name_ptr);
		job_ptr->part_ptr = part_ptr;
		last_job_update = now;
		update_accounting = true;
	}

	if (new_node_cnt) {
		job_desc_msg_t job_desc;
#ifdef HAVE_BG
		uint16_t geometry[SYSTEM_DIMENSIONS] = {(uint16_t) NO_VAL};
		static uint16_t cpus_per_node = 0;
		if (!cpus_per_node) {
			select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
						&cpus_per_node);
		}
#endif
		if(!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			error("wiki: MODIFYJOB node count of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}
		memset(&job_desc, 0, sizeof(job_desc_msg_t));

		job_desc.min_nodes = new_node_cnt;
		job_desc.max_nodes = NO_VAL;
		job_desc.select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_alter_node_cnt(SELECT_SET_NODE_CNT, &job_desc);

		select_g_select_jobinfo_free(job_desc.select_jobinfo);

		job_ptr->details->min_nodes = job_desc.min_nodes;
		if (job_ptr->details->max_nodes &&
		    (job_ptr->details->max_nodes < job_desc.min_nodes))
			job_ptr->details->max_nodes = job_desc.min_nodes;
		info("wiki: change job %u min_nodes to %u",
		     jobid, new_node_cnt);
#ifdef HAVE_BG
		job_ptr->details->min_cpus = job_desc.min_cpus;
		job_ptr->details->max_cpus = job_desc.max_cpus;
		job_ptr->details->pn_min_cpus = job_desc.pn_min_cpus;

		new_node_cnt = job_ptr->details->min_cpus;
		if (cpus_per_node)
			new_node_cnt /= cpus_per_node;

		/* This is only set up so accounting is set up correctly */
		select_g_select_jobinfo_set(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &new_node_cnt);
		/* reset geo since changing this makes any geo
		   potentially invalid */
		select_g_select_jobinfo_set(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_GEOMETRY,
					    geometry);
#endif
		last_job_update = now;
		update_accounting = true;
	}

	if (gres_ptr) {
		char *orig_gres;

		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB GRES of non-pending job %u",
			      jobid);
			return ESLURM_DISABLED;
		}

		orig_gres = job_ptr->gres;
		job_ptr->gres = NULL;
		if (gres_ptr[0])
			job_ptr->gres = xstrdup(gres_ptr);
		if (gres_plugin_job_state_validate(job_ptr->gres,
						   &job_ptr->gres_list)) {
			error("wiki: MODIFYJOB Invalid GRES=%s", gres_ptr);
			xfree(job_ptr->gres);
			job_ptr->gres = orig_gres;
			return ESLURM_INVALID_GRES;
		}
		xfree(orig_gres);
	}

	if (wckey_ptr) {
		int rc = update_job_wckey("update_job", job_ptr, wckey_ptr);
		if (rc != SLURM_SUCCESS) {
			error("wiki: MODIFYJOB Invalid WCKEY=%s", wckey_ptr);
			return rc;
		}
	}

	if (update_accounting) {
		if (job_ptr->details && job_ptr->details->begin_time) {
			/* Update job record in accounting to reflect
			 * the changes */
			jobacct_storage_g_job_start(acct_db_conn, job_ptr);
		}
	}

	return SLURM_SUCCESS;
}
示例#17
0
文件: allocate.c 项目: VURM/slurm
/*
 * Create job description structure based off srun options
 * (see opt.h)
 */
job_desc_msg_t *
job_desc_msg_create_from_opts (void)
{
	job_desc_msg_t *j = xmalloc(sizeof(*j));
	hostlist_t hl = NULL;

	slurm_init_job_desc_msg(j);

	j->contiguous     = opt.contiguous;
	j->features       = opt.constraints;
	j->gres           = opt.gres;
	if (opt.immediate == 1)
		j->immediate = opt.immediate;
	if (opt.job_name)
		j->name   = xstrdup(opt.job_name);
	else
		j->name   = xstrdup(opt.cmd_name);
	if (opt.argc > 0) {
		j->argc    = 1;
		j->argv    = (char **) xmalloc(sizeof(char *) * 2);
		j->argv[0] = xstrdup(opt.argv[0]);
	}
	if (opt.acctg_freq >= 0)
		j->acctg_freq     = opt.acctg_freq;
	j->reservation    = xstrdup(opt.reservation);
	j->wckey          = xstrdup(opt.wckey);

	j->req_nodes      = xstrdup(opt.nodelist);

	/* simplify the job allocation nodelist,
	 * not laying out tasks until step */
	if(j->req_nodes) {
		hl = hostlist_create(j->req_nodes);
		xfree(opt.nodelist);
		opt.nodelist = hostlist_ranged_string_xmalloc(hl);
		hostlist_uniq(hl);
		xfree(j->req_nodes);
		j->req_nodes = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

	}

	if(opt.distribution == SLURM_DIST_ARBITRARY
	   && !j->req_nodes) {
		error("With Arbitrary distribution you need to "
		      "specify a nodelist or hostfile with the -w option");
		return NULL;
	}
	j->exc_nodes      = opt.exc_nodes;
	j->partition      = opt.partition;
	j->min_nodes      = opt.min_nodes;
	if (opt.sockets_per_node != NO_VAL)
		j->sockets_per_node    = opt.sockets_per_node;
	if (opt.cores_per_socket != NO_VAL)
		j->cores_per_socket      = opt.cores_per_socket;
	if (opt.threads_per_core != NO_VAL)
		j->threads_per_core    = opt.threads_per_core;
	j->user_id        = opt.uid;
	j->dependency     = opt.dependency;
	if (opt.nice)
		j->nice   = NICE_OFFSET + opt.nice;

	if (opt.cpu_bind)
		j->cpu_bind       = opt.cpu_bind;
	if (opt.cpu_bind_type)
		j->cpu_bind_type  = opt.cpu_bind_type;
	if (opt.mem_bind)
		j->mem_bind       = opt.mem_bind;
	if (opt.mem_bind_type)
		j->mem_bind_type  = opt.mem_bind_type;
	if (opt.plane_size != NO_VAL)
		j->plane_size     = opt.plane_size;
	j->task_dist      = opt.distribution;

	j->group_id       = opt.gid;
	j->mail_type      = opt.mail_type;

	if (opt.ntasks_per_node != NO_VAL)
		j->ntasks_per_node   = opt.ntasks_per_node;
	if (opt.ntasks_per_socket != NO_VAL)
		j->ntasks_per_socket = opt.ntasks_per_socket;
	if (opt.ntasks_per_core != NO_VAL)
		j->ntasks_per_core   = opt.ntasks_per_core;

	if (opt.mail_user)
		j->mail_user = xstrdup(opt.mail_user);
	if (opt.begin)
		j->begin_time = opt.begin;
	if (opt.licenses)
		j->licenses = xstrdup(opt.licenses);
	if (opt.network)
		j->network = xstrdup(opt.network);
	if (opt.account)
		j->account = xstrdup(opt.account);
	if (opt.comment)
		j->comment = xstrdup(opt.comment);
	if (opt.qos)
		j->qos = xstrdup(opt.qos);
	if (opt.cwd)
		j->work_dir = xstrdup(opt.cwd);

	if (opt.hold)
		j->priority     = 0;
	if (opt.jobid != NO_VAL)
		j->job_id	= opt.jobid;
#ifdef HAVE_BG
	if (opt.geometry[0] > 0) {
		int i;
		for (i=0; i<SYSTEM_DIMENSIONS; i++)
			j->geometry[i] = opt.geometry[i];
	}
#endif

	if (opt.conn_type != (uint16_t) NO_VAL)
		j->conn_type[0] = opt.conn_type;

	if (opt.reboot)
		j->reboot = 1;
	if (opt.no_rotate)
		j->rotate = 0;

	if (opt.blrtsimage)
		j->blrtsimage = xstrdup(opt.blrtsimage);
	if (opt.linuximage)
		j->linuximage = xstrdup(opt.linuximage);
	if (opt.mloaderimage)
		j->mloaderimage = xstrdup(opt.mloaderimage);
	if (opt.ramdiskimage)
		j->ramdiskimage = xstrdup(opt.ramdiskimage);

	if (opt.max_nodes)
		j->max_nodes    = opt.max_nodes;
	else if (opt.nodes_set) {
		/* On an allocation if the max nodes isn't set set it
		 * to do the same behavior as with salloc or sbatch.
		 */
		j->max_nodes    = opt.min_nodes;
	}
	if (opt.pn_min_cpus != NO_VAL)
		j->pn_min_cpus    = opt.pn_min_cpus;
	if (opt.pn_min_memory != NO_VAL)
		j->pn_min_memory = opt.pn_min_memory;
	else if (opt.mem_per_cpu != NO_VAL)
		j->pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
	if (opt.pn_min_tmp_disk != NO_VAL)
		j->pn_min_tmp_disk = opt.pn_min_tmp_disk;
	if (opt.overcommit) {
		j->min_cpus    = opt.min_nodes;
		j->overcommit  = opt.overcommit;
	} else
		j->min_cpus    = opt.ntasks * opt.cpus_per_task;
	if (opt.ntasks_set)
		j->num_tasks   = opt.ntasks;

	if (opt.cpus_set)
		j->cpus_per_task = opt.cpus_per_task;

	if (opt.no_kill)
		j->kill_on_node_fail   = 0;
	if (opt.time_limit != NO_VAL)
		j->time_limit          = opt.time_limit;
	if (opt.time_min != NO_VAL)
		j->time_min            = opt.time_min;
	j->shared = opt.shared;

	if (opt.warn_signal)
		j->warn_signal = opt.warn_signal;
	if (opt.warn_time)
		j->warn_time = opt.warn_time;

	/* srun uses the same listening port for the allocation response
	 * message as all other messages */
	j->alloc_resp_port = slurmctld_comm_addr.port;
	j->other_port = slurmctld_comm_addr.port;

	if (opt.spank_job_env_size) {
		j->spank_job_env      = opt.spank_job_env;
		j->spank_job_env_size = opt.spank_job_env_size;
	}

	return (j);
}
示例#18
0
文件: allocate.c 项目: RPI-HPC/slurm
/*
 * Create job description structure based off srun options
 * (see opt.h)
 */
job_desc_msg_t *
job_desc_msg_create_from_opts (void)
{
	job_desc_msg_t *j = xmalloc(sizeof(*j));
	hostlist_t hl = NULL;

	slurm_init_job_desc_msg(j);
#if defined HAVE_ALPS_CRAY && defined HAVE_REAL_CRAY
	uint64_t pagg_id = job_getjid(getpid());
	/*
	 * Interactive sessions require pam_job.so in /etc/pam.d/common-session
	 * since creating sgi_job containers requires root permissions. This is
	 * the only exception where we allow the fallback of using the SID to
	 * confirm the reservation (caught later, in do_basil_confirm).
	 */
	if (pagg_id == (uint64_t)-1) {
		error("No SGI job container ID detected - please enable the "
		      "Cray job service via /etc/init.d/job");
	} else {
		if (!j->select_jobinfo)
			j->select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_select_jobinfo_set(j->select_jobinfo,
					    SELECT_JOBDATA_PAGG_ID, &pagg_id);
	}
#endif

	j->contiguous     = opt.contiguous;
	if (opt.core_spec)
		j->core_spec = opt.core_spec;
	j->features       = opt.constraints;
	j->gres           = opt.gres;
	if (opt.immediate == 1)
		j->immediate = opt.immediate;
	if (opt.job_name)
		j->name   = opt.job_name;
	else
		j->name   = opt.cmd_name;
	if (opt.argc > 0) {
		j->argc    = 1;
		j->argv    = (char **) xmalloc(sizeof(char *) * 2);
		j->argv[0] = xstrdup(opt.argv[0]);
	}
	if (opt.acctg_freq)
		j->acctg_freq     = xstrdup(opt.acctg_freq);
	j->reservation    = opt.reservation;
	j->wckey          = opt.wckey;

	j->req_nodes      = xstrdup(opt.nodelist);

	/* simplify the job allocation nodelist,
	 * not laying out tasks until step */
	if (j->req_nodes) {
		hl = hostlist_create(j->req_nodes);
		xfree(opt.nodelist);
		opt.nodelist = hostlist_ranged_string_xmalloc(hl);
		hostlist_uniq(hl);
		xfree(j->req_nodes);
		j->req_nodes = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

	}

	if (opt.distribution == SLURM_DIST_ARBITRARY
	   && !j->req_nodes) {
		error("With Arbitrary distribution you need to "
		      "specify a nodelist or hostfile with the -w option");
		return NULL;
	}
	j->exc_nodes      = opt.exc_nodes;
	j->partition      = opt.partition;
	j->min_nodes      = opt.min_nodes;
	if (opt.sockets_per_node != NO_VAL)
		j->sockets_per_node    = opt.sockets_per_node;
	if (opt.cores_per_socket != NO_VAL)
		j->cores_per_socket      = opt.cores_per_socket;
	if (opt.threads_per_core != NO_VAL)
		j->threads_per_core    = opt.threads_per_core;
	j->user_id        = opt.uid;
	j->dependency     = opt.dependency;
	if (opt.nice)
		j->nice   = NICE_OFFSET + opt.nice;
	if (opt.priority)
		j->priority = opt.priority;

	if (opt.cpu_bind)
		j->cpu_bind       = opt.cpu_bind;
	if (opt.cpu_bind_type)
		j->cpu_bind_type  = opt.cpu_bind_type;
	if (opt.mem_bind)
		j->mem_bind       = opt.mem_bind;
	if (opt.mem_bind_type)
		j->mem_bind_type  = opt.mem_bind_type;
	if (opt.plane_size != NO_VAL)
		j->plane_size     = opt.plane_size;
	j->task_dist      = opt.distribution;

	j->group_id       = opt.gid;
	j->mail_type      = opt.mail_type;

	if (opt.ntasks_per_node != NO_VAL)
		j->ntasks_per_node   = opt.ntasks_per_node;
	if (opt.ntasks_per_socket != NO_VAL)
		j->ntasks_per_socket = opt.ntasks_per_socket;
	if (opt.ntasks_per_core != NO_VAL)
		j->ntasks_per_core   = opt.ntasks_per_core;

	if (opt.mail_user)
		j->mail_user = opt.mail_user;
	if (opt.begin)
		j->begin_time = opt.begin;
	if (opt.licenses)
		j->licenses = opt.licenses;
	if (opt.network)
		j->network = opt.network;
	if (opt.profile)
		j->profile = opt.profile;
	if (opt.account)
		j->account = opt.account;
	if (opt.comment)
		j->comment = opt.comment;
	if (opt.qos)
		j->qos = opt.qos;
	if (opt.cwd)
		j->work_dir = opt.cwd;

	if (opt.hold)
		j->priority     = 0;
	if (opt.jobid != NO_VAL)
		j->job_id	= opt.jobid;
#ifdef HAVE_BG
	if (opt.geometry[0] > 0) {
		int i;
		for (i = 0; i < SYSTEM_DIMENSIONS; i++)
			j->geometry[i] = opt.geometry[i];
	}
#endif

	memcpy(j->conn_type, opt.conn_type, sizeof(j->conn_type));

	if (opt.reboot)
		j->reboot = 1;
	if (opt.no_rotate)
		j->rotate = 0;

	if (opt.blrtsimage)
		j->blrtsimage = opt.blrtsimage;
	if (opt.linuximage)
		j->linuximage = opt.linuximage;
	if (opt.mloaderimage)
		j->mloaderimage = opt.mloaderimage;
	if (opt.ramdiskimage)
		j->ramdiskimage = opt.ramdiskimage;

	if (opt.max_nodes)
		j->max_nodes    = opt.max_nodes;
	else if (opt.nodes_set) {
		/* On an allocation if the max nodes isn't set set it
		 * to do the same behavior as with salloc or sbatch.
		 */
		j->max_nodes    = opt.min_nodes;
	}
	if (opt.pn_min_cpus != NO_VAL)
		j->pn_min_cpus    = opt.pn_min_cpus;
	if (opt.pn_min_memory != NO_VAL)
		j->pn_min_memory = opt.pn_min_memory;
	else if (opt.mem_per_cpu != NO_VAL)
		j->pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
	if (opt.pn_min_tmp_disk != NO_VAL)
		j->pn_min_tmp_disk = opt.pn_min_tmp_disk;
	if (opt.overcommit) {
		j->min_cpus    = opt.min_nodes;
		j->overcommit  = opt.overcommit;
	} else if (opt.cpus_set)
		j->min_cpus    = opt.ntasks * opt.cpus_per_task;
	else
		j->min_cpus    = opt.ntasks;
	if (opt.ntasks_set)
		j->num_tasks   = opt.ntasks;

	if (opt.cpus_set)
		j->cpus_per_task = opt.cpus_per_task;

	if (opt.no_kill)
		j->kill_on_node_fail   = 0;
	if (opt.time_limit != NO_VAL)
		j->time_limit          = opt.time_limit;
	if (opt.time_min != NO_VAL)
		j->time_min            = opt.time_min;
	j->shared = opt.shared;

	if (opt.warn_signal)
		j->warn_signal = opt.warn_signal;
	if (opt.warn_time)
		j->warn_time = opt.warn_time;

	if (opt.req_switch >= 0)
		j->req_switch = opt.req_switch;
	if (opt.wait4switch >= 0)
		j->wait4switch = opt.wait4switch;

	/* srun uses the same listening port for the allocation response
	 * message as all other messages */
	j->alloc_resp_port = slurmctld_comm_addr.port;
	j->other_port = slurmctld_comm_addr.port;

	if (opt.spank_job_env_size) {
		j->spank_job_env      = opt.spank_job_env;
		j->spank_job_env_size = opt.spank_job_env_size;
	}

	return (j);
}
示例#19
0
文件: start_job.c 项目: IFCA/slurm
/* Start a job:
 *	CMD=STARTJOB ARG=<jobid> TASKLIST=<node_list>
 * RET 0 on success, -1 on failure */
extern int	start_job(char *cmd_ptr, int *err_code, char **err_msg)
{
	char *arg_ptr, *task_ptr, *tasklist, *tmp_char;
	int rc, task_cnt;
	uint32_t jobid;
	hostlist_t hl = (hostlist_t) NULL;
	char *host_string;
	static char reply_msg[128];

	arg_ptr = strstr(cmd_ptr, "ARG=");
	if (arg_ptr == NULL) {
		*err_code = -300;
		*err_msg = "STARTJOB lacks ARG";
		error("wiki: STARTJOB lacks ARG");
		return -1;
	}
	jobid = strtoul(arg_ptr+4, &tmp_char, 10);
	if (!isspace(tmp_char[0])) {
		*err_code = -300;
		*err_msg = "Invalid ARG value";
		error("wiki: STARTJOB has invalid jobid");
		return -1;
	}

	task_ptr = strstr(cmd_ptr, "TASKLIST=");
	if (task_ptr == NULL) {
		*err_code = -300;
		*err_msg = "STARTJOB lacks TASKLIST";
		error("wiki: STARTJOB lacks TASKLIST");
		return -1;
	}
	task_ptr += 9;	/* skip over "TASKLIST=" */
	null_term(task_ptr);
	tasklist = moab2slurm_task_list(task_ptr, &task_cnt);
	if (tasklist)
		hl = hostlist_create(tasklist);
	if ((tasklist == NULL) || (hl == NULL)) {
		*err_code = -300;
		*err_msg = "STARTJOB TASKLIST is invalid";
		error("wiki: STARTJOB TASKLIST is invalid: %s",
			task_ptr);
		xfree(tasklist);
		return -1;
	}
	hostlist_uniq(hl);
	hostlist_sort(hl);
	host_string = hostlist_ranged_string_xmalloc(hl);
	hostlist_destroy(hl);
	if (host_string == NULL) {
		*err_code = -300;
		*err_msg = "STARTJOB has invalid TASKLIST";
		error("wiki: STARTJOB has invalid TASKLIST: %s", tasklist);
		xfree(tasklist);
		return -1;
	}

	rc = _start_job(jobid, task_cnt, host_string, tasklist,
			err_code, err_msg);
	xfree(host_string);
	xfree(tasklist);
	if (rc == 0) {
		snprintf(reply_msg, sizeof(reply_msg),
			"job %u started successfully", jobid);
		*err_msg = reply_msg;
	}
	return rc;
}
示例#20
0
static char *	_dump_node(struct node_record *node_ptr, hostlist_t hl,
			   time_t update_time)
{
	char tmp[16*1024], *buf = NULL;
	int i;
	uint32_t cpu_cnt;

	if (!node_ptr)
		return NULL;

	if (hl) {
		char *node_list;
		hostlist_sort(hl);
		hostlist_uniq(hl);
		node_list = hostlist_ranged_string_xmalloc(hl);
		xstrcat(buf, node_list);
		xfree(node_list);
	} else {
		snprintf(tmp, sizeof(tmp), "%s", node_ptr->name);
		xstrcat(buf, tmp);
	}

	snprintf(tmp, sizeof(tmp), ":STATE=%s;", _get_node_state(node_ptr));
	xstrcat(buf, tmp);

	if (node_ptr->cpu_load != NO_VAL) {
		snprintf(tmp, sizeof(tmp), "CPULOAD=%f;",
			 (node_ptr->cpu_load / 100.0));
		xstrcat(buf, tmp);
	}

	if (node_ptr->reason) {
		/* Strip out any quotes, they confuse Moab */
		char *reason, *bad_char;
		reason = xstrdup(node_ptr->reason);
		while ((bad_char = strchr(reason, '\'')))
			bad_char[0] = ' ';
		while ((bad_char = strchr(reason, '\"')))
			bad_char[0] = ' ';
		snprintf(tmp, sizeof(tmp), "CAT=\"%s\";", reason);
		xstrcat(buf, tmp);
		xfree(reason);
	}

	if (update_time > last_node_update)
		return buf;

	if (slurmctld_conf.fast_schedule) {
		/* config from slurm.conf */
		cpu_cnt = node_ptr->config_ptr->cpus;
	} else {
		/* config as reported by slurmd */
		cpu_cnt = node_ptr->cpus;
	}
	for (i=0; i<node_ptr->part_cnt; i++) {
		if (i == 0)
			xstrcat(buf, "CCLASS=");
		snprintf(tmp, sizeof(tmp), "[%s:%u]",
			node_ptr->part_pptr[i]->name,
			cpu_cnt);
		xstrcat(buf, tmp);
	}
	if (i > 0)
		xstrcat(buf, ";");

	if (node_ptr->arch) {
		snprintf(tmp, sizeof(tmp), "ARCH=%s;", node_ptr->arch);
		xstrcat(buf, tmp);
	}

	if (node_ptr->os) {
		snprintf(tmp, sizeof(tmp), "OS=%s;", node_ptr->os);
		xstrcat(buf, tmp);
	}

	if (node_ptr->config_ptr && node_ptr->config_ptr->feature) {
		snprintf(tmp, sizeof(tmp), "FEATURE=%s;",
			node_ptr->config_ptr->feature);
		/* comma separator to colon */
		for (i=0; (tmp[i] != '\0'); i++) {
			if (tmp[i] == ',')
				tmp[i] = ':';
		}
		xstrcat(buf, tmp);
	}

	if (node_ptr->config_ptr && node_ptr->config_ptr->gres) {
		snprintf(tmp, sizeof(tmp), "GRES=%s;",
			node_ptr->config_ptr->gres);
		xstrcat(buf, tmp);
	}

	if (update_time > 0)
		return buf;

	if (slurmctld_conf.fast_schedule) {
		/* config from slurm.conf */
		snprintf(tmp, sizeof(tmp),
			"CMEMORY=%u;CDISK=%u;CPROC=%u;",
			node_ptr->config_ptr->real_memory,
			node_ptr->config_ptr->tmp_disk,
			node_ptr->config_ptr->cpus);
	} else {
		/* config as reported by slurmd */
		snprintf(tmp, sizeof(tmp),
			"CMEMORY=%u;CDISK=%u;CPROC=%u;",
			node_ptr->real_memory,
			node_ptr->tmp_disk,
			node_ptr->cpus);
	}
	xstrcat(buf, tmp);

	return buf;
}
示例#21
0
文件: srun_job.c 项目: VURM/slurm
/*
 * Create an srun job structure for a step w/out an allocation response msg.
 * (i.e. inside an allocation)
 */
srun_job_t *
job_step_create_allocation(resource_allocation_response_msg_t *resp)
{
	uint32_t job_id = resp->job_id;
	srun_job_t *job = NULL;
	allocation_info_t *ai = xmalloc(sizeof(*ai));
	hostlist_t hl = NULL;
	char *buf = NULL;
	int count = 0;
	uint32_t alloc_count = 0;

	ai->jobid          = job_id;
	ai->stepid         = NO_VAL;
	ai->nodelist = opt.alloc_nodelist;
	hl = hostlist_create(ai->nodelist);
	hostlist_uniq(hl);
	alloc_count = hostlist_count(hl);
	ai->nnodes = alloc_count;
	hostlist_destroy(hl);

	if (opt.exc_nodes) {
		hostlist_t exc_hl = hostlist_create(opt.exc_nodes);
		hostlist_t inc_hl = NULL;
		char *node_name = NULL;

		hl = hostlist_create(ai->nodelist);
		if(opt.nodelist) {
			inc_hl = hostlist_create(opt.nodelist);
		}
		hostlist_uniq(hl);
		//info("using %s or %s", opt.nodelist, ai->nodelist);
		while ((node_name = hostlist_shift(exc_hl))) {
			int inx = hostlist_find(hl, node_name);
			if (inx >= 0) {
				debug("excluding node %s", node_name);
				hostlist_delete_nth(hl, inx);
				ai->nnodes--;	/* decrement node count */
			}
			if(inc_hl) {
				inx = hostlist_find(inc_hl, node_name);
				if (inx >= 0) {
					error("Requested node %s is also "
					      "in the excluded list.",
					      node_name);
					error("Job not submitted.");
					hostlist_destroy(exc_hl);
					hostlist_destroy(inc_hl);
					goto error;
				}
			}
			free(node_name);
		}
		hostlist_destroy(exc_hl);

		/* we need to set this here so if there are more nodes
		 * available than we requested we can set it
		 * straight. If there is no exclude list then we set
		 * the vars then.
		 */
		if (!opt.nodes_set) {
			/* we don't want to set the number of nodes =
			 * to the number of requested processes unless we
			 * know it is less than the number of nodes
			 * in the allocation
			 */
			if(opt.ntasks_set && (opt.ntasks < ai->nnodes))
				opt.min_nodes = opt.ntasks;
			else
				opt.min_nodes = ai->nnodes;
			opt.nodes_set = true;
		}
		if(!opt.max_nodes)
			opt.max_nodes = opt.min_nodes;
		if((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
			ai->nnodes = opt.max_nodes;

		count = hostlist_count(hl);
		if(!count) {
			error("Hostlist is now nothing!  Can't run job.");
			hostlist_destroy(hl);
			goto error;
		}
		if(inc_hl) {
			count = hostlist_count(inc_hl);
			if(count < ai->nnodes) {
				/* add more nodes to get correct number for
				   allocation */
				hostlist_t tmp_hl = hostlist_copy(hl);
				int i=0;
				int diff = ai->nnodes - count;
				buf = hostlist_ranged_string_xmalloc(inc_hl);
				hostlist_delete(tmp_hl, buf);
				xfree(buf);
				while ((node_name = hostlist_shift(tmp_hl)) &&
				       (i < diff)) {
					hostlist_push(inc_hl, node_name);
					i++;
				}
				hostlist_destroy(tmp_hl);
			}
			buf = hostlist_ranged_string_xmalloc(inc_hl);
			hostlist_destroy(inc_hl);
			xfree(opt.nodelist);
			opt.nodelist = buf;
		} else {
			if (count > ai->nnodes) {
				/* remove more nodes than needed for
				   allocation */
				int i=0;
				for (i=count; i>ai->nnodes; i--)
					hostlist_delete_nth(hl, i);
			}
			xfree(opt.nodelist);
			opt.nodelist = hostlist_ranged_string_xmalloc(hl);
		}

		hostlist_destroy(hl);
	} else {
		if (!opt.nodes_set) {
			/* we don't want to set the number of nodes =
			 * to the number of requested processes unless we
			 * know it is less than the number of nodes
			 * in the allocation
			 */
			if(opt.ntasks_set && (opt.ntasks < ai->nnodes))
				opt.min_nodes = opt.ntasks;
			else
				opt.min_nodes = ai->nnodes;
			opt.nodes_set = true;
		}
		if(!opt.max_nodes)
			opt.max_nodes = opt.min_nodes;
		if((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
			ai->nnodes = opt.max_nodes;
		/* Don't reset the ai->nodelist because that is the
		 * nodelist we want to say the allocation is under
		 * opt.nodelist is what is used for the allocation.
		 */
		/* xfree(ai->nodelist); */
		/* ai->nodelist = xstrdup(buf); */
	}

	/* get the correct number of hosts to run tasks on */
	if (opt.nodelist) {
		hl = hostlist_create(opt.nodelist);
		if (opt.distribution != SLURM_DIST_ARBITRARY)
			hostlist_uniq(hl);
		if (!hostlist_count(hl)) {
			error("Hostlist is now nothing!  Can not run job.");
			hostlist_destroy(hl);
			goto error;
		}

		buf = hostlist_ranged_string_xmalloc(hl);
		count = hostlist_count(hl);
		hostlist_destroy(hl);
		/* Don't reset the ai->nodelist because that is the
		 * nodelist we want to say the allocation is under
		 * opt.nodelist is what is used for the allocation.
		 */
		/* xfree(ai->nodelist); */
		/* ai->nodelist = xstrdup(buf); */
		xfree(opt.nodelist);
		opt.nodelist = buf;
	}

	if (opt.distribution == SLURM_DIST_ARBITRARY) {
		if (count != opt.ntasks) {
			error("You asked for %d tasks but specified %d nodes",
			      opt.ntasks, count);
			goto error;
		}
	}

	if (ai->nnodes == 0) {
		error("No nodes in allocation, can't run job");
		goto error;
	}

	ai->num_cpu_groups = resp->num_cpu_groups;
	ai->cpus_per_node  = resp->cpus_per_node;
	ai->cpu_count_reps = resp->cpu_count_reps;

/* 	info("looking for %d nodes out of %s with a must list of %s", */
/* 	     ai->nnodes, ai->nodelist, opt.nodelist); */
	/*
	 * Create job
	 */
	job = _job_create_structure(ai);
error:
   	xfree(ai);
	return (job);

}
示例#22
0
/*
 * Create job description structure based off srun options
 * (see opt.h)
 */
static job_desc_msg_t *_job_desc_msg_create_from_opts(slurm_opt_t *opt_local)
{
	srun_opt_t *srun_opt = opt_local->srun_opt;
	job_desc_msg_t *j = xmalloc(sizeof(*j));
	hostlist_t hl = NULL;
	xassert(srun_opt);

	slurm_init_job_desc_msg(j);
#if defined HAVE_ALPS_CRAY && defined HAVE_REAL_CRAY
	static bool sgi_err_logged = false;
	uint64_t pagg_id = job_getjid(getpid());
	/*
	 * Interactive sessions require pam_job.so in /etc/pam.d/common-session
	 * since creating sgi_job containers requires root permissions. This is
	 * the only exception where we allow the fallback of using the SID to
	 * confirm the reservation (caught later, in do_basil_confirm).
	 */
	if (pagg_id != (uint64_t) -1) {
		if (!j->select_jobinfo)
			j->select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_select_jobinfo_set(j->select_jobinfo,
					    SELECT_JOBDATA_PAGG_ID, &pagg_id);
	} else if (!sgi_err_logged) {
		error("No SGI job container ID detected - please enable the "
		      "Cray job service via /etc/init.d/job");
		sgi_err_logged = true;
	}
#endif

	j->contiguous     = opt_local->contiguous;
	if (opt_local->core_spec != NO_VAL16)
		j->core_spec      = opt_local->core_spec;
	j->features       = opt_local->constraints;
	j->cluster_features = opt_local->c_constraints;
	if (opt_local->gres && xstrcasecmp(opt_local->gres, "NONE"))
		j->gres   = opt_local->gres;
	if (opt_local->immediate == 1)
		j->immediate = opt_local->immediate;
	if (opt_local->job_name)
		j->name   = opt_local->job_name;
	else
		j->name = srun_opt->cmd_name;
	if (srun_opt->argc > 0) {
		j->argc    = 1;
		j->argv    = (char **) xmalloc(sizeof(char *) * 2);
		j->argv[0] = xstrdup(srun_opt->argv[0]);
	}
	if (opt_local->acctg_freq)
		j->acctg_freq     = xstrdup(opt_local->acctg_freq);
	j->reservation    = opt_local->reservation;
	j->wckey          = opt_local->wckey;
	j->x11 = opt.x11;
	if (j->x11) {
		j->x11_magic_cookie = xstrdup(opt.x11_magic_cookie);
		j->x11_target_port = opt.x11_target_port;
	}

	j->req_nodes      = xstrdup(opt_local->nodelist);

	/* simplify the job allocation nodelist,
	 * not laying out tasks until step */
	if (j->req_nodes) {
		hl = hostlist_create(j->req_nodes);
		xfree(opt_local->nodelist);
		opt_local->nodelist = hostlist_ranged_string_xmalloc(hl);
		hostlist_uniq(hl);
		xfree(j->req_nodes);
		j->req_nodes = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

	}

	if (((opt_local->distribution & SLURM_DIST_STATE_BASE) ==
	     SLURM_DIST_ARBITRARY) && !j->req_nodes) {
		error("With Arbitrary distribution you need to "
		      "specify a nodelist or hostfile with the -w option");
		return NULL;
	}
	j->extra = opt_local->extra;
	j->exc_nodes      = opt_local->exc_nodes;
	j->partition      = opt_local->partition;
	j->min_nodes      = opt_local->min_nodes;
	if (opt_local->sockets_per_node != NO_VAL)
		j->sockets_per_node    = opt_local->sockets_per_node;
	if (opt_local->cores_per_socket != NO_VAL)
		j->cores_per_socket      = opt_local->cores_per_socket;
	if (opt_local->threads_per_core != NO_VAL) {
		j->threads_per_core    = opt_local->threads_per_core;
		/* if 1 always make sure affinity knows about it */
		if (j->threads_per_core == 1)
			srun_opt->cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE;
	}
	j->user_id        = opt_local->uid;
	j->dependency     = opt_local->dependency;
	if (opt_local->nice != NO_VAL)
		j->nice   = NICE_OFFSET + opt_local->nice;
	if (opt_local->priority)
		j->priority = opt_local->priority;
	if (srun_opt->cpu_bind)
		j->cpu_bind = srun_opt->cpu_bind;
	if (srun_opt->cpu_bind_type)
		j->cpu_bind_type = srun_opt->cpu_bind_type;
	if (opt_local->delay_boot != NO_VAL)
		j->delay_boot = opt_local->delay_boot;
	if (opt_local->mem_bind)
		j->mem_bind       = opt_local->mem_bind;
	if (opt_local->mem_bind_type)
		j->mem_bind_type  = opt_local->mem_bind_type;
	if (opt_local->plane_size != NO_VAL)
		j->plane_size     = opt_local->plane_size;
	j->task_dist      = opt_local->distribution;

	j->group_id       = opt_local->gid;
	j->mail_type      = opt_local->mail_type;

	if (opt_local->ntasks_per_node != NO_VAL)
		j->ntasks_per_node   = opt_local->ntasks_per_node;
	if (opt_local->ntasks_per_socket != NO_VAL)
		j->ntasks_per_socket = opt_local->ntasks_per_socket;
	if (opt_local->ntasks_per_core != NO_VAL)
		j->ntasks_per_core   = opt_local->ntasks_per_core;

	if (opt_local->mail_user)
		j->mail_user = opt_local->mail_user;
	if (opt_local->burst_buffer)
		j->burst_buffer = opt_local->burst_buffer;
	if (opt_local->begin)
		j->begin_time = opt_local->begin;
	if (opt_local->deadline)
		j->deadline = opt_local->deadline;
	if (opt_local->licenses)
		j->licenses = opt_local->licenses;
	if (opt_local->network)
		j->network = opt_local->network;
	if (opt_local->profile)
		j->profile = opt_local->profile;
	if (opt_local->account)
		j->account = opt_local->account;
	if (opt_local->comment)
		j->comment = opt_local->comment;
	if (opt_local->qos)
		j->qos = opt_local->qos;
	if (opt_local->cwd)
		j->work_dir = opt_local->cwd;

	if (opt_local->hold)
		j->priority     = 0;
	if (opt_local->jobid != NO_VAL)
		j->job_id	= opt_local->jobid;
#ifdef HAVE_BG
	if (opt_local->geometry[0] > 0) {
		int i;
		for (i = 0; i < SYSTEM_DIMENSIONS; i++)
			j->geometry[i] = opt_local->geometry[i];
	}
#endif

	memcpy(j->conn_type, opt_local->conn_type, sizeof(j->conn_type));

	if (opt_local->reboot)
		j->reboot = 1;
	if (opt_local->no_rotate)
		j->rotate = 0;

	if (opt_local->blrtsimage)
		j->blrtsimage = opt_local->blrtsimage;
	if (opt_local->linuximage)
		j->linuximage = opt_local->linuximage;
	if (opt_local->mloaderimage)
		j->mloaderimage = opt_local->mloaderimage;
	if (opt_local->ramdiskimage)
		j->ramdiskimage = opt_local->ramdiskimage;

	if (opt_local->max_nodes)
		j->max_nodes    = opt_local->max_nodes;
	else if (opt_local->nodes_set) {
		/* On an allocation if the max nodes isn't set set it
		 * to do the same behavior as with salloc or sbatch.
		 */
		j->max_nodes    = opt_local->min_nodes;
	}
	if (opt_local->pn_min_cpus != NO_VAL)
		j->pn_min_cpus = opt_local->pn_min_cpus;
	if (opt_local->pn_min_memory != NO_VAL64)
		j->pn_min_memory = opt_local->pn_min_memory;
	else if (opt_local->mem_per_cpu != NO_VAL64)
		j->pn_min_memory = opt_local->mem_per_cpu | MEM_PER_CPU;
	if (opt_local->pn_min_tmp_disk != NO_VAL)
		j->pn_min_tmp_disk = opt_local->pn_min_tmp_disk;
	if (opt_local->overcommit) {
		j->min_cpus    = opt_local->min_nodes;
		j->overcommit  = opt_local->overcommit;
	} else if (opt_local->cpus_set)
		j->min_cpus    = opt_local->ntasks * opt_local->cpus_per_task;
	else
		j->min_cpus    = opt_local->ntasks;
	if (opt_local->ntasks_set)
		j->num_tasks   = opt_local->ntasks;

	if (opt_local->cpus_set)
		j->cpus_per_task = opt_local->cpus_per_task;

	if (opt_local->no_kill)
		j->kill_on_node_fail   = 0;
	if (opt_local->time_limit != NO_VAL)
		j->time_limit          = opt_local->time_limit;
	if (opt_local->time_min != NO_VAL)
		j->time_min            = opt_local->time_min;
	if (opt_local->shared != NO_VAL16)
		j->shared = opt_local->shared;

	if (opt_local->warn_signal)
		j->warn_signal = opt_local->warn_signal;
	if (opt_local->warn_time)
		j->warn_time = opt_local->warn_time;
	if (opt_local->job_flags)
		j->bitflags = opt_local->job_flags;

	if (opt_local->cpu_freq_min != NO_VAL)
		j->cpu_freq_min = opt_local->cpu_freq_min;
	if (opt_local->cpu_freq_max != NO_VAL)
		j->cpu_freq_max = opt_local->cpu_freq_max;
	if (opt_local->cpu_freq_gov != NO_VAL)
		j->cpu_freq_gov = opt_local->cpu_freq_gov;

	if (opt_local->req_switch >= 0)
		j->req_switch = opt_local->req_switch;
	if (opt_local->wait4switch >= 0)
		j->wait4switch = opt_local->wait4switch;

	/* srun uses the same listening port for the allocation response
	 * message as all other messages */
	j->alloc_resp_port = slurmctld_comm_addr.port;
	j->other_port = slurmctld_comm_addr.port;

	if (opt_local->spank_job_env_size) {
		j->spank_job_env      = opt_local->spank_job_env;
		j->spank_job_env_size = opt_local->spank_job_env_size;
	}

	if (opt_local->power_flags)
		j->power_flags = opt_local->power_flags;
	if (opt_local->mcs_label)
		j->mcs_label = opt_local->mcs_label;
	j->wait_all_nodes = 1;

	/* If can run on multiple clusters find the earliest run time
	 * and run it there */
	j->clusters = xstrdup(opt_local->clusters);

	return j;
}
示例#23
0
/*
 * ping_nodes - check that all nodes and daemons are alive,
 *	get nodes in UNKNOWN state to register
 */
void ping_nodes (void)
{
	static bool restart_flag = true;	/* system just restarted */
	static int offset = 0;	/* mutex via node table write lock on entry */
	static int max_reg_threads = 0;	/* max node registration threads
					 * this can include DOWN nodes, so
					 * limit the number to avoid huge
					 * communication delays */
	int i;
	time_t now, still_live_time, node_dead_time;
	static time_t last_ping_time = (time_t) 0;
	hostlist_t down_hostlist = NULL;
	char *host_str = NULL;
	agent_arg_t *ping_agent_args = NULL;
	agent_arg_t *reg_agent_args = NULL;
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr = NULL;
#else
	struct node_record *node_ptr = NULL;
#endif

	now = time (NULL);

	ping_agent_args = xmalloc (sizeof (agent_arg_t));
	ping_agent_args->msg_type = REQUEST_PING;
	ping_agent_args->retry = 0;
	ping_agent_args->hostlist = hostlist_create("");

	reg_agent_args = xmalloc (sizeof (agent_arg_t));
	reg_agent_args->msg_type = REQUEST_NODE_REGISTRATION_STATUS;
	reg_agent_args->retry = 0;
	reg_agent_args->hostlist = hostlist_create("");

	/*
	 * If there are a large number of down nodes, the node ping
	 * can take a long time to complete:
	 *  ping_time = down_nodes * agent_timeout / agent_parallelism
	 *  ping_time = down_nodes * 10_seconds / 10
	 *  ping_time = down_nodes (seconds)
	 * Because of this, we extend the SlurmdTimeout by the
	 * time needed to complete a ping of all nodes.
	 */
	if ((slurmctld_conf.slurmd_timeout == 0) ||
	    (last_ping_time == (time_t) 0)) {
		node_dead_time = (time_t) 0;
	} else {
		node_dead_time = last_ping_time -
				 slurmctld_conf.slurmd_timeout;
	}
	still_live_time = now - (slurmctld_conf.slurmd_timeout / 3);
	last_ping_time  = now;

	if (max_reg_threads == 0) {
		max_reg_threads = MAX(slurm_get_tree_width(), 1);
	}
	offset += max_reg_threads;
	if ((offset > node_record_count) &&
	    (offset >= (max_reg_threads * MAX_REG_FREQUENCY)))
		offset = 0;

#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if ((slurmctld_conf.slurmd_timeout == 0)	&&
		    (!restart_flag)				&&
		    (!IS_NODE_UNKNOWN(front_end_ptr))		&&
		    (!IS_NODE_NO_RESPOND(front_end_ptr)))
			continue;

		if ((front_end_ptr->last_response != (time_t) 0)     &&
		    (front_end_ptr->last_response <= node_dead_time) &&
		    (!IS_NODE_DOWN(front_end_ptr))) {
			if (down_hostlist)
				(void) hostlist_push_host(down_hostlist,
					front_end_ptr->name);
			else {
				down_hostlist =
					hostlist_create(front_end_ptr->name);
				if (down_hostlist == NULL)
					fatal("hostlist_create: malloc error");
			}
			set_front_end_down(front_end_ptr, "Not responding");
			front_end_ptr->not_responding = false;
			continue;
		}

		if (restart_flag) {
			front_end_ptr->last_response =
				slurmctld_conf.last_update;
		}

		/* Request a node registration if its state is UNKNOWN or
		 * on a periodic basis (about every MAX_REG_FREQUENCY ping,
		 * this mechanism avoids an additional (per node) timer or
		 * counter and gets updated configuration information
		 * once in a while). We limit these requests since they
		 * can generate a flood of incoming RPCs. */
		if (IS_NODE_UNKNOWN(front_end_ptr) || restart_flag ||
		    ((i >= offset) && (i < (offset + max_reg_threads)))) {
			hostlist_push(reg_agent_args->hostlist,
				      front_end_ptr->name);
			reg_agent_args->node_count++;
			continue;
		}

		if ((!IS_NODE_NO_RESPOND(front_end_ptr)) &&
		    (front_end_ptr->last_response >= still_live_time))
			continue;

		/* Do not keep pinging down nodes since this can induce
		 * huge delays in hierarchical communication fail-over */
		if (IS_NODE_NO_RESPOND(front_end_ptr) &&
		    IS_NODE_DOWN(front_end_ptr))
			continue;

		hostlist_push(ping_agent_args->hostlist, front_end_ptr->name);
		ping_agent_args->node_count++;
	}
#else
	for (i=0, node_ptr=node_record_table_ptr;
	     i<node_record_count; i++, node_ptr++) {
		if (IS_NODE_FUTURE(node_ptr) || IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if ((slurmctld_conf.slurmd_timeout == 0) &&
		    (!restart_flag)			 &&
		    (!IS_NODE_UNKNOWN(node_ptr))         &&
		    (!IS_NODE_NO_RESPOND(node_ptr)))
			continue;

		if ((node_ptr->last_response != (time_t) 0)     &&
		    (node_ptr->last_response <= node_dead_time) &&
		    (!IS_NODE_DOWN(node_ptr))) {
			if (down_hostlist)
				(void) hostlist_push_host(down_hostlist,
					node_ptr->name);
			else {
				down_hostlist =
					hostlist_create(node_ptr->name);
				if (down_hostlist == NULL)
					fatal("hostlist_create: malloc error");
			}
			set_node_down_ptr(node_ptr, "Not responding");
			node_ptr->not_responding = false;  /* logged below */
			continue;
		}

		if (restart_flag)
			node_ptr->last_response = slurmctld_conf.last_update;

		/* Request a node registration if its state is UNKNOWN or
		 * on a periodic basis (about every MAX_REG_FREQUENCY ping,
		 * this mechanism avoids an additional (per node) timer or
		 * counter and gets updated configuration information
		 * once in a while). We limit these requests since they
		 * can generate a flood of incoming RPCs. */
		if (IS_NODE_UNKNOWN(node_ptr) || restart_flag ||
		    ((i >= offset) && (i < (offset + max_reg_threads)))) {
			hostlist_push(reg_agent_args->hostlist,
				      node_ptr->name);
			reg_agent_args->node_count++;
			continue;
		}

		if ((!IS_NODE_NO_RESPOND(node_ptr)) &&
		    (node_ptr->last_response >= still_live_time))
			continue;

		/* Do not keep pinging down nodes since this can induce
		 * huge delays in hierarchical communication fail-over */
		if (IS_NODE_NO_RESPOND(node_ptr) && IS_NODE_DOWN(node_ptr))
			continue;

		hostlist_push(ping_agent_args->hostlist, node_ptr->name);
		ping_agent_args->node_count++;
	}
#endif

	restart_flag = false;
	if (ping_agent_args->node_count == 0) {
		hostlist_destroy(ping_agent_args->hostlist);
		xfree (ping_agent_args);
	} else {
		hostlist_uniq(ping_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				ping_agent_args->hostlist);
		debug("Spawning ping agent for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(ping_agent_args);
	}

	if (reg_agent_args->node_count == 0) {
		hostlist_destroy(reg_agent_args->hostlist);
		xfree (reg_agent_args);
	} else {
		hostlist_uniq(reg_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				reg_agent_args->hostlist);
		debug("Spawning registration agent for %s %d hosts",
		      host_str, reg_agent_args->node_count);
		xfree(host_str);
		ping_begin();
		agent_queue_request(reg_agent_args);
	}

	if (down_hostlist) {
		hostlist_uniq(down_hostlist);
		host_str = hostlist_ranged_string_xmalloc(down_hostlist);
		error("Nodes %s not responding, setting DOWN", host_str);
		xfree(host_str);
		hostlist_destroy(down_hostlist);
	}
}
示例#24
0
文件: forward.c 项目: IFCA/slurm
/*
 * forward_msg        - logic to forward a message which has been received and
 *                      accumulate the return codes from processes getting the
 *                      the forwarded message
 *
 * IN: forward_struct - forward_struct_t *   - holds information about message
 *                                             that needs to be forwarded to
 *                                             childern processes
 * IN: header         - header_t             - header from message that came in
 *                                             needing to be forwarded.
 * RET: SLURM_SUCCESS - int
 */
extern int forward_msg(forward_struct_t *forward_struct,
		       header_t *header)
{
	int j = 0;
	int retries = 0;
	forward_msg_t *forward_msg = NULL;
	int thr_count = 0;
	int *span = set_span(header->forward.cnt, 0);
	hostlist_t hl = NULL;
	hostlist_t forward_hl = NULL;
	char *name = NULL;

	if (!forward_struct->ret_list) {
		error("didn't get a ret_list from forward_struct");
		xfree(span);
		return SLURM_ERROR;
	}
	hl = hostlist_create(header->forward.nodelist);
	hostlist_uniq(hl);

	while ((name = hostlist_shift(hl))) {
		pthread_attr_t attr_agent;
		pthread_t thread_agent;
		char *buf = NULL;

		slurm_attr_init(&attr_agent);
		if (pthread_attr_setdetachstate
		    (&attr_agent, PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");

		forward_msg = &forward_struct->forward_msg[thr_count];
		forward_msg->ret_list = forward_struct->ret_list;

		forward_msg->timeout = forward_struct->timeout;

		if (forward_msg->timeout <= 0) {
			/* convert secs to msec */
			forward_msg->timeout  = slurm_get_msg_timeout() * 1000;
		}

		forward_msg->notify = &forward_struct->notify;
		forward_msg->forward_mutex = &forward_struct->forward_mutex;
		forward_msg->buf_len = forward_struct->buf_len;
		forward_msg->buf = forward_struct->buf;

		memcpy(&forward_msg->header.orig_addr,
		       &header->orig_addr,
		       sizeof(slurm_addr_t));

		forward_msg->header.version = header->version;
		forward_msg->header.flags = header->flags;
		forward_msg->header.msg_type = header->msg_type;
		forward_msg->header.body_length = header->body_length;
		forward_msg->header.ret_list = NULL;
		forward_msg->header.ret_cnt = 0;

		forward_hl = hostlist_create(name);
		free(name);
		for(j = 0; j < span[thr_count]; j++) {
			name = hostlist_shift(hl);
			if (!name)
				break;
			hostlist_push(forward_hl, name);
			free(name);
		}

		buf = hostlist_ranged_string_xmalloc(forward_hl);
		hostlist_destroy(forward_hl);
		forward_init(&forward_msg->header.forward, NULL);
		forward_msg->header.forward.nodelist = buf;
		while (pthread_create(&thread_agent, &attr_agent,
				     _forward_thread,
				     (void *)forward_msg)) {
			error("pthread_create error %m");
			if (++retries > MAX_RETRIES)
				fatal("Can't create pthread");
			sleep(1);	/* sleep and try again */
		}
		slurm_attr_destroy(&attr_agent);
		thr_count++;
	}
	hostlist_destroy(hl);
	xfree(span);
	return SLURM_SUCCESS;
}
示例#25
0
文件: forward.c 项目: IFCA/slurm
/*
 * start_msg_tree  - logic to begin the forward tree and
 *                   accumulate the return codes from processes getting the
 *                   the forwarded message
 *
 * IN: hl          - hostlist_t   - list of every node to send message to
 * IN: msg         - slurm_msg_t  - message to send.
 * IN: timeout     - int          - how long to wait in milliseconds.
 * RET List 	   - List containing the responses of the childern
 *		     (if any) we forwarded the message to. List
 *		     containing type (ret_data_info_t).
 */
extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout)
{
	int *span = NULL;
	fwd_tree_t *fwd_tree = NULL;
	pthread_mutex_t tree_mutex;
	pthread_cond_t notify;
	int j = 0, count = 0;
	List ret_list = NULL;
	char *name = NULL;
	int thr_count = 0;
	int host_count = 0;

	xassert(hl);
	xassert(msg);

	hostlist_uniq(hl);
	host_count = hostlist_count(hl);

	span = set_span(host_count, 0);

	slurm_mutex_init(&tree_mutex);
	pthread_cond_init(&notify, NULL);

	ret_list = list_create(destroy_data_info);

	while ((name = hostlist_shift(hl))) {
		pthread_attr_t attr_agent;
		pthread_t thread_agent;
		int retries = 0;

		slurm_attr_init(&attr_agent);
		if (pthread_attr_setdetachstate
		    (&attr_agent, PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");

		fwd_tree = xmalloc(sizeof(fwd_tree_t));
		fwd_tree->orig_msg = msg;
		fwd_tree->ret_list = ret_list;
		fwd_tree->timeout = timeout;
		fwd_tree->notify = &notify;
		fwd_tree->p_thr_count = &thr_count;
		fwd_tree->tree_mutex = &tree_mutex;

		if (fwd_tree->timeout <= 0) {
			/* convert secs to msec */
			fwd_tree->timeout  = slurm_get_msg_timeout() * 1000;
		}

		fwd_tree->tree_hl = hostlist_create(name);
		free(name);
		for (j = 0; j < span[thr_count]; j++) {
			name = hostlist_shift(hl);
			if (!name)
				break;
			hostlist_push(fwd_tree->tree_hl, name);
			free(name);
		}

		/*
		 * Lock and increase thread counter, we need that to protect
		 * the start_msg_tree waiting loop that was originally designed
		 * around a "while ((count < host_count))" loop. In case where a
		 * fwd thread was not able to get all the return codes from
		 * children, the waiting loop was deadlocked.
		 */
		slurm_mutex_lock(&tree_mutex);
		thr_count++;
		slurm_mutex_unlock(&tree_mutex);

		while (pthread_create(&thread_agent, &attr_agent,
				      _fwd_tree_thread, (void *)fwd_tree)) {
			error("pthread_create error %m");
			if (++retries > MAX_RETRIES)
				fatal("Can't create pthread");
			sleep(1);	/* sleep and try again */
		}
		slurm_attr_destroy(&attr_agent);

	}
	xfree(span);

	slurm_mutex_lock(&tree_mutex);

	count = list_count(ret_list);
	debug2("Tree head got back %d looking for %d", count, host_count);
	while (thr_count > 0) {
		pthread_cond_wait(&notify, &tree_mutex);
		count = list_count(ret_list);
		debug2("Tree head got back %d", count);
	}
	xassert(count >= host_count);	/* Tree head did not get all responses,
					 * but no more active fwd threads!*/
	slurm_mutex_unlock(&tree_mutex);

	slurm_mutex_destroy(&tree_mutex);
	pthread_cond_destroy(&notify);

	return ret_list;
}
示例#26
0
int
main (int argc, char *argv[])
{
    char *server = NULL;
    int msize = 65536;
    uid_t uid = geteuid ();
    int topt = 0;
    Npcfsys *fs = NULL;
    Npcfid *fid, *afid, *root;
    int c, fd;
    char buf[80], *host, *p;
    hostlist_t hl;
    hostlist_iterator_t itr;
    int lopt = 0;

    diod_log_init (argv[0]);

    opterr = 0;
    while ((c = GETOPT (argc, argv, OPTIONS, longopts)) != -1) {
        switch (c) {
        case 's':   /* --server HOST[:PORT] or /path/to/socket */
            server = optarg;
            break;
        case 'm':   /* --msize SIZE */
            msize = strtoul (optarg, NULL, 10);
            break;
        case 'u':   /* --uid UID */
            uid = strtoul (optarg, NULL, 10);
            break;
        case 't':   /* --timeout SECS */
            topt = strtoul (optarg, NULL, 10);
            break;
        case 'l':   /* --long */
            lopt = 1;
            break;
        default:
            usage ();
        }
    }

    if (signal (SIGPIPE, SIG_IGN) == SIG_ERR)
        err_exit ("signal");
    if (signal (SIGALRM, sigalarm) == SIG_ERR)
        err_exit ("signal");

    if (topt > 0)
        alarm (topt);

    if ((fd = diod_sock_connect (server, 0)) < 0)
        exit (1);

    if (!(fs = npc_start (fd, fd, msize, 0)))
        errn_exit (np_rerror (), "error negotiating protocol with server");
    if (!(afid = npc_auth (fs, "ctl", uid, diod_auth)) && np_rerror () != 0)
        errn_exit (np_rerror (), "error authenticating to server");
    if (!(root = npc_attach (fs, afid, "ctl", uid)))
        errn_exit (np_rerror (), "error attaching to aname=ctl");
    if (!(fid = npc_open_bypath (root, "connections", O_RDONLY)))
        errn_exit (np_rerror (), "open connections");

    if (!(hl = hostlist_create (NULL)))
        err_exit ("hostlist_create");
    while (npc_gets (fid, buf, sizeof(buf))) {
        if ((p = strchr (buf, ' ')))
            *p = '\0';
        if (!lopt && (p = strchr (buf, '.')))
            *p = '\0';
        if (!hostlist_push_host (hl, buf))
            err_exit ("hostlist_push_host");
    }
    hostlist_uniq (hl);
    if (lopt) {
        if (!(itr = hostlist_iterator_create (hl)))
            err_exit ("hostlist_iterator_create");
        while ((host = hostlist_next (itr)))
            printf ("%s\n", host);
        hostlist_iterator_destroy (itr);
    } else {
        char s[1024];

        if (hostlist_ranged_string (hl, sizeof (s), s) < 0)
            msg_exit ("hostlist output would be too long (use -l)");
        printf ("%s\n", s);
    }
    hostlist_destroy (hl);

    if (npc_clunk (fid) < 0)
        errn_exit (np_rerror (), "clunk connections");
    if (npc_clunk (root) < 0)
        errn_exit (np_rerror (), "error clunking ctl");
    if (npc_clunk (afid) < 0)
        errn_exit (np_rerror (), "error clunking afid");
    npc_finish (fs);

    exit(0);
}
示例#27
0
struct ipmipower_connection *
ipmipower_connection_array_create(const char *hostname, unsigned int *len) 
{
  char *str = NULL;
  int index = 0;
  hostlist_t hl = NULL;
  hostlist_iterator_t itr = NULL;
  struct ipmipower_connection *ics;
  int size = sizeof(struct ipmipower_connection);
  int hl_count;
  int errcount = 0;
  int emfilecount = 0;

  assert(hostname && len); 

  *len = 0;
  
  if (!(hl = hostlist_create(hostname)))
    {
      ipmipower_output(MSG_TYPE_HOSTNAME_INVALID, hostname);
      return NULL;
    }
  
  if (!(itr = hostlist_iterator_create(hl)))
    ierr_exit("hostlist_iterator_create() error"); 
  
  hostlist_uniq(hl);

  hl_count = hostlist_count(hl);

  ics = (struct ipmipower_connection *)Malloc(size * hl_count);
  
  memset(ics, '\0', (size * hl_count));
  
  while ((str = hostlist_next(itr))) 
    {
      ics[index].ipmi_fd = -1;
      ics[index].ping_fd = -1;
      
      /* cleanup only at the end, gather all error outputs for
       * later 
       */
      if (_connection_setup(&ics[index], str) < 0) 
        {
          if (errno == EMFILE && !emfilecount)
            {
              cbuf_printf(ttyout, "file descriptor limit reached\n");
              emfilecount++;
            }
          errcount++;
        }
       
      free(str);
      index++;
    }

  hostlist_iterator_destroy(itr);
  hostlist_destroy(hl);

  if (errcount)
    {
      int i;
      for (i = 0; i < hl_count; i++) 
        {
          close(ics[i].ipmi_fd);
          close(ics[i].ping_fd);
          if (ics[i].ipmi_in)
            cbuf_destroy(ics[i].ipmi_in);
          if (ics[i].ipmi_out)
            cbuf_destroy(ics[i].ipmi_out);
          if (ics[i].ping_in)
            cbuf_destroy(ics[i].ping_in);
          if (ics[i].ping_out)
            cbuf_destroy(ics[i].ping_out);
        }
      Free(ics);
      return NULL;
    }

  *len = hl_count;
  return ics;
}
示例#28
0
static int	_job_modify(uint32_t jobid, char *bank_ptr,
			char *depend_ptr, char *new_hostlist,
			uint32_t new_node_cnt, char *part_name_ptr,
			uint32_t new_time_limit)
{
	struct job_record *job_ptr;
	bool update_accounting = false;

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		error("wiki: MODIFYJOB has invalid jobid %u", jobid);
		return ESLURM_INVALID_JOB_ID;
	}
	if (IS_JOB_FINISHED(job_ptr)) {
		error("wiki: MODIFYJOB jobid %u is finished", jobid);
		return ESLURM_DISABLED;
	}

	if (depend_ptr) {
		int rc = update_job_dependency(job_ptr, depend_ptr);
		if (rc == SLURM_SUCCESS) {
			info("wiki: changed job %u dependency to %s",
				jobid, depend_ptr);
		} else {
			error("wiki: changing job %u dependency to %s",
				jobid, depend_ptr);
			return EINVAL;
		}
	}

	if (new_time_limit) {
		time_t old_time = job_ptr->time_limit;
		job_ptr->time_limit = new_time_limit;
		info("wiki: change job %u time_limit to %u",
			jobid, new_time_limit);
		/* Update end_time based upon change
		 * to preserve suspend time info */
		job_ptr->end_time = job_ptr->end_time +
				((job_ptr->time_limit -
				  old_time) * 60);
		last_job_update = time(NULL);
	}

	if (bank_ptr) {
		if (update_job_account("wiki", job_ptr, bank_ptr)
		   != SLURM_SUCCESS)
			return EINVAL;
		else
			update_accounting = true;
	}

	if (new_hostlist) {
		int rc = 0, task_cnt;
		hostlist_t hl;
		char *tasklist;

		if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			/* Job is done, nothing to reset */
			if (new_hostlist == '\0')
				goto host_fini;
			error("wiki: MODIFYJOB tasklist of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}

		xfree(job_ptr->details->req_nodes);
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		if (new_hostlist == '\0')
			goto host_fini;

		tasklist = moab2slurm_task_list(new_hostlist, &task_cnt);
		if (tasklist == NULL) {
			rc = 1;
			goto host_fini;
		}
		hl = hostlist_create(tasklist);
		if (hl == 0) {
			rc = 1;
			goto host_fini;
		}
		hostlist_uniq(hl);
		hostlist_sort(hl);
		job_ptr->details->req_nodes =
			hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);
		if (job_ptr->details->req_nodes == NULL) {
			rc = 1;
			goto host_fini;
		}
		if (node_name2bitmap(job_ptr->details->req_nodes, false,
                                     &job_ptr->details->req_node_bitmap)) {
			rc = 1;
			goto host_fini;
		}

host_fini:	if (rc) {
			info("wiki: change job %u invalid hostlist %s",
			     jobid, new_hostlist);
			xfree(job_ptr->details->req_nodes);
			return EINVAL;
		} else {
			info("wiki: change job %u hostlist %s",
			     jobid, new_hostlist);
			update_accounting = true;
		}
	}

	if (part_name_ptr) {
		struct part_record *part_ptr;
		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB partition of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}

		part_ptr = find_part_record(part_name_ptr);
		if (part_ptr == NULL) {
			error("wiki: MODIFYJOB has invalid partition %s",
				part_name_ptr);
			return ESLURM_INVALID_PARTITION_NAME;
		}
		info("wiki: change job %u partition %s",
			jobid, part_name_ptr);
		xfree(job_ptr->partition);
		job_ptr->partition = xstrdup(part_name_ptr);
		job_ptr->part_ptr = part_ptr;
		last_job_update = time(NULL);
		update_accounting = true;
	}
	if (new_node_cnt) {
		if (IS_JOB_PENDING(job_ptr) && job_ptr->details) {
			job_ptr->details->min_nodes = new_node_cnt;
			if (job_ptr->details->max_nodes
			&&  (job_ptr->details->max_nodes < new_node_cnt))
				job_ptr->details->max_nodes = new_node_cnt;
			info("wiki: change job %u min_nodes to %u",
				jobid, new_node_cnt);
			last_job_update = time(NULL);
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB node count of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (update_accounting) {
		/* Update job record in accounting to reflect changes */
		jobacct_storage_job_start_direct(acct_db_conn, job_ptr);
	}

	return SLURM_SUCCESS;
}
示例#29
0
/* Spawn health check function for every node that is not DOWN */
extern void run_health_check(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
	int node_test_cnt = 0, node_limit, node_states, run_cyclic;
	static int base_node_loc = -1;
	static time_t cycle_start_time = (time_t) 0;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *check_agent_args = NULL;

	/* Sync plugin internal data with
	 * node select_nodeinfo. This is important
	 * after reconfig otherwise select_nodeinfo
	 * will not return the correct number of
	 * allocated cpus.
	 */
	select_g_select_nodeinfo_set_all();

	check_agent_args = xmalloc (sizeof (agent_arg_t));
	check_agent_args->msg_type = REQUEST_HEALTH_CHECK;
	check_agent_args->retry = 0;
	check_agent_args->hostlist = hostlist_create(NULL);
#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		hostlist_push_host(check_agent_args->hostlist,
				   front_end_ptr->name);
		check_agent_args->node_count++;
	}
#else
	run_cyclic = slurmctld_conf.health_check_node_state &
		     HEALTH_CHECK_CYCLE;
	node_states = slurmctld_conf.health_check_node_state &
		      (~HEALTH_CHECK_CYCLE);
	if (run_cyclic) {
		time_t now = time(NULL);
		if (cycle_start_time == (time_t) 0)
			cycle_start_time = now;
		else if (base_node_loc >= 0)
			;	/* mid-cycle */
		else if (difftime(now, cycle_start_time) <
			 slurmctld_conf.health_check_interval) {
			return;	/* Wait to start next cycle */
		}
		cycle_start_time = now;
		/* Determine how many nodes we want to test on each call of
		 * run_health_check() to spread out the work. */
		node_limit = (node_record_count * 2) /
			     slurmctld_conf.health_check_interval;
		node_limit = MAX(node_limit, 10);
	}
	if ((node_states != HEALTH_CHECK_NODE_ANY) &&
	    (node_states != HEALTH_CHECK_NODE_IDLE)) {
		/* Update each node's alloc_cpus count */
		select_g_select_nodeinfo_set_all();
	}

	for (i = 0; i < node_record_count; i++) {
		if (run_cyclic) {
			if (node_test_cnt++ >= node_limit)
				break;
			base_node_loc++;
			if (base_node_loc >= node_record_count) {
				base_node_loc = -1;
				break;
			}
			node_ptr = node_record_table_ptr + base_node_loc;
		} else {
			node_ptr = node_record_table_ptr + i;
		}
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if (node_states != HEALTH_CHECK_NODE_ANY) {
			uint16_t cpus_total, cpus_used = 0;
			if (slurmctld_conf.fast_schedule) {
				cpus_total = node_ptr->config_ptr->cpus;
			} else {
				cpus_total = node_ptr->cpus;
			}
			if (!IS_NODE_IDLE(node_ptr)) {
				select_g_select_nodeinfo_get(
						node_ptr->select_nodeinfo,
						SELECT_NODEDATA_SUBCNT,
						NODE_STATE_ALLOCATED,
						&cpus_used);
			}
			/* Here the node state is inferred from
			 * the cpus allocated on it.
			 * - cpus_used == 0
			 *       means node is idle
			 * - cpus_used < cpus_total
			 *       means the node is in mixed state
			 * else cpus_used == cpus_total
			 *       means the node is allocated
			 */
			if (cpus_used == 0) {
				if (!(node_states & HEALTH_CHECK_NODE_IDLE))
					continue;
				if (!IS_NODE_IDLE(node_ptr))
					continue;
			} else if (cpus_used < cpus_total) {
				if (!(node_states & HEALTH_CHECK_NODE_MIXED))
					continue;
			} else {
				if (!(node_states & HEALTH_CHECK_NODE_ALLOC))
					continue;
			}
		}
		hostlist_push_host(check_agent_args->hostlist, node_ptr->name);
		check_agent_args->node_count++;
	}
	if (run_cyclic && (i >= node_record_count))
		base_node_loc = -1;
#endif

	if (check_agent_args->node_count == 0) {
		hostlist_destroy(check_agent_args->hostlist);
		xfree (check_agent_args);
	} else {
		hostlist_uniq(check_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				check_agent_args->hostlist);
		debug("Spawning health check agent for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(check_agent_args);
	}
}
示例#30
0
static agent_info_t *_make_agent_info(agent_arg_t *agent_arg_ptr)
{
	int i = 0, j = 0;
	agent_info_t *agent_info_ptr = NULL;
	thd_t *thread_ptr = NULL;
	int *span = NULL;
	int thr_count = 0;
	hostlist_t hl = NULL;
	char *name = NULL;

	agent_info_ptr = xmalloc(sizeof(agent_info_t));
	slurm_mutex_init(&agent_info_ptr->thread_mutex);
	if (pthread_cond_init(&agent_info_ptr->thread_cond, NULL))
		fatal("pthread_cond_init error %m");
	agent_info_ptr->thread_count   = agent_arg_ptr->node_count;
	agent_info_ptr->retry          = agent_arg_ptr->retry;
	agent_info_ptr->threads_active = 0;
	thread_ptr = xmalloc(agent_info_ptr->thread_count * sizeof(thd_t));
	memset(thread_ptr, 0, (agent_info_ptr->thread_count * sizeof(thd_t)));
	agent_info_ptr->thread_struct  = thread_ptr;
	agent_info_ptr->msg_type       = agent_arg_ptr->msg_type;
	agent_info_ptr->msg_args_pptr  = &agent_arg_ptr->msg_args;

	if ((agent_arg_ptr->msg_type != REQUEST_JOB_NOTIFY)	&&
	    (agent_arg_ptr->msg_type != REQUEST_SHUTDOWN)	&&
	    (agent_arg_ptr->msg_type != REQUEST_RECONFIGURE)	&&
	    (agent_arg_ptr->msg_type != SRUN_EXEC)		&&
	    (agent_arg_ptr->msg_type != SRUN_TIMEOUT)		&&
	    (agent_arg_ptr->msg_type != SRUN_NODE_FAIL)		&&
	    (agent_arg_ptr->msg_type != SRUN_REQUEST_SUSPEND)	&&
	    (agent_arg_ptr->msg_type != SRUN_USER_MSG)		&&
	    (agent_arg_ptr->msg_type != SRUN_STEP_MISSING)	&&
	    (agent_arg_ptr->msg_type != SRUN_JOB_COMPLETE)) {
#ifdef HAVE_FRONT_END
		span = set_span(agent_arg_ptr->node_count,
				agent_arg_ptr->node_count);
#else
		/* Sending message to a possibly large number of slurmd.
		 * Push all message forwarding to slurmd in order to
		 * offload as much work from slurmctld as possible. */
		span = set_span(agent_arg_ptr->node_count, 1);
#endif
		agent_info_ptr->get_reply = true;
	} else {
		/* Message is going to one node (for srun) or we want
		 * it to get processed ASAP (SHUTDOWN or RECONFIGURE).
		 * Send the message directly to each node. */
		span = set_span(agent_arg_ptr->node_count,
				agent_arg_ptr->node_count);
	}
	i = 0;
	while(i < agent_info_ptr->thread_count) {
		thread_ptr[thr_count].state      = DSH_NEW;
		thread_ptr[thr_count].addr = agent_arg_ptr->addr;
		name = hostlist_shift(agent_arg_ptr->hostlist);
		if(!name) {
			debug3("no more nodes to send to");
			break;
		}
		hl = hostlist_create(name);
		if(thread_ptr[thr_count].addr && span[thr_count]) {
			debug("warning: you will only be sending this to %s",
			      name);
			span[thr_count] = 0;
		}
		free(name);
		i++;
		for (j = 0; j < span[thr_count]; j++) {
			name = hostlist_shift(agent_arg_ptr->hostlist);
			if (!name)
				break;
			hostlist_push(hl, name);
			free(name);
			i++;
		}
		hostlist_uniq(hl);
		thread_ptr[thr_count].nodelist = 
			hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);
#if 0
		info("sending msg_type %u to nodes %s",
		     agent_arg_ptr->msg_type, thread_ptr[thr_count].nodelist);
#endif
		thr_count++;
	}
	xfree(span);
	agent_info_ptr->thread_count = thr_count;
	return agent_info_ptr;
}