Пример #1
0
ssize_t 
wrap_hostlist_ranged_string(WRAPPERS_ARGS, hostlist_t hl, size_t n, char *buf)
{
  ssize_t rv;

  assert(file && function);

  if (!hl || !buf || !(n > 0 || n <= INT_MAX))
    WRAPPERS_ERR_INVALID_PARAMETERS("hostlist_ranged_string");

  if ((rv = hostlist_ranged_string(hl, n, buf)) < 0)
    WRAPPERS_ERR_ERRNO("hostlist_ranged_string");

  return rv;
}
Пример #2
0
/*
 * Read a SLURM hostfile specified by "filename".  "filename" must contain
 * a list of SLURM NodeNames, one per line.  Reads up to "n" number of hostnames
 * from the file. Returns a string representing a hostlist ranged string of
 * the contents of the file.  This is a helper function, it does not
 * contact any SLURM daemons.
 *
 * Returns a string representing the hostlist.  Returns NULL if there are fewer
 * than "n" hostnames in the file, or if an error occurs.  If "n" ==
 * NO_VAL then the entire file is read in
 *
 * Returned string must be freed with free().
 */
char *slurm_read_hostfile(char *filename, int n)
{
	FILE *fp = NULL;
	char in_line[BUFFER_SIZE];	/* input line */
	int i, j;
	int line_size;
	int line_num = 0;
	hostlist_t hostlist = NULL;
	char *nodelist = NULL;

	if (filename == NULL || strlen(filename) == 0)
		return NULL;

	if ((fp = fopen(filename, "r")) == NULL) {
		error("slurm_allocate_resources error opening file %s, %m",
		      filename);
		return NULL;
	}

	hostlist = hostlist_create(NULL);
	if (hostlist == NULL) {
		fclose(fp);
		return NULL;
	}

	while (fgets(in_line, BUFFER_SIZE, fp) != NULL) {
		line_num++;
		line_size = strlen(in_line);
		if (line_size == (BUFFER_SIZE - 1)) {
			error ("Line %d, of hostfile %s too long",
			       line_num, filename);
			fclose (fp);
			hostlist_destroy(hostlist);
			return NULL;
		}

		for (i = 0; i < line_size; i++) {
			if (in_line[i] == '\n') {
				in_line[i] = '\0';
				break;
			}
			if (in_line[i] == '\0')
				break;
			if (in_line[i] != '#')
				continue;
			if ((i > 0) && (in_line[i - 1] == '\\')) {
				for (j = i; j < line_size; j++) {
					in_line[j - 1] = in_line[j];
				}
				line_size--;
				continue;
			}
			in_line[i] = '\0';
			break;
		}

		hostlist_push(hostlist, in_line);
		if (n != (int)NO_VAL && hostlist_count(hostlist) == n)
			break;
	}
	fclose(fp);

	if (hostlist_count(hostlist) <= 0) {
		error("Hostlist is empty!");
		goto cleanup_hostfile;
	}
	if (hostlist_count(hostlist) < n) {
		error("Too few NodeNames in SLURM Hostfile");
		goto cleanup_hostfile;
	}

	nodelist = (char *)malloc(0xffff);
	if (!nodelist) {
		error("Nodelist xmalloc failed");
		goto cleanup_hostfile;
	}

	if (hostlist_ranged_string(hostlist, 0xffff, nodelist) == -1) {
		error("Hostlist is too long for the allocate RPC!");
		free(nodelist);
		nodelist = NULL;
		goto cleanup_hostfile;
	}

	debug2("Hostlist from SLURM_HOSTFILE = %s", nodelist);

cleanup_hostfile:
	hostlist_destroy(hostlist);

	return nodelist;
}
Пример #3
0
int
main (int argc, char *argv[])
{
    char *server = NULL;
    int msize = 65536;
    uid_t uid = geteuid ();
    int topt = 0;
    Npcfsys *fs = NULL;
    Npcfid *fid, *afid, *root;
    int c, fd;
    char buf[80], *host, *p;
    hostlist_t hl;
    hostlist_iterator_t itr;
    int lopt = 0;

    diod_log_init (argv[0]);

    opterr = 0;
    while ((c = GETOPT (argc, argv, OPTIONS, longopts)) != -1) {
        switch (c) {
        case 's':   /* --server HOST[:PORT] or /path/to/socket */
            server = optarg;
            break;
        case 'm':   /* --msize SIZE */
            msize = strtoul (optarg, NULL, 10);
            break;
        case 'u':   /* --uid UID */
            uid = strtoul (optarg, NULL, 10);
            break;
        case 't':   /* --timeout SECS */
            topt = strtoul (optarg, NULL, 10);
            break;
        case 'l':   /* --long */
            lopt = 1;
            break;
        default:
            usage ();
        }
    }

    if (signal (SIGPIPE, SIG_IGN) == SIG_ERR)
        err_exit ("signal");
    if (signal (SIGALRM, sigalarm) == SIG_ERR)
        err_exit ("signal");

    if (topt > 0)
        alarm (topt);

    if ((fd = diod_sock_connect (server, 0)) < 0)
        exit (1);

    if (!(fs = npc_start (fd, fd, msize, 0)))
        errn_exit (np_rerror (), "error negotiating protocol with server");
    if (!(afid = npc_auth (fs, "ctl", uid, diod_auth)) && np_rerror () != 0)
        errn_exit (np_rerror (), "error authenticating to server");
    if (!(root = npc_attach (fs, afid, "ctl", uid)))
        errn_exit (np_rerror (), "error attaching to aname=ctl");
    if (!(fid = npc_open_bypath (root, "connections", O_RDONLY)))
        errn_exit (np_rerror (), "open connections");

    if (!(hl = hostlist_create (NULL)))
        err_exit ("hostlist_create");
    while (npc_gets (fid, buf, sizeof(buf))) {
        if ((p = strchr (buf, ' ')))
            *p = '\0';
        if (!lopt && (p = strchr (buf, '.')))
            *p = '\0';
        if (!hostlist_push_host (hl, buf))
            err_exit ("hostlist_push_host");
    }
    hostlist_uniq (hl);
    if (lopt) {
        if (!(itr = hostlist_iterator_create (hl)))
            err_exit ("hostlist_iterator_create");
        while ((host = hostlist_next (itr)))
            printf ("%s\n", host);
        hostlist_iterator_destroy (itr);
    } else {
        char s[1024];

        if (hostlist_ranged_string (hl, sizeof (s), s) < 0)
            msg_exit ("hostlist output would be too long (use -l)");
        printf ("%s\n", s);
    }
    hostlist_destroy (hl);

    if (npc_clunk (fid) < 0)
        errn_exit (np_rerror (), "clunk connections");
    if (npc_clunk (root) < 0)
        errn_exit (np_rerror (), "error clunking ctl");
    if (npc_clunk (afid) < 0)
        errn_exit (np_rerror (), "error clunking afid");
    npc_finish (fs);

    exit(0);
}
Пример #4
0
Файл: sstat.c Проект: Cray/slurm
int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist,
	     uint32_t req_cpufreq)
{
	job_step_stat_response_msg_t *step_stat_response = NULL;
	int rc = SLURM_SUCCESS;
	ListIterator itr;
	slurmdb_stats_t temp_stats;
	job_step_stat_t *step_stat = NULL;
	int ntasks = 0;
	int tot_tasks = 0;
	hostlist_t hl = NULL;

	debug("requesting info for job %u.%u", jobid, stepid);
	if ((rc = slurm_job_step_stat(jobid, stepid, nodelist,
				      &step_stat_response)) != SLURM_SUCCESS) {
		if (rc == ESLURM_INVALID_JOB_ID) {
			debug("job step %u.%u has already completed",
			      jobid, stepid);
		} else {
			error("problem getting step_layout for %u.%u: %s",
			      jobid, stepid, slurm_strerror(rc));
		}
		return rc;
	}

	memset(&job, 0, sizeof(slurmdb_job_rec_t));
	job.jobid = jobid;

	memset(&step, 0, sizeof(slurmdb_step_rec_t));

	memset(&temp_stats, 0, sizeof(slurmdb_stats_t));
	temp_stats.cpu_min = NO_VAL;
	memset(&step.stats, 0, sizeof(slurmdb_stats_t));
	step.stats.cpu_min = NO_VAL;

	step.job_ptr = &job;
	step.stepid = stepid;
	step.nodes = xmalloc(BUF_SIZE);
	step.req_cpufreq = req_cpufreq;
	step.stepname = NULL;
	step.state = JOB_RUNNING;

	hl = hostlist_create(NULL);
	itr = list_iterator_create(step_stat_response->stats_list);
	while ((step_stat = list_next(itr))) {
		if (!step_stat->step_pids || !step_stat->step_pids->node_name)
			continue;
		if (step_stat->step_pids->pid_cnt > 0 ) {
			int i;
			for(i=0; i<step_stat->step_pids->pid_cnt; i++) {
				if (step.pid_str)
					xstrcat(step.pid_str, ",");
				xstrfmtcat(step.pid_str, "%u",
					   step_stat->step_pids->pid[i]);
			}
		}

		if (params.pid_format) {
			step.nodes = step_stat->step_pids->node_name;
			print_fields(&step);
			xfree(step.pid_str);
		} else {
			hostlist_push(hl, step_stat->step_pids->node_name);
			jobacctinfo_2_stats(&temp_stats, step_stat->jobacct);
			ntasks += step_stat->num_tasks;
			aggregate_stats(&step.stats, &temp_stats);
		}
	}
	list_iterator_destroy(itr);
	slurm_job_step_pids_response_msg_free(step_stat_response);
	/* we printed it out already */
	if (params.pid_format)
		return rc;

	hostlist_sort(hl);
	hostlist_ranged_string(hl, BUF_SIZE, step.nodes);
	hostlist_destroy(hl);
	tot_tasks += ntasks;

	if (tot_tasks) {
		step.stats.cpu_ave /= (double)tot_tasks;
		step.stats.rss_ave /= (double)tot_tasks;
		step.stats.vsize_ave /= (double)tot_tasks;
		step.stats.pages_ave /= (double)tot_tasks;
		step.stats.disk_read_ave /= (double)tot_tasks;
		step.stats.disk_write_ave /= (double)tot_tasks;
		step.stats.act_cpufreq /= (double)tot_tasks;
		step.ntasks = tot_tasks;
	}

	print_fields(&step);

	return rc;
}
Пример #5
0
/*
 * Read a SLURM hostfile specified by "filename".  "filename" must contain
 * a list of SLURM NodeNames, one per line.  Reads up to "n" number of hostnames
 * from the file. Returns a string representing a hostlist ranged string of
 * the contents of the file.  This is a helper function, it does not
 * contact any SLURM daemons.
 *
 * Returns a string representing the hostlist.  Returns NULL if there are fewer
 * than "n" hostnames in the file, or if an error occurs.  If "n" ==
 * NO_VAL then the entire file is read in
 *
 * Returned string must be freed with free().
 */
char *slurm_read_hostfile(char *filename, int n)
{
	FILE *fp = NULL;
	char in_line[BUFFER_SIZE];	/* input line */
	int i, j;
	int line_size;
	int line_num = 0;
	hostlist_t hostlist = NULL;
	char *nodelist = NULL;
	char *asterisk, *tmp_text, *save_ptr = NULL, *host_name;
	int total_file_len = 0;

	if (filename == NULL || strlen(filename) == 0)
		return NULL;

	if ((fp = fopen(filename, "r")) == NULL) {
		error("slurm_allocate_resources error opening file %s, %m",
		      filename);
		return NULL;
	}

	hostlist = hostlist_create(NULL);
	if (hostlist == NULL) {
		fclose(fp);
		return NULL;
	}

	while (fgets(in_line, BUFFER_SIZE, fp) != NULL) {
		line_num++;
		if (!isalpha(in_line[0]) && !isdigit(in_line[0])) {
			error ("Invalid hostfile %s contents on line %d",
			       filename, line_num);
			fclose (fp);
			hostlist_destroy(hostlist);
			return NULL;
		}

		line_size = strlen(in_line);
		total_file_len += line_size;
		if (line_size == (BUFFER_SIZE - 1)) {
			error ("Line %d, of hostfile %s too long",
			       line_num, filename);
			fclose (fp);
			hostlist_destroy(hostlist);
			return NULL;
		}

		for (i = 0; i < line_size; i++) {
			if (in_line[i] == '\n') {
				in_line[i] = '\0';
				break;
			}
			if (in_line[i] == '\0')
				break;
			if (in_line[i] != '#')
				continue;
			if ((i > 0) && (in_line[i - 1] == '\\')) {
				for (j = i; j < line_size; j++) {
					in_line[j - 1] = in_line[j];
				}
				line_size--;
				continue;
			}
			in_line[i] = '\0';
			break;
		}

		tmp_text = xstrdup(in_line);
		host_name = strtok_r(tmp_text, ",", &save_ptr);
		while (host_name) {
			if ((asterisk = strchr(host_name, '*')) &&
			    (i = atoi(asterisk + 1))) {
				asterisk[0] = '\0';
				for (j = 0; j < i; j++)
					hostlist_push_host(hostlist, host_name);
			} else {
				hostlist_push_host(hostlist, host_name);
			}
			host_name = strtok_r(NULL, ",", &save_ptr);
		}
		xfree(tmp_text);

		if ((n != (int)NO_VAL) && (hostlist_count(hostlist) == n))
			break;
	}
	fclose(fp);

	if (hostlist_count(hostlist) <= 0) {
		error("Hostlist is empty!");
		goto cleanup_hostfile;
	}
	if (hostlist_count(hostlist) < n) {
		error("Too few NodeNames in SLURM Hostfile");
		goto cleanup_hostfile;
	}

	total_file_len += 1024;
	nodelist = (char *)malloc(total_file_len);
	if (!nodelist) {
		error("Nodelist xmalloc failed");
		goto cleanup_hostfile;
	}

	if (hostlist_ranged_string(hostlist, total_file_len, nodelist) == -1) {
		error("Hostlist is too long for the allocate RPC!");
		free(nodelist);
		nodelist = NULL;
		goto cleanup_hostfile;
	}

	debug2("Hostlist from SLURM_HOSTFILE = %s", nodelist);

cleanup_hostfile:
	hostlist_destroy(hostlist);

	return nodelist;
}
Пример #6
0
/*
 * Read a Slurm hostfile specified by "filename".  "filename" must contain
 * a list of Slurm NodeNames, one per line.  Reads up to "n" number of hostnames
 * from the file. Returns a string representing a hostlist ranged string of
 * the contents of the file.  This is a helper function, it does not
 * contact any Slurm daemons.
 *
 * Returns a string representing the hostlist.  Returns NULL if there are fewer
 * than "n" hostnames in the file, or if an error occurs.  If "n" ==
 * NO_VAL then the entire file is read in
 *
 * Returned string must be freed with free().
 */
char *slurm_read_hostfile(const char *filename, int n)
{
	FILE *fp = NULL;
	char in_line[BUFFER_SIZE];	/* input line */
	int i, j;
	int line_size;
	int line_num = 0;
	hostlist_t hostlist = NULL;
	char *nodelist = NULL, *end_part = NULL;
	char *asterisk, *tmp_text = NULL, *save_ptr = NULL, *host_name;
	int total_file_len = 0;

	if (filename == NULL || strlen(filename) == 0)
		return NULL;

	if ((fp = fopen(filename, "r")) == NULL) {
		error("slurm_allocate_resources error opening file %s, %m",
		      filename);
		return NULL;
	}

	hostlist = hostlist_create(NULL);
	if (hostlist == NULL) {
		fclose(fp);
		return NULL;
	}

	while (fgets(in_line, BUFFER_SIZE, fp) != NULL) {

		line_size = strlen(in_line);
		for (i = 0; i < line_size; i++) {
			if (in_line[i] == '\n') {
				in_line[i] = '\0';
				break;
			}
			if (in_line[i] == '\0')
				break;
			if (in_line[i] != '#')
				continue;
			if ((i > 0) && (in_line[i - 1] == '\\')) {
				for (j = i; j < line_size; j++) {
					in_line[j - 1] = in_line[j];
				}
				line_size--;
				continue;
			}
			in_line[i] = '\0';
			break;
		}

		/*
		 * Get the string length again just to in case it changed from
		 * the above loop
		 */
		line_size = strlen(in_line);
		total_file_len += line_size;

		/*
		 * If there was an end section from before set it up to be on
		 * the front of this next chunk.
		 */
		if (end_part) {
			tmp_text = end_part;
			end_part = NULL;
		}

		if (line_size == (BUFFER_SIZE - 1)) {
			/*
			 * If we filled up the buffer get the end past the last
			 * comma.  We will tack it on the next pass through.
			 */
			char *last_comma = strrchr(in_line, ',');
			if (!last_comma) {
				error("Line %d, of hostfile %s too long",
				      line_num, filename);
				fclose(fp);
				hostlist_destroy(hostlist);
				return NULL;
			}
			end_part = xstrdup(last_comma + 1);
			*last_comma = '\0';
		} else
			line_num++;

		xstrcat(tmp_text, in_line);

		/* Skip this line */
		if (tmp_text[0] == '\0')
			continue;

		if (!isalpha(tmp_text[0]) && !isdigit(tmp_text[0])) {
			error("Invalid hostfile %s contents on line %d",
			      filename, line_num);
			fclose(fp);
			hostlist_destroy(hostlist);
			xfree(end_part);
			xfree(tmp_text);
			return NULL;
		}

		host_name = strtok_r(tmp_text, ",", &save_ptr);
		while (host_name) {
			if ((asterisk = strchr(host_name, '*')) &&
			    (i = atoi(asterisk + 1))) {
				asterisk[0] = '\0';

				/*
				 * Don't forget the extra space potentially
				 * needed
				 */
				total_file_len += strlen(host_name) * i;

				for (j = 0; j < i; j++)
					hostlist_push_host(hostlist, host_name);
			} else {
				hostlist_push_host(hostlist, host_name);
			}
			host_name = strtok_r(NULL, ",", &save_ptr);
		}
		xfree(tmp_text);

		if ((n != (int)NO_VAL) && (hostlist_count(hostlist) == n))
			break;
	}
	fclose(fp);

	if (hostlist_count(hostlist) <= 0) {
		error("Hostlist is empty!");
		goto cleanup_hostfile;
	}
	if (hostlist_count(hostlist) < n) {
		error("Too few NodeNames in Slurm Hostfile");
		goto cleanup_hostfile;
	}

	total_file_len += 1024;
	nodelist = (char *)malloc(total_file_len);
	if (!nodelist) {
		error("Nodelist xmalloc failed");
		goto cleanup_hostfile;
	}

	if (hostlist_ranged_string(hostlist, total_file_len, nodelist) == -1) {
		error("Hostlist is too long for the allocate RPC!");
		free(nodelist);
		nodelist = NULL;
		goto cleanup_hostfile;
	}

	debug2("Hostlist from SLURM_HOSTFILE = %s", nodelist);

cleanup_hostfile:
	hostlist_destroy(hostlist);
	xfree(end_part);
	xfree(tmp_text);

	return nodelist;
}
Пример #7
0
static int
_pstdout_output_consolidated(FILE *stream,
                             List whichconsolidatedlist,
                             pthread_mutex_t *whichconsolidatedmutex)
{
    struct pstdout_consolidated_data *cdata;
    ListIterator itr = NULL;
    int mutex_locked = 0;
    int rc, rv = -1;

    assert(stream);
    assert(stream == stdout || stream == stderr);
    assert(whichconsolidatedlist);
    assert(whichconsolidatedmutex);

    if ((rc = pthread_mutex_lock(whichconsolidatedmutex)))
    {
        if (pstdout_debug_flags & PSTDOUT_DEBUG_STANDARD)
            fprintf(stderr, "pthread_mutex_lock: %s\n", strerror(rc));
        pstdout_errnum = PSTDOUT_ERR_INTERNAL;
        goto cleanup;
    }
    mutex_locked++;

    list_sort(whichconsolidatedlist, _pstdout_consolidated_data_compare);

    if (!(itr = list_iterator_create (whichconsolidatedlist)))
    {
        pstdout_errnum = PSTDOUT_ERR_OUTMEM;
        goto cleanup;
    }

    while ((cdata = list_next(itr)))
    {
        char hbuf[PSTDOUT_BUFLEN];

        memset(hbuf, '\0', PSTDOUT_BUFLEN);
        hostlist_sort(cdata->h);
        if (hostlist_ranged_string(cdata->h, PSTDOUT_BUFLEN, hbuf) < 0)
        {
            if (pstdout_debug_flags & PSTDOUT_DEBUG_STANDARD)
                fprintf(stderr, "hostlist_ranged_string: %s\n", strerror(errno));
            pstdout_errnum = PSTDOUT_ERR_INTERNAL;
            goto cleanup;
        }

        fprintf(stream, "----------------\n");
        fprintf(stream, "%s\n", hbuf);
        fprintf(stream, "----------------\n");
        fprintf(stream, "%s", cdata->output);
    }

    rv = 0;
cleanup:
    if (mutex_locked)
    {
        if ((rc = pthread_mutex_unlock(whichconsolidatedmutex)))
        {
            if (pstdout_debug_flags & PSTDOUT_DEBUG_STANDARD)
                fprintf(stderr, "pthread_mutex_unlock: %s\n", strerror(rc));
            /* Don't change error code, just move on */
        }
    }
    if (itr)
        list_iterator_destroy(itr);
    return rv;
}
Пример #8
0
static int
eliminate_nodes (char **hosts)
{
  hostlist_t hl = NULL;
  hostlist_t hlnew = NULL;
  hostlist_iterator_t hitr = NULL;
  ipmidetect_t id = NULL;
  char *host = NULL;
  char hostbuf[HOSTLIST_BUFLEN + 1];
  int rv = -1;

  assert (hosts);
  assert (*hosts);

  if (!(id = ipmidetect_handle_create ()))
    {
      fprintf (stderr,
               "ipmidetect_handle_create\n");
      goto cleanup;
    }

  if (ipmidetect_load_data (id,
                            NULL,
                            0,
                            0) < 0)
    {
      if (ipmidetect_errnum (id) == IPMIDETECT_ERR_CONNECT
          || ipmidetect_errnum (id) == IPMIDETECT_ERR_CONNECT_TIMEOUT)
        fprintf (stderr,
                 "Error connecting to ipmidetect daemon\n");
      else
        fprintf (stderr,
                 "ipmidetect_load_data: %s\n", ipmidetect_errormsg (id));
      goto cleanup;
    }

  if (!(hl = hostlist_create (*hosts)))
    {
      fprintf (stderr,
               "hostlist_create: %s\n",
               strerror (errno));
      goto cleanup;
    }

  if (!(hlnew = hostlist_create (*hosts)))
    {
      fprintf (stderr,
               "hostlist_create: %s\n",
               strerror (errno));
      goto cleanup;
    }

  if (!(hitr = hostlist_iterator_create (hl)))
    {
      fprintf (stderr,
               "hostlist_iterator_create: %s\n",
               strerror (errno));
      goto cleanup;
    }

  while ((host = hostlist_next (hitr)))
    {
      int ret;

      if ((ret = ipmidetect_is_node_detected (id, host)) < 0)
        {
          if (ipmidetect_errnum (id) == IPMIDETECT_ERR_NOTFOUND)
            fprintf (stderr,
                     "Node '%s' unrecognized by ipmidetect\n", host);
          else
            fprintf (stderr,
                     "ipmidetect_is_node_detected: %s\n", ipmidetect_errormsg (id));
          goto cleanup;
        }

      if (!ret)
        hostlist_delete (hlnew, host);

      free (host);
    }
  host = NULL;

  if (!hostlist_count (hlnew))
    {
      rv = 0;
      goto cleanup;
    }
 
  memset (hostbuf, '\0', HOSTLIST_BUFLEN + 1);
 
  if (hostlist_ranged_string (hlnew, HOSTLIST_BUFLEN, hostbuf) < 0)
    {
      fprintf (stderr,
               "hostlist_ranged_string: truncation\n");
      goto cleanup;
    }

  free (*hosts);
  if (!(*hosts = strdup (hostbuf)))
    {
      fprintf (stderr, "strdup: %s\n", strerror (errno));
      goto cleanup;
    }

  rv = hostlist_count (hlnew);
 cleanup:
  if (id)
    ipmidetect_handle_destroy (id);
  if (hitr)
    hostlist_iterator_destroy (hitr);
  if (hl)
    hostlist_destroy (hl);
  if (hlnew)
    hostlist_destroy (hlnew);
  free (host);
  return (rv);
}
Пример #9
0
extern ssize_t slurm_hostlist_ranged_string(hostlist_t hl, size_t n, char *buf)
{
	return hostlist_ranged_string(hl, n, buf);
}