示例#1
0
int
schd_bits2mask(char *string, Bitfield *mask)
  {
  int    i, bit, len;
  Bitfield new_bit;

  BITFIELD_CLRALL(&new_bit);

  len = (int)strlen(string);

  /* Start with the low bit. */
  bit = 0;

  for (i = 0; i < len; i++)
    {
    if (string[len - i - 1] == '1')
      BITFIELD_SETB(&new_bit, bit);
    else if (string[len - i - 1] != '0')
      return (-1);

    ++ bit;
    }

  /* Done.  Copy the new nodemask and return success. */

  BITFIELD_CPY(mask, &new_bit);

  return (0);
  }
示例#2
0
文件: mapnodes.c 项目: altair4/pbspro
/**
 * @brief
 * 	availnodes() - Fill the supplied bitfield with a set bit for each node on which sufficient
 * 	resources (c.f. mapnodes.h) are configured.  This is the set of nodes that
 * 	are physically available to be allocated -- policy, reserved nodes, etc,
 * 	may reduce the total number of nodes that are usable by jobs.
 *
 * NOTE: Must be called after mapnodes().
 *
 * @param[out] maskp - pointer to Bitfield structure
 *
 * @return	int
 * @retval	0	success
 * @retval	!0	failure
 *
 */
int
availnodes(Bitfield *maskp)
{
	int			node, i, count;
	Bitfield		avail;

	/* This function needs a valid node map in order to find available nodes. */
	if (nodemap == NULL)
		return 1;

	/*
	 * If minimum resource values are not set, default them to reasonable
	 * defaults.
	 */
	if (minnodemem < 0)
		minnodemem = minmem;
	if (minnodecpus < 0)
		minnodecpus = mincpus;
	if (memreserved < 0)
		memreserved = 0;

	(void)sprintf(log_buffer,
		"Minimum node resources: %d cpus, %d MB, %d MB rsvd",
		minnodecpus, minnodemem, memreserved);
	log_err(-1, __func__, log_buffer);

	BITFIELD_CLRALL(&avail);

	/*
	 * Walk through the node map, checking for sufficient resources on each
	 * node, and setting the appropriate bit on the mask for that node if it
	 * is sufficiently endowed.  See mapnodes.h for definition of "sufficient".
	 */
	for (node = 0; node <= maxnodeid; node++) {
		/* Enough CPUs?  If not, skip it. */
		for (count = i = 0; i < MAX_CPUS_PER_NODE; i++)
			if (nodemap[node].cpu[i] >= 0)
				count ++;

		/* Enough memory and cpus?  If not, skip it. */
		if (nodemap[node].memory < minnodemem || count < minnodecpus) {
			(void)sprintf(log_buffer,
				"node %d has only %luMB and %d cpus - cannot use",
				node, nodemap[node].memory, count);
			log_err(-1, __func__, log_buffer);

			continue;
		}

		/* Node has sufficient resources.  Count this node as available. */
		BITFIELD_SETB(&avail, node);
	}

	/* Copy the available mask to the passed-in storage, and return success. */
	BITFIELD_CPY(maskp, &avail);

	return 0;
}
示例#3
0
int schd_alloc_nodes(int nnodes, Queue *queue, Bitfield *job_mask)
  {
  Bitfield *queue_mask;
  Bitfield *q_avail_mask;
  Bitfield new_bit;
  int candidate;
  int n_found;

  n_found = 0;
  BITFIELD_CLRALL(&new_bit);

  queue_mask   = &(queue->queuemask);
  q_avail_mask = &(queue->availmask);

  candidate = (BITFIELD_SIZE - 1);

  while ((n_found < nnodes) && (candidate >= 0))
    {
    if (BITFIELD_TSTB(queue_mask, candidate))
      {
      if (BITFIELD_TSTB(q_avail_mask, candidate))
        {
        /* available */
        BITFIELD_SETB(&new_bit, candidate);
        ++n_found;
        }
      }

    -- candidate;
    }

  /*
   * If enough nodes were allocated, copy the resultant new nodemask
   * into the supplied area.
   */
  if (nnodes == n_found)
    {
    BITFIELD_CPY(job_mask, &new_bit);
    return nnodes;
    }

  /* Didn't get enough nodes.  Return 0 as an error. */
  return 0;
  }
示例#4
0
static int
find_nodemasks(Queue *queue, Resources *rsrcs)
  {
  Job *job;
  Bitfield jobs_using;

  BITFIELD_CLRALL(&jobs_using);

  /*
   * Compute the set of nodes that are both physically available and also
   * assigned to this queue.
   */
  BITFIELD_CPY(&queue->availmask, &(queue->queuemask));
  BITFIELD_ANDM(&queue->availmask, &(queue->rsrcs->availmask));

  /*
   * Compute the set of nodes in use by jobs running on the queue (if
   * there are any) and remove those nodes from the available node mask.
   */

  if (queue->running)
    {
    for (job = queue->jobs; job != NULL; job = job->next)
      {
      if (job->state == 'R')
        BITFIELD_SETM(&jobs_using, &(job->nodemask));
      }
    }

  /*
   * Remove the used node bits from the queue's availmask, and add them to
   * the resources' nodes used bits.
   */
  BITFIELD_CLRM(&queue->availmask, &jobs_using);

  BITFIELD_SETM(&rsrcs->nodes_used, &jobs_using);

  return (0);
  }
示例#5
0
文件: getrsrcs.c 项目: CESNET/torque
/*
 * Find an entry for the resources for the requested host in the list of
 * existing resources, or create a new one for that host and return it.
 */
Resources *
schd_get_resources(char *exechost)
{
    char   *id = "schd_get_resources";
    Resources *rptr, *new_rsrcs;
    int     rm;

    char   *response = NULL;
    int     badreply   = 0;
    int     cpus_avail = 0;
    size_t  pmem_avail = 0;

    char    hpm_ctl[64];

    struct sigaction act, oact;

    unsigned int remain; /* Time remaining in any old alarm(). */
    time_t then;  /* When this alarm() was started. */

#ifdef NODEMASK
    Bitfield cpy;
    int     i, j;
#endif /* NODEMASK */

    /*
     * Check for a local copy of the resources being available already.
     * If so, just return a reference to that Resources structure.
     */

    if (schd_RsrcsList != NULL)
    {
        for (rptr = schd_RsrcsList; rptr != NULL; rptr = rptr->next)
            if (strcmp(rptr->exechost, exechost) == 0)
                return (rptr);
    }

    schd_timestamp("get_rsrcs");

    /*
     * No cached resource information for 'exechost'.  Need to query the
     * host for its information.
     */

    if ((new_rsrcs = (Resources *)malloc(sizeof(Resources))) == NULL)
    {
        (void)sprintf(log_buffer, "Unable to alloc space for Resources.");
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        return (NULL); /* Can't get the information - nowhere to store it. */
    }

    memset((void *)new_rsrcs, 0, sizeof(Resources));

    act.sa_flags = 0;
    act.sa_handler = connect_interrupt;
    sigemptyset(&act.sa_mask);
    remain = 0;
    then = 0;

    /*
     * Set the alarm, and maintain some idea of how long was left on any
     * previously set alarm.
     */

    if (sigaction(SIGALRM, &act, &oact) == 0)
    {
        remain = alarm(GETRSRCS_CONNECT_TIME);
        then = time(NULL);
    }

    if ((rm = openrm(exechost, 0)) == -1)
    {
        (void)sprintf(log_buffer,
                      "Unable to contact resmom@%s (%d)", exechost, pbs_errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

        badreply = 1;
        goto bail;
    }

    /*
     * Turn off full response.  Responses will be received in the order in
     * which they are sent.
     */
    fullresp(0);

    /* Build a list of all the resources about which we want information. */

    addreq(rm, "loadave");

    addreq(rm, "availmem");

    addreq(rm, "physmem");

    addreq(rm, "ncpus");

#ifdef NODEMASK
    addreq(rm, "availmask");

#endif /* NODEMASK */

    if (schd_MANAGE_HPM)
    {
        (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_QUERY_STR);
        addreq(rm, hpm_ctl);
    }

    /* Get the values back from the resource monitor, and round up. */

    /* Receive LOADAVE response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        new_rsrcs->loadave = atof(response) * schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(loadave), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive AVAILMEM response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        new_rsrcs->freemem = schd_val2byte(response);
        new_rsrcs->freemem *= schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(freemem), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive PHYSMEM response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        pmem_avail = schd_val2byte(response);
        pmem_avail *= schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(realmem), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive NCPUS response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        cpus_avail = atoi(response) * schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

#ifdef NODEMASK
    /* Receive available nodes from resource monitor. */
    response = getreq(rm);

    if (response == NULL)
    {
        (void)sprintf(log_buffer, "bad return from getreq(availmask), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }
    else
    {
        if (schd_bits2mask(response, &new_rsrcs->availmask) != 0)
        {
            if (schd_str2mask(response, &new_rsrcs->availmask) != 0)
            {
                (void)sprintf(log_buffer, "can't parse availmask '%s'", response);
                log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
                badreply = 1;
                goto bail;
            }
        }

        (void)free(response);
    }

#endif /* NODEMASK */

    if (schd_MANAGE_HPM)
    {
        /* Receive HPM_CTL response from resource monitor. */
        response = getreq(rm);

        if (response != NULL)
        {
            if (strcmp(response, HPM_CTL_USERMODE_STR) == 0)
                new_rsrcs->flags |= RSRCS_FLAGS_HPM_USER;
            else if (strcmp(response, HPM_CTL_GLOBALMODE_STR) == 0)
                new_rsrcs->flags &= ~RSRCS_FLAGS_HPM_USER;
            else
            {
                (void)sprintf(log_buffer, "bad response '%s' for '%s@%s'",
                              response, hpm_ctl, exechost);
                log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                           log_buffer);
                badreply = 1;
                goto bail;
            }
        }
        else
        {
            (void)sprintf(log_buffer, "bad return from getreq(%s), %d, %d",
                          hpm_ctl, pbs_errno, errno);
            log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
            badreply = 1;
            goto bail;
        }
    }

    /*
     * NOTE: response will be free()'d in bail.  Be sure to explicitly free()
     * response if more getreq() calls are added before the code below.
     */

bail:
    if (response != NULL)
        (void)free(response);

    /* Disconnect from the resource monitor. */
    if (rm >= 0)  /* resmom handle "0" is valid in RPP. */
        closerm(rm);

    /* And unset the alarm and handler. */
    alarm(0);

    sigaction(SIGALRM, &oact, &act);

    /* Reset the old alarm, taking into account how much time has passed. */
    if (remain)
    {
        DBPRT(("%s: old alarm had %d secs remaining, %d elapsed, ", id,
               remain, (time(NULL) - then)));
        /* How much time remains even after the time spent above? */
        remain -= (time(NULL) - then);

        /*
         * Would the previous time have already expired?  If so, schedule
         * an alarm call in 1 second (close enough, hopefully).
         */

        if (remain < 1)
            remain = 1;

        DBPRT(("reset to %d secs\n", remain));

        alarm(remain);
    }

    /*
     * Verify all the data came back as expected; if not, abort this
     * iteration of the scheduler.
     */
    if (badreply)
    {
        (void)sprintf(log_buffer,
                      "Got bad info from mom@%s - aborting sched run", exechost);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        free(new_rsrcs);
        return (NULL);
    }

    /* Make a copy of the hostname for the resources struct. */
    new_rsrcs->exechost = schd_strdup(exechost);

    if (new_rsrcs->exechost == NULL)
    {
        (void)sprintf(log_buffer, "Unable to copy exechost %s to rsrcs",
                      exechost);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        free(new_rsrcs);
        return (NULL);
    }

    new_rsrcs->nodes_total = NODES_REQD(cpus_avail, pmem_avail);

#ifdef NODEMASK
    /* Copy the availmask schd_FAKE_MACH_MULT times to match avail cpus. */
    BITFIELD_CPY(&cpy, &(new_rsrcs->availmask));

    for (i = 2; i <= schd_FAKE_MACH_MULT; i++)
    {
        for (j = 0; j < (cpus_avail / schd_FAKE_MACH_MULT / 2); j++)
            BITFIELD_SHIFTL(&cpy);

        BITFIELD_SETM(&(new_rsrcs->availmask), &cpy);
    }

#endif /* NODEMASK */

    if (schd_RsrcsList == NULL)
    {
        schd_RsrcsList  = new_rsrcs; /* Start the list. */
    }
    else
    {
        for (rptr = schd_RsrcsList; rptr->next != NULL; rptr = rptr->next)
            /* Find the last element in the list. */ ;

        rptr->next = new_rsrcs;
    }

    /* Next pointer for the tail of the list points to nothing. */
    new_rsrcs->next = NULL;

    return (new_rsrcs);
}
示例#6
0
static int
nodemask_overlaps(void)
  {
  char    *id = "nodemask_overlaps";
  QueueList *qlp1, *qlp2;
  Bitfield overlap;

  /* Check for overlapping queue nodemasks in batch queues. */

  for (qlp1 = schd_BatchQueues; qlp1 != NULL; qlp1 = qlp1->next)
    {

    if (!(qlp1->queue->flags & QFLAGS_NODEMASK))
      continue;

    for (qlp2 = qlp1->next; qlp2 != NULL; qlp2 = qlp2->next)
      {

      /*
       * If the queues are on different hosts, they may appear to
       * overlap - no problem.
       */
      if (strcmp(qlp1->queue->exechost, qlp2->queue->exechost))
        continue;

      if ((qlp2->queue->flags & QFLAGS_NODEMASK))
        {
        DBPRT(("comparing nodemask for queue %s with queue %s\n",
               qlp1->queue->qname, qlp2->queue->qname));

        BITFIELD_CPY(&overlap, &(qlp1->queue->queuemask));
        BITFIELD_ANDM(&overlap, &(qlp2->queue->queuemask));

        if (!BITFIELD_IS_ZERO(&overlap))
          {
          (void)sprintf(log_buffer,
                        "Queues %s and %s have overlapping nodemasks!",
                        qlp1->queue->qname, qlp2->queue->qname);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                     log_buffer);
          DBPRT(("%s: %s\n", id, log_buffer));
          return (1);
          }
        }
      }
    }

  /* Make sure batch queues and external queues have differing nodemasks. */
  for (qlp1 = schd_BatchQueues; qlp1 != NULL; qlp1 = qlp1->next)
    {

    if (!(qlp1->queue->flags & QFLAGS_NODEMASK))
      continue;

    for (qlp2 = schd_ExternQueues; qlp2 != NULL; qlp2 = qlp2->next)
      {

      /*
       * If the queues are on different hosts, they may appear to
       * overlap - no problem.
       */
      if (strcmp(qlp1->queue->exechost, qlp2->queue->exechost))
        continue;

      if ((qlp2->queue->flags & QFLAGS_NODEMASK))
        {
        DBPRT(("comparing nodemask for queue %s with queue %s\n",
               qlp1->queue->qname, qlp2->queue->qname));

        BITFIELD_CPY(&overlap, &(qlp1->queue->queuemask));
        BITFIELD_ANDM(&overlap, &(qlp2->queue->queuemask));

        if (!BITFIELD_IS_ZERO(&overlap))
          {
          (void)sprintf(log_buffer,
                        "Queues %s and %s have overlapping nodemasks!",
                        qlp1->queue->qname, qlp2->queue->qname);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                     log_buffer);
          DBPRT(("%s: %s\n", id, log_buffer));
          return (1);
          }
        }
      }
    }

  return (0);
  }
示例#7
0
int
schd_str2mask(char *maskstr, Bitfield *maskp)
  {
  char *id = "str2mask";
  Bitfield nodemask;
  char hex;
  int ndbit = 0, len, dec, hxbit;

  unsigned long long compat;
  char *ptr;
  char     buf[32];

  if (maskstr == NULL)
    return (-1);

  /* Nodemask string must be in the format '0x...'. */
  if ((maskstr[0] != '0') || (tolower(maskstr[1]) != 'x'))
    {

    /* XXX
     * For backwards compatability, if the string will convert
     * into an unsigned long long, then assume this is an old-
     * style decimal nodemask (on 8-128P machines, nodemask is
     * an unsigned long long, and was treated as such).
     *
     * Note that this assumes that sizeof(unsigned long long)
     * is the same as sizeof(unsigned long), which is only the
     * case with the 64-bit SGI compiler ABI.
     */
    compat = strtoul(maskstr, &ptr, 10);

    if (*ptr != '\0')
      return (-1);

    /*
     * String converted to an unsigned long long.  Print it as
     * a hex back into a buffer, and point at that buffer instead.
     * This will cause the newly created hex string to be used.
     */
    (void)sprintf(buf, "0x%llx", compat);

    maskstr = buf;

    (void)sprintf(log_buffer,
                  "converted old-style nodemask %lu to %s", compat, maskstr);

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
               log_buffer);
    }

  /* Zero out the new nodemask struct, and fill it from the maskstr. */
  BITFIELD_CLRALL(&nodemask);

  /* Walk backwards through the hex digits in the string 0x... */
  for (len = (int)strlen(maskstr); len > 2 /* 0x... */; len --)
    {
    hex = maskstr[len - 1];
    /* Make sure it's a hex digit. */

    if (!isxdigit((int)hex))
      return (-1);

    /* If this digit is a '0', it contributes nothing. */
    if (hex != '0')
      {
      /* Convert the hex digit to a decimal int. */
      dec = isdigit((int)hex) ? (hex - '0') :
            (tolower((int)hex) - 'a' + 10);

      for (hxbit = 0; hxbit < 4; hxbit ++)
        {
        if (dec & (1 << hxbit))
          BITFIELD_SETB(&nodemask, ndbit);

        ndbit ++;
        }
      }
    else
      ndbit += 4;

    /* Make sure we haven't walked past the end of the nodemask. */
    if (ndbit > (sizeof(Bitfield) * 8))
      return (-1);
    }

  /* Copy the newly created nodemask into the handed-in one. */
  BITFIELD_CPY(maskp, &nodemask);

  return (0);
  }
示例#8
0
int schd_alloc_nodes(int request, Queue *queue, Bitfield *maskp)
  {
  char   *id = "schd_alloc_nodes";
  Bitfield avail;
  Bitfield mask;
  Bitfield contig;
  int remain;
  int qmsb;
  int qlsb;
  int i, n;
  int count;
  int found;

  /* Make certain the nodecount request can be fulfilled. */

  if (request <= 0 || request > BITFIELD_NUM_ONES(&(queue->availmask)))
    return 0;

  /*
   * Make a copy of the queue's available bit mask to play with, and clear
   * the allocated nodes mask.
   */
  BITFIELD_CPY(&avail, &(queue->availmask));

  BITFIELD_CLRALL(&mask);

  /* How many have been found, and how many remain. */
  found  = 0;

  remain = request;

  while (remain > 0)
    {
    /*
     * Find first and last available bit positions in the
     * queue's available node mask.
     */
    qmsb = BITFIELD_MS_ONE(&avail);
    qlsb = BITFIELD_LS_ONE(&avail);

    /*
     * Starting with the size of the remaining nodes needed to satisfy
     * this request, look for a set of 'n' contiguous bits in the
     * available node mask.  If that is not found, try the next smallest
     * contiguous vector, etc.
     */

    for (n = remain; n > 0; n--)
      {
      /*
       * Create a contiguous bitmask of 'n' bits, starting at the
       * position of the highest bit in the avail mask.
       */
      BITFIELD_CLRALL(&contig);

      for (i = 0; i < n; i++)
        BITFIELD_SETB(&contig, qmsb - i);

      /*
       * Calculate how many times this contiguous bitmask needs to be
       * shifted to the right to cover every set of 'n' bits between
       * the qmsb and qlsb, inclusive.  Count the initial configuration
       * as well (the trailing '+ 1').
       */
      count = (qmsb + 1 - qlsb) - n + 1;

      /*
       * Shift the contiguous mask right one bit at a time, checking
       * if all the bits in the mask are set in the available mask.
       */
      for (i = 0; i < count; i++)
        {

        /* Are all bits in contig also set in the avail mask? */
        if (BITFIELD_TSTALLM(&avail, &contig))
          {
          break;
          }

        BITFIELD_SHIFTR(&contig);
        }

      /*
       * If the contiguous bits are available, add them to the new job
       * nodemask, and remove them from the avail mask.  Adjust the
       * remaining node count, and start the next hunt for the remaining
       * nodes.
       */
      if (i < count)
        {
        BITFIELD_SETM(&mask,  &contig);
        BITFIELD_CLRM(&avail, &contig);

        found  += n;
        remain -= n;

        break; /* for(n) loop */
        }
      }

    /* Check for something going wrong. */
    if (n == 0)
      {
      DBPRT(("%s: couldn't find any contiguous bits (even one!)\n", id));
      break; /* while(remain) loop */
      }
    }

  /*
   * If no bits remain to be allocated, copy the new mask into the provided
   * space, and return the number of bits requested.
   */
  if (!remain && (found == request))
    {
    BITFIELD_CPY(maskp, &mask);
    DBPRT(("%s: mask %s\n", id,
           schd_format_nodemask(&queue->queuemask, maskp)));
    return found;
    }

  return 0;
  }