Ejemplo n.º 1
0
/*
 * PBS has been known to provide some bizarre information about the state
 * of the queue (i.e. no jobs are running in it, but some of its resources
 * are consumed).  Perform some consistency checks on the queue information
 * before agreeing that it is correct.  Generate a laundry list of everything
 * that appears wrong with it.
 */
static int
queue_sanity(Queue *queue)
  {
  char   *id = "queue_sanity";
  Job    *job;
  int     queued, running;
  int     is_sane;
#ifdef NODEMASK
  int     nodes_used;
#endif /* NODEMASK */

  is_sane = 1; /* Assume the queue is sane for now. */

#ifdef NODEMASK

  if (queue->flags & QFLAGS_NODEMASK)
    {
    if (queue->nodes_max != BITFIELD_NUM_ONES(&(queue->queuemask)))
      {
      sprintf(log_buffer, "Queue '%s' nodemask does not contain "
              "expected %d nodes (has %d)", queue->qname, queue->nodes_max,
              BITFIELD_NUM_ONES(&(queue->queuemask)));
      log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));
      }

    nodes_used = 0;

    for (job = queue->jobs; job != NULL; job = job->next)
      if (job->state == 'R')
        nodes_used += job->nodes;

    if (queue->nodes_assn != nodes_used)
      {
      if (queue->nodes_assn > nodes_used)
        {
        sprintf(log_buffer,
                "Queue '%s' has %d nodes assigned, only %d in running jobs",
                queue->qname, queue->nodes_assn, nodes_used);
        is_sane = 0;
        }
      else
        {
        sprintf(log_buffer, "Queue '%s' has %d nodes assigned, "
                "%d in running jobs - using %d",
                queue->qname, queue->nodes_assn, nodes_used, nodes_used);
        queue->nodes_assn = nodes_used;
        }

      log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);

      DBPRT(("%s: %s\n", id, log_buffer));
      }
    }

#endif /* NODEMASK */

  /*
   * Count running and queued jobs and make sure the numbers match up.
   */
  queued = running = 0;

  for (job = queue->jobs; job != NULL; job = job->next)
    {
    switch (job->state)
      {

      case 'R':
        running ++;
        break;

      case 'Q':
        queued ++;
        break;

      default:
        /* Empty */
        break;
      }
    }

  if (queue->running != running)
    {
    sprintf(log_buffer,
            "Queue '%s' expected %d running jobs, but found %d",
            queue->qname, queue->running, running);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    is_sane = 0;
    }

  if (queue->queued != queued)
    {
    sprintf(log_buffer,
            "Queue '%s' expected %d queued jobs, but found %d",
            queue->qname, queue->queued, queued);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    is_sane = 0;
    }

  if (queue->running && (queue->empty_by < schd_TimeNow))
    {
    sprintf(log_buffer,
            "Queue '%s' was expected to be empty %d seconds ago",
            queue->qname, (schd_TimeNow - queue->empty_by));
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    is_sane = 0;
    }

  return (is_sane);
  }
Ejemplo n.º 2
0
int schd_alloc_nodes(int request, Queue *queue, Bitfield *maskp)
  {
  char   *id = "schd_alloc_nodes";
  Bitfield avail;
  Bitfield mask;
  Bitfield contig;
  int remain;
  int qmsb;
  int qlsb;
  int i, n;
  int count;
  int found;

  /* Make certain the nodecount request can be fulfilled. */

  if (request <= 0 || request > BITFIELD_NUM_ONES(&(queue->availmask)))
    return 0;

  /*
   * Make a copy of the queue's available bit mask to play with, and clear
   * the allocated nodes mask.
   */
  BITFIELD_CPY(&avail, &(queue->availmask));

  BITFIELD_CLRALL(&mask);

  /* How many have been found, and how many remain. */
  found  = 0;

  remain = request;

  while (remain > 0)
    {
    /*
     * Find first and last available bit positions in the
     * queue's available node mask.
     */
    qmsb = BITFIELD_MS_ONE(&avail);
    qlsb = BITFIELD_LS_ONE(&avail);

    /*
     * Starting with the size of the remaining nodes needed to satisfy
     * this request, look for a set of 'n' contiguous bits in the
     * available node mask.  If that is not found, try the next smallest
     * contiguous vector, etc.
     */

    for (n = remain; n > 0; n--)
      {
      /*
       * Create a contiguous bitmask of 'n' bits, starting at the
       * position of the highest bit in the avail mask.
       */
      BITFIELD_CLRALL(&contig);

      for (i = 0; i < n; i++)
        BITFIELD_SETB(&contig, qmsb - i);

      /*
       * Calculate how many times this contiguous bitmask needs to be
       * shifted to the right to cover every set of 'n' bits between
       * the qmsb and qlsb, inclusive.  Count the initial configuration
       * as well (the trailing '+ 1').
       */
      count = (qmsb + 1 - qlsb) - n + 1;

      /*
       * Shift the contiguous mask right one bit at a time, checking
       * if all the bits in the mask are set in the available mask.
       */
      for (i = 0; i < count; i++)
        {

        /* Are all bits in contig also set in the avail mask? */
        if (BITFIELD_TSTALLM(&avail, &contig))
          {
          break;
          }

        BITFIELD_SHIFTR(&contig);
        }

      /*
       * If the contiguous bits are available, add them to the new job
       * nodemask, and remove them from the avail mask.  Adjust the
       * remaining node count, and start the next hunt for the remaining
       * nodes.
       */
      if (i < count)
        {
        BITFIELD_SETM(&mask,  &contig);
        BITFIELD_CLRM(&avail, &contig);

        found  += n;
        remain -= n;

        break; /* for(n) loop */
        }
      }

    /* Check for something going wrong. */
    if (n == 0)
      {
      DBPRT(("%s: couldn't find any contiguous bits (even one!)\n", id));
      break; /* while(remain) loop */
      }
    }

  /*
   * If no bits remain to be allocated, copy the new mask into the provided
   * space, and return the number of bits requested.
   */
  if (!remain && (found == request))
    {
    BITFIELD_CPY(maskp, &mask);
    DBPRT(("%s: mask %s\n", id,
           schd_format_nodemask(&queue->queuemask, maskp)));
    return found;
    }

  return 0;
  }