예제 #1
0
/*
 * Populate the queue struct with the information needed for scheduling;
 * querying the resource monitor for queue->exechost's information.
 */
int
schd_get_queue_info(Queue *queue)
  {
  char *id = "get_queue_info";

  if (queue->ncpus_assn == UNSPECIFIED)
    queue->ncpus_assn = 0;

  if (queue->mem_assn   == UNSPECIFIED)
    queue->mem_assn   = 0;

  if (queue->running    == UNSPECIFIED)
    queue->running    = 0;

  /*
   * Get the resources for this queue from the resource monitor (if
   * available).  If the resmom is not accessible, disable the queue.
   * Don't bother checking if the queue is Stopped.
   */

  if (strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0 &&
      (queue->flags & QFLAGS_STOPPED) == 0)
    {
    queue->rsrcs = schd_get_resources(queue->exechost);

    if (queue->rsrcs != NULL)
      {

      /* Account for this queue's resources. */
      queue->rsrcs->ncpus_alloc += queue->ncpus_assn;
      queue->rsrcs->mem_alloc   += queue->mem_assn;
      queue->rsrcs->njobs       += queue->running;
      queue->ncpus_max =
        (queue->ncpus_max <= queue->rsrcs->ncpus_total ?
         queue->ncpus_max  : queue->rsrcs->ncpus_total);
      }
    else
      {
      (void)sprintf(log_buffer,
                    "Can't get resources for %s@%s - marking unavailable.",
                    queue->qname, queue->exechost);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      queue->flags |= QFLAGS_NODEDOWN;
      }
    }

  return (0);
  }
예제 #2
0
파일: getqueues.c 프로젝트: Johnlihj/torque
/*
 * schd_get_queue_limits - query queue information from the server.
 *
 * Returns 0 on success, -1 for "fatal errors", and 1 for a transient
 * error (i.e., the queue failed the sanity checks imposed by the
 * queue_sanity() function).
 */
int
schd_get_queue_limits(Queue *queue)
  {
  char   *id = "schd_get_queue_limits";
  int     moved = 0, istrue;
  Batch_Status *bs;
  AttrList *attr;
  static AttrList alist[] =
    {
      {&alist[1],  ATTR_start, "", ""},
    {&alist[2],  ATTR_enable, "", ""},
    {&alist[3],  ATTR_count, "", ""},
    {&alist[4],  ATTR_maxuserrun, "", ""},
    {&alist[5],  ATTR_rescavail, "", ""},
    {&alist[6],  ATTR_rescassn, "", ""},
    {&alist[7],  ATTR_rescdflt, "", ""},
    {&alist[8],  ATTR_rescmax, "", ""},
    {&alist[9],  ATTR_rescmin, "", ""},
    {&alist[10], ATTR_acluren, "", ""},
    {&alist[11], ATTR_acluser, "", ""},
    {NULL,       ATTR_maxrun, "", ""}
    };
  size_t  mem_default = UNSPECIFIED;
  size_t  mem_assn    = UNSPECIFIED;
  size_t  mem_max     = UNSPECIFIED;
  size_t  mem_min     = UNSPECIFIED;
  int     cpu_default = UNSPECIFIED;
  int     cpu_assn    = UNSPECIFIED;
  int     cpu_max     = UNSPECIFIED;
  int     cpu_min     = UNSPECIFIED;
  int     nodes_from_cpu, nodes_from_mem;

  queue->running = UNSPECIFIED;
  queue->queued = UNSPECIFIED;
  queue->maxrun = UNSPECIFIED;
  queue->userrun      = UNSPECIFIED;

  queue->nodes_max = UNSPECIFIED;
  queue->nodes_min = UNSPECIFIED;
  queue->nodes_default = UNSPECIFIED;
  queue->nodes_assn = UNSPECIFIED;
  queue->nodes_rsvd = UNSPECIFIED;
  queue->wallt_max = UNSPECIFIED;
  queue->wallt_min = UNSPECIFIED;
  queue->wallt_default = UNSPECIFIED;

  queue->flags = 0;
#ifdef NODEMASK
  BITFIELD_CLRALL(&queue->queuemask);
  BITFIELD_CLRALL(&queue->availmask);
#endif /* NODEMASK */
  queue->rsrcs = NULL;

  if (queue->jobs)
    {
    DBPRT(("%s: found jobs on queue '%s'!  Freeing them...\n", id,
           queue->qname));
    schd_free_jobs(queue->jobs);
    }

  if (queue->useracl)
    {
    DBPRT(("%s: found user ACL list on queue '%s'!  Freeing it...\n", id,
           queue->qname));
    schd_free_useracl(queue->useracl);
    }

  queue->jobs         = NULL;

  queue->useracl = NULL;

  /* Ask the server for information about the specified queue. */

  if ((bs = pbs_statque(connector, queue->qname, alist, NULL)) == NULL)
    {
    sprintf(log_buffer, "pbs_statque failed, \"%s\" %d",
            queue->qname, pbs_errno);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    return (-1);
    }

  /* Process the list of attributes returned by the server. */

  for (attr = bs->attribs; attr != NULL; attr = attr->next)
    {

    /* Is queue started? */
    if (!strcmp(attr->name, ATTR_start))
      {
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue) /* if true, queue is not stopped. */
          queue->flags &= ~QFLAGS_STOPPED;
        else
          queue->flags |= QFLAGS_STOPPED;
        }
      else
        {
        DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    /* Is queue enabled? */
    if (!strcmp(attr->name, ATTR_enable))
      {
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue) /* if true, queue is not disabled. */
          queue->flags &= ~QFLAGS_DISABLED;
        else
          queue->flags |= QFLAGS_DISABLED;
        }
      else
        {
        DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    /* How many jobs are queued and running? */
    if (!strcmp(attr->name, ATTR_count))
      {
      queue->queued = schd_how_many(attr->value, SC_QUEUED);
      queue->running = schd_how_many(attr->value, SC_RUNNING);
      continue;
      }

    /* Queue-wide maximum number of jobs running. */
    if (!strcmp(attr->name, ATTR_maxrun))
      {
      queue->maxrun = atoi(attr->value);
      continue;
      }

    /* Per-user maximum number of jobs running. */
    if (!strcmp(attr->name, ATTR_maxuserrun))
      {
      queue->userrun = atoi(attr->value);
      continue;
      }

    /* Is there an enabled user access control list on this queue? */
    if (!strcmp(attr->name, ATTR_acluren))
      {
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue) /* if true, queue has an ACL */
          queue->flags |= QFLAGS_USER_ACL;
        else
          queue->flags &= ~QFLAGS_USER_ACL;
        }
      else
        {
        DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_acluser))
      {
      if (queue->useracl)
        {
        DBPRT(("queue %s acluser already set!\n", queue->qname));
        schd_free_useracl(queue->useracl);
        }

      queue->useracl = schd_create_useracl(attr->value);

      continue;
      }

    /* Queue maximum resource usage. */
    if (!strcmp(attr->name, ATTR_rescmax))
      {
      if (!strcmp("mem", attr->resource))
        {
        mem_max = schd_val2byte(attr->value);
        continue;
        }

      if (!strcmp("ncpus", attr->resource))
        {
        cpu_max = atoi(attr->value);
        continue;
        }

      if (!strcmp("walltime", attr->resource))
        {
        queue->wallt_max = schd_val2sec(attr->value);
        continue;
        }

#ifdef NODEMASK
      if (!strcmp("nodemask", attr->resource))
        {
        if (schd_str2mask(attr->value, &queue->queuemask))
          {
          (void)sprintf(log_buffer, "couldn't convert nodemask %s",
                        attr->value);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                     log_buffer);
          }
        else
          queue->flags |= QFLAGS_NODEMASK; /* Valid nodemask. */
        }

#endif /* NODEMASK */

      continue;
      }

    /* Queue minimum resource usage. */
    if (!strcmp(attr->name, ATTR_rescmin))
      {
      if (!strcmp("mem", attr->resource))
        {
        mem_min = schd_val2byte(attr->value);
        continue;
        }

      if (!strcmp("ncpus", attr->resource))
        {
        cpu_min = atoi(attr->value);
        continue;
        }

      if (!strcmp("walltime", attr->resource))
        {
        queue->wallt_min = schd_val2sec(attr->value);
        continue;
        }

      continue;
      }

    /* Queue assigned (in use) resource usage. */
    if (!strcmp(attr->name, ATTR_rescassn))
      {
      if (!strcmp("mem", attr->resource))
        {
        mem_assn = schd_val2byte(attr->value);
        continue;
        }

      if (!strcmp("ncpus", attr->resource))
        {
        cpu_assn = atoi(attr->value);
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_rescdflt))
      {
      if (!strcmp("mem", attr->resource))
        {
        mem_default = schd_val2byte(attr->value);
        continue;
        }

      if (!strcmp("ncpus", attr->resource))
        {
        cpu_default = atoi(attr->value);
        continue;
        }

      if (!strcmp("walltime", attr->resource))
        queue->wallt_default = schd_val2sec(attr->value);
      }

    /* Ignore anything else */
    }

  pbs_statfree(bs);

  /*
   * Calculate values for queue node limits, given memory and cpu values.
   * Note any discrepancies.
   */
  nodes_from_cpu = NODES_FROM_CPU(cpu_default);
  nodes_from_mem = NODES_FROM_MEM(mem_default);

  if (nodes_from_cpu != nodes_from_mem)
    {
    sprintf(log_buffer,
            "%s: Queue '%s' default cpu/mem (%d/%s) convert to %d != %d nodes",
            id, queue->qname, cpu_default, schd_byte2val(mem_default),
            nodes_from_cpu, nodes_from_mem);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    }

  nodes_from_cpu = NODES_FROM_CPU(cpu_max);

  nodes_from_mem = NODES_FROM_MEM(mem_max);

  if (nodes_from_cpu != nodes_from_mem)
    {
    sprintf(log_buffer,
            "%s: Queue '%s' maximum cpu/mem (%d/%s) convert to %d != %d nodes",
            id, queue->qname, cpu_max, schd_byte2val(mem_max),
            nodes_from_cpu, nodes_from_mem);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    }

  nodes_from_cpu = NODES_FROM_CPU(cpu_min);

  nodes_from_mem = NODES_FROM_MEM(mem_min);

  if (nodes_from_cpu != nodes_from_mem)
    {
    sprintf(log_buffer,
            "%s: Queue '%s' minimum cpu/mem (%d/%s) convert to %d != %d nodes",
            id, queue->qname, cpu_min, schd_byte2val(mem_min),
            nodes_from_cpu, nodes_from_mem);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    }

  /*
   * Note: The assigned cpus and memory need not be exactly the same
   * node equivalency.
   */
  if ((cpu_default != UNSPECIFIED) && (mem_default != UNSPECIFIED))
    queue->nodes_default = NODES_REQD(cpu_default, mem_default);

  if ((cpu_max != UNSPECIFIED) && (mem_max != UNSPECIFIED))
    queue->nodes_max     = NODES_REQD(cpu_max, mem_max);

  if ((cpu_min != UNSPECIFIED) && (mem_min != UNSPECIFIED))
    queue->nodes_min     = NODES_REQD(cpu_min, mem_min);

  if ((cpu_assn != UNSPECIFIED) && (mem_assn != UNSPECIFIED))
    queue->nodes_assn    = NODES_REQD(cpu_assn, mem_assn);

  /*
   * Move any jobs on this queue from the global list onto the queue's
   * list.  Keep track of when the longest-running job will end, and set
   * the 'empty_by' field to that value.  Maintain the ordering as it was
   * in "schd_AllJobs".
   */

  if (schd_AllJobs)
    moved = queue_claim_jobs(queue, &schd_AllJobs);

  if (moved < 0)
    {
    sprintf(log_buffer, "%s: WARNING! Queue '%s' failed to claim jobs.",
            id, queue->qname);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    }

  if (queue->nodes_assn == UNSPECIFIED)
    queue->nodes_assn = 0;

  if (queue->running    == UNSPECIFIED)
    queue->running    = 0;

  /*
   * Find out if the queue is idle, and if it was not before, set the idle
   * time to now.  If there are running jobs, the queue is not idle at the
   * start of this iteration - set idle_since to 0.
   */
  if (queue->running)
    {
    queue->idle_since = 0;
    }
  else
    {
    if (queue->idle_since == 0)
      queue->idle_since = schd_TimeNow;
    }

  /*
   * Get the resources for this queue from the resource monitor (if
   * available).  If the resmom is not accessible, disable the queue.
   * If the resources were received okay, compute the available node
   * masks from the resources and jobs.
   * Don't bother with resources for the special or submit queues.
   */
  if ((strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0) ||
      ((schd_SpecialQueue != NULL) &&
       (!strcmp(queue->qname, schd_SpecialQueue->queue->qname))))
    {

    queue->rsrcs = schd_get_resources(queue->exechost);

    if (queue->rsrcs != NULL)
      {
      /* Account for this queue's resources. */
      queue->rsrcs->nodes_alloc += queue->nodes_assn;
      queue->rsrcs->njobs       += queue->running;

      /*
       * If the HPM counters do not appear to be in use on this host,
       * check for jobs on the queue that are using hpm.  If so, set
       * the 'HPM_IN_USE' flag on the resources.  This will prevent the
       * HPM counters from being released to global mode at the end
       * of the scheduling run (c.f. cleanup.c).
       * The 'HPM_IN_USE' flag will also be asserted if a job is run
       * that uses the HPM counters.
       */

      if (schd_MANAGE_HPM &&
          !(queue->rsrcs->flags & RSRCS_FLAGS_HPM_IN_USE))
        {
        if (schd_hpm_job_count(queue->jobs))
          queue->rsrcs->flags |= RSRCS_FLAGS_HPM_IN_USE;
        }

#ifdef NODEMASK
      /* And find the nodemasks for the queue and resources. */
      find_nodemasks(queue, queue->rsrcs);

#endif /* NODEMASK */

      }
    else
      {
      (void)sprintf(log_buffer,
                    "Can't get resources for %s@%s - marking unavailable.",
                    queue->qname, queue->exechost);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      queue->flags |= QFLAGS_DISABLED;
      }
    }

#ifdef DEBUG
  schd_dump_queue(queue, QUEUE_DUMP_JOBS);

#endif /* DEBUG */

  /*
   * It would probably be better to wait for the world to stabilize
   * than to try to impose some artificial order upon it.  Do not do
   * the sanity check if the queue is stopped.
   */
  if ((queue->flags & QFLAGS_STOPPED) == 0)
    {
    if (!queue_sanity(queue))
      {
      sprintf(log_buffer, "WARNING! Queue '%s' failed sanity checks.",
              queue->qname);
      log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      return (1);
      }
    }

  return (0);
  }
예제 #3
0
/*
 * Populate the queue struct with the information needed for scheduling;
 * this includes "claiming" jobs that "belong" to this queue; and querying
 * the resource monitor for queue->exechost's information.
 */
int
schd_get_queue_info(Queue *queue)
  {
  char *id = "get_queue_info";
  int   moved = 0;

  /*
   * Move any jobs on this queue from the global list onto the queue's
   * list.  Keep track of when the longest-running job will end, and set
   * the 'empty_by' field to that value.  Maintain the ordering as it was
   * in "schd_AllJobs".
   */

  if (schd_AllJobs)
    moved = queue_claim_jobs(queue, &schd_AllJobs);

  if (moved < 0)
    {
    sprintf(log_buffer, "%s: WARNING! Queue '%s' failed to claim jobs.",
            id, queue->qname);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    }

  if (queue->ncpus_assn == UNSPECIFIED)
    queue->ncpus_assn = 0;

  if (queue->mem_assn   == UNSPECIFIED)
    queue->mem_assn   = 0;

  if (queue->running    == UNSPECIFIED)
    queue->running    = 0;

  /*
   * Get the resources for this queue from the resource monitor (if
   * available).  If the resmom is not accessible, disable the queue.
   * Don't bother checking if the queue is Stopped.
   */
  if (strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0 &&
      (queue->flags & QFLAGS_STOPPED) == 0)
    {
    queue->rsrcs = schd_get_resources(queue->exechost);

    if (queue->rsrcs != NULL)
      {
      /* Account for this queue's resources. */
      queue->rsrcs->ncpus_alloc += queue->ncpus_assn;
      queue->rsrcs->mem_alloc   += queue->mem_assn;
      queue->rsrcs->njobs       += queue->running;

      }
    else
      {
      (void)sprintf(log_buffer,
                    "Can't get resources for %s@%s - marking unavailable.",
                    queue->qname, queue->exechost);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      queue->flags |= QFLAGS_NODEDOWN;
      }
    }

  return (0);
  }