Ejemplo n.º 1
0
int reply_send(

  struct batch_request *request)  /* I (freed) */

  {
#ifdef PBS_MOM
  return(reply_send_mom(request));
#else
  return(reply_send_svr(request));
#endif
  }
Ejemplo n.º 2
0
static void req_stat_job_step2(

  struct stat_cntl *cntl)  /* I/O (free'd on return) */

  {
  svrattrl              *pal;
  job                   *pjob = NULL;

  struct batch_request  *preq;
  struct batch_reply    *preply;
  int                    rc = 0;
  enum TJobStatTypeEnum  type;
  pbs_queue             *pque = NULL;
  int                    exec_only = 0;

  int                    bad = 0;
  long                   DTime;  /* delta time - only report full pbs_attribute list if J->MTime > DTime */
  static svrattrl       *dpal = NULL;
  int                    job_array_index = 0;
  job_array             *pa = NULL;
  char                   log_buf[LOCAL_LOG_BUF_SIZE];
  all_jobs_iterator      *iter;

  preq   = cntl->sc_origrq;
  type   = (enum TJobStatTypeEnum)cntl->sc_type;
  preply = &preq->rq_reply;

  /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */

  if (dpal == NULL)
    {
    /* build 'delta' pbs_attribute list */

    svrattrl *tpal;

    tlist_head dalist;

    int aindex;

    int atrlist[] =
      {
      JOB_ATR_jobname,
      JOB_ATR_resc_used,
      JOB_ATR_LAST
      };

    CLEAR_LINK(dalist);

    for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++)
      {
      if ((tpal = attrlist_create("", "", 23)) == NULL)
        {
        return;
        }

      tpal->al_valln = atrlist[aindex];

      if (dpal == NULL)
        dpal = tpal;

      append_link(&dalist, &tpal->al_link, tpal);
      }
    }  /* END if (dpal == NULL) */

  if (type == tjstArray)
    {
    pa = get_array(preq->rq_ind.rq_status.rq_id);

    if (pa == NULL)
      {
      req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array");
      return;
      }
    }

  {
  all_jobs *ajptr = NULL;

  if (type == tjstQueue)
    ajptr = cntl->sc_pque->qu_jobs;

  else if (type == tjstSummarizeArraysQueue)
    ajptr = cntl->sc_pque->qu_jobs_array_sum;

  else if (type == tjstSummarizeArraysServer)
    ajptr = &array_summary;

  else
    ajptr = &alljobs;

  ajptr->lock();
  iter = ajptr->get_iterator();
  ajptr->unlock();
  }

  /*
   * now ready for part 3, building the status reply,
   * loop through again
   */

  if ((type == tjstSummarizeArraysQueue) || 
      (type == tjstSummarizeArraysServer))
    {
    /* No array can be owned for these options */
    update_array_statuses();
    }


  if (type == tjstJob)
    pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE);

  else if (type == tjstQueue)
    pjob = next_job(cntl->sc_pque->qu_jobs,iter);

  else if (type == tjstSummarizeArraysQueue)
    pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,iter);

  else if (type == tjstSummarizeArraysServer)
    pjob = next_job(&array_summary,iter);

  else if (type == tjstArray)
    {
    job_array_index = -1;
    pjob = NULL;
    /* increment job_array_index until we find a non-null pointer or hit the end */
    while (++job_array_index < pa->ai_qs.array_size)
      {
      if (pa->job_ids[job_array_index] != NULL)
        {
        if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
          {
          break;
          }
        }
      }
    }
  else
    pjob = next_job(&alljobs,iter);

  DTime = 0;

  if (preq->rq_extend != NULL)
    {
    char *ptr;

    /* FORMAT:  { EXECQONLY | DELTA:<EPOCHTIME> } */

    if (strstr(preq->rq_extend, EXECQUEONLY))
      exec_only = 1;

    ptr = strstr(preq->rq_extend, "DELTA:");

    if (ptr != NULL)
      {
      ptr += strlen("delta:");

      DTime = strtol(ptr, NULL, 10);
      }
    }

  if ((type == tjstTruncatedServer) || 
      (type == tjstTruncatedQueue))
    {
    long sentJobCounter;
    long qjcounter;
    long qmaxreport;
    all_queues_iterator *iter = NULL;

    svr_queues.lock();
    iter = svr_queues.get_iterator();
    svr_queues.unlock();

    /* loop through all queues */
    while ((pque = next_queue(&svr_queues,iter)) != NULL)
      {
      qjcounter = 0;

      if ((exec_only == 1) &&
          (pque->qu_qs.qu_type != QTYPE_Execution))
        {
        /* ignore routing queues */
        unlock_queue(pque, __func__, "ignore queue", LOGLEVEL);
        continue;
        }

      if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) &&
          (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0))
        {
        qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long;
        }
      else
        {
        qmaxreport = TMAX_JOB;
        }

      if (LOGLEVEL >= 5)
        {
        sprintf(log_buf,"giving scheduler up to %ld idle jobs in queue %s\n",
          qmaxreport,
          pque->qu_qs.qu_name);

        log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
        }

      sentJobCounter = 0;

      /* loop through jobs in queue */
      if (pjob != NULL)
        unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL);

      all_jobs_iterator *jobiter = NULL;
      pque->qu_jobs->lock();
      jobiter = pque->qu_jobs->get_iterator();
      pque->qu_jobs->unlock();

      while ((pjob = next_job(pque->qu_jobs,jobiter)) != NULL)
        {
        if ((qjcounter >= qmaxreport) &&
            (pjob->ji_qs.ji_state == JOB_STATE_QUEUED))
          {
          /* max_report of queued jobs reached for queue */
          unlock_ji_mutex(pjob, __func__, "6", LOGLEVEL);

          continue;
          }

        pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

        rc = status_job(
               pjob,
               preq,
               (pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal,
               &preply->brp_un.brp_status,
               &bad);

        if ((rc != 0) && (rc != PBSE_PERM))
          {
          req_reject(rc, bad, preq, NULL, NULL);

          unlock_ji_mutex(pjob, __func__, "7", LOGLEVEL);
          unlock_queue(pque, __func__, "perm", LOGLEVEL);

          delete iter;

          return;
          }

        sentJobCounter++;

        if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)
          qjcounter++;

        unlock_ji_mutex(pjob, __func__, "8", LOGLEVEL);
        }    /* END foreach (pjob from pque) */

      if (LOGLEVEL >= 5)
        {
        sprintf(log_buf,"sent scheduler %ld total jobs for queue %s\n",
          sentJobCounter,
          pque->qu_qs.qu_name);

        log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
        }
    
      unlock_queue(pque, __func__, "end while", LOGLEVEL);
      }      /* END for (pque) */

    reply_send_svr(preq);

    delete iter;

    return;
    } /* END if ((type == tjstTruncatedServer) || ...) */

  while (pjob != NULL)
    {
    /* go ahead and build the status reply for this job */

    if (exec_only)
      {
      if (cntl->sc_pque != NULL)
        {
        if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution)
          goto nextjob;
        }
      else
        {
        if (pa != NULL)
          pthread_mutex_unlock(pa->ai_mutex);
        pque = get_jobs_queue(&pjob);
        if (pa != NULL)
          pthread_mutex_lock(pa->ai_mutex);

        if ((pjob == NULL) ||
            (pque == NULL))
          goto nextjob;
        
        mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true);
        if (pque->qu_qs.qu_type != QTYPE_Execution)
          {
          goto nextjob;
          }
        }
      }

    pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

    rc = status_job(
           pjob,
           preq,
           pal,
           &preply->brp_un.brp_status,
           &bad);

    if ((rc != 0) && 
        (rc != PBSE_PERM))
      {
      if (pa != NULL)
        {
        unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
        }

      unlock_ji_mutex(pjob, __func__, "9", LOGLEVEL);

      req_reject(rc, bad, preq, NULL, NULL);

      delete iter;

      return;
      }

    /* get next job */

nextjob:

    if (pjob != NULL)
      unlock_ji_mutex(pjob, __func__, "10", LOGLEVEL);

    if (type == tjstJob)
      break;

    if (type == tjstQueue)
      pjob = next_job(cntl->sc_pque->qu_jobs,iter);
    else if (type == tjstSummarizeArraysQueue)
      pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,iter);
    else if (type == tjstSummarizeArraysServer)
      pjob = next_job(&array_summary,iter);
    else if (type == tjstArray)
      {
      pjob = NULL;
      /* increment job_array_index until we find a non-null pointer or hit the end */
      while (++job_array_index < pa->ai_qs.array_size)
        {
        if (pa->job_ids[job_array_index] != NULL)
          {
          if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
            {
            break;
            }
          }
        }
      }
    else
      pjob = next_job(&alljobs,iter);

    rc = 0;
    }  /* END while (pjob != NULL) */

  delete iter;

  if (pa != NULL)
    {
    unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
    }
 
  reply_send_svr(preq);

  if (LOGLEVEL >= 7)
    {
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      "req_statjob",
      "Successfully returned the status of queued jobs\n");
    }

  return;
  }  /* END req_stat_job_step2() */
Ejemplo n.º 3
0
int req_stat_svr(

  struct batch_request *preq) /* ptr to the decoded request */

  {
  svrattrl             *pal;

  struct batch_reply   *preply;

  struct brp_status    *pstat;
  int                   bad = 0;
  char                  nc_buf[128];
  int                   numjobs;
  int                   netrates[3];

  memset(netrates, 0, sizeof(netrates));

  /* update count and state counts from sv_numjobs and sv_jobstates */
  lock_sv_qs_mutex(server.sv_qs_mutex, __func__);
  numjobs = server.sv_qs.sv_numjobs;
  unlock_sv_qs_mutex(server.sv_qs_mutex, __func__);
  
  pthread_mutex_lock(server.sv_attr_mutex);
  server.sv_attr[SRV_ATR_TotalJobs].at_val.at_long = numjobs;
  server.sv_attr[SRV_ATR_TotalJobs].at_flags |= ATR_VFLAG_SET;

  pthread_mutex_lock(server.sv_jobstates_mutex);

  update_state_ct(
    &server.sv_attr[SRV_ATR_JobsByState],
    server.sv_jobstates,
    server.sv_jobstbuf);
  
  pthread_mutex_unlock(server.sv_jobstates_mutex);

  netcounter_get(netrates);
  snprintf(nc_buf, 127, "%d %d %d", netrates[0], netrates[1], netrates[2]);

  if (server.sv_attr[SRV_ATR_NetCounter].at_val.at_str != NULL)
    free(server.sv_attr[SRV_ATR_NetCounter].at_val.at_str);
  server.sv_attr[SRV_ATR_NetCounter].at_val.at_str = strdup(nc_buf);
  if (server.sv_attr[SRV_ATR_NetCounter].at_val.at_str != NULL)
    server.sv_attr[SRV_ATR_NetCounter].at_flags |= ATR_VFLAG_SET;
  pthread_mutex_unlock(server.sv_attr_mutex);

  /* allocate a reply structure and a status sub-structure */

  preply = &preq->rq_reply;
  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  pstat = (struct brp_status *)calloc(1, sizeof(struct brp_status));

  if (pstat == NULL)
    {
    reply_free(preply);

    req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);
    pthread_mutex_unlock(server.sv_attr_mutex);

    return(PBSE_SYSTEM);
    }

  CLEAR_LINK(pstat->brp_stlink);

  strcpy(pstat->brp_objname, server_name);

  pstat->brp_objtype = MGR_OBJ_SERVER;

  CLEAR_HEAD(pstat->brp_attr);

  append_link(&preply->brp_un.brp_status, &pstat->brp_stlink, pstat);

  /* add attributes to the status reply */

  pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

  if (status_attrib(
        pal,
        svr_attr_def,
        server.sv_attr,
        SRV_ATR_LAST,
        preq->rq_perm,
        &pstat->brp_attr,
        &bad,
        1))    /* IsOwner == TRUE */
    {
    reply_badattr(PBSE_NOATTR, bad, pal, preq);
    }
  else
    {
    reply_send_svr(preq);
    }
    

  return(PBSE_NONE);
  }  /* END req_stat_svr() */
Ejemplo n.º 4
0
int req_stat_node(

  struct batch_request *preq)

  {
  char                 *name;

  int                   rc   = PBSE_NONE;
  int                   type = 0;
  int                   bad  = 0;

  struct pbsnode       *pnode = NULL;
  struct batch_reply   *preply;
  struct prop props;
  svrattrl             *pal;

  /*
   * first, check that the server indeed has a list of nodes
   * and if it does, validate the name of the requested object--
   * either name is that of a specific node, or name[0] is null/@
   * meaning request is for all nodes in the server's jurisdiction
   */

  if (LOGLEVEL >= 6)
    {
    log_record( PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, "entered");
    }

  if (svr_totnodes <= 0)
    {
    rc = PBSE_NONODES;
    req_reject(rc, 0, preq, NULL, "node list is empty - check 'server_priv/nodes' file");

    return rc;
    }

  name = preq->rq_ind.rq_status.rq_id;

  if ((*name == '\0') || (*name == '@'))
    {
    type = 1;
    }
  else if ((*name == ':') && (*(name + 1) != '\0'))
    {
    if (!strcmp(name + 1, "ALL"))
      {
      type = 1;  /* psuedo-group for all nodes */
      }
    else
      {
      type = 2;
      props.name = name + 1;
      props.mark = 1;
      props.next = NULL;
      }
    }

  preply = &preq->rq_reply;

  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  if (type == 0)
    {
    /* get status of the named node */
    pnode = find_nodebyname(name);
    if (pnode == NULL)
      {
      rc = PBSE_UNKNODE;
      req_reject(rc, 0, preq, NULL, "cannot locate specified node");
      return(rc);
      }

    /* get the status on all of the numa nodes */
    if (pnode->nd_is_alps_reporter == TRUE)
      rc = get_alps_statuses(pnode, preq, &bad, &preply->brp_un.brp_status);
    else
      rc = get_numa_statuses(pnode, preq, &bad, &preply->brp_un.brp_status);

    unlock_node(pnode, __func__, "type == 0", LOGLEVEL);
    }
  else
    {
    /* get status of all or several nodes */
    all_nodes_iterator *iter = NULL;

    while ((pnode = next_host(&allnodes,&iter,NULL)) != NULL)
      {
      if ((type == 2) && 
          (!hasprop(pnode, &props)))
        {
        unlock_node(pnode, __func__, "type != 0, next_host", LOGLEVEL);
        continue;
        }

      /* get the status on all of the numa nodes */
      if (pnode->nd_is_alps_reporter == TRUE)
        rc = get_alps_statuses(pnode, preq, &bad, &preply->brp_un.brp_status);
      else
        rc = get_numa_statuses(pnode, preq, &bad, &preply->brp_un.brp_status);
      
      if (rc != PBSE_NONE)
        {
        unlock_node(pnode, __func__, "type != 0, rc != 0, get_numa_statuses", LOGLEVEL);
        break;
        }

      unlock_node(pnode, __func__, "type != 0, rc == 0, get_numa_statuses", LOGLEVEL);
      }

    if (iter != NULL)
      delete iter;
    }

  if (rc == PBSE_NONE)
    {
    /* SUCCESS */

    reply_send_svr(preq);
    }
  else
    {
    if (rc != PBSE_UNKNODEATR)
      {
      req_reject(rc, 0, preq, NULL, NULL);
      }
    else
      {
      pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

      reply_badattr(rc, bad, pal, preq);
      }
    }

  return(rc);
  }  /* END req_stat_node() */
Ejemplo n.º 5
0
int req_stat_que(
    
  batch_request *preq)

  {
  char                 *name;
  pbs_queue            *pque = NULL;

  struct batch_reply   *preply;
  int                   rc   = 0;
  int                   type = 0;
  char log_buf[LOCAL_LOG_BUF_SIZE+1];

  /*
   * first, validate the name of the requested object, either
   * a queue, or null for all queues
   */

  name = preq->rq_ind.rq_status.rq_id;

  if ((*name == '\0') || (*name == '@'))
    {
    type = 1;
    }
  else
    {
    pque = find_queuebyname(name);

    if (pque == NULL)
      {
      rc = PBSE_UNKQUE;
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "cannot locate queue %s", name);
      req_reject(rc, 0, preq, NULL, log_buf);
      return rc;
      }
    }

  preply = &preq->rq_reply;

  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  if (type == 0)
    {
    /* get status of the named queue */
    mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true);
    rc = status_que(pque, preq, &preply->brp_un.brp_status);
    /* pque_qu_mutex will be unlocked in the destructor when we leave this scope */
    }
  else
    {
    /* pque == NULL before next_queue */
    svr_queues.lock();
    all_queues_iterator *iter = svr_queues.get_iterator();
    svr_queues.unlock();

    /* get status of all queues */
    while ((pque = next_queue(&svr_queues,iter)) != NULL)
      {
      mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true);
      rc = status_que(pque, preq, &preply->brp_un.brp_status);

      if (rc != 0)
        {
        if (rc != PBSE_PERM)
          {
          break;
          }

        rc = 0;
        }
      }

    delete iter;
    }

  if (rc != PBSE_NONE)
    {
    reply_free(preply);

    req_reject(PBSE_NOATTR, rc, preq, NULL, "status_queue failed");
    }
  else
    {
    reply_send_svr(preq);
    }

  return rc;
  }  /* END req_stat_que() */
Ejemplo n.º 6
0
static void req_stat_job_step2(

  struct stat_cntl *cntl)  /* I/O (free'd on return) */

  {
  svrattrl              *pal;
  job                   *pjob = NULL;

  struct batch_request  *preq;
  struct batch_reply    *preply;
  int                    rc = 0;
  enum TJobStatTypeEnum  type;
  pbs_queue             *pque = NULL;
  int                    exec_only = 0;

  int                    bad = 0;
  long                   DTime;  /* delta time - only report full pbs_attribute list if J->MTime > DTime */
  static svrattrl       *dpal = NULL;
  int                    job_array_index = 0;
  job_array             *pa = NULL;
  char                   log_buf[LOCAL_LOG_BUF_SIZE];
  int                    iter;
  time_t                 time_now = time(NULL);
  long                   poll_jobs = 0;
  char                   job_id[PBS_MAXSVRJOBID+1];
  int                    job_substate = -1;
  time_t                 job_momstattime = -1;

  preq   = cntl->sc_origrq;
  type   = (enum TJobStatTypeEnum)cntl->sc_type;
  preply = &preq->rq_reply;

  /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */

  if (dpal == NULL)
    {
    /* build 'delta' pbs_attribute list */

    svrattrl *tpal;

    tlist_head dalist;

    int aindex;

    int atrlist[] =
      {
      JOB_ATR_jobname,
      JOB_ATR_resc_used,
      JOB_ATR_LAST
      };

    CLEAR_LINK(dalist);

    for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++)
      {
      if ((tpal = attrlist_create("", "", 23)) == NULL)
        {
        return;
        }

      tpal->al_valln = atrlist[aindex];

      if (dpal == NULL)
        dpal = tpal;

      append_link(&dalist, &tpal->al_link, tpal);
      }
    }  /* END if (dpal == NULL) */

  if (type == tjstArray)
    {
    pa = get_array(preq->rq_ind.rq_status.rq_id);

    if (pa == NULL)
      {
      req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array");
      return;
      }
    }

  iter = -1;

  get_svr_attr_l(SRV_ATR_PollJobs, &poll_jobs);
  if (!poll_jobs)
    {
    /* polljobs not set - indicates we may need to obtain fresh data from
       MOM */

    if (cntl->sc_jobid[0] == '\0')
      pjob = NULL;
    else
      pjob = svr_find_job(cntl->sc_jobid, FALSE);

    while (1)
      {
      if (pjob == NULL)
        {
        /* start from the first job */

        if (type == tjstJob)
          {
          pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE);
          }
        else if (type == tjstQueue)
          {
          pjob = next_job(cntl->sc_pque->qu_jobs,&iter);
          }
        else if (type == tjstArray)
          {
          job_array_index = 0;
          /* increment job_array_index until we find a non-null pointer or hit the end */
          while (job_array_index < pa->ai_qs.array_size)
            {
            if (pa->job_ids[job_array_index] != NULL)
              {
              if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
                {
                unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);
                break;
                }
              }

            job_array_index++;
            }
          }
        else
          {
          pjob = next_job(&alljobs,&iter);
          }

        }    /* END if (pjob == NULL) */
      else
        {
        strcpy(job_id, pjob->ji_qs.ji_jobid);
        unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL);

        if (type == tjstJob)
          break;

        if (type == tjstQueue)
          pjob = next_job(cntl->sc_pque->qu_jobs,&iter);
        else if (type == tjstArray)
          {
          pjob = NULL;
          /* increment job_array_index until we find a non-null pointer or hit the end */
          while (++job_array_index < pa->ai_qs.array_size)
            {
            if (pa->job_ids[job_array_index] != NULL)
              {
              if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
                {
                unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL);
                break;
                }
              }
            }
          }
        else
          pjob = next_job(&alljobs,&iter);
          
        }

      if (pjob == NULL)
        break;

      strcpy(job_id, pjob->ji_qs.ji_jobid);
      job_substate = pjob->ji_qs.ji_substate;
      job_momstattime = pjob->ji_momstat;
      strcpy(cntl->sc_jobid, job_id);
      unlock_ji_mutex(pjob, __func__, "4", LOGLEVEL);
      pjob = NULL;

      /* PBS_RESTAT_JOB defaults to 30 seconds */
      if ((job_substate == JOB_SUBSTATE_RUNNING) &&
          ((time_now - job_momstattime) > JobStatRate))
        {
        /* go to MOM for status */
        if ((rc = stat_to_mom(job_id, cntl)) == PBSE_MEM_MALLOC)
          break;

        if (rc != 0)
          {
          pjob = svr_find_job(job_id, FALSE);

          rc = 0;

          continue;
          }
        
        if (pa != NULL)
          unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);

        return; /* will pick up after mom replies */
        }
      }    /* END while(1) */

    if (rc != 0)
      {
      if (pa != NULL)
        unlock_ai_mutex(pa, __func__, "2", LOGLEVEL);

      reply_free(preply);

      req_reject(rc, 0, preq, NULL, "cannot get update from mom");

      return;
      }
    }    /* END if (!server.sv_attr[SRV_ATR_PollJobs].at_val.at_long) */

  /*
   * now ready for part 3, building the status reply,
   * loop through again
   */

  if ((type == tjstSummarizeArraysQueue) || 
      (type == tjstSummarizeArraysServer))
    {
    /* No array can be owned for these options */
    update_array_statuses();
    }

  if (type == tjstJob)
    pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE);

  else if (type == tjstQueue)
    pjob = next_job(cntl->sc_pque->qu_jobs,&iter);

  else if (type == tjstSummarizeArraysQueue)
    pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,&iter);

  else if (type == tjstSummarizeArraysServer)
    pjob = next_job(&array_summary,&iter);

  else if (type == tjstArray)
    {
    job_array_index = -1;
    pjob = NULL;
    /* increment job_array_index until we find a non-null pointer or hit the end */
    while (++job_array_index < pa->ai_qs.array_size)
      {
      if (pa->job_ids[job_array_index] != NULL)
        {
        if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
          {
          break;
          }
        }
      }
    }
  else
    pjob = next_job(&alljobs,&iter);

  DTime = 0;

  if (preq->rq_extend != NULL)
    {
    char *ptr;

    /* FORMAT:  { EXECQONLY | DELTA:<EPOCHTIME> } */

    if (strstr(preq->rq_extend, EXECQUEONLY))
      exec_only = 1;

    ptr = strstr(preq->rq_extend, "DELTA:");

    if (ptr != NULL)
      {
      ptr += strlen("delta:");

      DTime = strtol(ptr, NULL, 10);
      }
    }


  if ((type == tjstTruncatedServer) || 
      (type == tjstTruncatedQueue))
    {
    long sentJobCounter;
    long qjcounter;
    long qmaxreport;
    int  iter = -1;

    /* loop through all queues */
    while ((pque = next_queue(&svr_queues,&iter)) != NULL)
      {
      qjcounter = 0;

      if ((exec_only == 1) &&
          (pque->qu_qs.qu_type != QTYPE_Execution))
        {
        /* ignore routing queues */
        unlock_queue(pque, __func__, "ignore queue", LOGLEVEL);
        continue;
        }

      if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) &&
          (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0))
        {
        qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long;
        }
      else
        {
        qmaxreport = TMAX_JOB;
        }

      if (LOGLEVEL >= 5)
        {
        sprintf(log_buf,"giving scheduler up to %ld idle jobs in queue %s\n",
          qmaxreport,
          pque->qu_qs.qu_name);

        log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
        }

      sentJobCounter = 0;

      /* loop through jobs in queue */
      if (pjob != NULL)
        unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL);

      iter = -1;

      while ((pjob = next_job(pque->qu_jobs,&iter)) != NULL)
        {
        if ((qjcounter >= qmaxreport) &&
            (pjob->ji_qs.ji_state == JOB_STATE_QUEUED))
          {
          /* max_report of queued jobs reached for queue */
          unlock_ji_mutex(pjob, __func__, "6", LOGLEVEL);

          continue;
          }

        pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

        rc = status_job(
               pjob,
               preq,
               (pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal,
               &preply->brp_un.brp_status,
               &bad);

        if ((rc != 0) && (rc != PBSE_PERM))
          {
          req_reject(rc, bad, preq, NULL, NULL);

          if (pa != NULL)
            {
            unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
            }
          unlock_ji_mutex(pjob, __func__, "7", LOGLEVEL);
          unlock_queue(pque, __func__, "perm", LOGLEVEL);
          return;
          }

        sentJobCounter++;

        if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)
          qjcounter++;

        unlock_ji_mutex(pjob, __func__, "8", LOGLEVEL);
        }    /* END foreach (pjob from pque) */

      if (LOGLEVEL >= 5)
        {
        sprintf(log_buf,"sent scheduler %ld total jobs for queue %s\n",
          sentJobCounter,
          pque->qu_qs.qu_name);

        log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
        }
    
      unlock_queue(pque, __func__, "end while", LOGLEVEL);
      }      /* END for (pque) */
      
    if (pa != NULL)
      unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);

    reply_send_svr(preq);

    return;
    }        /* END if ((type == tjstTruncatedServer) || ...) */

  while (pjob != NULL)
    {
    /* go ahead and build the status reply for this job */

    if (exec_only)
      {
      if (cntl->sc_pque != NULL)
        {
        if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution)
          goto nextjob;
        }
      else
        {
        if (pa != NULL)
          pthread_mutex_unlock(pa->ai_mutex);
        pque = get_jobs_queue(&pjob);
        if (pa != NULL)
          pthread_mutex_lock(pa->ai_mutex);

        if ((pjob == NULL) ||
            (pque == NULL))
          goto nextjob;
        
        if (pque->qu_qs.qu_type != QTYPE_Execution)
          {
          unlock_queue(pque, __func__, "not exec", LOGLEVEL);
        
          goto nextjob;
          }

        unlock_queue(pque, __func__, "exec", LOGLEVEL);
        }
      }

    pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

    rc = status_job(
           pjob,
           preq,
           pal,
           &preply->brp_un.brp_status,
           &bad);

    if ((rc != 0) && 
        (rc != PBSE_PERM))
      {
      if (pa != NULL)
        {
        unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
        }
      unlock_ji_mutex(pjob, __func__, "9", LOGLEVEL);

      req_reject(rc, bad, preq, NULL, NULL);

      return;
      }

    /* get next job */

nextjob:

    if (pjob != NULL)
      unlock_ji_mutex(pjob, __func__, "10", LOGLEVEL);

    if (type == tjstJob)
      break;

    if (type == tjstQueue)
      pjob = next_job(cntl->sc_pque->qu_jobs,&iter);
    else if (type == tjstSummarizeArraysQueue)
      pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,&iter);
    else if (type == tjstSummarizeArraysServer)
      pjob = next_job(&array_summary,&iter);
    else if (type == tjstArray)
      {
      pjob = NULL;
      /* increment job_array_index until we find a non-null pointer or hit the end */
      while (++job_array_index < pa->ai_qs.array_size)
        {
        if (pa->job_ids[job_array_index] != NULL)
          {
          if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
            {
            break;
            }
          }
        }
      }
    else
      pjob = next_job(&alljobs,&iter);

    rc = 0;
    }  /* END while (pjob != NULL) */

  if (pa != NULL)
    {
    unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
    }
 
  reply_send_svr(preq);

  if (LOGLEVEL >= 7)
    {
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      "req_statjob",
      "Successfully returned the status of queued jobs\n");
    }

  return;
  }  /* END req_stat_job_step2() */
Ejemplo n.º 7
0
int req_stat_que(
    struct batch_request *preq)
  {
  char                 *name;
  pbs_queue            *pque = NULL;

  struct batch_reply   *preply;
  int                   rc   = 0;
  int                   type = 0;
  char log_buf[LOCAL_LOG_BUF_SIZE+1];

  /*
   * first, validate the name of the requested object, either
   * a queue, or null for all queues
   */

  name = preq->rq_ind.rq_status.rq_id;

  if ((*name == '\0') || (*name == '@'))
    {
    type = 1;
    }
  else
    {
    pque = find_queuebyname(name);

    if (pque == NULL)
      {
      rc = PBSE_UNKQUE;
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "can not locate queue %s", name);
      req_reject(rc, 0, preq, NULL, log_buf);
      return rc;
      }
    }

  preply = &preq->rq_reply;

  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  if (type == 0)
    {
    /* get status of the named queue */

    rc = status_que(pque, preq, &preply->brp_un.brp_status);
    unlock_queue(pque, "req_stat_que", "type == 0", LOGLEVEL);
    }
  else
    {
    /* pque == NULL before next_queue */
    int iter = -1;

    /* get status of all queues */
    while ((pque = next_queue(&svr_queues,&iter)) != NULL)
      {
      rc = status_que(pque, preq, &preply->brp_un.brp_status);

      if (rc != 0)
        {
        if (rc != PBSE_PERM)
          {
          unlock_queue(pque, "req_stat_que", "break", LOGLEVEL);
          break;
          }

        rc = 0;
        }

      unlock_queue(pque, "req_stat_que", "end while", LOGLEVEL);
      }
    }

  if (rc != PBSE_NONE)
    {
    reply_free(preply);

    req_reject(PBSE_NOATTR, rc, preq, NULL, "status_queue failed");
    }
  else
    {
    reply_send_svr(preq);
    }

  return rc;
  }  /* END req_stat_que() */
Ejemplo n.º 8
0
int  req_locatejob(

  struct batch_request *preq)

  {
  int     rc = PBSE_NONE;
  char   *at;
  int     i;
  job    *pjob;
  char   *location = (char *)0;

  if ((at = strchr(preq->rq_ind.rq_locate, (int)'@')))
    * at = '\0';  /* strip off @server_name */

  pjob = svr_find_job(preq->rq_ind.rq_locate, FALSE);

  if (pjob)
    {
    unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);

    location = server_name;
    }
  else
    {
    for (i = 0; i < server.sv_tracksize; i++)
      {
      if ((server.sv_track + i)->tk_mtime &&
          !strcmp((server.sv_track + i)->tk_jobid, preq->rq_ind.rq_locate))
        {
        location = (server.sv_track + i)->tk_location;
        break;
        }
      }
    }

  if (location != NULL)
    {
    preq->rq_reply.brp_code = 0;
    preq->rq_reply.brp_auxcode = 0;
    preq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_Locate;
    snprintf(preq->rq_reply.brp_un.brp_locate, sizeof(preq->rq_reply.brp_un.brp_locate), "%s", location);

    reply_send_svr(preq);
    }
  else
    {
    if (LOGLEVEL >= 7)
      {
      log_event(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        preq->rq_ind.rq_locate,
        "cannot find job in server tracking list");
      }

    rc = PBSE_UNKJOBID;
    req_reject(rc, 0, preq, NULL, NULL);
    }

  return rc;
  }  /* END req_locatejob() */
Ejemplo n.º 9
0
void req_stat_job_step2(

  struct stat_cntl *cntl)  /* I/O (free'd on return) */

  {
  batch_request         *preq = cntl->sc_origrq;
  svrattrl              *pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);
  job                   *pjob = NULL;

  struct batch_reply    *preply = &preq->rq_reply;
  int                    rc = 0;
  enum TJobStatTypeEnum  type = (enum TJobStatTypeEnum)cntl->sc_type;
  bool                   exec_only = false;

  int                    bad = 0;
  /* delta time - only report full pbs_attribute list if J->MTime > DTime */
  int                    job_array_index = -1;
  job_array             *pa = NULL;
  all_jobs_iterator     *iter;

  if (preq->rq_extend != NULL)
    {
    /* FORMAT:  { EXECQONLY } */
    if (strstr(preq->rq_extend, EXECQUEONLY))
      exec_only = true;
    }

  if ((type == tjstTruncatedServer) || 
      (type == tjstTruncatedQueue))
    {
    handle_truncated_qstat(exec_only, cntl->sc_condensed, preq);

    return;
    } /* END if ((type == tjstTruncatedServer) || ...) */
  else if (type == tjstJob)
    {
    pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE);

    if (pjob != NULL)
      {
      if ((rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, cntl->sc_condensed, &bad)))
        req_reject(rc, bad, preq, NULL, NULL);
      else
        reply_send_svr(preq);

      unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
      }
    else
      {
      req_reject(PBSE_JOBNOTFOUND, bad, preq, NULL, NULL);
      }
    }
  else
    {
    if (type == tjstArray)
      {
      pa = get_array(preq->rq_ind.rq_status.rq_id);

      if (pa == NULL)
        {
        req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array");
        return;
        }
      }
    else if ((type == tjstSummarizeArraysQueue) || 
             (type == tjstSummarizeArraysServer))
      update_array_statuses();

    iter = get_correct_status_iterator(cntl);

    for (pjob = get_next_status_job(cntl, job_array_index, pa, iter);
         pjob != NULL;
         pjob = get_next_status_job(cntl, job_array_index, pa, iter))
      {
      mutex_mgr job_mutex(pjob->ji_mutex, true);

      /* go ahead and build the status reply for this job */
      if (pjob->ji_being_recycled == true)
        continue;

      if (exec_only)
        {
        if (cntl->sc_pque != NULL)
          {
          if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution)
            continue;
          }
        else if (in_execution_queue(pjob, pa) == false)
          continue;
        }

      rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, cntl->sc_condensed, &bad);

      if ((rc != PBSE_NONE) && 
          (rc != PBSE_PERM))
        {
        if (pa != NULL)
          unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);

        req_reject(rc, bad, preq, NULL, NULL);

        delete iter;

        return;
        }
      }  /* END for (pjob != NULL) */

    delete iter;

    if (pa != NULL)
      {
      unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
      }
   
    reply_send_svr(preq);
    }

  if (LOGLEVEL >= 7)
    {
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      "req_statjob",
      "Successfully returned the status of queued jobs\n");
    }

  return;
  }  /* END req_stat_job_step2() */
Ejemplo n.º 10
0
void handle_truncated_qstat(
    
  bool           exec_only,
  bool           condensed,
  batch_request *preq)

  {
  long                 sentJobCounter = 0;
  long                 qmaxreport;
  all_queues_iterator *queue_iter = NULL;
  pbs_queue           *pque;
  char                 log_buf[LOCAL_LOG_BUF_SIZE];
  job                 *pjob;
  svrattrl            *pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);
  batch_reply         *preply = &preq->rq_reply;
  int                  bad = 0;

  svr_queues.lock();
  queue_iter = svr_queues.get_iterator();
  svr_queues.unlock();

  /* loop through all queues */
  while ((pque = next_queue(&svr_queues, queue_iter)) != NULL)
    {
    long      qjcounter = 0;
    mutex_mgr queue_mutex(pque->qu_mutex, true);

    if ((exec_only == true) &&
        (pque->qu_qs.qu_type != QTYPE_Execution))
      {
      /* ignore routing queues */
      continue;
      }

    if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) &&
        (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0))
      {
      qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long;
      }
    else
      {
      qmaxreport = TMAX_JOB;
      }

    if (LOGLEVEL >= 5)
      {
      snprintf(log_buf, sizeof(log_buf), "Reporting up to %ld idle jobs in queue %s\n",
        qmaxreport,
        pque->qu_qs.qu_name);

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
      }

    /* loop through jobs in queue */
    all_jobs_iterator *jobiter = NULL;
    pque->qu_jobs->lock();
    jobiter = pque->qu_jobs->get_iterator();
    pque->qu_jobs->unlock();

    while ((pjob = next_job(pque->qu_jobs, jobiter)) != NULL)
      {
      mutex_mgr job_mgr(pjob->ji_mutex, true);

      if ((qjcounter >= qmaxreport) &&
          (pjob->ji_qs.ji_state == JOB_STATE_QUEUED))
        {
        /* max_report of queued jobs reached for queue */
        continue;
        }

      int rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, condensed, &bad);

      if ((rc != 0) &&
          (rc != PBSE_PERM))
        {
        req_reject(rc, bad, preq, NULL, NULL);

        delete queue_iter;

        return;
        }

      sentJobCounter++;

      if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)
        qjcounter++;
      } /* END foreach (pjob from pque) */

    if (LOGLEVEL >= 5)
      {
      snprintf(log_buf, sizeof(log_buf), "Reported %ld total jobs for queue %s\n",
        sentJobCounter,
        pque->qu_qs.qu_name);

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
      }
    } /* END for (pque) */

  reply_send_svr(preq);

  delete queue_iter;

  return;
  } // END handle_truncated_qstat()