Пример #1
0
void
req_stat_que(struct batch_request *preq)
{
	char		   *name;
	pbs_queue	   *pque;
	struct batch_reply *preply;
	int		    rc   = 0;
	int		    type = 0;

	/*
	 * first, validate the name of the requested object, either
	 * a queue, or null for all queues
	 */

	name = preq->rq_ind.rq_status.rq_id;

	if ((*name == '\0') || (*name =='@'))
		type = 1;
	else {
		pque = find_queuebyname(name);
#ifdef NAS /* localmod 075 */
		if (pque == NULL)
			pque = find_resvqueuebyname(name);
#endif /* localmod 075 */
		if (pque == NULL) {
			req_reject(PBSE_UNKQUE, 0, preq);
			return;
		}
	}

	preply = &preq->rq_reply;
	preply->brp_choice = BATCH_REPLY_CHOICE_Status;
	CLEAR_HEAD(preply->brp_un.brp_status);

	if (type == 0) {	/* get status of the one named queue */
		rc = status_que(pque, preq, &preply->brp_un.brp_status);

	} else {	/* get status of queues */

		pque = (pbs_queue *)GET_NEXT(svr_queues);
		while (pque) {
			rc = status_que(pque, preq, &preply->brp_un.brp_status);
			if (rc != 0) {
				if (rc == PBSE_PERM)
					rc = 0;
				else
					break;
			}
			pque = (pbs_queue *)GET_NEXT(pque->qu_link);
		}
	}
	if (rc) {
		(void)reply_free(preply);
		req_reject(rc, bad, preq);
	} else {
		(void)reply_send(preq);
	}
}
Пример #2
0
pbs_queue *
get_dfltque(void)
{
	pbs_queue *pq = (pbs_queue *)0;

	if (server.sv_attr[SRV_ATR_dflt_que].at_flags & ATR_VFLAG_SET)
		pq = find_queuebyname(server.sv_attr[SRV_ATR_dflt_que].at_val.at_str);
	return (pq);
}
Пример #3
0
pbs_queue *get_dfltque(void)

  {
  pbs_queue *pq = NULL;

  if (server.sv_attr[SRV_ATR_dflt_que].at_flags & ATR_VFLAG_SET)
    {
    pq = find_queuebyname(server.sv_attr[SRV_ATR_dflt_que].at_val.at_str);
    }

  return(pq);
  }  /* END get_dfltque() */
Пример #4
0
pbs_queue *get_dfltque(void)

  {
  pbs_queue *pq = NULL;
  char      *dque = NULL;

  if (get_svr_attr_str(SRV_ATR_dflt_que, &dque) == PBSE_NONE)
    {
    pq = find_queuebyname(dque);
    }

  return(pq);
  }  /* END get_dfltque() */
Пример #5
0
static int local_move(

  job                  *jobp,
  struct batch_request *req)

  {
  char   *id = "local_move";
  pbs_queue *qp;
  char   *destination = jobp->ji_qs.ji_destin;
  int    mtype;

  /* search for destination queue */

  if ((qp = find_queuebyname(destination)) == NULL)
    {
    sprintf(log_buffer, "queue %s does not exist\n",
            destination);

    log_err(-1, id, log_buffer);

    pbs_errno = PBSE_UNKQUE;

    return(ROUTE_PERM_FAILURE);
    }

  /*
   * if being moved at specific request of administrator, then
   * checks on queue availability, etc. are skipped;
   * otherwise all checks are enforced.
   */

  if (req == 0)
    {
    mtype = MOVE_TYPE_Route; /* route */
    }
  else if (req->rq_perm & (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR))
    {
    mtype = MOVE_TYPE_MgrMv; /* privileged move */
    }
  else
    {
    mtype = MOVE_TYPE_Move; /* non-privileged move */
    }

  if ((pbs_errno = svr_chkque(
                     jobp,
                     qp,
                     get_variable(jobp, pbs_o_host), mtype, NULL)))
    {
    /* should this queue be retried? */

    return(should_retry_route(pbs_errno));
    }

  /* dequeue job from present queue, update destination and */
  /* queue_rank for new queue and enqueue into destination  */

  svr_dequejob(jobp);

  strcpy(jobp->ji_qs.ji_queue, destination);

  jobp->ji_wattr[JOB_ATR_qrank].at_val.at_long = ++queue_rank;

  pbs_errno = svr_enquejob(jobp);

  if (pbs_errno != 0)
    {
    return(ROUTE_PERM_FAILURE); /* should never ever get here */
    }

  jobp->ji_lastdest = 0; /* reset in case of another route */

  job_save(jobp, SAVEJOB_FULL);

  return(ROUTE_SUCCESS);
  }  /* END local_move() */
Пример #6
0
int req_stat_job(

  struct batch_request *preq)  /* ptr to the decoded request */

  {
  struct stat_cntl     *cntl; /* see svrfunc.h  */
  char                 *name;
  job                  *pjob = NULL;
  pbs_queue            *pque = NULL;
  int                   rc = PBSE_NONE;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  enum TJobStatTypeEnum type = tjstNONE;

  /*
   * first, validate the name of the requested object, either
   * a job, a queue, or the whole server.
   */
  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "note");
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }


  /* FORMAT:  name = { <JOBID> | <QUEUEID> | '' } */

  name = preq->rq_ind.rq_status.rq_id;

  if (preq->rq_extend != NULL)
    {
    /* evaluate pbs_job_stat() 'extension' field */

    if (!strncasecmp(preq->rq_extend, "truncated", strlen("truncated")))
      {
      /* truncate response by 'max_report' */

      type = tjstTruncatedServer;
      }
    else if (!strncasecmp(preq->rq_extend, "summarize_arrays", strlen("summarize_arrays")))
      {
      type = tjstSummarizeArraysServer;
      }

    }    /* END if (preq->rq_extend != NULL) */

  if (isdigit((int)*name))
    {
    /* status a single job */

    if (is_array(name))
      {
      if (type != tjstSummarizeArraysServer)
        {
        type = tjstArray;
        }
      }
    else
      {
      type = tjstJob;

      if ((pjob = svr_find_job(name, FALSE)) == NULL)
        {
        rc = PBSE_UNKJOBID;
        }
      else
        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
      }
    }
  else if (isalpha(name[0]))
    {
    if (type == tjstNONE)
      type = tjstQueue;
    else if (type == tjstSummarizeArraysServer)
      type = tjstSummarizeArraysQueue;
    else
      type = tjstTruncatedQueue;

    /* if found, this mutex is released later */
    if ((pque = find_queuebyname(name)) == NULL)
      {
      rc = PBSE_UNKQUE;
      }
    }
  else if ((*name == '\0') || (*name == '@'))
    {
    /* status all jobs at server */

    if (type == tjstNONE)
      type = tjstServer;
    }
  else
    {
    rc = PBSE_IVALREQ;
    }

  if (rc != 0)
    {
    /* is invalid - an error */
    req_reject(rc, 0, preq, NULL, NULL);

    return(rc);
    }

  preq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preq->rq_reply.brp_un.brp_status);

  cntl = (struct stat_cntl *)calloc(1, sizeof(struct stat_cntl));

  if (cntl == NULL)
    {
    if (pque != NULL) 
      unlock_queue(pque, "req_stat_job", (char *)"no memory cntl", LOGLEVEL);
    req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);

    return(PBSE_SYSTEM);
    }

  if ((type == tjstTruncatedQueue) ||
      (type == tjstTruncatedServer))
    {
    if (pque != NULL)
      {
      unlock_queue(pque, __func__, "", LOGLEVEL);
      pque = NULL;
      }
    }

  cntl->sc_type   = (int)type;
  cntl->sc_conn   = -1;
  cntl->sc_pque   = pque;
  cntl->sc_origrq = preq;
  cntl->sc_post   = req_stat_job_step2;
  cntl->sc_jobid[0] = '\0'; /* cause "start from beginning" */

  req_stat_job_step2(cntl); /* go to step 2, see if running is current */

  if (pque != NULL)
    unlock_queue(pque, "req_stat_job", (char *)"success", LOGLEVEL);

  free(cntl);
  return(PBSE_NONE);
  }  /* END req_stat_job() */
Пример #7
0
int req_stat_que(
    
  batch_request *preq)

  {
  char                 *name;
  pbs_queue            *pque = NULL;

  struct batch_reply   *preply;
  int                   rc   = 0;
  int                   type = 0;
  char log_buf[LOCAL_LOG_BUF_SIZE+1];

  /*
   * first, validate the name of the requested object, either
   * a queue, or null for all queues
   */

  name = preq->rq_ind.rq_status.rq_id;

  if ((*name == '\0') || (*name == '@'))
    {
    type = 1;
    }
  else
    {
    pque = find_queuebyname(name);

    if (pque == NULL)
      {
      rc = PBSE_UNKQUE;
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "cannot locate queue %s", name);
      req_reject(rc, 0, preq, NULL, log_buf);
      return rc;
      }
    }

  preply = &preq->rq_reply;

  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  if (type == 0)
    {
    /* get status of the named queue */
    mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true);
    rc = status_que(pque, preq, &preply->brp_un.brp_status);
    /* pque_qu_mutex will be unlocked in the destructor when we leave this scope */
    }
  else
    {
    /* pque == NULL before next_queue */
    svr_queues.lock();
    all_queues_iterator *iter = svr_queues.get_iterator();
    svr_queues.unlock();

    /* get status of all queues */
    while ((pque = next_queue(&svr_queues,iter)) != NULL)
      {
      mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true);
      rc = status_que(pque, preq, &preply->brp_un.brp_status);

      if (rc != 0)
        {
        if (rc != PBSE_PERM)
          {
          break;
          }

        rc = 0;
        }
      }

    delete iter;
    }

  if (rc != PBSE_NONE)
    {
    reply_free(preply);

    req_reject(PBSE_NOATTR, rc, preq, NULL, "status_queue failed");
    }
  else
    {
    reply_send_svr(preq);
    }

  return rc;
  }  /* END req_stat_que() */
Пример #8
0
int req_stat_que(
    struct batch_request *preq)
  {
  char                 *name;
  pbs_queue            *pque = NULL;

  struct batch_reply   *preply;
  int                   rc   = 0;
  int                   type = 0;
  char log_buf[LOCAL_LOG_BUF_SIZE+1];

  /*
   * first, validate the name of the requested object, either
   * a queue, or null for all queues
   */

  name = preq->rq_ind.rq_status.rq_id;

  if ((*name == '\0') || (*name == '@'))
    {
    type = 1;
    }
  else
    {
    pque = find_queuebyname(name);

    if (pque == NULL)
      {
      rc = PBSE_UNKQUE;
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "can not locate queue %s", name);
      req_reject(rc, 0, preq, NULL, log_buf);
      return rc;
      }
    }

  preply = &preq->rq_reply;

  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  if (type == 0)
    {
    /* get status of the named queue */

    rc = status_que(pque, preq, &preply->brp_un.brp_status);
    unlock_queue(pque, "req_stat_que", "type == 0", LOGLEVEL);
    }
  else
    {
    /* pque == NULL before next_queue */
    int iter = -1;

    /* get status of all queues */
    while ((pque = next_queue(&svr_queues,&iter)) != NULL)
      {
      rc = status_que(pque, preq, &preply->brp_un.brp_status);

      if (rc != 0)
        {
        if (rc != PBSE_PERM)
          {
          unlock_queue(pque, "req_stat_que", "break", LOGLEVEL);
          break;
          }

        rc = 0;
        }

      unlock_queue(pque, "req_stat_que", "end while", LOGLEVEL);
      }
    }

  if (rc != PBSE_NONE)
    {
    reply_free(preply);

    req_reject(PBSE_NOATTR, rc, preq, NULL, "status_queue failed");
    }
  else
    {
    reply_send_svr(preq);
    }

  return rc;
  }  /* END req_stat_que() */
Пример #9
0
void *queue_route(

  void *vp)

  {
  pbs_queue *pque;
  job       *pjob = NULL;
  char      *queue_name;
  char      log_buf[LOCAL_LOG_BUF_SIZE];

  int       iter = -1;
  time_t    time_now = time(NULL);

  queue_name = (char *)vp;

  if (queue_name == NULL)
    {
    sprintf(log_buf, "NULL queue name");
    log_err(-1, __func__, log_buf);
    return(NULL);
    }

  if (LOGLEVEL >= 7)
    {
    snprintf(log_buf, sizeof(log_buf), "queue name: %s", queue_name);
    log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, __func__, log_buf);
    }
  
  pthread_mutex_lock(reroute_job_mutex);

  pque = find_queuebyname(queue_name);
  if (pque == NULL)
    {
    sprintf(log_buf, "Could not find queue %s", queue_name);
    log_err(-1, __func__, log_buf);
    free(queue_name);
    pthread_mutex_unlock(reroute_job_mutex);
    return(NULL);
    }

  while ((pjob = next_job(pque->qu_jobs,&iter)) != NULL)
    {
    /* the second condition says we only want to try if routing
     * has been tried once - this is to let req_commit have the 
     * first crack at routing always */
    unlock_queue(pque, __func__, (char *)NULL, 0);
    if ((pjob->ji_qs.ji_un.ji_routet.ji_rteretry <= time_now - ROUTE_RETRY_TIME) &&
        (pjob->ji_qs.ji_un.ji_routet.ji_rteretry != 0))
      {
      reroute_job(pjob, pque);
      unlock_ji_mutex(pjob, __func__, (char *)"1", LOGLEVEL);
      }
    else
      unlock_ji_mutex(pjob, __func__, (char *)"1", LOGLEVEL);
    }

  free(queue_name);
  unlock_queue(pque, __func__, (char *)NULL, 0);
  pthread_mutex_unlock(reroute_job_mutex);
  return(NULL);
  } /* END queue_route() */
Пример #10
0
void *queue_route(

    void *vp)

{
    pbs_queue *pque;
    job       *pjob = NULL;
    char      *queue_name;
    char       log_buf[LOCAL_LOG_BUF_SIZE];

    all_jobs_iterator   *iter = NULL;

    queue_name = (char *)vp;

    if (queue_name == NULL)
    {
        sprintf(log_buf, "NULL queue name");
        log_err(-1, __func__, log_buf);
        return(NULL);
    }

    while (1)
    {
        pthread_mutex_lock(reroute_job_mutex);
        /* Before we attempt to service this queue, make sure we can find it. */
        pque = find_queuebyname(queue_name);
        if (pque == NULL)
        {
            sprintf(log_buf, "Could not find queue %s", queue_name);
            log_err(-1, __func__, log_buf);
            free(queue_name);
            return(NULL);
        }

        mutex_mgr que_mutex(pque->qu_mutex, true);

        pque->qu_jobs->lock();
        iter = pque->qu_jobs->get_iterator();
        pque->qu_jobs->unlock();

        if (LOGLEVEL >= 7)
        {
            snprintf(log_buf, sizeof(log_buf), "routing any ready jobs in queue: %s", queue_name);
            log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, __func__, log_buf);
        }

        while ((pjob = next_job(pque->qu_jobs,iter)) != NULL)
        {
            /* We only want to try if routing has been tried at least once - this is to let
             * req_commit have the first crack at routing always. */

            if (pjob->ji_commit_done == 0) /* when req_commit is done it will set ji_commit_done to 1 */
            {
                unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
                continue;
            }
            /* queue must be unlocked when calling reroute_job */
            que_mutex.unlock();
            reroute_job(pjob);
            unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);
            /* need to relock queue when we go to call next_job */
            pque = find_queuebyname(queue_name);
            if (pque == NULL)
            {
                sprintf(log_buf, "Could not find queue %s", queue_name);
                log_err(-1, __func__, log_buf);
                free(queue_name);
                delete iter;
                return(NULL);
            }
            que_mutex.mark_as_locked();
        }

        /* we come out of the while loop with the queue locked.
           We don't want it locked while we sleep */
        que_mutex.unlock();
        pthread_mutex_unlock(reroute_job_mutex);
        delete iter;
        sleep(route_retry_interval);
    }

    free(queue_name);
    return(NULL);
} /* END queue_route() */
Пример #11
0
int local_move(

  job                  *pjob,
  int                  *my_err,
  struct batch_request *req)

  {
  pbs_queue *dest_que = NULL;
  char      *destination = pjob->ji_qs.ji_destin;
  int        mtype;
  char       log_buf[LOCAL_LOG_BUF_SIZE];
  char       job_id[PBS_MAXSVRJOBID+1];
  int        rc;
  bool       reservation = false;

  /* Sometimes multiple threads are trying to route the same job. Protect against this
   * by making sure that the destionation queue and the current queue are different. 
   * If they are the same then consider it done correctly */
  if (!strcmp(pjob->ji_qs.ji_queue, pjob->ji_qs.ji_destin))
    return(PBSE_NONE);

  if (LOGLEVEL >= 8)
    {
    sprintf(log_buf, "%s", pjob->ji_qs.ji_jobid);
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }

  /*
   * if being moved at specific request of administrator, then
   * checks on queue availability, etc. are skipped;
   * otherwise all checks are enforced.
   */
  if (req == 0)
    {
    mtype = MOVE_TYPE_Route; /* route */
    }
  else if (req->rq_perm & (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR))
    {
    mtype = MOVE_TYPE_MgrMv; /* privileged move */
    }
  else
    {
    mtype = MOVE_TYPE_Move; /* non-privileged move */
    }

  strcpy(job_id, pjob->ji_qs.ji_jobid);
  unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL);

  dest_que = find_queuebyname(destination);
  if (dest_que == NULL)
    {
    /* this should never happen */
    sprintf(log_buf, "queue %s does not exist\n", pjob->ji_qs.ji_queue);
    log_err(-1, __func__, log_buf);

    *my_err = PBSE_UNKQUE;
    return(-1);
    }

  mutex_mgr dest_que_mutex = mutex_mgr(dest_que->qu_mutex, true);
  if ((pjob = svr_find_job(job_id, TRUE)) == NULL)
    {
    /* job disappeared while locking queue */
    return(PBSE_JOB_RECYCLED);
    }

  /* check the destination */
  if ((*my_err = svr_chkque(pjob, dest_que, get_variable(pjob, pbs_o_host), mtype, NULL)))
    {
    /* should this queue be retried? */
    return(should_retry_route(*my_err));
    }

  reservation = have_reservation(pjob, dest_que);
  /* dequeue job from present queue, update destination and */
  /* queue_rank for new queue and enqueue into destination  */
  dest_que_mutex.unlock();
  rc = svr_dequejob(pjob, FALSE); 
  if (rc)
    return(rc);

  snprintf(pjob->ji_qs.ji_queue, sizeof(pjob->ji_qs.ji_queue), "%s", destination);

  pjob->ji_wattr[JOB_ATR_qrank].at_val.at_long = ++queue_rank;
    
  if ((*my_err = svr_enquejob(pjob, FALSE, NULL, reservation, false)) == PBSE_JOB_RECYCLED)
    return(-1);

  if (*my_err != PBSE_NONE)
    {
    return(-1); /* should never ever get here */
    }

  if (pjob != NULL)
    {
    pjob->ji_lastdest = 0; /* reset in case of another route */
    
    job_save(pjob, SAVEJOB_FULL, 0);
    }

  return(PBSE_NONE);
  }  /* END local_move() */
Пример #12
0
void req_stat_job(struct batch_request *preq)
{
	int		    at_least_one_success = 0;
	int		    dosubjobs = 0;
	int		    dohistjobs = 0;
	char		   *name;
	job		   *pjob = NULL;
	pbs_queue	   *pque = NULL;
	struct batch_reply *preply;
	int		    rc   = 0;
	int		    type = 0;
	char		   *pnxtjid = NULL;

	/* check for any extended flag in the batch request. 't' for
	 * the sub jobs. If 'x' is there, then check if the server is
	 * configured for history job info. If not set or set to FALSE,
	 * return with PBSE_JOBHISTNOTSET error. Otherwise select history
	 * jobs.
	 */
	if (preq->rq_extend) {
		if (strchr(preq->rq_extend, (int)'t'))
			dosubjobs = 1;	/* status sub jobs of an Array Job */
		if (strchr(preq->rq_extend, (int)'x')) {
			if (svr_history_enable == 0) {
				req_reject(PBSE_JOBHISTNOTSET, 0, preq);
				return;
			}
			dohistjobs = 1;	/* status history jobs */
		}
	}

	/*
	 * first, validate the name of the requested object, either
	 * a job, a queue, or the whole server.
	 * type = 1 for a job, Array job, subjob or range of subjobs, or
	 *          a comma separated list of  the above.
	 *        2 for jobs in a queue,
	 *        3 for jobs in the server, or
	 */

	name = preq->rq_ind.rq_status.rq_id;

	if ( isdigit((int)*name) ) {
		/* a single job id */
		type = 1;
		rc = PBSE_UNKJOBID;

	} else if ( isalpha((int)*name) ) {
		pque = find_queuebyname(name)	/* status jobs in a queue */;
#ifdef NAS /* localmod 075 */
		if (pque == NULL)
			pque = find_resvqueuebyname(name);
#endif /* localmod 075 */
		if (pque)
			type = 2;
		else
			rc = PBSE_UNKQUE;

	} else if ((*name == '\0') || (*name == '@')) {
		type = 3;	/* status all jobs at server */
	} else
		rc = PBSE_IVALREQ;

	if (type == 0) {		/* is invalid - an error */
		req_reject(rc, 0, preq);
		return;
	}
	preply = &preq->rq_reply;
	preply->brp_choice = BATCH_REPLY_CHOICE_Status;
	CLEAR_HEAD(preply->brp_un.brp_status);

	rc = PBSE_NONE;

	if (type == 1) {
		/*
		 * If there is more than one job id, any status for any
		 * one job is returned, then no error is given.
		 * If a single job id is requested and there is an error
		 * the error is returned.
		 */
		pnxtjid = name;
		while ((name = parse_comma_string_r(&pnxtjid)) != NULL) {
			if ((rc = stat_a_jobidname(preq, name, dohistjobs, dosubjobs)) == PBSE_NONE)
				at_least_one_success = 1;
		}
		if (at_least_one_success == 1)
			reply_send(preq);
		else
			req_reject(rc, 0, preq);
		return;

	} else if (type == 2) {
		pjob = (job *)GET_NEXT(pque->qu_jobs);
		while (pjob && (rc == PBSE_NONE)) {
			rc = do_stat_of_a_job(preq, pjob, dohistjobs, dosubjobs);
			pjob = (job *)GET_NEXT(pjob->ji_jobque);
		}
	} else {
		pjob = (job *)GET_NEXT(svr_alljobs);
		while (pjob && (rc == PBSE_NONE)) {
			rc = do_stat_of_a_job(preq, pjob, dohistjobs, dosubjobs);
			pjob = (job *)GET_NEXT(pjob->ji_alljobs);
		}

	}

	if (rc && (rc != PBSE_PERM))
		req_reject(rc, bad, preq);
	else
		reply_send(preq);
}
Пример #13
0
/**
 * @brief
 * 		Move a job to another queue in this Server.
 *
 * @par
 * 		Check the destination to see if it can accept the job.
 * 		If the job can enter the new queue, dequeue from the existing queue and
 * 		enqueue into the new queue
 *
 * @par
 * 		Note - the destination is specified by the queue's name in the
 *		ji_qs.ji_destin element of the job structure.
 *
 * param[in]	jobp	-	pointer to job to move
 * param[in]	req	-	client request from a qmove client, null if a route
 *
 * @return	int
 * @retval  0	: success
 * @retval -1	: permanent failure or rejection, see pbs_errno
 * @retval  1	: failed but try again later
 */
int
local_move(job *jobp, struct batch_request *req)
{
	pbs_queue *qp;
	char	  *destination = jobp->ji_qs.ji_destin;
	int	   mtype;
	attribute *pattr;
	long	newtype = -1;


	/* search for destination queue */
	if ((qp = find_queuebyname(destination)) == NULL) {
		sprintf(log_buffer,
			"queue %s does not exist",
			destination);
		log_err(-1, __func__, log_buffer);
		pbs_errno = PBSE_UNKQUE;
		return -1;
	}

	/*
	 * if being moved at specific request of administrator, then
	 * checks on queue availability, etc. are skipped;
	 * otherwise all checks are enforced.
	 */

	if (req == NULL) {
		mtype = MOVE_TYPE_Route;	/* route */
	} else if (req->rq_perm & (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) {
		mtype =	MOVE_TYPE_MgrMv;	/* privileged move */
	} else {
		mtype = MOVE_TYPE_Move;		/* non-privileged move */
	}

	pbs_errno = svr_chkque(jobp, qp,
		get_hostPart(jobp->ji_wattr[(int)JOB_ATR_job_owner].at_val.at_str),
		mtype);

	if (pbs_errno) {
		/* should this queue be retried? */
		return (should_retry_route(pbs_errno));
	}

	/* dequeue job from present queue, update destination and	*/
	/* queue_rank for new queue and enqueue into destination	*/

	svr_dequejob(jobp);
	jobp->ji_myResv = NULL;
	strncpy(jobp->ji_qs.ji_queue, qp->qu_qs.qu_name, PBS_MAXQUEUENAME);
	jobp->ji_qs.ji_queue[PBS_MAXQUEUENAME] = '\0';

	jobp->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = ++queue_rank;
	jobp->ji_wattr[(int)JOB_ATR_qrank].at_flags |= ATR_VFLAG_MODCACHE;

	pattr = &jobp->ji_wattr[(int)JOB_ATR_reserve_ID];
	if (qp->qu_resvp) {

		job_attr_def[(int)JOB_ATR_reserve_ID].at_decode(pattr,
			(char *)0, (char *)0, qp->qu_resvp->ri_qs.ri_resvID);
		jobp->ji_myResv = qp->qu_resvp;
	} else {

		job_attr_def[(int)JOB_ATR_reserve_ID].at_decode(pattr,
			(char *)0, (char *)0, (char*)0);
	}

	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) {

		newtype = determine_accruetype(jobp);
		if (newtype == -1)
			/* unable to determine accruetype, set it to NEW */
			(void)update_eligible_time(JOB_INITIAL, jobp);
		else
			/* found suiting accruetype, update to this */
			(void)update_eligible_time(newtype, jobp);

	}


	if ((pbs_errno = svr_enquejob(jobp)) != 0)
		return -1;		/* should never ever get here */

	jobp->ji_lastdest = 0;	/* reset in case of another route */

	(void)job_save(jobp, SAVEJOB_FULL);

	/* If a scheduling cycle is in progress, then this moved job may have
	 * had changes resulting from the move that would impact scheduling or
	 * placement, add job to list of jobs which cannot be run in this cycle.
	 */
	if ((req == NULL || (req->rq_conn != scheduler_sock)) && (scheduler_jobs_stat))
		am_jobs_add(jobp);

	return 0;
}
Пример #14
0
static void req_stat_job_step2(

  struct stat_cntl *cntl)  /* I/O (freed on return) */

  {
  svrattrl        *pal;
  job         *pjob = NULL;

  struct batch_request *preq;

  struct batch_reply   *preply;
  int          rc = 0;

  enum TJobStatTypeEnum type;

  pbs_queue            *pque = NULL;
  int                   exec_only = 0;

  int                   IsTruncated = 0;

  long                  DTime;  /* delta time - only report full attribute list if J->MTime > DTime */

  static svrattrl      *dpal = NULL;
  
  int job_array_index = 0;
  job_array *pa = NULL;
  

  preq   = cntl->sc_origrq;
  type   = (enum TJobStatTypeEnum)cntl->sc_type;
  preply = &preq->rq_reply;

  /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */

  /* NOTE:  If IsTruncated is true, should walk all queues and walk jobs in each queue
            until max_reported is reached (NYI) */

  if (dpal == NULL)
    {
    /* build 'delta' attribute list */

    svrattrl *tpal;

    tlist_head dalist;

    int aindex;

    int atrlist[] =
      {
      JOB_ATR_jobname,
      JOB_ATR_resc_used,
      JOB_ATR_LAST
      };

    CLEAR_LINK(dalist);

    for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++)
      {
      if ((tpal = attrlist_create("", "", 23)) == NULL)
        {
        return;
        }

      tpal->al_valln = atrlist[aindex];

      if (dpal == NULL)
        dpal = tpal;

      append_link(&dalist, &tpal->al_link, tpal);
      }
    }  /* END if (dpal == NULL) */

  if (type == tjstArray)
    {
    pa = get_array(preq->rq_ind.rq_status.rq_id);
    }

  if (!server.sv_attr[(int)SRV_ATR_PollJobs].at_val.at_long)
    {
    /* polljobs not set - indicates we may need to obtain fresh data from
       MOM */

    if (cntl->sc_jobid[0] == '\0')
      pjob = NULL;
    else
      pjob = find_job(cntl->sc_jobid);

    while (1)
      {
      if (pjob == NULL)
        {
        /* start from the first job */

        if (type == tjstJob)
          {
          pjob = find_job(preq->rq_ind.rq_status.rq_id);
          }
        else if (type == tjstQueue)
          {
          pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs);
          }
        else if (type == tjstArray)
          {
          job_array_index = 0;
          /* increment job_array_index until we find a non-null pointer or hit the end */
          while (job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL)
             job_array_index++;
         
          }
        else
          {
          if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue))
            IsTruncated = TRUE;

          pjob = (job *)GET_NEXT(svr_alljobs);
          }
        }    /* END if (pjob == NULL) */
      else
        {
        /* get next job */

        if (type == tjstJob)
          break;

        if (type == tjstQueue)
          pjob = (job *)GET_NEXT(pjob->ji_jobque);
        else
          pjob = (job *)GET_NEXT(pjob->ji_alljobs);
          
        if (type == tjstArray)
          {
          pjob = NULL;
          /* increment job_array_index until we find a non-null pointer or hit the end */
          while (++job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL)
            ;
          }
        }

      if (pjob == NULL)
        break;

      /* PBS_RESTAT_JOB defaults to 30 seconds */

      if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING) &&
          ((time_now - pjob->ji_momstat) > JobStatRate))
        {
        /* go to MOM for status */

        strcpy(cntl->sc_jobid, pjob->ji_qs.ji_jobid);

        if ((rc = stat_to_mom(pjob, cntl)) == PBSE_SYSTEM)
          {
          break;
          }

        if (rc != 0)
          {
          rc = 0;

          continue;
          }

        return; /* will pick up after mom replies */
        }
      }    /* END while(1) */

    if (cntl->sc_conn >= 0)
      svr_disconnect(cntl->sc_conn);  /* close connection to MOM */

    if (rc != 0)
      {
      free(cntl);

      reply_free(preply);

      req_reject(rc, 0, preq, NULL, "cannot get update from mom");

      return;
      }
    }    /* END if (!server.sv_attr[(int)SRV_ATR_PollJobs].at_val.at_long) */

  /*
   * now ready for part 3, building the status reply,
   * loop through again
   */

  if (type == tjstSummarizeArraysQueue || type == tjstSummarizeArraysServer)
    {
    update_array_statuses();
    }

  if (type == tjstJob)
    pjob = find_job(preq->rq_ind.rq_status.rq_id);
  else if (type == tjstQueue)
    pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs);
  else if (type == tjstSummarizeArraysQueue)
    pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs_array_sum);
  else if (type == tjstSummarizeArraysServer)
    pjob = (job *)GET_NEXT(svr_jobs_array_sum);
  else if (type == tjstArray)
    {
    job_array_index = 0;
    pjob = NULL;
    /* increment job_array_index until we find a non-null pointer or hit the end */
    while (job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL)
        job_array_index++;
    }
  else
    pjob = (job *)GET_NEXT(svr_alljobs);

  DTime = 0;

  if (preq->rq_extend != NULL)
    {
    char *ptr;

    /* FORMAT:  { EXECQONLY | DELTA:<EPOCHTIME> } */

    if (strstr(preq->rq_extend, EXECQUEONLY))
      exec_only = 1;

    ptr = strstr(preq->rq_extend, "DELTA:");

    if (ptr != NULL)
      {
      ptr += strlen("delta:");

      DTime = strtol(ptr, NULL, 10);
      }
    }

  free(cntl);

  if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue))
    {
    long sentJobCounter;
    long qjcounter;
    long qmaxreport;

    /* loop through all queues */

    for (pque = (pbs_queue *)GET_NEXT(svr_queues);
         pque != NULL;
         pque = (pbs_queue *)GET_NEXT(pque->qu_link))
      {
      qjcounter = 0;

      if ((exec_only == 1) &&
          (pque->qu_qs.qu_type != QTYPE_Execution))
        {
        /* ignore routing queues */

        continue;
        }

      if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) &&
          (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0))
        {
        qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long;
        }
      else
        {
        qmaxreport = TMAX_JOB;
        }

      if (LOGLEVEL >= 5)
        {
        sprintf(log_buffer,"giving scheduler up to %ld idle jobs in queue %s\n",
          qmaxreport,
          pque->qu_qs.qu_name);

        log_event(
          PBSEVENT_SYSTEM,
          PBS_EVENTCLASS_QUEUE,
          pque->qu_qs.qu_name,
          log_buffer);
        }

      sentJobCounter = 0;

      /* loop through jobs in queue */

      for (pjob = (job *)GET_NEXT(pque->qu_jobs);
           pjob != NULL;
           pjob = (job *)GET_NEXT(pjob->ji_jobque))
        {
        if ((qjcounter >= qmaxreport) &&
            (pjob->ji_qs.ji_state == JOB_STATE_QUEUED))
          {
          /* max_report of queued jobs reached for queue */

          continue;
          }

        pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

        rc = status_job(
               pjob,
               preq,
               (pjob->ji_wattr[(int)JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal,
               &preply->brp_un.brp_status,
               &bad);

        if ((rc != 0) && (rc != PBSE_PERM))
          {
          req_reject(rc, bad, preq, NULL, NULL);

          return;
          }

        sentJobCounter++;

        if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)
          qjcounter++;
        }    /* END for (pjob) */

      if (LOGLEVEL >= 5)
        {
        sprintf(log_buffer,"sent scheduler %ld total jobs for queue %s\n",
          sentJobCounter,
          pque->qu_qs.qu_name);

        log_event(
          PBSEVENT_SYSTEM,
          PBS_EVENTCLASS_QUEUE,
          pque->qu_qs.qu_name,
          log_buffer);
        }
      }      /* END for (pque) */

    reply_send(preq);

    return;
    }        /* END if ((type == tjstTruncatedServer) || ...) */

  while (pjob != NULL)
    {
    /* go ahead and build the status reply for this job */

    if (exec_only)
      {
      pque = find_queuebyname(pjob->ji_qs.ji_queue);

      if (pque->qu_qs.qu_type != QTYPE_Execution)
        goto nextjob;
      }

    pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

    rc = status_job(
           pjob,
           preq,
           pal,
           &preply->brp_un.brp_status,
           &bad);

    if ((rc != 0) && (rc != PBSE_PERM))
      {
      req_reject(rc, bad, preq, NULL, NULL);

      return;
      }

    /* get next job */

nextjob:

    if (type == tjstJob)
      break;

    if (type == tjstQueue)
      pjob = (job *)GET_NEXT(pjob->ji_jobque);
    else if (type == tjstSummarizeArraysQueue)
      pjob = (job *)GET_NEXT(pjob->ji_jobque_array_sum);
    else if (type == tjstSummarizeArraysServer)
      pjob = (job *)GET_NEXT(pjob->ji_jobs_array_sum);
    else if (type == tjstArray)
      {
      pjob = NULL;
      /* increment job_array_index until we find a non-null pointer or hit the end */
      while (++job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL)
        ;
      }
    else
      pjob = (job *)GET_NEXT(pjob->ji_alljobs);

    rc = 0;
    }  /* END while (pjob != NULL) */

 
  reply_send(preq);

  if (LOGLEVEL >= 7)
    {
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      "req_statjob",
      "Successfully returned the status of queued jobs\n");
    }

  return;
  }  /* END req_stat_job_step2() */
Пример #15
0
void req_stat_job(

  struct batch_request *preq) /* ptr to the decoded request   */

  {

  struct stat_cntl *cntl; /* see svrfunc.h  */
  char     *name;
  job     *pjob = NULL;
  pbs_queue    *pque = NULL;
  int      rc = 0;


  enum TJobStatTypeEnum type = tjstNONE;

  /*
   * first, validate the name of the requested object, either
   * a job, a queue, or the whole server.
   */

  /* FORMAT:  name = { <JOBID> | <QUEUEID> | '' } */

  name = preq->rq_ind.rq_status.rq_id;

  if (preq->rq_extend != NULL)
    {
    /* evaluate pbs_job_stat() 'extension' field */

    if (!strncasecmp(preq->rq_extend, "truncated", strlen("truncated")))
      {
      /* truncate response by 'max_report' */

      type = tjstTruncatedServer;
      }
    else if (!strncasecmp(preq->rq_extend, "summarize_arrays", strlen("summarize_arrays")))
      {
      type = tjstSummarizeArraysServer;
      }

    }    /* END if (preq->rq_extend != NULL) */

  if (isdigit((int)*name))
    {
    /* status a single job */

    if (is_array(name))
      {
      
      if (type != tjstSummarizeArraysServer)
        {
        type = tjstArray;
        }
      pjob = find_array_template(name);
      }
    else
      {
      type = tjstJob;
      if ((pjob = find_job(name)) == NULL)
        {
        rc = PBSE_UNKJOBID;
        }
      }
      
    
    }
  else if (isalpha(name[0]))
    {
    if (type == tjstNONE)
      type = tjstQueue;
    else if (type == tjstSummarizeArraysServer)
      type = tjstSummarizeArraysQueue;
    else
      type = tjstTruncatedQueue;

    if ((pque = find_queuebyname(name)) == NULL)
      {
      rc = PBSE_UNKQUE;
      }
    }
  else if ((*name == '\0') || (*name == '@'))
    {
    /* status all jobs at server */

    if (type == tjstNONE)
      type = tjstServer;
    }
  else
    {
    rc = PBSE_IVALREQ;
    }

  if (rc != 0)
    {
    /* is invalid - an error */

    req_reject(rc, 0, preq, NULL, NULL);

    return;
    }

  preq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preq->rq_reply.brp_un.brp_status);

  cntl = (struct stat_cntl *)malloc(sizeof(struct stat_cntl));

  if (cntl == NULL)
    {
    req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);

    return;
    }

  cntl->sc_type   = (int)type;

  cntl->sc_conn   = -1;
  cntl->sc_pque   = pque;
  cntl->sc_origrq = preq;
  cntl->sc_post   = req_stat_job_step2;
  cntl->sc_jobid[0] = '\0'; /* cause "start from beginning" */

  if (server.sv_attr[(int)SRV_ATR_PollJobs].at_val.at_long)
    cntl->sc_post = 0; /* we're not going to make clients wait */

  req_stat_job_step2(cntl); /* go to step 2, see if running is current */

  return;
  }  /* END req_stat_job() */
Пример #16
0
void req_stat_que(

  struct batch_request *preq) /* ptr to the decoded request   */

  {
  char     *name;
  pbs_queue    *pque = NULL;

  struct batch_reply *preply;
  int      rc   = 0;
  int      type = 0;

  /*
   * first, validate the name of the requested object, either
   * a queue, or null for all queues
   */

  name = preq->rq_ind.rq_status.rq_id;

  if ((*name == '\0') || (*name == '@'))
    {
    type = 1;
    }
  else
    {
    pque = find_queuebyname(name);

    if (pque == NULL)
      {
      req_reject(PBSE_UNKQUE, 0, preq, NULL, "cannot locate queue");

      return;
      }
    }

  preply = &preq->rq_reply;

  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  if (type == 0)
    {
    /* get status of the named queue */

    rc = status_que(pque, preq, &preply->brp_un.brp_status);
    }
  else
    {
    /* get status of all queues */

    pque = (pbs_queue *)GET_NEXT(svr_queues);

    while (pque != NULL)
      {
      rc = status_que(pque, preq, &preply->brp_un.brp_status);

      if (rc != 0)
        {
        if (rc != PBSE_PERM)
          break;

        rc = 0;
        }

      pque = (pbs_queue *)GET_NEXT(pque->qu_link);
      }
    }

  if (rc != 0)
    {
    reply_free(preply);

    req_reject(rc, bad, preq, NULL, "status_queue failed");
    }
  else
    {
    reply_send(preq);
    }

  return;
  }  /* END req_stat_que() */