Example #1
0
int reroute_job(

  job *pjob,
  pbs_queue *pque)

  {
  int        rc = PBSE_NONE;
  char       log_buf[LOCAL_LOG_BUF_SIZE];

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "%s", pjob->ji_qs.ji_jobid);
    LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }

  if ((pque != NULL) &&
      (pque->qu_qs.qu_type == QTYPE_RoutePush))
    {
    rc = job_route(pjob);

    if (rc == PBSE_ROUTEREJ)
      job_abt(&pjob, pbse_to_txt(PBSE_ROUTEREJ));
    else if (rc == PBSE_ROUTEEXPD)
      job_abt(&pjob, msg_routexceed);
    else if (rc == PBSE_QUENOEN)
      job_abt(&pjob, msg_err_noqueue);
    }

  return(rc);      
  } /* END reroute_job() */
Example #2
0
int reroute_job(

    job *pjob)

{
    int        rc = PBSE_NONE;
    char       log_buf[LOCAL_LOG_BUF_SIZE];

    if (LOGLEVEL >= 8)
    {
        sprintf(log_buf, "%s", pjob->ji_qs.ji_jobid);
        LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }

    rc = job_route(pjob);

    if (rc == PBSE_ROUTEREJ)
        job_abt(&pjob, pbse_to_txt(PBSE_ROUTEREJ));
    else if (rc == PBSE_ROUTEEXPD)
        job_abt(&pjob, msg_routexceed);
    else if (rc == PBSE_QUENOEN)
        job_abt(&pjob, msg_err_noqueue);

    return(rc);
} /* END reroute_job() */
Example #3
0
void queue_route(

  pbs_queue *pque)

  {
  job *nxjb;
  job *pjob;
  int  rc;

  pjob = (job *)GET_NEXT(pque->qu_jobs);

  while (pjob != NULL)
    {
    nxjb = (job *)GET_NEXT(pjob->ji_jobque);

    if (pjob->ji_qs.ji_un.ji_routet.ji_rteretry <= time_now)
      {
      if ((rc = job_route(pjob)) == PBSE_ROUTEREJ)
        job_abt(&pjob, pbse_to_txt(PBSE_ROUTEREJ));
      else if (rc == PBSE_ROUTEEXPD)
        job_abt(&pjob, msg_routexceed);
      }

    pjob = nxjb;
    }

  return;
  }
Example #4
0
static void post_routejob(

  struct work_task *pwt)

  {
  int  newstate;
  int  newsub;
  int  r;
  int  stat = pwt->wt_aux;
  char *id = "post_routejob";
  job *jobp = (job *)pwt->wt_parm1;

  if (WIFEXITED(stat))
    {
    r = WEXITSTATUS(stat);
    }
  else
    {
    r = 2;

    sprintf(log_buffer, msg_badexit,
            stat);

    strcat(log_buffer, id);

    log_event(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      log_buffer);
    }

  switch (r)
    {
    case 0:  /* normal return, job was routed */

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
        remove_stagein(jobp);

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_COPIED)
        remove_checkpoint(jobp);

      job_purge(jobp); /* need to remove server job struct */

      return;

      /*NOTREACHED*/

      break;

    case 1:  /* permanent rejection (or signal) */

      if (jobp->ji_qs.ji_substate == JOB_SUBSTATE_ABORT)
        {
        /* job delete in progress, just set to queued status */

        svr_setjobstate(jobp, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT);

        return;
        }

      add_dest(jobp);  /* else mark destination as bad */

      /* fall through */

    default : /* try routing again */

      /* force re-eval of job state out of Transit */

      svr_evaljobstate(jobp, &newstate, &newsub, 1);
      svr_setjobstate(jobp, newstate, newsub);

      if ((r = job_route(jobp)) == PBSE_ROUTEREJ)
        job_abt(&jobp, pbse_to_txt(PBSE_ROUTEREJ));
      else if (r != 0)
        job_abt(&jobp, msg_routexceed);

      break;
    }  /* END switch (r) */

  return;
  }  /* END post_routejob() */
Example #5
0
void finish_routing_processing(

  job *pjob,
  int  status)

  {
  int          newstate;
  int          newsub;

  if (pjob == NULL)
    return;

  if (LOGLEVEL >= 10)
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, pjob->ji_qs.ji_jobid);

  switch (status)
    {
    case LOCUTION_SUCCESS:  /* normal return, job was routed */

      if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
        remove_stagein(&pjob);

      if (pjob != NULL)
        {
        if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_COPIED)
          remove_checkpoint(&pjob);

        if (pjob != NULL)
          svr_job_purge(pjob); /* need to remove server job struct */
        }

      break;

    case LOCUTION_FAIL:  /* permanent rejection (or signal) */

      if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_ABORT)
        {
        /* job delete in progress, just set to queued status */
        svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT, FALSE);
        
        svr_mailowner(pjob, 'a', TRUE, "Couldn't route job to remote server");

        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);

        return;
        }

      add_dest(pjob);  /* else mark destination as bad */

      /* fall through */

    default: /* try routing again */
       
      svr_mailowner(pjob, 'a', TRUE, "Couldn't route job to remote server");

      /* force re-eval of job state out of Transit */

      svr_evaljobstate(*pjob, newstate, newsub, 1);
      svr_setjobstate(pjob, newstate, newsub, FALSE);

      if ((status = job_route(pjob)) == PBSE_ROUTEREJ)
        job_abt(&pjob, pbse_to_txt(PBSE_ROUTEREJ));
      else if (status != 0)
        job_abt(&pjob, msg_routexceed);
      else
        unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);


      break;
    }  /* END switch (status) */

  return;
  } /* END finish_routing_processing() */
Example #6
0
/**
 * @brief
 * 		post_routejob - clean up action for child started in net_move/send_job
 *		   to "route" a job to another server
 * @par
 * 		If route was successfull, delete job.
 * @par
 * 		If route didn't work, mark destination not to be tried again for this
 * 		job and call route again.
 *
 * @param[in]	pwt	-	work task structure
 *
 * @return	none.
 */
static void
post_routejob(struct work_task *pwt)
{
	int	 newstate;
	int	 newsub;
	int	 r;
	int	 stat = pwt->wt_aux;
	job	*jobp = (job *)pwt->wt_parm2;

	if (jobp == NULL) {
		log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, LOG_INFO, "", "post_routejob failed, jobp NULL");
		return;
	}

	if (WIFEXITED(stat)) {
		r = WEXITSTATUS(stat);
	} else {
		r = SEND_JOB_FATAL;
		(void)sprintf(log_buffer, msg_badexit, stat);
		(void)strcat(log_buffer, __func__);
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, LOG_NOTICE,
			jobp->ji_qs.ji_jobid, log_buffer);
	}

	switch (r) {
		case SEND_JOB_OK:		/* normal return, job was routed */

			if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
				remove_stagein(jobp);
			/*
			 * If the server is configured to keep job history and the job
			 * is created here, do not purge the job structure but save
			 * it for history purpose. No need to check for sub-jobs as
			 * sub jobs can not be routed.
			 */
			if (svr_chk_history_conf())
				svr_setjob_histinfo(jobp, T_MOV_JOB);
			else
				job_purge(jobp); /* need to remove server job struct */
			return;
		case SEND_JOB_FATAL:		/* permanent rejection (or signal) */
			if (jobp->ji_qs.ji_substate == JOB_SUBSTATE_ABORT) {

				/* Job Delete in progress, just set to queued status */

				(void)svr_setjobstate(jobp, JOB_STATE_QUEUED,
					JOB_SUBSTATE_ABORT);
				return;
			}
			add_dest(jobp);		/* else mark destination as bad */
			/* fall through */
		default :	/* try routing again */
			/* force re-eval of job state out of Transit */
			svr_evaljobstate(jobp, &newstate, &newsub, 1);
			(void)svr_setjobstate(jobp, newstate, newsub);
			jobp->ji_retryok = 1;
			if ((r = job_route(jobp)) == PBSE_ROUTEREJ)
				(void)job_abt(jobp, msg_routebad);
			else if (r != 0)
				(void)job_abt(jobp, msg_routexceed);
			break;
	}
	return;
}