Beispiel #1
0
void ensure_deleted(

  struct work_task *ptask)  /* I */

  {
  struct batch_request *preq;
  job *pjob;

  preq = ptask->wt_parm1;

  if ((pjob = find_job(preq->rq_ind.rq_delete.rq_objname)) == NULL)
    {
    /* job doesn't exist, we're done */
    return;
    }

  sprintf(log_buffer, "purging job without checking MOM");
  
  log_event(
    PBSEVENT_JOB,
    PBS_EVENTCLASS_JOB,
    pjob->ji_qs.ji_jobid,
    log_buffer);
  
  free_nodes(pjob);
  
  if (pjob->ji_qhdr->qu_qs.qu_type == QTYPE_Execution)
    {
    set_resc_assigned(pjob, DECR);
    }
  
  job_purge(pjob);

  } /* END ensure_deleted() */
Beispiel #2
0
static void close_quejob(

  int sfds)

  {
  job *pjob;
  job *npjob;

  pjob = (job *)GET_NEXT(svr_newjobs);

  while (pjob != NULL)
    {
    npjob = GET_NEXT(pjob->ji_alljobs);

    if (pjob->ji_qs.ji_un.ji_newt.ji_fromsock == sfds)
      {
      if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_TRANSICM)
        {

#ifndef PBS_MOM

        if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE)
          {
          /*
           * the job was being created here for the first time
           * go ahead and enqueue it as QUEUED; otherwise, hold
           * it here as TRANSICM until we hear from the sending
           * server again to commit.
           */

          delete_link(&pjob->ji_alljobs);

          pjob->ji_qs.ji_state = JOB_STATE_QUEUED;
          pjob->ji_qs.ji_substate = JOB_SUBSTATE_QUEUED;

          if (svr_enquejob(pjob))
            job_abt(&pjob, msg_err_noqueue);
          }

#endif /* PBS_MOM */

        }
      else
        {
        /* else delete the job */

        delete_link(&pjob->ji_alljobs);

        job_purge(pjob);
        }

      break;
      }  /* END if (..) */

    pjob = npjob;
    }

  return;
  }  /* END close_quejob() */
Beispiel #3
0
/* delete a job array struct from memory and disk. This is used when the number
 *  of jobs that belong to the array becomes zero.
 *  returns zero if there are no errors, non-zero otherwise
 */
int array_delete(job_array *pa)
  {

  char path[MAXPATHLEN + 1];
  array_request_node *rn;


  /* first thing to do is take this out of the servers list of all arrays */
  delete_link(&pa->all_arrays);


  /* delete the on disk copy of the struct */

  strcpy(path, path_arrays);
  strcat(path, pa->ai_qs.fileprefix);
  strcat(path, ARRAY_FILE_SUFFIX);

  if (unlink(path))
    {
    sprintf(log_buffer, "unable to delete %s", path);
    log_err(errno, "array_delete", log_buffer);
    }


  /* clear array request linked list */

  for (rn = (array_request_node*)GET_NEXT(pa->request_tokens);
       rn != NULL;
       rn = (array_request_node*)GET_NEXT(pa->request_tokens))
    {
    delete_link(&rn->request_tokens_link);
    free(rn);
    }

  /* free the memory for the job pointers */
  free(pa->jobs);

  /* purge the "template" job, 
     this also deletes the shared script file for the array*/
  if (pa->template_job)
    {
    job_purge(pa->template_job);
    }
    
  /* free the memory allocated for the struct */
  free(pa);

  return 0;
  }
Beispiel #4
0
/**
 * @brief
 * 		force_reque - requeue (rerun) a job
 *
 * @param[in,out]	pwt	-	job which needs to be rerun
 */
void
force_reque(job *pjob)
{
	int  newstate;
	int  newsubstate;

	pjob->ji_modified = 1;
	pjob->ji_momhandle = -1;
	pjob->ji_mom_prot = PROT_INVALID;

	/* simulate rerun: free nodes, clear checkpoint flag, and */
	/* clear exec_vnode string				  */

	rel_resc(pjob);

	/* note in accounting file */
	account_jobend(pjob, pjob->ji_acctrec, PBS_ACCT_RERUN);

	/* if a subjob,  we set substate to RERUN3 to cause trktbl entry */
	/* to be reset to Qeued, and then blow away the job struct       */

	if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SubJob) {
		pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN3;
		job_purge(pjob);
		return;
	}

	/*
	 * Clear any JOB_SVFLG_Actsuspd flag too, as the job is no longer
	 * suspended (User busy).  A suspended job is rerun in case of a
	 * MOM failure after the workstation becomes active(busy).
	 */
	pjob->ji_qs.ji_svrflags &= ~(JOB_SVFLG_Actsuspd | JOB_SVFLG_StagedIn | JOB_SVFLG_CHKPT);
	job_attr_def[(int)JOB_ATR_exec_host].at_free(
		&pjob->ji_wattr[(int)JOB_ATR_exec_host]);
	job_attr_def[(int)JOB_ATR_exec_host2].at_free(
		&pjob->ji_wattr[(int)JOB_ATR_exec_host2]);
	job_attr_def[(int)JOB_ATR_exec_vnode].at_free(
		&pjob->ji_wattr[(int)JOB_ATR_exec_vnode]);
	job_attr_def[(int)JOB_ATR_pset].at_free(
		&pjob->ji_wattr[(int)JOB_ATR_pset]);
	/* job dir has no meaning for re-queued jobs, so unset it */
	job_attr_def[(int)JOB_ATR_jobdir].at_free(&pjob->
		ji_wattr[(int)JOB_ATR_jobdir]);
	svr_evaljobstate(pjob, &newstate, &newsubstate, 1);
	(void)svr_setjobstate(pjob, newstate, newsubstate);
}
void req_rdytocommit(

  struct batch_request *preq)  /* I */

  {
  job *pj;
  int  sock = preq->rq_conn;

  int  OrigState;
  int  OrigSState;
  char OrigSChar;
  long OrigFlags;

  pj = locate_new_job(sock, preq->rq_ind.rq_rdytocommit);

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "ready to commit job");
    }

  if (pj == NULL)
    {
    log_err(errno, "req_rdytocommit", "unknown job id");

    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    /* FAILURE */

    return;
    }

  if (pj->ji_qs.ji_substate != JOB_SUBSTATE_TRANSIN)
    {
    log_err(errno, "req_rdytocommit", "cannot commit job in unexpected state");

    req_reject(PBSE_IVALREQ, 0, preq, NULL, NULL);

    /* FAILURE */

    return;
    }

  OrigState  = pj->ji_qs.ji_state;

  OrigSState = pj->ji_qs.ji_substate;
  OrigSChar  = pj->ji_wattr[(int)JOB_ATR_state].at_val.at_char;
  OrigFlags  = pj->ji_wattr[(int)JOB_ATR_state].at_flags;

  pj->ji_qs.ji_state    = JOB_STATE_TRANSIT;
  pj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSICM;
  pj->ji_wattr[(int)JOB_ATR_state].at_val.at_char = 'T';
  pj->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_SET;

  if (job_save(pj, SAVEJOB_NEW) == -1)
    {
    char tmpLine[1024];

    sprintf(tmpLine, "cannot save job - errno=%d - %s",
            errno,
            strerror(errno));

    log_err(errno, "req_rdytocommit", tmpLine);

    /* commit failed, backoff state changes */

    pj->ji_qs.ji_state    = OrigState;
    pj->ji_qs.ji_substate = OrigSState;
    pj->ji_wattr[(int)JOB_ATR_state].at_val.at_char = OrigSChar;
    pj->ji_wattr[(int)JOB_ATR_state].at_flags = OrigFlags;

    req_reject(PBSE_SYSTEM, 0, preq, NULL, tmpLine);

    /* FAILURE */

    return;
    }

  /* acknowledge the request with the job id */

  if (reply_jobid(preq, pj->ji_qs.ji_jobid, BATCH_REPLY_CHOICE_RdytoCom) != 0)
    {
    /* reply failed, purge the job and close the connection */

    sprintf(log_buffer, "cannot report jobid - errno=%d - %s",
            errno,
            strerror(errno));

    log_err(errno, "req_rdytocommit", log_buffer);

    close_conn(sock);

    job_purge(pj);

    /* FAILURE */

    return;
    }

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "ready to commit job completed");
    }

  return;
  }  /* END req_rdytocommit() */
void req_quejob(

  struct batch_request *preq) /* ptr to the decoded request   */

  {
  char  *id = "req_quejob";

  char   basename[PBS_JOBBASE + 1];
  int    created_here = 0;
  int    index;
  char  *jid;
  attribute_def *pdef;
  job   *pj;
  svrattrl *psatl;
  int    rc;
  int    sock = preq->rq_conn;

  int    IsCheckpoint = 0;

  /* set basic (user) level access permission */

  resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_Creat;

  if (PBSNodeCheckProlog)
    {
    check_state(1);

    mom_server_all_update_stat();

    if (internal_state & INUSE_DOWN)
      {
      req_reject(PBSE_MOMREJECT,0,preq,NULL,NULL);

      return;
      }
    }

  if (preq->rq_fromsvr)
    {
    /* from another server - accept the extra attributes */

    resc_access_perm |= ATR_DFLAG_MGWR | ATR_DFLAG_SvWR | ATR_DFLAG_MOM;

    jid = preq->rq_ind.rq_queuejob.rq_jid;
    }
  else
    {
    /* request must be from server */

    log_err(errno, id, "request not from server");

    req_reject(PBSE_IVALREQ, 0, preq, NULL, "request not received from server");

    return;
    }

  /* does job already exist, check both old and new jobs */

  if ((pj = find_job(jid)) == NULL)
    {
    pj = (job *)GET_NEXT(svr_newjobs);

    while (pj != NULL)
      {
      if (!strcmp(pj->ji_qs.ji_jobid, jid))
        break;

      pj = (job *)GET_NEXT(pj->ji_alljobs);
      }
    }

  /*
   * New job ...
   *
   * for MOM - rather than make up a hashname, we use the name sent
   * to us by the server as an attribute.
   */

  psatl = (svrattrl *)GET_NEXT(preq->rq_ind.rq_queuejob.rq_attr);

  while (psatl != NULL)
    {
    if (!strcmp(psatl->al_name,ATTR_hashname))
      {
      strcpy(basename,psatl->al_value);

      break;
      }

    psatl = (svrattrl *)GET_NEXT(psatl->al_link);
    }

  if (pj != NULL)
    {
    /* newly queued job already exists */

    if (pj->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING)
      {
      /* FAILURE - job exists and is running */

      log_err(errno,id,"cannot queue new job, job exists and is running");

      req_reject(PBSE_JOBEXIST,0,preq,NULL,"job is running");

      return;
      }

    /* if checkpointed, then keep old and skip rest of process */

    if (pj->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE)
      {
      IsCheckpoint = 1;
      }  /* END if (pj->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) */
    else
      {
      /* unlink job from svr_alljobs since it will be placed on newjobs */

      delete_link(&pj->ji_alljobs);
      }
    }  /* END if (pj != NULL) */
  else
    {
    /* if not already here, allocate job struct */

    if ((pj = job_alloc()) == NULL)
      {
      /* FAILURE */

      req_reject(PBSE_SYSTEM, 0, preq, NULL, "cannot allocate new job structure");

      return;
      }
    }    /* END else (pj != NULL) */

  if (IsCheckpoint == 0)
    {
    strcpy(pj->ji_qs.ji_jobid,jid);

    strcpy(pj->ji_qs.ji_fileprefix,basename);

    pj->ji_modified       = 1;

    pj->ji_qs.ji_svrflags = created_here;

    pj->ji_qs.ji_un_type  = JOB_UNION_TYPE_NEW;
    }

  /* decode attributes from request into job structure */

  psatl = (svrattrl *)GET_NEXT(preq->rq_ind.rq_queuejob.rq_attr);

  while (psatl != NULL)
    {
    if (IsCheckpoint == 1)
      {
      if (strcmp(psatl->al_name,ATTR_checkpoint_name) &&
          strcmp(psatl->al_name,ATTR_v))
        {
        psatl = (svrattrl *)GET_NEXT(psatl->al_link);

        continue;
        }
      }

    /* identify the attribute by name */

    index = find_attr(job_attr_def,psatl->al_name,JOB_ATR_LAST);

    if (index < 0)
      {
      /* FAILURE */

      /* didn`t recognize the name */

      job_purge(pj);   /* CRI - 12/20/2004 */

      reply_badattr(PBSE_NOATTR,1,psatl,preq);

      return;
      }

    pdef = &job_attr_def[index];

    /* Is attribute not writeable by manager or by a server? */

    if ((pdef->at_flags & resc_access_perm) == 0)
      {
      /* FAILURE */

      job_purge(pj);

      reply_badattr(PBSE_ATTRRO,1,psatl,preq);

      return;
      }

    /* decode attribute */

    if (!strcmp(psatl->al_name,ATTR_v))
      {
      rc = decode_arst_merge(
             &pj->ji_wattr[index],
             psatl->al_name,
             psatl->al_resc,
             psatl->al_value);
      }
    else
      {
      rc = pdef->at_decode(
             &pj->ji_wattr[index],
             psatl->al_name,
             psatl->al_resc,
             psatl->al_value);
      }

    if (rc != 0)
      {
      /* FAILURE */

      /* all errors are fatal for MOM */

      job_purge(pj);

      reply_badattr(rc,1,psatl,preq);

      return;
      }

    if (psatl->al_op == DFLT)
      {
      if (psatl->al_resc)
        {
        resource     *presc;
        resource_def *prdef;

        prdef = find_resc_def(svr_resc_def,psatl->al_resc,svr_resc_size);

        if (prdef == NULL)
          {
          job_purge(pj);

          reply_badattr(rc,1,psatl, preq);

          return;
          }

        presc = find_resc_entry(&pj->ji_wattr[index],prdef);

        if (presc != NULL)
          presc->rs_value.at_flags |= ATR_VFLAG_DEFLT;
        }
      else
        {
        pj->ji_wattr[index].at_flags |= ATR_VFLAG_DEFLT;
        }
      }    /* END if (psatl->al_op == DFLT) */

    psatl = (svrattrl *)GET_NEXT(psatl->al_link);
    }      /* END while (psatl != NULL) */

  if (IsCheckpoint == 1)
    {
    pj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSIN;

    if (reply_jobid(preq,pj->ji_qs.ji_jobid,BATCH_REPLY_CHOICE_Queue) == 0)
      {
      delete_link(&pj->ji_alljobs);

      append_link(&svr_newjobs,&pj->ji_alljobs,pj);

      pj->ji_qs.ji_un_type = JOB_UNION_TYPE_NEW;
      pj->ji_qs.ji_un.ji_newt.ji_fromsock = sock;
      pj->ji_qs.ji_un.ji_newt.ji_fromaddr = get_connectaddr(sock);
      pj->ji_qs.ji_un.ji_newt.ji_scriptsz = 0;

      /* Per Eric R., req_mvjobfile was giving error in open_std_file, 
         showed up as fishy error message */

      if (pj->ji_grpcache != NULL)
        {
        free(pj->ji_grpcache);
        pj->ji_grpcache = NULL;
        }
      }
    else
      {
      close_conn(sock);
      }

    /* SUCCESS */

    return;
    }

  /* set remaining job structure elements */

  pj->ji_qs.ji_state =    JOB_STATE_TRANSIT;

  pj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSIN;

  pj->ji_wattr[(int)JOB_ATR_mtime].at_val.at_long = (long)time_now;

  pj->ji_wattr[(int)JOB_ATR_mtime].at_flags |= ATR_VFLAG_SET;

  pj->ji_qs.ji_un_type = JOB_UNION_TYPE_NEW;

  pj->ji_qs.ji_un.ji_newt.ji_fromsock = sock;

  pj->ji_qs.ji_un.ji_newt.ji_fromaddr = get_connectaddr(sock);

  pj->ji_qs.ji_un.ji_newt.ji_scriptsz = 0;

  /* acknowledge the request with the job id */

  if (reply_jobid(preq, pj->ji_qs.ji_jobid, BATCH_REPLY_CHOICE_Queue) != 0)
    {
    /* reply failed, purge the job and close the connection */

    close_conn(sock);

    job_purge(pj);

    return;
    }

  /* link job into server's new jobs list request  */

  append_link(&svr_newjobs, &pj->ji_alljobs, pj);

  return;
  }  /* END req_quejob() */
Beispiel #7
0
int setup_array_struct(job *pjob)
  {
  job_array *pa;

  /* struct work_task *wt; */
  array_request_node *rn;
  int bad_token_count;
  int array_size;
  int rc;

  /* setup a link to this job array in the servers all_arrays list */
  pa = (job_array *)calloc(1,sizeof(job_array));

  pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION;
  
  pa->template_job = pjob;

  /*pa->ai_qs.array_size  = pjob->ji_wattr[(int)JOB_ATR_job_array_size].at_val.at_long;*/

  strcpy(pa->ai_qs.parent_id, pjob->ji_qs.ji_jobid);
  strcpy(pa->ai_qs.fileprefix, pjob->ji_qs.ji_fileprefix);
  strncpy(pa->ai_qs.owner, pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str, PBS_MAXUSER + PBS_MAXSERVERNAME + 2);
  strncpy(pa->ai_qs.submit_host, get_variable(pjob, pbs_o_host), PBS_MAXSERVERNAME);

  pa->ai_qs.num_cloned = 0;
  CLEAR_LINK(pa->all_arrays);
  CLEAR_HEAD(pa->request_tokens);
  append_link(&svr_jobarrays, &pa->all_arrays, (void*)pa);

 if (job_save(pjob, SAVEJOB_FULL, 0) != 0)
    {
    job_purge(pjob);


    if (LOGLEVEL >= 6)
      {
      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL",
        "cannot save job");
      }

    return 1;
    }

  if ((rc = set_slot_limit(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, pa)))
    {
    array_delete(pa);

    snprintf(log_buffer,sizeof(log_buffer),
      "Array %s requested a slot limit above the max limit %ld, rejecting\n",
      pa->ai_qs.parent_id,
      server.sv_attr[SRV_ATR_MaxSlotLimit].at_val.at_long);
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      pa->ai_qs.parent_id,
      log_buffer);

    return(INVALID_SLOT_LIMIT);
    }

  pa->ai_qs.jobs_running = 0;
  pa->ai_qs.num_started = 0;
  pa->ai_qs.num_failed = 0;
  pa->ai_qs.num_successful = 0;
  
  bad_token_count =

    parse_array_request(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str,
                        &(pa->request_tokens));

  /* get the number of elements that should be allocated in the array */
  rn = (array_request_node *)GET_NEXT(pa->request_tokens);
  array_size = 0;
  pa->ai_qs.num_jobs = 0;
  while (rn != NULL) 
    {
    if (rn->end > array_size)
      array_size = rn->end;
    /* calculate the actual number of jobs (different from array size) */
    pa->ai_qs.num_jobs += rn->end - rn->start + 1;

    rn = (array_request_node *)GET_NEXT(rn->request_tokens_link);
    }

  /* size of array is the biggest index + 1 */
  array_size++; 

  if (server.sv_attr[SRV_ATR_MaxArraySize].at_flags & ATR_VFLAG_SET)
    {
    int max_array_size = server.sv_attr[SRV_ATR_MaxArraySize].at_val.at_long;
    if (max_array_size < pa->ai_qs.num_jobs)
      {
      array_delete(pa);

      return(ARRAY_TOO_LARGE);
      }
    }

  /* initialize the array */
  pa->jobs = malloc(array_size * sizeof(job *));
  memset(pa->jobs,0,array_size * sizeof(job *));

  /* remember array_size */
  pa->ai_qs.array_size = array_size;

  CLEAR_HEAD(pa->ai_qs.deps);

  array_save(pa);

  if (bad_token_count > 0)
    {
    array_delete(pa);
    return 2;
    }

  return 0;

  }
Beispiel #8
0
/**
 * @brief
 * 		create_subjob - create a Subjob from the parent Array Job
 * 		Certain attributes are changed or left out
 * @param[in]	parent - pointer to parent Job
 * @param[in]	newjid -  new job id
 * @param[in]	rc -  return code
 * @return	pointer to new job
 * @retval  NULL	- error
 */
job *
create_subjob(job *parent, char *newjid, int *rc)
{
	pbs_list_head  attrl;
	int	   i;
	int	   j;
	int	   indx;
	char	  *index;
	attribute_def *pdef;
	attribute *ppar;
	attribute *psub;
	svrattrl  *psatl;
	job 	  *subj;
	long	   eligibletime;
	long	    time_msec;
#ifdef	WIN32
	struct	_timeb	    tval;
#else
	struct timeval	    tval;
#endif


	if ((parent->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) == 0) {
		*rc = PBSE_IVALREQ;
		return NULL;	/* parent not an array job */
	}

	/* find and copy the index */

	if ((index = get_index_from_jid(newjid)) == NULL) {
		*rc = PBSE_IVALREQ;
		return NULL;
	}
	if ((indx = subjob_index_to_offset(parent, index)) == -1) {
		*rc = PBSE_UNKJOBID;
		return NULL;
	}
	if (parent->ji_ajtrk->tkm_tbl[indx].trk_status != JOB_STATE_QUEUED) {
		*rc = PBSE_BADSTATE;
		return NULL;
	}

	/*
	 * allocate and clear basic structure
	 * cannot copy job attributes because cannot share strings and other
	 * malloc-ed data,  so copy ji_qs as a whole and then copy the
	 * non-saved items before ji_qs.
	 */

	subj = job_alloc();
	subj->ji_qs = parent->ji_qs;	/* copy the fixed save area */

#ifdef PBS_CRED_GRIDPROXY
	subj->ji_gsscontext  = parent->ji_gsscontext;
#endif
	subj->ji_qhdr     = parent->ji_qhdr;
	subj->ji_resvp    = parent->ji_resvp;
	subj->ji_myResv   = parent->ji_myResv;
	subj->ji_parentaj = parent;
	strcpy(subj->ji_qs.ji_jobid, newjid);	/* replace job id */
	*subj->ji_qs.ji_fileprefix = '\0';
	subj->ji_subjindx = indx;

	/*
	 * now that is all done, copy the required attributes by
	 * encoding and then decoding into the new array.  Then add the
	 * subjob specific attributes.
	 */

	resc_access_perm = ATR_DFLAG_ACCESS;
	CLEAR_HEAD(attrl);
	for (i = 0; attrs_to_copy[i] != JOB_ATR_LAST; i++) {
		j    = (int)attrs_to_copy[i];
		ppar = &parent->ji_wattr[j];
		psub = &subj->ji_wattr[j];
		pdef = &job_attr_def[j];

		if (pdef->at_encode(ppar, &attrl, pdef->at_name, NULL,
			ATR_ENCODE_MOM, &psatl) > 0) {
			for (psatl = (svrattrl *)GET_NEXT(attrl); psatl;
				psatl = ((svrattrl *)GET_NEXT(psatl->al_link))) {
				pdef->at_decode(psub, psatl->al_name, psatl->al_resc,
					psatl->al_value);
			}
			/* carry forward the default bit if set */
			psub->at_flags |= (ppar->at_flags & ATR_VFLAG_DEFLT);
			free_attrlist(&attrl);
		}
	}

	psub = &subj->ji_wattr[(int)JOB_ATR_array_id];
	job_attr_def[(int)JOB_ATR_array_id].at_decode(psub, NULL, NULL,
		parent->ji_qs.ji_jobid);

	psub = &subj->ji_wattr[(int)JOB_ATR_array_index];
	job_attr_def[(int)JOB_ATR_array_index].at_decode(psub, NULL, NULL, index);

	/* Lastly, set or clear a few flags and link in the structure */

	subj->ji_qs.ji_svrflags &= ~JOB_SVFLG_ArrayJob;
	subj->ji_qs.ji_svrflags |=  JOB_SVFLG_SubJob;
	subj->ji_modified = 1;	/* ** will likely take this out ** */

	subj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSICM;
	(void)svr_setjobstate(subj, JOB_STATE_QUEUED, JOB_SUBSTATE_QUEUED);
	subj->ji_wattr[(int)JOB_ATR_state].at_flags    |= ATR_VFLAG_SET;
	subj->ji_wattr[(int)JOB_ATR_substate].at_flags |= ATR_VFLAG_SET;

	/* subjob needs to borrow eligible time from parent job array.
	 * expecting only to accrue eligible_time and nothing else.
	 */
	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) {

		eligibletime = parent->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long;

		if (parent->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE)
			eligibletime += subj->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long - parent->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long;

		subj->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long = eligibletime;
		subj->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE;

	}
#ifdef WIN32
	_ftime_s(&tval);
	time_msec = (tval.time * 1000L) + tval.millitm;
#else
	gettimeofday(&tval, NULL);
	time_msec = (tval.tv_sec * 1000L) + (tval.tv_usec/1000L);
#endif
	/* set the queue rank attribute */
	subj->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = time_msec;
	subj->ji_wattr[(int)JOB_ATR_qrank].at_flags |= ATR_VFLAG_SET|ATR_VFLAG_MODCACHE;
	if (svr_enquejob(subj) != 0) {
		job_purge(subj);
		*rc = PBSE_IVALREQ;
		return NULL;
	}
	*rc = PBSE_NONE;
	return subj;
}
Beispiel #9
0
static int forced_jobpurge(

  struct batch_request *preq)

  {
  job *pjob;

  if ((pjob = find_job(preq->rq_ind.rq_delete.rq_objname)) == NULL)
    {
    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      preq->rq_ind.rq_delete.rq_objname,
      pbse_to_txt(PBSE_UNKJOBID));

    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return(-1);
    }

  /* check about possibly purging the job */

  if (preq->rq_extend != NULL)
    {
    if (!strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr)))
      {
      if (((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) != 0) ||
          ((svr_chk_owner(preq, pjob) == 0) && (server.sv_attr[SRV_ATR_OwnerPurge].at_val.at_long)))
        {
        sprintf(log_buffer, "purging job without checking MOM");

        log_event(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);

        reply_ack(preq);

        free_nodes(pjob);

        if (pjob->ji_qhdr->qu_qs.qu_type == QTYPE_Execution)
          {
          set_resc_assigned(pjob, DECR);
          }

        job_purge(pjob);

        return(1);
        }
      else
        {
        /* FAILURE */

        req_reject(PBSE_PERM, 0, preq, NULL, NULL);

        return(-1);
        }
      }
    }

  return(0);
  }  /* END forced_jobpurge() */
Beispiel #10
0
/**
 * @brief
 * 		post_movejob - clean up action for child started in net_move/send_job
 *		   to "move" a job to another server
 * @par
 * 		If move was successfull, delete server's copy of thejob structure,
 * 		and reply to request.
 * @par
 * 		If route didn't work, reject the request.
 *
 * @param[in]	pwt	-	work task structure
 *
 * @return	none.
 */
static void
post_movejob(struct work_task *pwt)
{
	char	*id = "post_movejob";
	struct batch_request *req;
	int	newstate;
	int	newsub;
	int	stat;
	int	r;
	job	*jobp;

	req  = (struct batch_request *)pwt->wt_parm1;
	stat = pwt->wt_aux;
	pbs_errno = PBSE_NONE;
	if (req->rq_type != PBS_BATCH_MoveJob) {
		sprintf(log_buffer, "bad request type %d", req->rq_type);
		log_err(-1, __func__, log_buffer);
		return;
	}

	jobp = find_job(req->rq_ind.rq_move.rq_jid);
	if ((jobp == NULL) || (jobp != (job *)pwt->wt_parm2)) {
		sprintf(log_buffer,
			"job %s not found",
			req->rq_ind.rq_move.rq_jid);
		log_err(-1, __func__, log_buffer);

	}

	if (WIFEXITED(stat)) {
		r = WEXITSTATUS(stat);
		if (r == SEND_JOB_OK) {	/* purge server's job structure */
			if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
				remove_stagein(jobp);
			(void)strcpy(log_buffer, msg_movejob);
			(void)sprintf(log_buffer+strlen(log_buffer),
				msg_manager,
				req->rq_ind.rq_move.rq_destin,
				req->rq_user, req->rq_host);
			/*
			 * If server is configured to keep job history info and
			 * the job is created here, then keep the job struture
			 * for history purpose without purging. No need to check
			 * for sub-jobs as sub jobs can't be moved.
			 */
			if (svr_chk_history_conf())
				svr_setjob_histinfo(jobp, T_MOV_JOB);
			else
				job_purge(jobp);
		} else
			r = PBSE_ROUTEREJ;
	} else {
		r = PBSE_SYSTEM;
		(void)sprintf(log_buffer, msg_badexit, stat);
		(void)strcat(log_buffer, __func__);
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, LOG_NOTICE,
			jobp->ji_qs.ji_jobid, log_buffer);
	}

	if (r) {
		if (jobp) {
			/* force re-eval of job state out of Transit */
			svr_evaljobstate(jobp, &newstate, &newsub, 1);
			svr_setjobstate(jobp, newstate, newsub);
		}
		req_reject(r, 0, req);
	} else
		reply_ack(req);

	return;
}
Beispiel #11
0
static void post_job_delete_nanny(

  struct work_task *pwt)

  {

  struct batch_request *preq_sig;                /* signal request to MOM */

  int   rc;
  job  *pjob;

  preq_sig = pwt->wt_parm1;
  rc       = preq_sig->rq_reply.brp_code;


  if (!server.sv_attr[SRV_ATR_JobNanny].at_val.at_long)
    {
    /* the admin disabled nanny within the last minute or so */

    release_req(pwt);

    return;
    }

  /* extract job id from task */

  pjob = find_job(preq_sig->rq_ind.rq_signal.rq_jid);

  if (pjob == NULL)
    {
    sprintf(log_buffer, "job delete nanny: the job disappeared (this is a BUG!)");

    LOG_EVENT(
      PBSEVENT_ERROR,
      PBS_EVENTCLASS_JOB,
      preq_sig->rq_ind.rq_signal.rq_jid,
      log_buffer);
    }
  else if (rc == PBSE_UNKJOBID)
    {
    sprintf(log_buffer, "job delete nanny returned, but does not exist on mom");

    LOG_EVENT(
      PBSEVENT_ERROR,
      PBS_EVENTCLASS_JOB,
      preq_sig->rq_ind.rq_signal.rq_jid,
      log_buffer);

    free_nodes(pjob);

    set_resc_assigned(pjob, DECR);

    job_purge(pjob);
    }

  /* free task */

  release_req(pwt);

  return;
  } /* END post_job_delete_nanny() */
Beispiel #12
0
job *
chk_job_request(char *jobid, struct batch_request *preq, int *rc)
{
	int	 t;
	int	 histerr = 0;
	job	*pjob;
	int deletehist = 0;
	char	*p1;
	char	*p2;

	if (preq->rq_extend && strstr(preq->rq_extend, DELETEHISTORY))
		deletehist = 1;
	t = is_job_array(jobid);
	if ((t == IS_ARRAY_NO) || (t == IS_ARRAY_ArrayJob))
		pjob = find_job(jobid);		/* regular or ArrayJob itself */
	else
		pjob = find_arrayparent(jobid); /* subjob(s) */

	*rc = t;

	if (pjob == NULL) {
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
			jobid, msg_unkjobid);
		req_reject(PBSE_UNKJOBID, 0, preq);
		return NULL;
	} else {
		histerr = svr_chk_histjob(pjob);
		if (histerr && deletehist == 0) {
			req_reject(histerr, 0, preq);
			return NULL;
		}
		if (deletehist ==1&& pjob->ji_qs.ji_state == JOB_STATE_MOVED &&
			pjob->ji_qs.ji_substate != JOB_SUBSTATE_FINISHED) {
			job_purge(pjob);
			req_reject(PBSE_UNKJOBID, 0, preq);
			return NULL;
		}
	}

	/*
	 * The job was found using the job ID in the request, but it may not
	 * match exactly (i.e. FQDN vs. unqualified hostname). Overwrite the
	 * host portion of the job ID in the request with the host portion of
	 * the one from the server job structure. Do not modify anything
	 * before the first dot in the job ID because it may be an array job.
	 * This will allow find_job() to look for an exact match when the
	 * request is serviced by MoM.
	 */
	p1 = strchr(pjob->ji_qs.ji_jobid, '.');
	if (p1) {
		p2 = strchr(jobid, '.');
		if (p2)
			*p2 = '\0';
		strncat(jobid, p1, PBS_MAXSVRJOBID-1);
	}

	if (svr_authorize_jobreq(preq, pjob) == -1) {
		(void)sprintf(log_buffer, msg_permlog, preq->rq_type,
			"Job", pjob->ji_qs.ji_jobid,
			preq->rq_user, preq->rq_host);
		log_event(PBSEVENT_SECURITY, PBS_EVENTCLASS_JOB, LOG_INFO,
			pjob->ji_qs.ji_jobid, log_buffer);
		req_reject(PBSE_PERM, 0, preq);
		return NULL;
	}

	if ((t == IS_ARRAY_NO) && (pjob->ji_qs.ji_state == JOB_STATE_EXITING)) {

		/* special case Deletejob with "force" */
		if ((preq->rq_type == PBS_BATCH_DeleteJob) &&
			(preq->rq_extend != NULL) &&
			(strcmp(preq->rq_extend, "force") == 0)) {
			return (pjob);
		}

		(void)sprintf(log_buffer, "%s, state=%d", msg_badstate,
			pjob->ji_qs.ji_state);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
			pjob->ji_qs.ji_jobid, log_buffer);
		req_reject(PBSE_BADSTATE, 0, preq);
		return NULL;
	}

	return (pjob);
}
Beispiel #13
0
int job_route(

  job *jobp) /* job to route */

  {
  int      bad_state = 0;
  char     *id = "job_route";
  time_t     life;

  struct pbs_queue *qp;
  long              retry_time;

  /* see if the job is able to be routed */

  switch (jobp->ji_qs.ji_state)
    {

    case JOB_STATE_TRANSIT:

      return(0);  /* already going, ignore it */

      /*NOTREACHED*/

      break;

    case JOB_STATE_QUEUED:

      /* NO-OP */

      break;   /* ok to try */

    case JOB_STATE_HELD:

      /* job may be acceptable */

      bad_state = !jobp->ji_qhdr->qu_attr[QR_ATR_RouteHeld].at_val.at_long;

      break;

    case JOB_STATE_WAITING:

      /* job may be acceptable */

      bad_state = !jobp->ji_qhdr->qu_attr[QR_ATR_RouteWaiting].at_val.at_long;

      break;

    case JOB_STATE_COMPLETE:

      /* job has been deleted */

      job_purge(jobp);

      return(0);

      /*NOTREACHED*/

      break;

    default:

      sprintf(log_buffer, "%s %d", pbse_to_txt(PBSE_BADSTATE),
              jobp->ji_qs.ji_state);

      strcat(log_buffer, id);

      log_event(
        PBSEVENT_DEBUG,
        PBS_EVENTCLASS_JOB,
        jobp->ji_qs.ji_jobid,
        log_buffer);

      return(0);

      /*NOTREACHED*/

      break;
    }

  /* check the queue limits, can we route any (more) */

  qp = jobp->ji_qhdr;

  if (qp->qu_attr[(int)QA_ATR_Started].at_val.at_long == 0)
    {
    /* queue not started - no routing */

    return(0);
    }

  if ((qp->qu_attr[(int)QA_ATR_MaxRun].at_flags & ATR_VFLAG_SET) &&
      (qp->qu_attr[(int)QA_ATR_MaxRun].at_val.at_long <= qp->qu_njstate[JOB_STATE_TRANSIT]))
    {
    /* max number of jobs being routed */

    return(0);
    }

  /* what is the retry time and life time of a job in this queue */

  if (qp->qu_attr[(int)QR_ATR_RouteRetryTime].at_flags & ATR_VFLAG_SET)
    {
    retry_time =
      (long)time_now +
      qp->qu_attr[(int)QR_ATR_RouteRetryTime].at_val.at_long;
    }
  else
    {
    retry_time = (long)time_now + PBS_NET_RETRY_TIME;
    }

  if (qp->qu_attr[(int)QR_ATR_RouteLifeTime].at_flags & ATR_VFLAG_SET)
    {
    life =
      jobp->ji_qs.ji_un.ji_routet.ji_quetime +
      qp->qu_attr[(int)QR_ATR_RouteLifeTime].at_val.at_long;
    }
  else
    {
    life = 0; /* forever */
    }

  if (life && (life < time_now))
    {
    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      msg_routexceed);

    /* job too long in queue */

    return(PBSE_ROUTEEXPD);
    }

  if (bad_state)
    {
    /* not currently routing this job */

    return(0);
    }

  if (qp->qu_attr[(int)QR_ATR_AltRouter].at_val.at_long == 0)
    {
    return(default_router(jobp, qp, retry_time));
    }

  return(site_alt_router(jobp, qp, retry_time));
  }  /* END job_route() */
Beispiel #14
0
static void post_routejob(

  struct work_task *pwt)

  {
  int  newstate;
  int  newsub;
  int  r;
  int  stat = pwt->wt_aux;
  char *id = "post_routejob";
  job *jobp = (job *)pwt->wt_parm1;

  if (WIFEXITED(stat))
    {
    r = WEXITSTATUS(stat);
    }
  else
    {
    r = 2;

    sprintf(log_buffer, msg_badexit,
            stat);

    strcat(log_buffer, id);

    log_event(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      log_buffer);
    }

  switch (r)
    {
    case 0:  /* normal return, job was routed */

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
        remove_stagein(jobp);

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_COPIED)
        remove_checkpoint(jobp);

      job_purge(jobp); /* need to remove server job struct */

      return;

      /*NOTREACHED*/

      break;

    case 1:  /* permanent rejection (or signal) */

      if (jobp->ji_qs.ji_substate == JOB_SUBSTATE_ABORT)
        {
        /* job delete in progress, just set to queued status */

        svr_setjobstate(jobp, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT);

        return;
        }

      add_dest(jobp);  /* else mark destination as bad */

      /* fall through */

    default : /* try routing again */

      /* force re-eval of job state out of Transit */

      svr_evaljobstate(jobp, &newstate, &newsub, 1);
      svr_setjobstate(jobp, newstate, newsub);

      if ((r = job_route(jobp)) == PBSE_ROUTEREJ)
        job_abt(&jobp, pbse_to_txt(PBSE_ROUTEREJ));
      else if (r != 0)
        job_abt(&jobp, msg_routexceed);

      break;
    }  /* END switch (r) */

  return;
  }  /* END post_routejob() */
Beispiel #15
0
static void post_delete_mom1(

  struct work_task *pwt)

  {
  int         delay = 0;
  int        dellen = strlen(deldelaystr);
  job       *pjob;

  struct work_task   *pwtnew;
  pbs_queue      *pque;

  struct batch_request *preq_sig;  /* signal request to MOM */

  struct batch_request *preq_clt;  /* original client request */
  int        rc;

  preq_sig = pwt->wt_parm1;
  rc       = preq_sig->rq_reply.brp_code;
  preq_clt = preq_sig->rq_extra;

  release_req(pwt);

  pjob = find_job(preq_clt->rq_ind.rq_delete.rq_objname);

  if (pjob == NULL)
    {
    /* job has gone away */

    req_reject(PBSE_UNKJOBID, 0, preq_clt, NULL, NULL);

    return;
    }

  if (rc)
    {
    /* mom rejected request */

    if (rc == PBSE_UNKJOBID)
      {
      /* MOM claims no knowledge, so just purge it */

      log_event(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid,
        "MOM rejected signal during delete");

      /* removed the resources assigned to job */

      free_nodes(pjob);

      set_resc_assigned(pjob, DECR);

      job_purge(pjob);

      reply_ack(preq_clt);
      }
    else
      {
      req_reject(rc, 0, preq_clt, NULL, NULL);
      }

    return;
    }

  if (preq_clt->rq_extend)
    {
    if (strncmp(preq_clt->rq_extend, deldelaystr, dellen) == 0)
      {
      delay = atoi(preq_clt->rq_extend + dellen);
      }
    }

  reply_ack(preq_clt);  /* dont need it, reply now */

  /*
   * if no delay specified in original request, see if kill_delay
   * queue attribute is set.
   */

  if (delay == 0)
    {
    pque = pjob->ji_qhdr;

    delay = attr_ifelse_long(&pque->qu_attr[QE_ATR_KillDelay],
                             &server.sv_attr[SRV_ATR_KillDelay],
                             2);
    }

  pwtnew = set_task(WORK_Timed, delay + time_now, post_delete_mom2, pjob);

  if (pwtnew)
    {
    /* insure that work task will be removed if job goes away */

    append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
    }

  /*
   * Since the first signal has succeeded, let's reschedule the
   * nanny to be 1 minute after the second phase.
   */

  apply_job_delete_nanny(pjob, time_now + delay + 60);

  return;
  }  /* END post_delete_mom1() */
Beispiel #16
0
static void post_movejob(

  struct work_task *pwt)

  {
  char *id = "post_movejob";

  struct batch_request *req;
  int newstate;
  int newsub;
  int stat;
  int r;
  job *jobp;

  req  = (struct batch_request *)pwt->wt_parm2;

  stat = pwt->wt_aux;

  pbs_errno = PBSE_NONE;

  if (req->rq_type != PBS_BATCH_MoveJob)
    {
    sprintf(log_buffer, "bad request type %d\n",
            req->rq_type);

    log_err(-1, id, log_buffer);

    return;
    }

  jobp = find_job(req->rq_ind.rq_move.rq_jid);

  if ((jobp == NULL) || (jobp != (job *)pwt->wt_parm1))
    {
    sprintf(log_buffer, "job %s not found\n",
            req->rq_ind.rq_move.rq_jid);

    log_err(-1, id, log_buffer);
    }

  if (WIFEXITED(stat))
    {
    r = WEXITSTATUS(stat);

    if (r == 0)
      {
      /* purge server's job structure */

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
        remove_stagein(jobp);

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_COPIED)
        remove_checkpoint(jobp);

      strcpy(log_buffer, msg_movejob);

      sprintf(log_buffer + strlen(log_buffer), msg_manager,
              req->rq_ind.rq_move.rq_destin,
              req->rq_user,
              req->rq_host);

      job_purge(jobp);
      }
    else
      {
      r = PBSE_ROUTEREJ;
      }
    }
  else
    {
    r = PBSE_SYSTEM;

    sprintf(log_buffer, msg_badexit, stat);

    strcat(log_buffer, id);

    log_event(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      log_buffer);
    }

  if (r)
    {
    if (jobp != NULL)
      {
      /* force re-eval of job state out of Transit */

      svr_evaljobstate(jobp, &newstate, &newsub, 1);
      svr_setjobstate(jobp, newstate, newsub);
      }

    req_reject(r, 0, req, NULL, NULL);
    }
  else
    {
    reply_ack(req);
    }

  return;
  }  /* END post_movejob() */
Beispiel #17
0
/**
 * @brief
 * 		post_routejob - clean up action for child started in net_move/send_job
 *		   to "route" a job to another server
 * @par
 * 		If route was successfull, delete job.
 * @par
 * 		If route didn't work, mark destination not to be tried again for this
 * 		job and call route again.
 *
 * @param[in]	pwt	-	work task structure
 *
 * @return	none.
 */
static void
post_routejob(struct work_task *pwt)
{
	int	 newstate;
	int	 newsub;
	int	 r;
	int	 stat = pwt->wt_aux;
	job	*jobp = (job *)pwt->wt_parm2;

	if (jobp == NULL) {
		log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, LOG_INFO, "", "post_routejob failed, jobp NULL");
		return;
	}

	if (WIFEXITED(stat)) {
		r = WEXITSTATUS(stat);
	} else {
		r = SEND_JOB_FATAL;
		(void)sprintf(log_buffer, msg_badexit, stat);
		(void)strcat(log_buffer, __func__);
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, LOG_NOTICE,
			jobp->ji_qs.ji_jobid, log_buffer);
	}

	switch (r) {
		case SEND_JOB_OK:		/* normal return, job was routed */

			if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
				remove_stagein(jobp);
			/*
			 * If the server is configured to keep job history and the job
			 * is created here, do not purge the job structure but save
			 * it for history purpose. No need to check for sub-jobs as
			 * sub jobs can not be routed.
			 */
			if (svr_chk_history_conf())
				svr_setjob_histinfo(jobp, T_MOV_JOB);
			else
				job_purge(jobp); /* need to remove server job struct */
			return;
		case SEND_JOB_FATAL:		/* permanent rejection (or signal) */
			if (jobp->ji_qs.ji_substate == JOB_SUBSTATE_ABORT) {

				/* Job Delete in progress, just set to queued status */

				(void)svr_setjobstate(jobp, JOB_STATE_QUEUED,
					JOB_SUBSTATE_ABORT);
				return;
			}
			add_dest(jobp);		/* else mark destination as bad */
			/* fall through */
		default :	/* try routing again */
			/* force re-eval of job state out of Transit */
			svr_evaljobstate(jobp, &newstate, &newsub, 1);
			(void)svr_setjobstate(jobp, newstate, newsub);
			jobp->ji_retryok = 1;
			if ((r = job_route(jobp)) == PBSE_ROUTEREJ)
				(void)job_abt(jobp, msg_routebad);
			else if (r != 0)
				(void)job_abt(jobp, msg_routexceed);
			break;
	}
	return;
}