Ejemplo n.º 1
0
/*
 * release_job - releases the hold on job j
 * @param j - the job to modify
 * @return 0 if successful, a PBS error on failure
 */
int release_job(

  struct batch_request *preq, /* I */
  void                 *j)    /* I/O */

  {
  long           old_hold;
  int            rc = 0;
  int            newstate;
  int            newsub;
  char          *pset;
  job           *pjob = (job *)j;
  char           log_buf[LOCAL_LOG_BUF_SIZE];

  pbs_attribute  temphold;

  /* cannot do anything until we decode the holds to be set */

  if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, &pset, &temphold)) != 0)
    {
    return(rc);
    }

  /* if other than HOLD_u is being released, must have privil */

  if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0)
    {
    return(rc);
    }

  /* unset the hold */

  old_hold = pjob->ji_wattr[JOB_ATR_hold].at_val.at_long;

  if ((rc = job_attr_def[JOB_ATR_hold].at_set(&pjob->ji_wattr[JOB_ATR_hold], &temphold, DECR)))
    {
    return(rc);
    }

  /* everything went well, if holds changed, update the job state */

  if (old_hold != pjob->ji_wattr[JOB_ATR_hold].at_val.at_long)
    {
    pjob->ji_modified = 1; /* indicates attributes changed */

    svr_evaljobstate(pjob, &newstate, &newsub, 0);

    svr_setjobstate(pjob, newstate, newsub, FALSE); /* saves job */
    }

  sprintf(log_buf, msg_jobholdrel,
    pset,
    preq->rq_user,
    preq->rq_host);

  log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf);

  return(rc);
  } /* END release_job() */
Ejemplo n.º 2
0
void process_hold_reply(

  batch_request *preq)

  {
  job                  *pjob;
  pbs_attribute         temphold;

  int                   newstate;
  int                   newsub;
  int                   rc;
  char                 *pset;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  /* preq was handled previously */
  if (preq == NULL)
    return;

  preq->rq_conn = preq->rq_orgconn;  /* restore client socket */

  if ((pjob = svr_find_job(preq->rq_ind.rq_hold.rq_orig.rq_objname, FALSE)) == NULL)
    {
    log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB,
              preq->rq_ind.rq_hold.rq_orig.rq_objname,
              msg_postmomnojob);
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, msg_postmomnojob);
    }
  else
    {
    mutex_mgr job_mutex(pjob->ji_mutex, true);

    if (preq->rq_reply.brp_code != 0)
      {
      rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, (const char **)&pset, &temphold);
      
      if (rc == 0)
        {
        rc = job_attr_def[JOB_ATR_hold].at_set(&pjob->ji_wattr[JOB_ATR_hold],
            &temphold, DECR);
        }
      
      pjob->ji_qs.ji_substate = JOB_SUBSTATE_RUNNING;  /* reset it */
      
      pjob->ji_modified = 1;    /* indicate attributes changed */
      svr_evaljobstate(*pjob, newstate, newsub, 0);
      svr_setjobstate(pjob, newstate, newsub, FALSE); /* saves job */
      
      if (preq->rq_reply.brp_code != PBSE_NOSUP)
        {
        sprintf(log_buf, msg_mombadhold, preq->rq_reply.brp_code);
        log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf);
        req_reject(preq->rq_reply.brp_code, 0, preq, NULL, log_buf);
        }
      else
        {
        reply_ack(preq);
        }
      }
    else
      {
      /* record that MOM has a checkpoint file */
      
      /* PBS_CHECKPOINT_MIGRATEABLE is defined as zero therefore this code will never fire.
       * And if these flags are not set, start_exec will not try to run the job from
       * the checkpoint image file.
       */
      pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHECKPOINT_FILE;
      
      if (preq->rq_reply.brp_auxcode)  /* checkpoint can be moved */
        {
        pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_CHECKPOINT_FILE;
        pjob->ji_qs.ji_svrflags |=  JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_MIGRATEABLE;
        }

      pjob->ji_modified = 1;    /* indicate attributes changed     */
      
      svr_evaljobstate(*pjob, newstate, newsub, 0);
      svr_setjobstate(pjob, newstate, newsub, FALSE); /* saves job */
      
      account_record(PBS_ACCT_CHKPNT, pjob, "Checkpointed and held"); /* note in accounting file */
      reply_ack(preq);
      }
    }

  } /* END process_hold_reply() */
Ejemplo n.º 3
0
/*
 * release_job - releases the hold on job j
 * @param j - the job to modify
 * @param pa - a pointer to an array whose mutex we hold - always this job's array
 * @return 0 if successful, a PBS error on failure
 */
int release_job(

  struct batch_request *preq, /* I */
  void                 *j,    /* I/O */
  job_array            *pa)   /* I */

  {
  long           old_hold;
  int            rc = PBSE_NONE;
  int            newstate;
  int            newsub;
  char          *pset;
  job           *pjob = (job *)j;
  char           log_buf[LOCAL_LOG_BUF_SIZE];

  pbs_attribute  temphold;

  // this function is meaningless for jobs in exiting or completed
  if (pjob->ji_qs.ji_state > JOB_STATE_RUNNING)
    return(PBSE_NONE);

  /* cannot do anything until we decode the holds to be set */

  if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, (const char **)&pset, &temphold)) != 0)
    {
    return(rc);
    }

  /* if other than HOLD_u is being released, must have privil */

  if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0)
    {
    return(rc);
    }

  /* unset the hold */

  old_hold = pjob->ji_wattr[JOB_ATR_hold].at_val.at_long;

  if ((rc = job_attr_def[JOB_ATR_hold].at_set(&pjob->ji_wattr[JOB_ATR_hold], &temphold, DECR)))
    {
    return(rc);
    }

  if (pjob->ji_arraystructid[0] != '\0')
    {
    // Make sure our slot limit counts are correct
    check_array_slot_limits(pjob, pa);
    }

  /* everything went well, if holds changed, update the job state */

  if (old_hold != pjob->ji_wattr[JOB_ATR_hold].at_val.at_long)
    {
    pjob->ji_modified = 1; /* indicates attributes changed */

    svr_evaljobstate(*pjob, newstate, newsub, 0);

    svr_setjobstate(pjob, newstate, newsub, FALSE); /* saves job */
    }

  sprintf(log_buf, msg_jobholdrel,
    pset,
    preq->rq_user,
    preq->rq_host);

  log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf);

  return(rc);
  } /* END release_job() */
Ejemplo n.º 4
0
int req_holdjob(

  batch_request *vp) /* I */

  {
  long          *hold_val;
  int            newstate;
  int            newsub;
  long           old_hold;
  job           *pjob;
  char          *pset;
  int            rc;
  pbs_attribute  temphold;
  pbs_attribute *pattr;
  batch_request *preq = (struct batch_request *)vp;
  char           log_buf[LOCAL_LOG_BUF_SIZE];
  batch_request *dup_req = NULL;

  pjob = chk_job_request(preq->rq_ind.rq_hold.rq_orig.rq_objname, preq);

  if (pjob == NULL)
    {
    return(PBSE_NONE);
    }

  mutex_mgr job_mutex(pjob->ji_mutex, true);

  /* cannot do anything until we decode the holds to be set */
  if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, (const char **)&pset,
                     &temphold)) != 0)
    {
    req_reject(rc, 0, preq, NULL, NULL);

    return(PBSE_NONE);
    }

  /* if other than HOLD_u is being set, must have privil */

  if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0)
    {
    req_reject(rc, 0, preq, NULL, NULL);

    return(PBSE_NONE);
    }

  hold_val = &pjob->ji_wattr[JOB_ATR_hold].at_val.at_long;

  old_hold = *hold_val;
  *hold_val |= temphold.at_val.at_long;
  pjob->ji_wattr[JOB_ATR_hold].at_flags |= ATR_VFLAG_SET;
  sprintf(log_buf, msg_jobholdset, pset, preq->rq_user, preq->rq_host);

  pattr = &pjob->ji_wattr[JOB_ATR_checkpoint];

  if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) &&
      ((pattr->at_flags & ATR_VFLAG_SET) &&
       ((csv_find_string(pattr->at_val.at_str, "s") != NULL) ||
        (csv_find_string(pattr->at_val.at_str, "c") != NULL) ||
        (csv_find_string(pattr->at_val.at_str, "enabled") != NULL))))
    {

    /* have MOM attempt checkpointing */

    /*
    ** The jobid in the request always have the server suffix attached
    ** which is dropped when the server attribute 
    ** 'display_job_server_suffix' is FALSE and so will in the MOM's.
    ** Therefore, it must be passed as the server to the MOM so she can
    ** find it to hold.
    */
    if (strncmp(pjob->ji_qs.ji_jobid, 
          preq->rq_ind.rq_hold.rq_orig.rq_objname, PBS_MAXSVRJOBID))
       snprintf(preq->rq_ind.rq_hold.rq_orig.rq_objname, 
          sizeof(preq->rq_ind.rq_hold.rq_orig.rq_objname), "%s", 
          pjob->ji_qs.ji_jobid);
    if ((dup_req = duplicate_request(preq)) == NULL)
      {
      req_reject(rc, 0, preq, NULL, "memory allocation failure");
      }
    /* The dup_req is freed in relay_to_mom (failure)
     * or in issue_Drequest (success) */
    else if ((rc = relay_to_mom(&pjob, dup_req, NULL)) != PBSE_NONE)
      {
      free_br(dup_req);
      *hold_val = old_hold;  /* reset to the old value */
      req_reject(rc, 0, preq, NULL, "relay to mom failed");

      if (pjob == NULL)
        job_mutex.set_unlock_on_exit(false);
      }
    else
      {
      if (pjob != NULL)
        {
        pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_FILE;
        
        job_save(pjob, SAVEJOB_QUICK, 0);
        
        /* fill in log_buf again, since relay_to_mom changed it */
        sprintf(log_buf, msg_jobholdset, pset, preq->rq_user, preq->rq_host);
        
        log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf);
        unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL);
        pjob = NULL;
        reply_ack(preq);
        }
      else
        job_mutex.set_unlock_on_exit(false);

      process_hold_reply(dup_req);
      }
    }
#ifdef ENABLE_BLCR
  else if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
    {
    /*
     * This system is configured with BLCR checkpointing to be used,
     * but this Running job does not have checkpointing enabled,
     * so we reject the request
     */

    log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf);

    req_reject(PBSE_IVALREQ, 0, preq, NULL,
      "job not held since checkpointing is expected but not enabled for job");
    }
#endif
  else
    {
    /* everything went well, may need to update the job state */
    log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf);

    if (old_hold != *hold_val)
      {
      /* indicate attributes changed     */
      pjob->ji_modified = 1;

      svr_evaljobstate(*pjob, newstate, newsub, 0);

      svr_setjobstate(pjob, newstate, newsub, FALSE);
      }

    reply_ack(preq);
    }

  return(PBSE_NONE);
  }  /* END req_holdjob() */
Ejemplo n.º 5
0
void req_holdarray(struct batch_request *preq)
  {
  int i;

  char *pset;
  char *range_str;
  int rc;
  attribute temphold;
  char owner[PBS_MAXUSER + 1];
  job_array *pa;
  /* batch_request *preq_tmp; */

  pa = get_array(preq->rq_ind.rq_hold.rq_orig.rq_objname);

  if (pa == NULL)
    {
    /* this shouldn't happen since we verify that this is a valid array
       just prior to calling this function */
    req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array");
    }

  get_jobowner(pa->ai_qs.owner, owner);

  if (svr_authorize_req(preq, owner, pa->ai_qs.submit_host) == -1)
    {
    sprintf(log_buffer, msg_permlog,
            preq->rq_type,
            "Array",
            preq->rq_ind.rq_delete.rq_objname,
            preq->rq_user,
            preq->rq_host);

    log_event(
      PBSEVENT_SECURITY,
      PBS_EVENTCLASS_JOB,
      preq->rq_ind.rq_delete.rq_objname,
      log_buffer);

    req_reject(PBSE_PERM, 0, preq, NULL, "operation not permitted");
    return;
    }


  if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, &pset,
                     &temphold)) != 0)
    {
    req_reject(rc, 0, preq, NULL, NULL);
    return;
    }

  /* if other than HOLD_u is being set, must have privil */

  if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0)
    {
    req_reject(rc, 0, preq, NULL, NULL);
    return;
    }

  /* get the range of jobs to iterate over */
  range_str = preq->rq_extend;
  if ((range_str != NULL) &&
      (strstr(range_str,ARRAY_RANGE) != NULL))
    {
    if ((rc = hold_array_range(pa,range_str,&temphold)) != 0)
      {
      req_reject(rc,0,preq,NULL,
        "Error in specified array range");
      }
    }
  else
    {
    /* do the entire array */
    for (i = 0;i < pa->ai_qs.array_size;i++)
      {
      if (pa->jobs[i] == NULL)
        continue;

      hold_job(&temphold,pa->jobs[i]);
      }
    }

  reply_ack(preq);

  }
Ejemplo n.º 6
0
int req_holdarray(
    
  void *vp) /* I */

  {
  int                   i;
  struct batch_request *preq = (struct batch_request *)vp;
  char                 *pset;
  char                 *range_str;
  int                   rc;
  pbs_attribute         temphold;
  char                  owner[PBS_MAXUSER + 1];
  job_array            *pa;
  job                  *pjob;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  pa = get_array(preq->rq_ind.rq_hold.rq_orig.rq_objname);

  if (pa == NULL)
    {
    /* this shouldn't happen since we verify that this is a valid array
       just prior to calling this function */
    req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array");
    return(PBSE_NONE);
    }

  get_jobowner(pa->ai_qs.owner, owner);

  if (svr_authorize_req(preq, owner, pa->ai_qs.submit_host) == -1)
    {
    sprintf(log_buf, msg_permlog,
      preq->rq_type, "Array", preq->rq_ind.rq_delete.rq_objname, preq->rq_user, preq->rq_host);

    log_event(PBSEVENT_SECURITY, PBS_EVENTCLASS_JOB, preq->rq_ind.rq_delete.rq_objname, log_buf);

    if (LOGLEVEL >= 7)
      {
      sprintf(log_buf, "%s: unlocking ai_mutex", __func__);
      log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pa->ai_qs.parent_id, log_buf);
      }

    pthread_mutex_unlock(pa->ai_mutex);

    req_reject(PBSE_PERM, 0, preq, NULL, "operation not permitted");
    return(PBSE_NONE);
    }


  if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, &pset,
                     &temphold)) != 0)
    {
    if (LOGLEVEL >= 7)
      {
      sprintf(log_buf, "%s: unlocking ai_mutex", __func__);
      log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pa->ai_qs.parent_id, log_buf);
      }
    
    pthread_mutex_unlock(pa->ai_mutex);

    req_reject(rc, 0, preq, NULL, NULL);
    return(PBSE_NONE);
    }

  /* if other than HOLD_u is being set, must have privil */

  if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0)
    {
    if (LOGLEVEL >= 7)
      {
      sprintf(log_buf, "%s: unlocking ai_mutex", __func__);
      log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pa->ai_qs.parent_id, log_buf);
      }

    pthread_mutex_unlock(pa->ai_mutex);

    req_reject(rc, 0, preq, NULL, NULL);
    return(PBSE_NONE);
    }

  /* get the range of jobs to iterate over */
  range_str = preq->rq_extend;
  if ((range_str != NULL) &&
      (strstr(range_str,ARRAY_RANGE) != NULL))
    {
    if ((rc = hold_array_range(pa,range_str,&temphold)) != 0)
      {
      pthread_mutex_unlock(pa->ai_mutex);

      req_reject(rc,0,preq,NULL,
        "Error in specified array range");
      return(PBSE_NONE);
      }
    }
  else
    {
    /* do the entire array */
    for (i = 0;i < pa->ai_qs.array_size;i++)
      {
      if (pa->job_ids[i] == NULL)
        continue;

      if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL)
        {
        free(pa->job_ids[i]);
        pa->job_ids[i] = NULL;
        }
      else
        {
        hold_job(&temphold,pjob);
        if (LOGLEVEL >= 7)
          {
          sprintf(log_buf, "%s: unlocking ai_mutex", __func__);
          log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pa->ai_qs.parent_id, log_buf);
          }

        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
        }
      }
    }

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "%s: unlocking ai_mutex", __func__);
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pa->ai_qs.parent_id, log_buf);
    }
  pthread_mutex_unlock(pa->ai_mutex);

  reply_ack(preq);

  return(PBSE_NONE);
  } /* END req_holdarray() */
Ejemplo n.º 7
0
void
req_releasejob(struct batch_request *preq)
{
	int              jt;            /* job type */
	int		 newstate;
	int		 newsub;
	long		 old_hold;
	job		*pjob;
	char		*pset;
	int		 rc;


	pjob = chk_job_request(preq->rq_ind.rq_release.rq_objname, preq, &jt);
	if (pjob == (job *)0)
		return;

	if ((jt != IS_ARRAY_NO) && (jt != IS_ARRAY_ArrayJob)) {
		req_reject(PBSE_IVALREQ, 0, preq);
		return;
	}

	/* cannot do anything until we decode the holds to be set */

	if ((rc=get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, &pset)) != 0) {
		req_reject(rc, 0, preq);
		return;
	}

	/* if other than HOLD_u is being released, must have privil */

	if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0) {
		req_reject(rc, 0, preq);
		return;
	}

	/* all ok so far, unset the hold */

	old_hold = pjob->ji_wattr[(int)JOB_ATR_hold].at_val.at_long;
	rc = job_attr_def[(int)JOB_ATR_hold].
		at_set(&pjob->ji_wattr[(int)JOB_ATR_hold],
		&temphold, DECR);
	if (rc) {
		req_reject(rc, 0, preq);
		return;
	}

	/* every thing went well, if holds changed, update the job state */

#ifndef NAS /* localmod 105 Always reset etime on release */
	if (old_hold != pjob->ji_wattr[(int)JOB_ATR_hold].at_val.at_long) {
#endif /* localmod 105 */
#ifdef NAS /* localmod 105 */
		{
			attribute *etime = &pjob->ji_wattr[(int)JOB_ATR_etime];
			etime->at_val.at_long = time_now;
			etime->at_flags |= ATR_VFLAG_SET|ATR_VFLAG_MODCACHE;
#endif /* localmod 105 */
		pjob->ji_modified = 1;	/* indicates attributes changed    */
		svr_evaljobstate(pjob, &newstate, &newsub, 0);
		(void)svr_setjobstate(pjob, newstate, newsub); /* saves job */
	}
	if (pjob->ji_wattr[(int)JOB_ATR_hold].at_val.at_long == 0)
		job_attr_def[(int)JOB_ATR_Comment].at_free(&pjob->ji_wattr[(int)JOB_ATR_Comment]);
	(void)sprintf(log_buffer, msg_jobholdrel, pset, preq->rq_user,
		preq->rq_host);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
		pjob->ji_qs.ji_jobid, log_buffer);
	reply_ack(preq);
}

/**
 * @brief
 * 		get_hold - search a list of attributes (svrattrl) for the hold-types
 * 		attribute.  This is used by the Hold Job and Release Job request,
 *		therefore it is an error if the hold-types attribute is not present,
 *		or there is more than one.
 *
 *		Decode the hold attribute into temphold.
 *
 * @param[in]	phead	- pbs list head.
 * @param[out]	phead	- RETURN - ptr to hold value
 *
 * @return	error code
 */

static int
get_hold(pbs_list_head *phead, char	 **pset)
{
	int		 have_one = 0;
	struct svrattrl *holdattr = (struct svrattrl*)0;
	struct svrattrl *pal;

	pal = (struct svrattrl *)GET_NEXT((*phead));
	while (pal) {
		if (!strcasecmp(pal->al_name, job_attr_def[(int)JOB_ATR_hold].at_name)) {
			holdattr = pal;
			*pset    = pal->al_value;
			have_one++;
		} else {
			return (PBSE_IVALREQ);
		}
		pal = (struct svrattrl *)GET_NEXT(pal->al_link);
	}
	if (have_one != 1)
		return (PBSE_IVALREQ);

	/* decode into temporary attribute structure */

	clear_attr(&temphold, &job_attr_def[(int)JOB_ATR_hold]);
	return (job_attr_def[(int)JOB_ATR_hold].at_decode(
		&temphold,
		holdattr->al_name,
		(char *)0,
		holdattr->al_value));
}
Ejemplo n.º 8
0
void
req_holdjob(struct batch_request *preq)
{
	long		*hold_val;
	int		 jt;		/* job type */
	int		 newstate;
	int		 newsub;
	long		 old_hold;
	job		*pjob;
	char		*pset;
	int		 rc;
	char             date[32];
	time_t           now;


	pjob = chk_job_request(preq->rq_ind.rq_hold.rq_orig.rq_objname, preq, &jt);
	if (pjob == (job *)0)
		return;
	if ((jt != IS_ARRAY_NO) && (jt != IS_ARRAY_ArrayJob)) {
		req_reject(PBSE_IVALREQ, 0, preq);
		return;
	}
	if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) &&
		(pjob->ji_qs.ji_substate == JOB_SUBSTATE_PROVISION)) {
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}


	/* cannot do anything until we decode the holds to be set */

	if ((rc=get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, &pset)) != 0) {
		req_reject(rc, 0, preq);
		return;
	}

	/* if other than HOLD_u is being set, must have privil */

	if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0) {
		req_reject(rc, 0, preq);
		return;
	}

	/* HOLD_bad_password can only be done by root or admin */
#ifdef WIN32
	if ( (temphold.at_val.at_long & HOLD_bad_password) && \
				!isAdminPrivilege(preq->rq_user) )
#else
	if ( (temphold.at_val.at_long & HOLD_bad_password) && \
		  strcasecmp(preq->rq_user, PBS_DEFAULT_ADMIN) != 0 )
#endif
	{
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	hold_val = &pjob->ji_wattr[(int)JOB_ATR_hold].at_val.at_long;
	old_hold = *hold_val;
	*hold_val |= temphold.at_val.at_long;
	pjob->ji_wattr[(int)JOB_ATR_hold].at_flags |= ATR_VFLAG_SET | ATR_VFLAG_MODCACHE;

	/* Note the hold time in the job comment. */
	now = time(NULL);
	(void)strncpy(date, (const char *)ctime(&now), 24);
	date[24] = '\0';
	(void)sprintf(log_buffer, "Job held by %s on %s", preq->rq_user, date);
	job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment], (char *)0, (char *)0, log_buffer);

	(void)sprintf(log_buffer, msg_jobholdset, pset, preq->rq_user,
		preq->rq_host);

	if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) &&
		(pjob->ji_qs.ji_substate != JOB_SUBSTATE_PRERUN) &&
		(pjob->ji_wattr[(int)JOB_ATR_chkpnt].at_val.at_str) &&
		(*pjob->ji_wattr[(int)JOB_ATR_chkpnt].at_val.at_str != 'n')) {

		/* have MOM attempt checkpointing */

		if ((rc = relay_to_mom(pjob, preq, post_hold)) != 0) {
			*hold_val = old_hold;	/* reset to the old value */
			req_reject(rc, 0, preq);
		} else {
			pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN;
			pjob->ji_qs.ji_svrflags |=
				JOB_SVFLG_HASRUN | JOB_SVFLG_CHKPT;
			(void)job_save(pjob, SAVEJOB_QUICK);
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
				pjob->ji_qs.ji_jobid, log_buffer);
		}
	} else {

		/* every thing went well, may need to update the job state */

		log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
			pjob->ji_qs.ji_jobid, log_buffer);
		if (old_hold != *hold_val) {
			/* indicate attributes changed     */
			pjob->ji_modified = 1;
			svr_evaljobstate(pjob, &newstate, &newsub, 0);
			(void)svr_setjobstate(pjob, newstate, newsub);
		}
		reply_ack(preq);
	}
}
Ejemplo n.º 9
0
static void process_hold_reply(

  struct work_task *pwt)
  {
  job       *pjob;

  struct batch_request *preq;
  int   newstate;
  int   newsub;
  attribute temphold;
  char *pset;
  int rc;

  svr_disconnect(pwt->wt_event); /* close connection to MOM */

  preq = pwt->wt_parm1;
  preq->rq_conn = preq->rq_orgconn;  /* restore client socket */

  if ((pjob = find_job(preq->rq_ind.rq_hold.rq_orig.rq_objname)) == (job *)0)
    {
    LOG_EVENT(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB,
              preq->rq_ind.rq_hold.rq_orig.rq_objname,
              msg_postmomnojob);
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, msg_postmomnojob);
    }
  else if (preq->rq_reply.brp_code != 0)
    {

    rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, &pset, &temphold);

    if (rc == 0)
      {
      rc = job_attr_def[(int)JOB_ATR_hold].at_set(&pjob->ji_wattr[(int)JOB_ATR_hold],
           &temphold, DECR);
      }

    pjob->ji_qs.ji_substate = JOB_SUBSTATE_RUNNING;  /* reset it */

    pjob->ji_modified = 1;    /* indicate attributes changed */
    svr_evaljobstate(pjob, &newstate, &newsub, 0);
    svr_setjobstate(pjob, newstate, newsub); /* saves job */

    if (preq->rq_reply.brp_code != PBSE_NOSUP)
      {
      sprintf(log_buffer, msg_mombadhold, preq->rq_reply.brp_code);
      LOG_EVENT(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB,
                pjob->ji_qs.ji_jobid, log_buffer);
      req_reject(preq->rq_reply.brp_code, 0, preq, NULL, log_buffer);
      }
    else
      {
      reply_ack(preq);
      }
    }
  else
    {
    /* record that MOM has a checkpoint file */

    /* PBS_CHECKPOINT_MIGRATEABLE is defined as zero therefore this code will never fire.
     * And if these flags are not set, start_exec will not try to run the job from
     * the checkpoint image file.
     */

    pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHECKPOINT_FILE;

    if (preq->rq_reply.brp_auxcode)  /* checkpoint can be moved */
      {
      pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_CHECKPOINT_FILE;
      pjob->ji_qs.ji_svrflags |=  JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_MIGRATEABLE;
      }

    pjob->ji_modified = 1;    /* indicate attributes changed     */

    svr_evaljobstate(pjob, &newstate, &newsub, 0);
    svr_setjobstate(pjob, newstate, newsub); /* saves job */

    account_record(PBS_ACCT_CHKPNT, pjob, "Checkpointed and held"); /* note in accounting file */
    reply_ack(preq);
    }
  }
Ejemplo n.º 10
0
void req_releasejob(

  struct batch_request *preq) /* ptr to the decoded request   */

  {
  int   newstate;
  int   newsub;
  long   old_hold;
  job  *pjob;
  char  *pset;
  int   rc;
  attribute      temphold;

  pjob = chk_job_request(preq->rq_ind.rq_release.rq_objname, preq);

  if (pjob == NULL)
    {
    return;
    }

  /* cannot do anything until we decode the holds to be set */

  if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, &pset, &temphold)) != 0)
    {
    req_reject(rc, 0, preq, NULL, NULL);
    return;
    }

  /* if other than HOLD_u is being released, must have privil */

  if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0)
    {
    req_reject(rc, 0, preq, NULL, NULL);
    return;
    }

  /* all ok so far, unset the hold */

  old_hold = pjob->ji_wattr[(int)JOB_ATR_hold].at_val.at_long;

  if ((rc = job_attr_def[(int)JOB_ATR_hold].at_set(&pjob->ji_wattr[(int)JOB_ATR_hold], &temphold, DECR)))
    {
    req_reject(rc, 0, preq, NULL, NULL);
    return;
    }

  /* everything went well, if holds changed, update the job state */

  if (old_hold != pjob->ji_wattr[(int)JOB_ATR_hold].at_val.at_long)
    {
    pjob->ji_modified = 1; /* indicates attributes changed */

    svr_evaljobstate(pjob, &newstate, &newsub, 0);

    svr_setjobstate(pjob, newstate, newsub); /* saves job */
    }

  sprintf(log_buffer, msg_jobholdrel,

          pset,
          preq->rq_user,
          preq->rq_host);

  LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
            log_buffer);

  reply_ack(preq);

  return;
  }  /* END req_releasejob() */
Ejemplo n.º 11
0
void req_holdjob(

  struct batch_request *preq)

  {
  long  *hold_val;
  int   newstate;
  int   newsub;
  long   old_hold;
  job    *pjob;
  char    *pset;
  int     rc;
  attribute temphold;
  attribute *pattr;

  pjob = chk_job_request(preq->rq_ind.rq_hold.rq_orig.rq_objname, preq);

  if (pjob == NULL)
    {
    return;
    }

  if (is_cloud_job(pjob))
    {
    req_reject(PBSE_CLOUD_REQUEST,0,preq,NULL,NULL);
    }

  /* cannot do anything until we decode the holds to be set */

  if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, &pset,
                     &temphold)) != 0)
    {
    req_reject(rc, 0, preq, NULL, NULL);
    return;
    }

  /* if other than HOLD_u is being set, must have privil */

  if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0)
    {
    req_reject(rc, 0, preq, NULL, NULL);
    return;
    }

  hold_val = &pjob->ji_wattr[(int)JOB_ATR_hold].at_val.at_long;

  old_hold = *hold_val;
  *hold_val |= temphold.at_val.at_long;
  pjob->ji_wattr[(int)JOB_ATR_hold].at_flags |= ATR_VFLAG_SET;
  sprintf(log_buffer, msg_jobholdset, pset, preq->rq_user,
          preq->rq_host);

  pattr = &pjob->ji_wattr[(int)JOB_ATR_checkpoint];

  if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) &&
      ((pattr->at_flags & ATR_VFLAG_SET) &&
       ((csv_find_string(pattr->at_val.at_str, "s") != NULL) ||
        (csv_find_string(pattr->at_val.at_str, "c") != NULL) ||
        (csv_find_string(pattr->at_val.at_str, "enabled") != NULL))))
    {

    /* have MOM attempt checkpointing */

    if ((rc = relay_to_mom(pjob->ji_qs.ji_un.ji_exect.ji_momaddr,
                           preq, process_hold_reply)) != 0)
      {
      *hold_val = old_hold;  /* reset to the old value */
      req_reject(rc, 0, preq, NULL, NULL);
      }
    else
      {
      pjob->ji_qs.ji_svrflags |=
        JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_FILE;
      job_save(pjob, SAVEJOB_QUICK);
      
      /* fill in log_buffer again, since relay_to_mom changed it */
      
      sprintf(log_buffer, msg_jobholdset, pset, preq->rq_user,
          preq->rq_host);
          
      LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid, log_buffer);
      }
    }
#ifdef ENABLE_BLCR
  else if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
    {
    /*
     * This system is configured with BLCR checkpointing to be used,
     * but this Running job does not have checkpointing enabled,
     * so we reject the request
     */

    LOG_EVENT(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    req_reject(PBSE_IVALREQ, 0, preq, NULL,
        "job not held since checkpointing is expected but not enabled for job");
    }
#endif
  else
    {
    /* everything went well, may need to update the job state */

    LOG_EVENT(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    if (old_hold != *hold_val)
      {
      /* indicate attributes changed     */

      pjob->ji_modified = 1;

      svr_evaljobstate(pjob, &newstate, &newsub, 0);

      svr_setjobstate(pjob, newstate, newsub);
      }

    reply_ack(preq);
    }
  }  /* END req_holdjob() */