示例#1
0
void issue_track(
    
  job *pjob)

  {

  struct batch_request   *preq;
  char         *pc;

  preq = alloc_br(PBS_BATCH_TrackJob);

  if (preq == (struct batch_request *)0)
    return;

  preq->rq_ind.rq_track.rq_hopcount = pjob->ji_wattr[JOB_ATR_hopcount].at_val.at_long;

  strcpy(preq->rq_ind.rq_track.rq_jid, pjob->ji_qs.ji_jobid);
  strcpy(preq->rq_ind.rq_track.rq_location, server_name);

  preq->rq_ind.rq_track.rq_state[0] = pjob->ji_wattr[JOB_ATR_state].at_val.at_char;

  pc = pjob->ji_qs.ji_jobid;

  while (*pc != '.')
    pc++;

  issue_to_svr(++pc, preq, NULL);
  free_br(preq);
  }
示例#2
0
文件: req_delete.c 项目: dhill12/test
struct batch_request *duplicate_request(

  struct batch_request *preq)

  {
  struct batch_request *preq_tmp = alloc_br(preq->rq_type);

  preq_tmp->rq_perm = preq->rq_perm;
  preq_tmp->rq_ind.rq_manager.rq_cmd = preq->rq_ind.rq_manager.rq_cmd;
  preq_tmp->rq_ind.rq_manager.rq_objtype = preq->rq_ind.rq_manager.rq_objtype;
  preq_tmp->rq_fromsvr = preq->rq_fromsvr;
  preq_tmp->rq_extsz = preq->rq_extsz;
  preq_tmp->rq_conn = preq->rq_conn;

  memcpy(preq_tmp->rq_ind.rq_manager.rq_objname,
    preq->rq_ind.rq_manager.rq_objname, PBS_MAXSVRJOBID + 1);

  memcpy(preq_tmp->rq_user, preq->rq_user, PBS_MAXUSER + 1);
  memcpy(preq_tmp->rq_host, preq->rq_host, PBS_MAXHOSTNAME + 1);

  if (preq->rq_extend != NULL)
    preq_tmp->rq_extend = strdup(preq->rq_extend);

  if (preq->rq_type == PBS_BATCH_RunJob)
    {
    if (preq->rq_ind.rq_run.rq_destin)
      preq_tmp->rq_ind.rq_run.rq_destin = strdup(preq->rq_ind.rq_run.rq_destin);
    }

  return(preq_tmp);
  } /* END duplicate_request() */
void *check_if_orphaned(

  void *vp)

  {
  char                 *rsv_id = (char *)vp;
  char                  job_id[PBS_MAXSVRJOBID];
  struct batch_request *preq;
  int                   handle = -1;
  int                   retries = 0;
  struct pbsnode       *pnode;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  if (is_orphaned(rsv_id, job_id) == TRUE)
    {
    if((preq = alloc_br(PBS_BATCH_DeleteReservation)) == NULL)
      return NULL;
    preq->rq_extend = rsv_id;

    /* Assume the request will be successful and remove the RSV from the hash table */
    remove_alps_reservation(rsv_id);

    if ((pnode = get_next_login_node(NULL)) != NULL)
      {
      struct in_addr hostaddr;
      int            local_errno;
      pbs_net_t      momaddr;

      memcpy(&hostaddr, &pnode->nd_sock_addr.sin_addr, sizeof(hostaddr));
      momaddr = ntohl(hostaddr.s_addr);

      snprintf(log_buf, sizeof(log_buf),
        "Found orphan ALPS reservation ID %s for job %s; asking %s to remove it",
        rsv_id,
        job_id,
        pnode->nd_name);
      log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, __func__, log_buf);

      while ((handle < 0) &&
             (retries < 3))
        {
        handle = svr_connect(momaddr, pnode->nd_mom_port, &local_errno, pnode, NULL, ToServerDIS);
        retries++;
        }

      /* unlock before the network transaction */
      unlock_node(pnode, __func__, NULL, LOGLEVEL);
      
      if (handle >= 0)
        issue_Drequest(handle, preq, true);
        
      free_br(preq);
      }
    }
  else
    free(rsv_id);

  return(NULL);
  } /* END check_if_orphaned() */
示例#4
0
文件: req_delete.c 项目: dhill12/test
static void job_delete_nanny(

  struct work_task *pwt)

  {
  job                  *pjob;
  char                 *sigk = "SIGKILL";
  char                 *jobid;

  struct batch_request *newreq;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  time_t                time_now = time(NULL);
  long                  nanny = FALSE;

  /* short-circuit if nanny isn't enabled */
  get_svr_attr_l(SRV_ATR_JobNanny, &nanny);
  if (!nanny)
    {
    jobid = (char *)pwt->wt_parm1;
    
    if (jobid != NULL)
      {
      pjob = svr_find_job(jobid, FALSE);
      
      if (pjob != NULL)
        {
        sprintf(log_buf, "exiting job '%s' still exists, sending a SIGKILL", pjob->ji_qs.ji_jobid);
        log_err(-1, "job nanny", log_buf);
        
        /* build up a Signal Job batch request */
        if ((newreq = alloc_br(PBS_BATCH_SignalJob)) != NULL)
          {
          strcpy(newreq->rq_ind.rq_signal.rq_jid, pjob->ji_qs.ji_jobid);
          snprintf(newreq->rq_ind.rq_signal.rq_signame, sizeof(newreq->rq_ind.rq_signal.rq_signame), "%s", sigk);
          }
        
        issue_signal(&pjob, sigk, post_job_delete_nanny, newreq);
        
        if (pjob != NULL)
          {
          apply_job_delete_nanny(pjob, time_now + 60);
  
          unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
          }
        }
      }
    else
      {
      log_err(ENOMEM, __func__, "Cannot allocate memory");
      }
    }
  
  if (pwt->wt_parm1 != NULL)
    free(pwt->wt_parm1);

  free(pwt->wt_mutex);
  free(pwt);
  } /* END job_delete_nanny() */
示例#5
0
/**
 * @brief
 *	 	Duplicate the existing batch request for a running subjob
 *
 * @param[in]	opreq	- the batch status request structure to duplicate
 * @param[in]	pjob	- the parent job structure of the subjob
 * @param[in]	func	- the function to call after duplicating the batch
 *			   structure.
 * @par
 *		1. duplicate the batch request
 *		2. replace the job id with the one from the running subjob
 *		3. link the new batch request to the original and incr its ref ct
 *		4. call the "func" with the new batch request and job
 * @note
 *		Currently, this is called in PBS_Batch_DeleteJob, PBS_Batch_SignalJob,
 *		PBS_Batch_Rerun, and PBS_Batch_RunJob subjob requests.
 *		For any other request types, be sure to add another switch case below
 *		(matching request type).
 */
void
dup_br_for_subjob(struct batch_request *opreq, job *pjob, void (*func)(struct batch_request *, job *))
{
	struct batch_request  *npreq;

	npreq = alloc_br(opreq->rq_type);
	if (npreq == NULL)
		return;

	npreq->rq_perm    = opreq->rq_perm;
	npreq->rq_fromsvr = opreq->rq_fromsvr;
	npreq->rq_conn = opreq->rq_conn;
	npreq->rq_orgconn = opreq->rq_orgconn;
	npreq->rq_time    = opreq->rq_time;
	strcpy(npreq->rq_user, opreq->rq_user);
	strcpy(npreq->rq_host, opreq->rq_host);
	npreq->rq_extend  = opreq->rq_extend;
	npreq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_NULL;
	npreq->rq_refct   = 0;

	/* for each type, update the job id with the one from the new job */

	switch (opreq->rq_type) {
		case PBS_BATCH_DeleteJob:
			npreq->rq_ind.rq_delete = opreq->rq_ind.rq_delete;
			(void)strcpy(npreq->rq_ind.rq_delete.rq_objname,
				pjob->ji_qs.ji_jobid);
			break;
		case PBS_BATCH_SignalJob:
			npreq->rq_ind.rq_signal = opreq->rq_ind.rq_signal;
			(void)strcpy(npreq->rq_ind.rq_signal.rq_jid,
				pjob->ji_qs.ji_jobid);
			break;
		case PBS_BATCH_Rerun:
			(void)strcpy(npreq->rq_ind.rq_rerun,
				pjob->ji_qs.ji_jobid);
			break;
		case PBS_BATCH_RunJob:
			npreq->rq_ind.rq_run = opreq->rq_ind.rq_run;
			(void)strcpy(npreq->rq_ind.rq_run.rq_jid,
				pjob->ji_qs.ji_jobid);
			break;
		default:
			delete_link(&npreq->rq_link);
			free(npreq);
			return;
	}

	npreq->rq_parentbr = opreq;
	opreq->rq_refct++;

	func(npreq, pjob);
}
示例#6
0
void issue_track(

    job *pjob)

{
    struct batch_request *preq;
    char                 *pc;
    char                 *sname;
    char                  log_buf[LOCAL_LOG_BUF_SIZE];

    if ((pc = strchr(pjob->ji_qs.ji_jobid, '.')) == NULL)
    {
        snprintf(log_buf, sizeof(log_buf),
                 "Remote job routing is not compatible with display_job_server_suffix set to false. Cannot track %s",
                 pjob->ji_qs.ji_jobid);
        log_err(-1, __func__, log_buf);

        return;
    }

    sname = pc + 1;

    /* do not issue track requests to ourselves */
    if (!strcmp(sname, server_name))
    {
        snprintf(log_buf, sizeof(log_buf),
                 "%s erroneously called for local job %s",
                 __func__, pjob->ji_qs.ji_jobid);
        log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
        return;
    }

    preq = alloc_br(PBS_BATCH_TrackJob);

    if (preq == NULL)
        return;

    preq->rq_ind.rq_track.rq_hopcount = pjob->ji_wattr[JOB_ATR_hopcount].at_val.at_long;

    strcpy(preq->rq_ind.rq_track.rq_jid, pjob->ji_qs.ji_jobid);
    strcpy(preq->rq_ind.rq_track.rq_location, server_name);

    preq->rq_ind.rq_track.rq_state[0] = pjob->ji_wattr[JOB_ATR_state].at_val.at_char;

    pc = pjob->ji_qs.ji_jobid;

    while (*pc != '.')
        pc++;

    issue_to_svr(++pc, preq, NULL);
    free_br(preq);
}
示例#7
0
int issue_signal(

  job  **pjob_ptr,
  char  *signame, /* name of the signal to send */
  void  (*func)(batch_request *),
  void  *extra) /* extra parameter to be stored in sig request */

  {
  int                   rc;
  job                  *pjob = *pjob_ptr;
  struct batch_request *newreq;
  char                  jobid[PBS_MAXSVRJOBID + 1];

  /* build up a Signal Job batch request */

  if ((newreq = alloc_br(PBS_BATCH_SignalJob)) == NULL)
    {
    /* FAILURE */

    return(PBSE_SYSTEM);
    }

  newreq->rq_extra = extra;

  strcpy(newreq->rq_ind.rq_signal.rq_jid, pjob->ji_qs.ji_jobid);

  snprintf(newreq->rq_ind.rq_signal.rq_signame, sizeof(newreq->rq_ind.rq_signal.rq_signame), "%s", signame);

  /* The newreq is freed in relay_to_mom (failure)
   * or in issue_Drequest (success) */
  rc = relay_to_mom(&pjob, newreq, NULL);

  if ((rc == PBSE_NONE) &&
      (pjob != NULL))
    {
    strcpy(jobid, pjob->ji_qs.ji_jobid);
    unlock_ji_mutex(pjob, __func__, NULL, 0);
    func(newreq);

    *pjob_ptr = svr_find_job((char *)jobid, TRUE);
    }
  else
    {
    free_br(newreq);

    if (pjob == NULL)
      *pjob_ptr = NULL;
    }

  return(rc);
  }  /* END issue_signal() */
示例#8
0
void *check_if_orphaned(

  void *vp)

  {
  char                 *rsv_id = (char *)vp;
  struct batch_request *preq;
  int                   handle = -1;
  int                   retries = 0;
  struct pbsnode       *pnode;

  if (is_orphaned(rsv_id) == TRUE)
    {
    preq = alloc_br(PBS_BATCH_DeleteReservation);
    preq->rq_extend = rsv_id;

    if ((pnode = get_next_login_node(NULL)) != NULL)
      {
      struct in_addr hostaddr;
      int            local_errno;
      pbs_net_t      momaddr;

      memcpy(&hostaddr, &pnode->nd_sock_addr.sin_addr, sizeof(hostaddr));
      momaddr = ntohl(hostaddr.s_addr);

      while ((handle < 0) &&
             (retries < 3))
        {
        handle = svr_connect(momaddr, pnode->nd_mom_port, &local_errno, pnode, NULL, ToServerDIS);
        retries++;
        }

      /* unlock before the network transaction */
      unlock_node(pnode, __func__, NULL, 0);
      
      if (handle >= 0)
        {
        issue_Drequest(handle, preq, release_req, 0);
        }
      else
        free_br(preq);
      }
    }
  else
    free(rsv_id);

  return(NULL);
  } /* END check_if_orphaned() */
示例#9
0
END_TEST

START_TEST(test_alloc_br)
  {
  batch_request *preq = alloc_br(PBS_BATCH_QueueJob);

  fail_unless(preq->rq_type == PBS_BATCH_QueueJob);
  fail_unless(preq->rq_conn == -1);
  fail_unless(preq->rq_orgconn == -1);
  fail_unless(preq->rq_reply.brp_choice == BATCH_REPLY_CHOICE_NULL);
  fail_unless(preq->rq_noreply == FALSE);
  fail_unless(preq->rq_time > 0);

  free_br(preq);
  fail_unless(free_attrlist_called > 0);
  }
示例#10
0
文件: req_rescq.c 项目: CESNET/pbspro
/**
 * @brief
 *		qmove a job into a reservation
 *
 * @parm[in]	presv	-	reservation structure
 *
 * @return	int
 *
 * @retval	0	: Success
 * @retval	-1	: Failure
 *
 */
int
cnvrt_qmove(resc_resv  *presv)
{
	int rc;
	struct job *pjob;
	struct work_task wtnew;
	char *q_job_id, *at;
	struct batch_request *reqcnvrt;

	if (gen_task_EndResvWindow(presv)) {
		(void)resv_purge(presv);
		return (-1);
	}

	pjob = find_job(presv->ri_wattr[(int)RESV_ATR_convert].at_val.at_str);
	if (pjob != NULL)
		q_job_id = pjob->ji_qs.ji_jobid;
	else {
		(void)resv_purge(presv);
		return (-1);
	}
	if ((reqcnvrt = alloc_br(PBS_BATCH_MoveJob)) == NULL) {
		(void)resv_purge(presv);
		return (-1);
	}
	reqcnvrt->rq_perm = (presv->ri_brp)->rq_perm;
	strcpy(reqcnvrt->rq_user, (presv->ri_brp)->rq_user);
	strcpy(reqcnvrt->rq_host, (presv->ri_brp)->rq_host);

	snprintf(reqcnvrt->rq_ind.rq_move.rq_jid, sizeof(reqcnvrt->rq_ind.rq_move.rq_jid), "%s", q_job_id);
	at = strchr(presv->ri_qs.ri_resvID, (int)'.');
	if (at)
		*at = '\0';

	snprintf(reqcnvrt->rq_ind.rq_move.rq_destin, sizeof(reqcnvrt->rq_ind.rq_move.rq_destin), "%s", presv->ri_qs.ri_resvID);
	if (at)
		*at = '.';

	snprintf(pjob->ji_qs.ji_destin, PBS_MAXROUTEDEST, "%s", reqcnvrt->rq_ind.rq_move.rq_destin);
	rc = cnvrt_local_move(pjob, reqcnvrt);
	wtnew.wt_parm1 = (void *)presv;
	cnvrt_delete(&wtnew);

	if (rc != 0) return (-1);
	return (0);
}
示例#11
0
int
shutdown_preempt_chkpt(job *pjob)
{
	struct batch_request *phold;
	attribute temp;
	void (*func)(struct work_task *);

	long *hold_val = NULL;
	long old_hold = 0;

	phold = alloc_br(PBS_BATCH_HoldJob);
	if (phold == NULL)
		return (PBSE_SYSTEM);

	temp.at_flags = ATR_VFLAG_SET;
	temp.at_type  = job_attr_def[(int)JOB_ATR_hold].at_type;
	temp.at_user_encoded = NULL;
	temp.at_priv_encoded = NULL;
	temp.at_val.at_long = HOLD_s;

	phold->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR;
	(void)strcpy(phold->rq_ind.rq_hold.rq_orig.rq_objname, pjob->ji_qs.ji_jobid);
	CLEAR_HEAD(phold->rq_ind.rq_hold.rq_orig.rq_attr);
	if (job_attr_def[(int)JOB_ATR_hold].at_encode(&temp,
		&phold->rq_ind.rq_hold.rq_orig.rq_attr,
		job_attr_def[(int)JOB_ATR_hold].at_name,
		NULL,
		ATR_ENCODE_CLIENT, NULL) < 0)
		return (PBSE_SYSTEM);

	phold->rq_extra = pjob;
	func = post_chkpt;

	if (relay_to_mom(pjob, phold, func) == 0) {

		if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT)
			svr_setjobstate(pjob, JOB_STATE_RUNNING, JOB_SUBSTATE_RUNNING);
		pjob->ji_qs.ji_svrflags |= (JOB_SVFLG_HASRUN | JOB_SVFLG_CHKPT | JOB_SVFLG_HASHOLD);
		pjob->ji_modified = 1;
		(void)job_save(pjob, SAVEJOB_QUICK);
		return (0);
	} else {
		*hold_val = old_hold;	/* reset to the old value */
		return (-1);
	}
}
示例#12
0
END_TEST

START_TEST(cpy_checkpoint_test)
  {
  struct job *test_job = job_alloc();
  struct batch_request *result = cpy_checkpoint(NULL,
                                         test_job,
                                         JOB_ATR_checkpoint_name,
                                         CKPT_DIR_IN);
  struct batch_request *initial = alloc_br(/*PBS_BATCH_CheckpointJob*/0);
  fail_unless(result == NULL, "NULL batch_request input fail");

  result = cpy_checkpoint(initial,
                          NULL,
                          JOB_ATR_checkpoint_name,
                          CKPT_DIR_IN);
  fail_unless(result == NULL, "NULL job input fail");

  /*TODO: add test for valid input, invalid dir value*/
  }
示例#13
0
static void job_delete_nanny(

  struct work_task *pwt)

  {
  job *pjob;
  char *sigk = "SIGKILL";

  struct batch_request *newreq;

  /* short-circuit if nanny isn't enabled */

  if (!server.sv_attr[SRV_ATR_JobNanny].at_val.at_long)
    {
    release_req(pwt);

    return;
    }

  pjob = (job *)pwt->wt_parm1;

  sprintf(log_buffer, "exiting job '%s' still exists, sending a SIGKILL",
          pjob->ji_qs.ji_jobid);

  log_err(-1, "job nanny", log_buffer);

  /* build up a Signal Job batch request */

  if ((newreq = alloc_br(PBS_BATCH_SignalJob)) != NULL)
    {
    strcpy(newreq->rq_ind.rq_signal.rq_jid, pjob->ji_qs.ji_jobid);
    strncpy(newreq->rq_ind.rq_signal.rq_signame, sigk, PBS_SIGNAMESZ);
    }

  issue_signal(pjob, sigk, post_job_delete_nanny, newreq);


  apply_job_delete_nanny(pjob, time_now + 60);

  return;
  } /* END job_delete_nanny() */
示例#14
0
static int
shutdown_chkpt(job *pjob)
{
	struct batch_request *phold;
	attribute 	      temp;

	phold = alloc_br(PBS_BATCH_HoldJob);
	if (phold == (struct batch_request *)0)
		return (PBSE_SYSTEM);

	temp.at_flags = ATR_VFLAG_SET;
	temp.at_type  = job_attr_def[(int)JOB_ATR_hold].at_type;
	temp.at_user_encoded = NULL;
	temp.at_priv_encoded = NULL;
	temp.at_val.at_long = HOLD_s;

	phold->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR;
	(void)strcpy(phold->rq_ind.rq_hold.rq_orig.rq_objname, pjob->ji_qs.ji_jobid);
	CLEAR_HEAD(phold->rq_ind.rq_hold.rq_orig.rq_attr);
	if (job_attr_def[(int)JOB_ATR_hold].at_encode(&temp,
		&phold->rq_ind.rq_hold.rq_orig.rq_attr,
		job_attr_def[(int)JOB_ATR_hold].at_name,
		(char *)0,
		ATR_ENCODE_CLIENT, NULL) < 0)
		return (PBSE_SYSTEM);

	if (relay_to_mom(pjob, phold, post_chkpt) == 0) {

		if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT)
			svr_setjobstate(pjob, JOB_STATE_RUNNING, JOB_SUBSTATE_RUNNING);

		pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN;
		pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN;
		pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHKPT;
		pjob->ji_modified = 1;
		(void)job_save(pjob, SAVEJOB_QUICK);
		return (0);
	} else
		return (-1);
}
示例#15
0
int issue_signal(

  job  *pjob,
  char *signame, /* name of the signal to send */
  void (*func)(struct work_task *),
  void *extra) /* extra parameter to be stored in sig request */

  {
  int rc;

  struct batch_request *newreq;

  /* build up a Signal Job batch request */

  if ((newreq = alloc_br(PBS_BATCH_SignalJob)) == NULL)
    {
    /* FAILURE */

    return(PBSE_SYSTEM);
    }

  newreq->rq_extra = extra;

  strcpy(newreq->rq_ind.rq_signal.rq_jid, pjob->ji_qs.ji_jobid);

  strncpy(newreq->rq_ind.rq_signal.rq_signame, signame, PBS_SIGNAMESZ);

  rc = relay_to_mom(
         pjob,
         newreq,
         func);

  /* when MOM replies, we just free the request structure */

  return(rc);
  }  /* END issue_signal() */
示例#16
0
int stat_to_mom(

  char             *job_id,
  struct stat_cntl *cntl)  /* M */

  {
  struct batch_request *newrq;
  int                   rc = PBSE_NONE;
  unsigned long         addr;
  char                  log_buf[LOCAL_LOG_BUF_SIZE+1];
  struct pbsnode       *node;
  int                   handle = -1;
  unsigned long         job_momaddr = -1;
  unsigned short        job_momport = -1;
  char                 *job_momname = NULL;
  job                  *pjob = NULL;

  if ((pjob = svr_find_job(job_id, FALSE)) == NULL)
    return(PBSE_JOBNOTFOUND);

  mutex_mgr job_mutex(pjob->ji_mutex, true);

  if ((pjob->ji_qs.ji_un.ji_exect.ji_momaddr == 0) || 
      (!pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str))
    {
    job_mutex.unlock();
    snprintf(log_buf, sizeof(log_buf),
      "Job %s missing MOM's information. Skipping statting on this job", pjob->ji_qs.ji_jobid);
    log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    return PBSE_BAD_PARAMETER;
    }

  job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr;
  job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport;
  job_momname = strdup(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str);
  job_mutex.unlock();

  if (job_momname == NULL)
    return PBSE_MEM_MALLOC;

  if ((newrq = alloc_br(PBS_BATCH_StatusJob)) == NULL)
    {
    free(job_momname);
    return PBSE_MEM_MALLOC;
    }

  if (cntl->sc_type == 1)
    snprintf(newrq->rq_ind.rq_status.rq_id, sizeof(newrq->rq_ind.rq_status.rq_id), "%s", job_id);
  else
    newrq->rq_ind.rq_status.rq_id[0] = '\0';  /* get stat of all */

  CLEAR_HEAD(newrq->rq_ind.rq_status.rq_attr);

  /* if MOM is down just return stale information */
  addr = job_momaddr;

  node = tfind_addr(addr,job_momport,job_momname);
  free(job_momname);

  if (node == NULL)
    return PBSE_UNKNODE;
  if ((node->nd_state & INUSE_DOWN)||(node->nd_power_state != POWER_STATE_RUNNING))
    {
    if (LOGLEVEL >= 6)
      {
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
          "node '%s' is allocated to job but in state 'down'",
          node->nd_name);

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,job_id,log_buf);
      }

    unlock_node(node, __func__, "no rely mom", LOGLEVEL);
    free_br(newrq);

    return PBSE_NORELYMOM;
    }

  /* get connection to MOM */
  unlock_node(node, __func__, "before svr_connect", LOGLEVEL);
  handle = svr_connect(job_momaddr, job_momport, &rc, NULL, NULL);

  if (handle >= 0)
    {
    if ((rc = issue_Drequest(handle, newrq, true)) == PBSE_NONE)
      {
      stat_update(newrq, cntl);
      }
    }
  else
    rc = PBSE_CONNECT;

  if (rc == PBSE_SYSTEM)
    rc = PBSE_MEM_MALLOC;

  free_br(newrq);

  return(rc);
  }  /* END stat_to_mom() */
示例#17
0
void
process_request(int sfds)
{
	int		      rc;
	struct batch_request *request;
	conn_t		     *conn;


	time_now = time(NULL);

	conn = get_conn(sfds);

	if (!conn) {
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, LOG_ERR,
			"process_request", "did not find socket in connection table");
#ifdef WIN32
		(void)closesocket(sfds);
#else
		(void)close(sfds);
#endif
		return;
	}

	if ((request = alloc_br(0)) == NULL) {
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, LOG_ERR,
			"process_request", "Unable to allocate request structure");
		close_conn(sfds);
		return;
	}
	request->rq_conn = sfds;

	/*
	 * Read in the request and decode it to the internal request structure.
	 */

	if (get_connecthost(sfds, request->rq_host, PBS_MAXHOSTNAME)) {

		(void)sprintf(log_buffer, "%s: %lu", msg_reqbadhost,
			get_connectaddr(sfds));
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, LOG_DEBUG,
			"", log_buffer);
		req_reject(PBSE_BADHOST, 0, request);
		return;
	}

#ifndef PBS_MOM

	if (conn->cn_active == FromClientDIS) {
		rc = dis_request_read(sfds, request);
	} else {
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, LOG_ERR,
			"process_req", "request on invalid type of connection");
		close_conn(sfds);
		free_br(request);
		return;
	}
#else	/* PBS_MOM */
	rc = dis_request_read(sfds, request);
#endif	/* PBS_MOM */

	if (rc == -1) {		/* End of file */
		close_client(sfds);
		free_br(request);
		return;

	} else if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL)) {

		/* read error, likely cannot send reply so just disconnect */

		/* ??? not sure about this ??? */

		close_client(sfds);
		free_br(request);
		return;

	} else if (rc > 0) {

		/*
		 * request didn't decode, either garbage or  unknown
		 * request type, in ether case, return reject-reply
		 */

		req_reject(rc, 0, request);
		close_client(sfds);
		return;
	}

#ifndef PBS_MOM
	/* If the request is coming on the socket we opened to the  */
	/* scheduler,  change the "user" from "root" to "Scheduler" */
	if (find_sched_from_sock(request->rq_conn) != NULL) {
		strncpy(request->rq_user, PBS_SCHED_DAEMON_NAME, PBS_MAXUSER);
		request->rq_user[PBS_MAXUSER] = '\0';
	}
#endif	/* PBS_MOM */

	(void)sprintf(log_buffer, msg_request, request->rq_type,
		request->rq_user, request->rq_host, sfds);
	log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, LOG_DEBUG,
		"", log_buffer);

	/* is the request from a host acceptable to the server */
	if (request->rq_type == PBS_BATCH_AuthExternal) {
		rc = authenticate_external(conn, request);
		if (rc == 0)
			reply_ack(request);
		else if (rc == -2)
			req_reject(PBSE_NOSUP, 0, request);
		else
			req_reject(PBSE_BADCRED, 0, request);
		return;
	}

#ifndef PBS_MOM
	if (server.sv_attr[(int)SRV_ATR_acl_host_enable].at_val.at_long) {
		/* acl enabled, check it; always allow myself	*/

		struct pbsnode *isanode = NULL;
		if ((server.sv_attr[SRV_ATR_acl_host_moms_enable].at_flags & ATR_VFLAG_SET) &&
			(server.sv_attr[(int)SRV_ATR_acl_host_moms_enable].at_val.at_long == 1)) {
			isanode = find_nodebyaddr(get_connectaddr(sfds));

			if ((isanode != NULL) && (isanode->nd_state & INUSE_DELETED))
				isanode = NULL;
		}

		if (isanode == NULL) {
			if ((acl_check(&server.sv_attr[(int)SRV_ATR_acl_hosts],
				request->rq_host, ACL_Host) == 0) &&
				(strcasecmp(server_host, request->rq_host) != 0)) {
					req_reject(PBSE_BADHOST, 0, request);
					close_client(sfds);
					return;
			}
                }
	}

	/*
	 * determine source (user client or another server) of request.
	 * set the permissions granted to the client
	 */
	if (conn->cn_authen & PBS_NET_CONN_FROM_PRIVIL) {

		/* request came from another server */

		request->rq_fromsvr = 1;
		request->rq_perm = ATR_DFLAG_USRD | ATR_DFLAG_USWR |
				   ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
				   ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
				   ATR_DFLAG_SvWR;

	} else {

		/* request not from another server */

		request->rq_fromsvr = 0;

		/*
		 * Client must be authenticated by a Authenticate User Request,
		 * if not, reject request and close connection.
		 * -- The following is retained for compat with old cmds --
		 * The exception to this is of course the Connect Request which
		 * cannot have been authenticated, because it contains the
		 * needed ticket; so trap it here.  Of course, there is no
		 * prior authentication on the Authenticate User request either,
		 * but it comes over a reserved port and appears from another
		 * server, hence is automatically granted authorization.

		 */

		if (request->rq_type == PBS_BATCH_Connect) {
			req_connect(request);
			return;
		}

		if ((conn->cn_authen & PBS_NET_CONN_AUTHENTICATED) ==0) {
			rc = PBSE_BADCRED;
		} else {
			rc = authenticate_user(request, conn);
		}
		if (rc != 0) {
			req_reject(rc, 0, request);
			if (rc == PBSE_BADCRED)
				close_client(sfds);
			return;
		}

		request->rq_perm =
			svr_get_privilege(request->rq_user, request->rq_host);
	}

	/* if server shutting down, disallow new jobs and new running */

	if (server.sv_attr[(int)SRV_ATR_State].at_val.at_long > SV_STATE_RUN) {
		switch (request->rq_type) {
			case PBS_BATCH_AsyrunJob:
			case PBS_BATCH_JobCred:
			case PBS_BATCH_UserCred:
			case PBS_BATCH_UserMigrate:
			case PBS_BATCH_MoveJob:
			case PBS_BATCH_QueueJob:
			case PBS_BATCH_RunJob:
			case PBS_BATCH_StageIn:
			case PBS_BATCH_jobscript:
				req_reject(PBSE_SVRDOWN, 0, request);
				return;
		}
	}


#else	/* THIS CODE FOR MOM ONLY */

	/* check connecting host against allowed list of ok clients */
	if (!addrfind(conn->cn_addr)) {
		req_reject(PBSE_BADHOST, 0, request);
		close_client(sfds);
		return;
	}

	request->rq_fromsvr = 1;
	request->rq_perm = ATR_DFLAG_USRD | ATR_DFLAG_USWR |
			   ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
			   ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
			   ATR_DFLAG_SvWR | ATR_DFLAG_MOM;
#endif

	/*
	 * dispatch the request to the correct processing function.
	 * The processing function must call reply_send() to free
	 * the request struture.
	 */

	dispatch_request(sfds, request);
	return;
}
示例#18
0
文件: req_modify.c 项目: dhill12/test
int copy_batchrequest(
    
  struct batch_request **newreq,
  struct batch_request  *preq,
  int                    type,
  int                    jobid)

  {
  struct batch_request *request;
  svrattrl             *pal = NULL;
  svrattrl             *newpal = NULL;
  tlist_head           *phead = NULL;
  char                 *ptr1;
  char                 *ptr2;
  char                  newjobname[PBS_MAXSVRJOBID+1];
  
  request = alloc_br(type);
  if (request)
    {
    request->rq_type = preq->rq_type;
    request->rq_perm = preq->rq_perm;
    request->rq_fromsvr = preq->rq_fromsvr;
    request->rq_conn = preq->rq_conn;
    request->rq_orgconn = preq->rq_orgconn;
    request->rq_extsz = preq->rq_extsz;
    request->rq_time = preq->rq_time;
    strcpy(request->rq_user, preq->rq_user);
    strcpy(request->rq_host, preq->rq_host);
    request->rq_reply.brp_choice = preq->rq_reply.brp_choice;
    request->rq_noreply = preq->rq_noreply;
    /* we need to copy rq_extend if there is any data */
    if (preq->rq_extend)
      {
      request->rq_extend = (char *)calloc(1, strlen(preq->rq_extend) + 1);
      if (request->rq_extend == NULL)
        {
        free_br(request);
        return(PBSE_SYSTEM);
        }
      strcpy(request->rq_extend, preq->rq_extend);
      }
    /* remember the batch_request we copied */
    request->rq_extra = (void *)preq;
    
    switch(preq->rq_type)
      {
      /* This function was created for a modify arracy request (PBS_BATCH_ModifyJob)
         the preq->rq_ind structure was allocated in dis_request_read. If other
         BATCH types are needed refer to that function to see how the rq_ind structure
         was allocated and then copy it here. */
      case PBS_BATCH_DeleteJob:
        
      case PBS_BATCH_HoldJob:
        
      case PBS_BATCH_CheckpointJob:
        
      case PBS_BATCH_ModifyJob:
        
      case PBS_BATCH_AsyModifyJob:
        /* based on how decode_DIS_Manage allocates data */
        CLEAR_HEAD(request->rq_ind.rq_manager.rq_attr);
        
        phead = &request->rq_ind.rq_manager.rq_attr;
        request->rq_ind.rq_manager.rq_cmd = preq->rq_ind.rq_manager.rq_cmd;
        request->rq_ind.rq_manager.rq_objtype = preq->rq_ind.rq_manager.rq_objtype;
        /* If this is a job array it is possible we only have the array name
           and not the individual job. We need to find out what we have and
           modify the name if needed */
        ptr1 = strstr(preq->rq_ind.rq_manager.rq_objname, "[]");
        if ((ptr1) && (jobid != -1))
          {
          ptr1++;
          strcpy(newjobname, preq->rq_ind.rq_manager.rq_objname);
          ptr2 = strstr(newjobname, "[]");
          ptr2++;
          *ptr2 = 0;
          sprintf(request->rq_ind.rq_manager.rq_objname,"%s%d%s", 
            newjobname,
            jobid,
            ptr1);
          }
        else
          strcpy(request->rq_ind.rq_manager.rq_objname, preq->rq_ind.rq_manager.rq_objname);
        
        /* copy the attribute list */
        pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_manager.rq_attr);
        while(pal != NULL)
          {
          newpal = (svrattrl *)calloc(1, pal->al_tsize + 1);
          if (!newpal)
            {
            free_br(request);
            return(PBSE_SYSTEM);
            }
          CLEAR_LINK(newpal->al_link);
          
          newpal->al_atopl.next = 0;
          newpal->al_tsize = pal->al_tsize + 1;
          newpal->al_nameln = pal->al_nameln;
          newpal->al_flags  = pal->al_flags;
          newpal->al_atopl.name = (char *)newpal + sizeof(svrattrl);
          strcpy(newpal->al_atopl.name, pal->al_atopl.name);
          newpal->al_nameln = pal->al_nameln;
          newpal->al_atopl.resource = newpal->al_atopl.name + newpal->al_nameln;
          if (pal->al_atopl.resource != NULL)
            strcpy(newpal->al_atopl.resource, pal->al_atopl.resource);
          newpal->al_rescln = pal->al_rescln;
          newpal->al_atopl.value = newpal->al_atopl.name + newpal->al_nameln + newpal->al_rescln;
          strcpy(newpal->al_atopl.value, pal->al_atopl.value);
          newpal->al_valln = pal->al_valln;
          newpal->al_atopl.op = pal->al_atopl.op;
          
          pal = (struct svrattrl *)GET_NEXT(pal->al_link);
          
          }
        
        break;

      case PBS_BATCH_SignalJob:

        strcpy(request->rq_ind.rq_signal.rq_jid, preq->rq_ind.rq_signal.rq_jid);
        strcpy(request->rq_ind.rq_signal.rq_signame, preq->rq_ind.rq_signal.rq_signame);
        request->rq_extra = strdup(preq->rq_extra);

        break;

      case PBS_BATCH_MessJob:

        strcpy(request->rq_ind.rq_message.rq_jid, preq->rq_ind.rq_message.rq_jid);
        request->rq_ind.rq_message.rq_file = preq->rq_ind.rq_message.rq_file;
        strcpy(request->rq_ind.rq_message.rq_text, preq->rq_ind.rq_message.rq_text);

        break;
        
      default:

        break;
        
      }

    if ((phead != NULL) &&
        (newpal != NULL))
      append_link(phead, &newpal->al_link, newpal);
    
    *newreq = request;
    return(0);
    
    }
  else
    return(PBSE_SYSTEM);
  }
示例#19
0
文件: req_modify.c 项目: dhill12/test
void mom_cleanup_checkpoint_hold(

  struct work_task *ptask)

  {
  int                   rc = 0;
  job                  *pjob;
  char                 *jobid;

  struct batch_request *preq;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  time_t                time_now = time(NULL);

  jobid = (char *)ptask->wt_parm1;
  free(ptask->wt_mutex);
  free(ptask);

  if (jobid == NULL)
    {
    log_err(ENOMEM, __func__, "Cannot allocate memory");
    return;
    }

  pjob = svr_find_job(jobid, FALSE);
  if (pjob == NULL)
    {
    if (LOGLEVEL >= 3)
      {
      sprintf(log_buf,
        "%s:failed to find job\n",
        __func__);

      log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,jobid,log_buf);
      }
    free(jobid);
    return;
    }
  free(jobid);

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf,
      "checking mom cleanup job state is %s-%s\n",
      PJobState[pjob->ji_qs.ji_state],
      PJobSubState[pjob->ji_qs.ji_substate]);

    log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf);
    }

  /* 
   * if the job is no longer running then we have recieved the job obit
   * and need to request the mom to clean up after the job
   */

  if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING)
    {
    if ((preq = alloc_br(PBS_BATCH_DeleteJob)) == NULL)
      {
      log_err(-1, __func__, "unable to allocate DeleteJob request - big trouble!");
      }
    else
      {
      strcpy(preq->rq_ind.rq_delete.rq_objname, pjob->ji_qs.ji_jobid);
      /* The preq is freed in relay_to_mom (failure)
       * or in issue_Drequest (success) */
      if ((rc = relay_to_mom(&pjob, preq, release_req)) != 0)
        {
        if (pjob != NULL)
          {
          snprintf(log_buf,sizeof(log_buf),
            "Unable to relay information to mom for job '%s'\n",
            pjob->ji_qs.ji_jobid);
          
          log_err(rc, __func__, log_buf);
          
          unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
          }

        return;
        }

      if ((LOGLEVEL >= 7) &&
          (pjob != NULL))
        {
        log_event(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "requested mom cleanup");
        }
      }
    }
  else
    {
    set_task(WORK_Timed, time_now + 1, mom_cleanup_checkpoint_hold, strdup(pjob->ji_qs.ji_jobid), FALSE);
    }

  if (pjob != NULL)
    unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);
  } /* END mom_cleanup_checkpoint_hold() */
示例#20
0
int stat_to_mom(

  char             *job_id,
  struct stat_cntl *cntl)  /* M */

  {
  struct batch_request *newrq;
  int                   rc = PBSE_NONE;
  unsigned long         addr;
  char                  log_buf[LOCAL_LOG_BUF_SIZE+1];
  struct pbsnode       *node;
  int handle = -1;
  unsigned long job_momaddr = -1;
  unsigned short job_momport = -1;
  char *job_momname = NULL;
  job *pjob = NULL;

  if ((pjob = svr_find_job(job_id, FALSE)) == NULL)
    return PBSE_JOBNOTFOUND;

  job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr;
  job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport;
  job_momname = strdup(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str);
  unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);

  if (job_momname == NULL)
    return PBSE_MEM_MALLOC;

  if ((newrq = alloc_br(PBS_BATCH_StatusJob)) == NULL)
    {
    free(job_momname);
    return PBSE_MEM_MALLOC;
    }

  if (cntl->sc_type == 1)
    strcpy(newrq->rq_ind.rq_status.rq_id, job_id);
  else
    newrq->rq_ind.rq_status.rq_id[0] = '\0';  /* get stat of all */

  CLEAR_HEAD(newrq->rq_ind.rq_status.rq_attr);

  /* if MOM is down just return stale information */
  addr = job_momaddr;

  node = tfind_addr(addr,job_momport,job_momname);
  free(job_momname);

  if (node == NULL)
    return PBSE_UNKNODE;
  if (node->nd_state & INUSE_DOWN)
    {
    if (LOGLEVEL >= 6)
      {
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
          "node '%s' is allocated to job but in state 'down'",
          node->nd_name);

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,job_id,log_buf);
      }

    unlock_node(node, __func__, "no rely mom", LOGLEVEL);
    free_br(newrq);

    return PBSE_NORELYMOM;
    }

  /* get connection to MOM */
  unlock_node(node, __func__, "before svr_connect", LOGLEVEL);
  handle = svr_connect(job_momaddr, job_momport, &rc, NULL, NULL, ToServerDIS);

  /* Unlock job here */
  if (handle >= 0)
    {
    if ((rc = issue_Drequest(handle, newrq)) == PBSE_NONE)
      {
      stat_update(newrq, cntl);
      }
    }
  else
    rc = PBSE_CONNECT;

  if (rc == PBSE_SYSTEM)
    rc = PBSE_MEM_MALLOC;

  free_br(newrq);

  return rc;
  }  /* END stat_to_mom() */
int process_request(

  struct tcp_chan *chan) /* file descriptor (socket) to get request */

  {
  int                   rc = PBSE_NONE;
  struct batch_request *request = NULL;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  long                  acl_enable = FALSE;
  long                  state = SV_STATE_DOWN;

  time_t                time_now = time(NULL);
  int                   free_request = TRUE;
  char                  tmpLine[MAXLINE];
  char                 *auth_err = NULL;
  enum conn_type        conn_active;
  unsigned short        conn_socktype;
  unsigned short        conn_authen;
  unsigned long         conn_addr;
  int                   sfds = chan->sock;

  pthread_mutex_lock(svr_conn[sfds].cn_mutex);
  conn_active = svr_conn[sfds].cn_active;
  conn_socktype = svr_conn[sfds].cn_socktype;
  conn_authen = svr_conn[sfds].cn_authen;
  conn_addr = svr_conn[sfds].cn_addr;
  svr_conn[sfds].cn_lasttime = time_now;
  pthread_mutex_unlock(svr_conn[sfds].cn_mutex);

  if ((request = alloc_br(0)) == NULL)
    {
    snprintf(tmpLine, sizeof(tmpLine),
        "cannot allocate memory for request from %lu",
        conn_addr);
    req_reject(PBSE_MEM_MALLOC, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_SYSTEM;
    goto process_request_cleanup;
    }

  request->rq_conn = sfds;

  /*
   * Read in the request and decode it to the internal request structure.
   */
  if (conn_active == FromClientDIS || conn_active == ToServerDIS)
    {
#ifdef ENABLE_UNIX_SOCKETS

    if ((conn_socktype & PBS_SOCK_UNIX) &&
        (conn_authen != PBS_NET_CONN_AUTHENTICATED))
      {
      /* get_creds interestingly always returns 0 */
      get_creds(sfds, conn_credent[sfds].username, conn_credent[sfds].hostname);
      }

#endif /* END ENABLE_UNIX_SOCKETS */
    rc = dis_request_read(chan, request);
    }
  else
    {
    char out[80];

    snprintf(tmpLine, MAXLINE, "request on invalid type of connection: %d, sock type: %d, from address %s", 
                conn_active,conn_socktype, netaddr_long(conn_addr, out));
    log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST,
      "process_req", tmpLine);
    snprintf(tmpLine, sizeof(tmpLine),
        "request on invalid type of connection (%d) from %s",
        conn_active,
        netaddr_long(conn_addr, out));
    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_BADHOST;
    goto process_request_cleanup;
    }

  if (rc == -1)
    {
    /* FAILURE */
    /* premature end of file */
    rc = PBSE_PREMATURE_EOF;
    goto process_request_cleanup;
    }

  if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL) || (rc == PBSE_SOCKET_CLOSE))
    {
    /* FAILURE */
    /* read error, likely cannot send reply so just disconnect */
    /* ??? not sure about this ??? */
    goto process_request_cleanup;
    }

  if (rc > 0)
    {
    /* FAILURE */

    /*
     * request didn't decode, either garbage or unknown
     * request type, in either case, return reject-reply
     */

    req_reject(rc, 0, request, NULL, "cannot decode message");
    free_request = FALSE;
    goto process_request_cleanup;
    }

  if (get_connecthost(sfds, request->rq_host, PBS_MAXHOSTNAME) != 0)
    {
    sprintf(log_buf, "%s: %lu",
      pbse_to_txt(PBSE_BADHOST),
      conn_addr);

    log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "", log_buf);

    snprintf(tmpLine, sizeof(tmpLine),
        "cannot determine hostname for connection from %lu",
        conn_addr);

    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_BADHOST;
    goto process_request_cleanup;
    }

  if (LOGLEVEL >= 1)
    {
    sprintf(log_buf,
      msg_request,
      reqtype_to_txt(request->rq_type),
      request->rq_user,
      request->rq_host,
      sfds);

    log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", log_buf);
    }

  /* is the request from a host acceptable to the server */
  if (conn_socktype & PBS_SOCK_UNIX)
    {
    strcpy(request->rq_host, server_name);
    }

  get_svr_attr_l(SRV_ATR_acl_host_enable, &acl_enable);
  if (acl_enable)
    {
    /* acl enabled, check it; always allow myself and nodes */
    struct array_strings *pas = NULL;
    struct pbsnode       *isanode;

    get_svr_attr_arst(SRV_ATR_acl_hosts, &pas);
    isanode = PGetNodeFromAddr(conn_addr);

    if ((isanode == NULL) &&
        (strcmp(server_host, request->rq_host) != 0) &&
        (acl_check_my_array_string(pas, request->rq_host, ACL_Host) == 0))
      {
      char tmpLine[MAXLINE];
      snprintf(tmpLine, sizeof(tmpLine), "request not authorized from host %s",
               request->rq_host);

      req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
      free_request = FALSE;
      rc = PBSE_BADHOST;
      goto process_request_cleanup;
      }

    if (isanode != NULL)
      unlock_node(isanode, "process_request", NULL, LOGLEVEL);
    }

  /*
   * determine source (user client or another server) of request.
   * set the permissions granted to the client
   */

  if (conn_authen == PBS_NET_CONN_FROM_PRIVIL)
    {
    /* request came from another server */

    request->rq_fromsvr = 1;

    request->rq_perm =
      ATR_DFLAG_USRD | ATR_DFLAG_USWR |
      ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
      ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
      ATR_DFLAG_SvWR;
    }
  else
    {
    /* request not from another server */
    conn_credent[sfds].timestamp = time_now;

    request->rq_fromsvr = 0;

    /*
     * Client must be authenticated by an Authenticate User Request, if not,
     * reject request and close connection.  -- The following is retained for
     * compat with old cmds -- The exception to this is of course the Connect
     * Request which cannot have been authenticated, because it contains the
     * needed ticket; so trap it here.  Of course, there is no prior
     * authentication on the Authenticate User request either, but it comes
     * over a reserved port and appears from another server, hence is
     * automatically granted authentication.
     *
     * The above is only true with inet sockets.  With unix domain sockets, the
     * user creds were read before the first dis_request_read call above.
     * We automatically granted authentication because we can trust the socket
     * creds.  Authorization is still granted in svr_get_privilege below
     */

    if (request->rq_type == PBS_BATCH_Connect)
      {
      req_connect(request);

      if (conn_socktype == PBS_SOCK_INET)
        {
        rc = PBSE_IVALREQ;
        req_reject(rc, 0, request, NULL, NULL);
        free_request = FALSE;
        goto process_request_cleanup;
        }

      }

    if (conn_socktype & PBS_SOCK_UNIX)
      {
      pthread_mutex_lock(svr_conn[sfds].cn_mutex);
      svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED;
      pthread_mutex_unlock(svr_conn[sfds].cn_mutex);
      }

    if (ENABLE_TRUSTED_AUTH == TRUE )
      rc = PBSE_NONE;  /* bypass the authentication of the user--trust the client completely */
    else if (munge_on)
      {
      /* If munge_on is true we will validate the connection now */
      if (request->rq_type == PBS_BATCH_AltAuthenUser)
        {
        rc = req_altauthenuser(request);
        free_request = FALSE;
        goto process_request_cleanup;
        }
      else
        {
        rc = authenticate_user(request, &conn_credent[sfds], &auth_err);
        }
      }
    else if (conn_authen != PBS_NET_CONN_AUTHENTICATED)
      /* skip checking user if we did not get an authenticated credential */
      rc = PBSE_BADCRED;
    else
      rc = authenticate_user(request, &conn_credent[sfds], &auth_err);

    if (rc != 0)
      {
      req_reject(rc, 0, request, NULL, auth_err);
      if (auth_err != NULL)
        free(auth_err);
      free_request = FALSE;
      goto process_request_cleanup;
      }

    /*
     * pbs_mom and checkpoint restart scripts both need the authority to do
     * alters and releases on checkpointable jobs.  Allow manager permission
     * for root on the jobs execution node.
     */
     
    if (((request->rq_type == PBS_BATCH_ModifyJob) ||
        (request->rq_type == PBS_BATCH_ReleaseJob)) &&
        (strcmp(request->rq_user, PBS_DEFAULT_ADMIN) == 0))
      {
      job *pjob;
      char *dptr;
      int skip = FALSE;
      char short_host[PBS_MAXHOSTNAME+1];

      /* make short host name */

      strcpy(short_host, request->rq_host);
      if ((dptr = strchr(short_host, '.')) != NULL)
        {
        *dptr = '\0';
        }
      
      if ((pjob = svr_find_job(request->rq_ind.rq_modify.rq_objname, FALSE)) != (job *)0)
        {
        if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
          {

          if ((pjob->ji_wattr[JOB_ATR_checkpoint].at_flags & ATR_VFLAG_SET) &&
              ((csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "s") != NULL) ||
               (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "c") != NULL) ||
               (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "enabled") != NULL)) &&
              (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, short_host) != NULL))
            {
            request->rq_perm = svr_get_privilege(request->rq_user, server_host);
            skip = TRUE;
            }

          }
        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
        }
      
      if (!skip)
        {
        request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
        }
      }
    else
      {
      request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
      }
    }  /* END else (conn_authen == PBS_NET_CONN_FROM_PRIVIL) */

  /* if server shutting down, disallow new jobs and new running */
  get_svr_attr_l(SRV_ATR_State, &state);

  if (state > SV_STATE_RUN)
    {
    switch (request->rq_type)
      {
      case PBS_BATCH_AsyrunJob:
      case PBS_BATCH_JobCred:
      case PBS_BATCH_MoveJob:
      case PBS_BATCH_QueueJob:
      case PBS_BATCH_RunJob:
      case PBS_BATCH_StageIn:
      case PBS_BATCH_jobscript:

        req_reject(PBSE_SVRDOWN, 0, request, NULL, NULL);
        rc = PBSE_SVRDOWN;
        free_request = FALSE;
        goto process_request_cleanup;
        /*NOTREACHED*/

        break;
      }
    }

  /*
   * dispatch the request to the correct processing function.
   * The processing function must call reply_send() to free
   * the request struture.
   */

  rc = dispatch_request(sfds, request);

  return(rc);

process_request_cleanup:

  if (free_request == TRUE)
    free_br(request);

  return(rc);
  }  /* END process_request() */
示例#22
0
void req_deletejob(

  struct batch_request *preq)  /* I */

  {
  job              *pjob;

  struct work_task *pwtold;

  struct work_task *pwtnew;
  struct work_task *pwtcheck;

  int               rc;
  char             *sigt = "SIGTERM";

  char             *Msg = NULL;

  /* check if we are getting a purgecomplete from scheduler */
  if ((preq->rq_extend != NULL) && 
        !strncmp(preq->rq_extend,PURGECOMP,strlen(PURGECOMP)))
    {

    /*
     * purge_completed_jobs will respond with either an ack or reject
     */
    purge_completed_jobs(preq);

    return;
    }

  /* The way this is implemented, if the user enters the command "qdel -p <jobid>",
   * they can then delete jobs other than their own since the authorization
   * checks are made below in chk_job_request. This should probably be fixed.
   */

  if (forced_jobpurge(preq) != 0)
    {
    return;
    }

  /* NOTE:  should support rq_objname={<JOBID>|ALL|<name:<JOBNAME>} */

  /* NYI */

  pjob = chk_job_request(preq->rq_ind.rq_delete.rq_objname, preq);

  if (pjob == NULL)
    {
    /* NOTE:  chk_job_request() will issue req_reject() */

    return;
    }

  if (preq->rq_extend != NULL)
    {
    if (strncmp(preq->rq_extend, deldelaystr, strlen(deldelaystr)) &&
        strncmp(preq->rq_extend, delasyncstr, strlen(delasyncstr)) &&
        strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr)))
      {
      /* have text message in request extension, add it */

      Msg = preq->rq_extend;

      /*
       * Message capability is only for operators and managers.
       * Check if request is authorized
      */

      if ((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
                            ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) == 0)
        {
        req_reject(PBSE_PERM, 0, preq, NULL,
                   "must have operator or manager privilege to use -m parameter");
        return;
        }
      }
    }

  if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT)
    {
    /*
     * Find pid of router from existing work task entry,
     * then establish another work task on same child.
     * Next, signal the router and wait for its completion;
     */

    pwtold = (struct work_task *)GET_NEXT(pjob->ji_svrtask);

    while (pwtold != NULL)
      {
      if ((pwtold->wt_type == WORK_Deferred_Child) ||
          (pwtold->wt_type == WORK_Deferred_Cmp))
        {
        pwtnew = set_task(
                   pwtold->wt_type,
                   pwtold->wt_event,
                   post_delete_route,
                   preq);

        if (pwtnew != NULL)
          {
          /*
           * reset type in case the SIGCHLD came
           * in during the set_task;  it makes
           * sure that next_task() will find the
           * new entry.
           */

          pwtnew->wt_type = pwtold->wt_type;
          pwtnew->wt_aux = pwtold->wt_aux;

          kill((pid_t)pwtold->wt_event, SIGTERM);

          pjob->ji_qs.ji_substate = JOB_SUBSTATE_ABORT;

          return; /* all done for now */
          }
        else
          {
          req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);

          return;
          }
        }

      pwtold = (struct work_task *)GET_NEXT(pwtold->wt_linkobj);
      }

    /* should never get here ...  */

    log_err(-1, "req_delete", "Did not find work task for router");

    req_reject(PBSE_INTERNAL, 0, preq, NULL, NULL);

    return;
    }

  if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN1 ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN2 ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN3 )
    {
    /* If JOB_SUBSTATE_PRERUN being sent to MOM, wait till she gets it going */
    /* retry in one second                            */
    /* If JOB_SUBSTATE_RERUN, RERUN1, RERUN2 or RERUN3 the
       job is being requeued. Wait until finished */

    static time_t  cycle_check_when = 0;
    static char    cycle_check_jid[PBS_MAXSVRJOBID + 1];

    if (cycle_check_when != 0)
      {
      if (!strcmp(pjob->ji_qs.ji_jobid, cycle_check_jid) &&
          (time_now - cycle_check_when > 10))
        {
        /* state not updated after 10 seconds */

        /* did the mom ever get it? delete it anyways... */

        cycle_check_jid[0] = '\0';
        cycle_check_when  = 0;

        goto jump;
        }

      if (time_now - cycle_check_when > 20)
        {
        /* give up after 20 seconds */

        cycle_check_jid[0] = '\0';
        cycle_check_when  = 0;
        }
      }    /* END if (cycle_check_when != 0) */

    if (cycle_check_when == 0)
      {
      /* new PRERUN job located */

      cycle_check_when = time_now;
      strcpy(cycle_check_jid, pjob->ji_qs.ji_jobid);
      }

    sprintf(log_buffer, "job cannot be deleted, state=PRERUN, requeuing delete request");

    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    pwtnew = set_task(
               WORK_Timed,
               time_now + 1,
               post_delete_route,
               preq);

    if (pwtnew == 0)
      req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);

    return;
    }  /* END if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) */

jump:

  /*
   * Log delete and if requesting client is not job owner, send mail.
   */

  sprintf(log_buffer, "requestor=%s@%s",
          preq->rq_user,
          preq->rq_host);


  /* NOTE:  should annotate accounting record with extend message (NYI) */

  account_record(PBS_ACCT_DEL, pjob, log_buffer);

  sprintf(log_buffer, msg_manager,
          msg_deletejob,
          preq->rq_user,
          preq->rq_host);

  log_event(
    PBSEVENT_JOB,
    PBS_EVENTCLASS_JOB,
    pjob->ji_qs.ji_jobid,
    log_buffer);

  /* NOTE:  should incorporate job delete message */

  if (Msg != NULL)
    {
    /* have text message in request extension, add it */

    strcat(log_buffer, "\n");
    strcat(log_buffer, Msg);
    }

  if ((svr_chk_owner(preq, pjob) != 0) &&
      !has_job_delete_nanny(pjob))
    {
    /* only send email if owner did not delete job and job deleted
       has not been previously attempted */

    svr_mailowner(pjob, MAIL_DEL, MAIL_FORCE, log_buffer);
    /*
     * If we sent mail and already sent the extra message
     * then reset message so we don't trigger a redundant email
     * in job_abt()
    */

    if (Msg != NULL)
      {
      Msg = NULL;
      }
    }

  if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
    {
    /* job has restart file at mom, change restart comment if failed */

    change_restart_comment_if_needed(pjob);
    }

  if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
    {
    /*
     * setup a nanny task to make sure the job is actually deleted (see the
     * comments at job_delete_nanny()).
     */

    if (has_job_delete_nanny(pjob))
      {
      req_reject(PBSE_IVALREQ, 0, preq, NULL, "job cancel in progress");

      return;
      }

    apply_job_delete_nanny(pjob, time_now + 60);

    /* check if we are getting a asynchronous delete */

    if ((preq->rq_extend != NULL) &&
          !strncmp(preq->rq_extend,DELASYNC,strlen(DELASYNC)))
      {
      struct batch_request *preq_tmp = NULL;
      /*
       * Respond with an ack now instead of after MOM processing
       * Create a new batch request and fill it in. It will be freed by reply_ack
       */

      snprintf(log_buffer,sizeof(log_buffer), "Deleting job asynchronously");
      log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buffer);

      preq_tmp = alloc_br(PBS_BATCH_DeleteJob);
      preq_tmp->rq_perm = preq->rq_perm;
      preq_tmp->rq_ind.rq_manager.rq_cmd = preq->rq_ind.rq_manager.rq_cmd;
      preq_tmp->rq_ind.rq_manager.rq_objtype = preq->rq_ind.rq_manager.rq_objtype;
      preq_tmp->rq_fromsvr = preq->rq_fromsvr;
      preq_tmp->rq_extsz = preq->rq_extsz;
      preq_tmp->rq_conn = preq->rq_conn;
      memcpy(preq_tmp->rq_ind.rq_manager.rq_objname,
          preq->rq_ind.rq_manager.rq_objname, PBS_MAXSVRJOBID + 1);
      memcpy(preq_tmp->rq_user, preq->rq_user, PBS_MAXUSER + 1);
      memcpy(preq_tmp->rq_host, preq->rq_host, PBS_MAXHOSTNAME + 1);

      reply_ack(preq_tmp);
      preq->rq_noreply = TRUE; /* set for no more replies */
      }
  
    /* make a cleanup task if set */
    if ((server.sv_attr[SRV_ATR_JobForceCancelTime].at_flags & ATR_VFLAG_SET) &&
        (server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long > 0))
      {
      pwtcheck = set_task(
        WORK_Timed,
        time_now + server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long,
        ensure_deleted,
        preq);
    
      if (pwtcheck != NULL)
        append_link(&pjob->ji_svrtask, &pwtcheck->wt_linkobj, pwtcheck);
      }

    /*
     * Send signal request to MOM.  The server will automagically
     * pick up and "finish" off the client request when MOM replies.
     */

    if ((rc = issue_signal(pjob, sigt, post_delete_mom1, preq)))
      {
      /* cant send to MOM */

      req_reject(rc, 0, preq, NULL, NULL);
      }

    /* normally will ack reply when mom responds */

    sprintf(log_buffer, msg_delrunjobsig,
            sigt);

    LOG_EVENT(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    return;
    }  /* END if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) */

  /* make a cleanup task if set */
  if ((server.sv_attr[SRV_ATR_JobForceCancelTime].at_flags & ATR_VFLAG_SET) &&
      (server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long > 0))
    {
    pwtcheck = set_task(
        WORK_Timed,
        time_now + server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long,
        ensure_deleted,
        preq);
    
    if (pwtcheck != NULL)
      append_link(&pjob->ji_svrtask, &pwtcheck->wt_linkobj, pwtcheck);
    }

  /* if configured, and this job didn't have a slot limit hold, free a job
   * held with the slot limit hold */
  if ((server.sv_attr[SRV_ATR_MoabArrayCompatible].at_val.at_long != FALSE) &&
      ((pjob->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) == FALSE))
    {
    if ((pjob->ji_arraystruct != NULL) &&
        (pjob->ji_is_array_template == FALSE))
      {
      int        i;
      int        newstate;
      int        newsub;
      job       *tmp;
      job_array *pa = pjob->ji_arraystruct;

      for (i = 0; i < pa->ai_qs.array_size; i++)
        {
        if (pa->jobs[i] == NULL)
          continue;

        tmp = (job *)pa->jobs[i];

        if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l)
          {
          tmp->ji_wattr[JOB_ATR_hold].at_val.at_long &= ~HOLD_l;
              
          if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long == 0)
            {
            tmp->ji_wattr[JOB_ATR_hold].at_flags &= ~ATR_VFLAG_SET;
            }
          
          svr_evaljobstate(tmp, &newstate, &newsub, 1);
          svr_setjobstate(tmp, newstate, newsub);
          job_save(tmp, SAVEJOB_FULL, 0);

          break;
          }
        }
      }
    } /* END MoabArrayCompatible check */

  if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
    {
    /* job has restart file at mom, do end job processing */
    
    svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING);

    pjob->ji_momhandle = -1;

    /* force new connection */

    pwtnew = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob);

    if (pwtnew)
      {
      append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
      }
    }
  else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0)
    {
    /* job has staged-in file, should remove them */

    remove_stagein(pjob);

    job_abt(&pjob, Msg);
    }
  else
    {
    /*
     * the job is not transitting (though it may have been) and
     * is not running, so put in into a complete state.
     */

    struct work_task *ptask;
    struct pbs_queue *pque;
    int  KeepSeconds = 0;

    svr_setjobstate(pjob, JOB_STATE_COMPLETE, JOB_SUBSTATE_COMPLETE);

    if ((pque = pjob->ji_qhdr) && (pque != NULL))
      {
      pque->qu_numcompleted++;
      }

    KeepSeconds = attr_ifelse_long(
                    &pque->qu_attr[QE_ATR_KeepCompleted],
                    &server.sv_attr[SRV_ATR_KeepCompleted],
                    0);
    ptask = set_task(WORK_Timed, time_now + KeepSeconds, on_job_exit, pjob);

    if (ptask != NULL)
      {
      append_link(&pjob->ji_svrtask, &ptask->wt_linkobj, ptask);
      }
    }  /* END else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) */

  reply_ack(preq);

  return;
  }  /* END req_deletejob() */
示例#23
0
int set_node_power_state(
    
  struct pbsnode **ppNode,
  unsigned short   newState)

  {
  struct pbsnode *pNode = *ppNode;
  if (pNode->nd_addrs == NULL)
    {
    return PBSE_BAD_PARAMETER;
    }

  if (newState == POWER_STATE_RUNNING)
    {
    static std::string interface;
    static unsigned char mac_addr[6];
    if (interface.length() == 0)
      {
      if (!getMacAddr(interface,mac_addr))
        {
        return PBSE_SYSTEM;
        }
      }

    int sock;
    if ((sock = socket(AF_INET,SOCK_PACKET,SOCK_PACKET)) < 0)
      {
      return PBSE_SYSTEM;
      }

    unsigned char outpack[1000];

    memcpy(outpack+6,mac_addr,6);
    memcpy(outpack,pNode->nd_mac_addr,6);
    outpack[12] = 0x08;
    outpack[13] = 0x42;
    int offset = 14;
    memset(outpack + offset,0xff,6);
    offset += 6;

    for (int i = 0;i < 16;i++)
      {
      memcpy(outpack + offset,pNode->nd_mac_addr,6);
      offset += 6;
      }

    int one = 1;
    if (setsockopt(sock, SOL_SOCKET, SO_BROADCAST, (char *)&one, sizeof(one)) < 0)
      {
      close(sock);
      return PBSE_SYSTEM;
      }

    struct sockaddr whereto;
    whereto.sa_family = 0;
    snprintf(whereto.sa_data, sizeof(whereto.sa_data), "%s", interface.c_str());

    if (sendto(sock, outpack, offset, 0, &whereto, sizeof(whereto)) < 0)
      {
      close(sock);
      return PBSE_SYSTEM;
      }

    close(sock);
    return PBSE_NONE;
    }

  if (pNode->nd_job_usages.size() != 0)
    {
    //Can't change the power state on a node with running jobs.
    return PBSE_CANT_CHANGE_POWER_STATE_WITH_JOBS_RUNNING;
    }
  struct batch_request *request = alloc_br(PBS_BATCH_ChangePowerState);
  if (request == NULL)
    {
    return PBSE_SYSTEM;
    }

  request->rq_ind.rq_powerstate = newState;
  pNode->nd_power_state_change_time = time(NULL);

  snprintf(request->rq_host, sizeof(request->rq_host), "%s", pNode->nd_name);
  std::string hostname(request->rq_host);
  int rc = PBSE_NONE;

  {
    int handle = 0;
    int local_errno = 0;
    handle = svr_connect(pNode->nd_addrs[0],pNode->nd_mom_port,&local_errno,pNode,NULL);
    if(handle < 0)
      {
      unlock_node(pNode, __func__, "Error connecting", LOGLEVEL);
      *ppNode = NULL;
      return local_errno;
      }
    unlock_node(pNode, __func__, "Done connecting", LOGLEVEL);
    *ppNode = NULL;
    rc = issue_Drequest(handle, request,true);
    if(rc == PBSE_NONE)
      {
      rc = request->rq_reply.brp_code;
      if(rc < 0) rc = -rc;
      }
  }
  pNode = find_nodebyname(hostname.c_str());
  *ppNode = pNode;
  if ((rc == PBSE_NONE)&&(pNode != NULL))
    {
    pNode->nd_power_state = newState;
    }

  return(rc);
  }
示例#24
0
int issue_signal(

  job        **pjob_ptr,
  const char  *signame, /* name of the signal to send */
  void       (*func)(struct batch_request *),
  void        *extra, /* extra parameter to be stored in sig request */
  char        *extend) /* Parameter to put in extended part of request */

  {
  int                   rc;
  job                  *pjob = *pjob_ptr;
  struct batch_request *newreq;
  char                  jobid[PBS_MAXSVRJOBID + 1];

  /* build up a Signal Job batch request */

  if ((newreq = alloc_br(PBS_BATCH_SignalJob)) == NULL)
    {
    /* FAILURE */

    return(PBSE_SYSTEM);
    }

  newreq->rq_extra = extra;
  newreq->rq_extend = extend;
  if (extend != NULL)
    {
    newreq->rq_extsz = strlen(extend);
    }

  strcpy(jobid, pjob->ji_qs.ji_jobid);
  strcpy(newreq->rq_ind.rq_signal.rq_jid, pjob->ji_qs.ji_jobid);

  snprintf(newreq->rq_ind.rq_signal.rq_signame, sizeof(newreq->rq_ind.rq_signal.rq_signame), "%s", signame);

  /* The newreq is freed in relay_to_mom (failure)
   * or in issue_Drequest (success) */
  rc = relay_to_mom(&pjob, newreq, NULL);

  if ((rc == PBSE_NONE) &&
      (pjob != NULL))
    {
    strcpy(jobid, pjob->ji_qs.ji_jobid);
    unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL);
    func(newreq);

    *pjob_ptr = svr_find_job((char *)jobid, TRUE);
    }
  else if ((extend != NULL) && 
      (!strcmp(extend, RERUNFORCE)))
    {
    if (pjob == NULL)
      {
      *pjob_ptr = svr_find_job((char *)jobid, TRUE);
      pjob = *pjob_ptr;
      }
    /* The job state is normally set when the obit arrives. But since the 
       MOM is not responding we need to set the state here */

    if (pjob != NULL)
      {
      /* Rerunning job, if not checkpointed, clear "resources_used and requeue job */
      if ((pjob->ji_qs.ji_svrflags & (JOB_SVFLG_CHECKPOINT_FILE | JOB_SVFLG_CHECKPOINT_MIGRATEABLE)) == 0)
        {
        job_attr_def[JOB_ATR_resc_used].at_free(&pjob->ji_wattr[JOB_ATR_resc_used]);
        }
      else if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE)
        {
        /* non-migratable checkpoint (cray), leave there */
        /* and just requeue the job         */

        rel_resc(pjob);

        pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN;

        svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_QUEUED, FALSE);

        pjob->ji_momhandle = -1;

        unlock_ji_mutex(pjob, __func__, "8", LOGLEVEL);

        return(PBSE_SYSTEM);
        }

      rel_resc(pjob); /* free resc assigned to job */

      /* Now re-queue the job */
      pjob->ji_modified = 1; /* force full job save */

      pjob->ji_momhandle = -1;
      pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_StagedIn;

      svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_QUEUED, FALSE);
      unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL);
      func(newreq);

      rc = PBSE_NONE;
      }
    else
      rc = PBSE_JOBNOTFOUND;
    }
  else
    {
    free_br(newreq);

    if (pjob == NULL)
      *pjob_ptr = NULL;
    }

  return(rc);
  }  /* END issue_signal() */
示例#25
0
int set_node_power_state(struct pbsnode *pNode,struct pbsnode *newNode)
  {
  if(pNode->nd_addrs == NULL)
    {
    return PBSE_BAD_PARAMETER;
    }
  if(newNode->nd_power_state == POWER_STATE_RUNNING)
    {
    newNode->nd_power_state = pNode->nd_power_state; //Don't change the power state here.
                                      //Let the mom update change the state
                                      //back to running.
    static std::string interface;
    static unsigned char mac_addr[6];
    if(interface.length() == 0)
      {
      if(!getMacAddr(interface,mac_addr))
        {
        return PBSE_SYSTEM;
        }
      }

    int sock;
    if((sock = socket(AF_INET,SOCK_PACKET,SOCK_PACKET)) < 0)
    {
      return PBSE_SYSTEM;
    }

    unsigned char outpack[1000];

    memcpy(outpack+6,mac_addr,6);
    memcpy(outpack,pNode->nd_mac_addr,6);
    outpack[12] = 0x08;
    outpack[13] = 0x42;
    int offset = 14;
    memset(outpack + offset,0xff,6);
    offset += 6;
    for(int i = 0;i < 16;i++)
    {
        memcpy(outpack + offset,pNode->nd_mac_addr,6);
        offset += 6;
    }

    int one = 1;
    if (setsockopt(sock, SOL_SOCKET, SO_BROADCAST, (char *)&one, sizeof(one)) < 0)
    {
      close(sock);
      return PBSE_SYSTEM;
    }

    struct sockaddr whereto;
    whereto.sa_family = 0;
    strcpy(whereto.sa_data,interface.c_str());
    if (sendto(sock, outpack, offset, 0, &whereto, sizeof(whereto)) < 0)
    {
      close(sock);
      return PBSE_SYSTEM;
    }
    close(sock);
    return PBSE_NONE;
    }
  if(pNode->nd_job_usages.size() != 0)
    {
    //Can't change the power state on a node with running jobs.
    return PBSE_CANT_CHANGE_POWER_STATE_WITH_JOBS_RUNNING;
    }
  struct batch_request *request = alloc_br(PBS_BATCH_ChangePowerState);
  if(request == NULL)
    {
    return PBSE_SYSTEM;
    }
  request->rq_ind.rq_powerstate = newNode->nd_power_state;
  newNode->nd_power_state_change_time = time(NULL);
  strncpy(request->rq_host,pNode->nd_name,sizeof(request->rq_host));
  int rc = enqueue_threadpool_request(send_power_state_to_mom,(void *)request,task_pool);
  return(rc);
  }
示例#26
0
static int shutdown_checkpoint(

  job *pjob)

  {

  struct batch_request *phold;
  attribute        temp;

  phold = alloc_br(PBS_BATCH_HoldJob);

  if (phold == NULL)
    {
    return(PBSE_SYSTEM);
    }

  temp.at_flags = ATR_VFLAG_SET;

  temp.at_type  = job_attr_def[(int)JOB_ATR_hold].at_type;
  temp.at_val.at_long = HOLD_s;

  phold->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR;

  strcpy(phold->rq_ind.rq_hold.rq_orig.rq_objname, pjob->ji_qs.ji_jobid);

  CLEAR_HEAD(phold->rq_ind.rq_hold.rq_orig.rq_attr);

  if (job_attr_def[(int)JOB_ATR_hold].at_encode(
        &temp,
        &phold->rq_ind.rq_hold.rq_orig.rq_attr,
        job_attr_def[(int)JOB_ATR_hold].at_name,
        NULL,
        ATR_ENCODE_CLIENT) < 0)
    {
    return(PBSE_SYSTEM);
    }

  if (relay_to_mom(pjob->ji_qs.ji_un.ji_exect.ji_momaddr, phold, post_checkpoint) != 0)
    {
    /* FAILURE */

    return(-1);
    }

  pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN;

  pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_FILE;

  if (LOGLEVEL >= 1)
    {
    log_event(
      PBSEVENT_SYSTEM | PBSEVENT_JOB | PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      "shutting down with active checkpointable job");
    }

  job_save(pjob, SAVEJOB_QUICK);

  return(0);
  }  /* END shutdown_checkpoint() */
示例#27
0
static int shutdown_checkpoint(

  job **pjob_ptr)

  {
  job                  *pjob = *pjob_ptr;
  struct batch_request *phold;
  pbs_attribute         temp;
  char                  jobid[PBS_MAXSVRJOBID + 1];
  int                   rc = PBSE_NONE;

  phold = alloc_br(PBS_BATCH_HoldJob);

  if (phold == NULL)
    {
    return(PBSE_SYSTEM);
    }

  temp.at_flags = ATR_VFLAG_SET;

  temp.at_type  = job_attr_def[JOB_ATR_hold].at_type;
  temp.at_val.at_long = HOLD_s;

  phold->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR;

  strcpy(phold->rq_ind.rq_hold.rq_orig.rq_objname, pjob->ji_qs.ji_jobid);

  CLEAR_HEAD(phold->rq_ind.rq_hold.rq_orig.rq_attr);

  if (job_attr_def[JOB_ATR_hold].at_encode(
        &temp,
        &phold->rq_ind.rq_hold.rq_orig.rq_attr,
        job_attr_def[JOB_ATR_hold].at_name,
        NULL,
        ATR_ENCODE_CLIENT,
        0) < 0)
    {
    free_br(phold);
    return(PBSE_SYSTEM);
    }

  /* The phold is freed in relay_to_mom (failure)
   * or in issue_Drequest (success) */
  if ((rc = relay_to_mom(&pjob, phold, NULL)) != PBSE_NONE)
    {
    /* FAILURE */
    free_br(phold);

    return(-1);
    }
    
  jobid[0] = '\0';

  if (pjob != NULL)
    {
    pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN;
    pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_FILE;
    
    if (LOGLEVEL >= 1)
      {
      log_event(
        PBSEVENT_SYSTEM | PBSEVENT_JOB | PBSEVENT_DEBUG,
        PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid,
        "shutting down with active checkpointable job");
      }
  
    job_save(pjob, SAVEJOB_QUICK, 0);
    strcpy(jobid, pjob->ji_qs.ji_jobid);
    unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
    }
  
  if (rc == PBSE_NONE)
    {
    post_checkpoint(phold);

    if (jobid[0] != '\0')
     *pjob_ptr = svr_find_job(jobid, TRUE);
    }

  return(PBSE_NONE);
  }  /* END shutdown_checkpoint() */
void *mom_process_request(

  void *sock_num) /* file descriptor (socket) to get request */

  {
  int                   rc;
  struct batch_request *request = NULL;
  int                   sfds = *(int *)sock_num;
  struct tcp_chan *chan = NULL;

  time_now = time(NULL);

  if ((request = alloc_br(0)) == NULL)
    {
    mom_close_client(sfds);
    return NULL;
    }

  request->rq_conn = sfds;


  if ((chan = DIS_tcp_setup(sfds)) == NULL)
    {
    mom_close_client(sfds);
    free_br(request);
    return NULL;
    }

  /* Read in the request and decode it to the internal request structure.  */
  rc = dis_request_read(chan, request);

  if (rc == -1)
    {
    /* FAILURE */
    /* premature end of file */
    mom_close_client(chan->sock);
    free_br(request);
    DIS_tcp_cleanup(chan);
    return NULL;
    }

  if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL) || (rc == PBSE_SOCKET_CLOSE))
    {
    /* FAILURE */
    /* read error, likely cannot send reply so just disconnect */
    /* ??? not sure about this ??? */
    mom_close_client(chan->sock);
    free_br(request);
    DIS_tcp_cleanup(chan);
    return NULL;
    }

  if (rc > 0)
    {
    /* FAILURE */

    /*
     * request didn't decode, either garbage or unknown
     * request type, in either case, return reject-reply
     */

    req_reject(rc, 0, request, NULL, "cannot decode message");
    mom_close_client(chan->sock);
    DIS_tcp_cleanup(chan);
    return NULL;
    }

  if (get_connecthost(chan->sock, request->rq_host, PBS_MAXHOSTNAME) != 0)
    {
    char tmpLine[MAXLINE];

    sprintf(log_buffer, "%s: %lu",
            pbse_to_txt(PBSE_BADHOST),
            get_connectaddr(chan->sock,FALSE));

    log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "", log_buffer);

    snprintf(tmpLine, sizeof(tmpLine), "cannot determine hostname for connection from %lu",
             get_connectaddr(chan->sock,FALSE));

    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    mom_close_client(chan->sock);
    DIS_tcp_cleanup(chan);
    return NULL;
    }

  if (LOGLEVEL >= 1)
    {
    sprintf(
      log_buffer,
      msg_request,
      reqtype_to_txt(request->rq_type),
      request->rq_user,
      request->rq_host,
      chan->sock);

    log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", log_buffer);
    }

  /* is the request from a host acceptable to the server */

    {
    /*extern tree *okclients; */

    extern void mom_server_update_receive_time_by_ip(u_long ipaddr, const char *cmd);

    /* check connecting host against allowed list of ok clients */

    if (LOGLEVEL >= 6)
      {
      sprintf(log_buffer, "request type %s from host %s received",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer);
      }

    if (!AVL_is_in_tree_no_port_compare(svr_conn[chan->sock].cn_addr, 0, okclients))
      {
      sprintf(log_buffer, "request type %s from host %s rejected (host not authorized)",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer);
      req_reject(PBSE_BADHOST, 0, request, NULL, "request not authorized");
      mom_close_client(chan->sock);
      DIS_tcp_cleanup(chan);
      return NULL;
      }

    if (LOGLEVEL >= 3)
      {
      sprintf(log_buffer, "request type %s from host %s allowed",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer);
      }

    mom_server_update_receive_time_by_ip(svr_conn[chan->sock].cn_addr, reqtype_to_txt(request->rq_type));
    }    /* END BLOCK */

  request->rq_fromsvr = 1;

  request->rq_perm =
    ATR_DFLAG_USRD | ATR_DFLAG_USWR |
    ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
    ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
    ATR_DFLAG_SvWR | ATR_DFLAG_MOM;

  /*
   * dispatch the request to the correct processing function.
   * The processing function must call reply_send() to free
   * the request struture.
   */

  mom_dispatch_request(chan->sock, request);

  DIS_tcp_cleanup(chan);

  return NULL;
  }  /* END mom_process_request() */
示例#29
0
batch_request *duplicate_request(batch_request *preq, int job_index)
  {
  batch_request *preq_tmp = alloc_br(preq->rq_type);
  char          *ptr1;
  char          *ptr2;
  char           newjobname[PBS_MAXSVRJOBID+1];

  if (preq_tmp == NULL)
    return(NULL);

  preq_tmp->rq_perm = preq->rq_perm;
  preq_tmp->rq_fromsvr = preq->rq_fromsvr;
  preq_tmp->rq_conn = preq->rq_conn;
  preq_tmp->rq_time = preq->rq_time;
  preq_tmp->rq_orgconn = preq->rq_orgconn;

  memcpy(preq_tmp->rq_ind.rq_manager.rq_objname,
  preq->rq_ind.rq_manager.rq_objname, PBS_MAXSVRJOBID + 1);

  strcpy(preq_tmp->rq_user, preq->rq_user);
  strcpy(preq_tmp->rq_host, preq->rq_host);

  if (preq->rq_extend != NULL)
    preq_tmp->rq_extend = strdup(preq->rq_extend);

  switch (preq->rq_type)
    {
    /* This function was created for a modify array request (PBS_BATCH_ModifyJob)
    the preq->rq_ind structure was allocated in dis_request_read. If other
    BATCH types are needed refer to that function to see how the rq_ind structure
    was allocated and then copy it here. */
    case PBS_BATCH_DeleteJob:
    case PBS_BATCH_HoldJob:
    case PBS_BATCH_CheckpointJob:
    case PBS_BATCH_ModifyJob:
    case PBS_BATCH_AsyModifyJob:

      /* based on how decode_DIS_Manage allocates data */
      CLEAR_HEAD(preq_tmp->rq_ind.rq_manager.rq_attr);

      preq_tmp->rq_ind.rq_manager.rq_cmd = preq->rq_ind.rq_manager.rq_cmd;
      preq_tmp->rq_ind.rq_manager.rq_objtype = preq->rq_ind.rq_manager.rq_objtype;

      if (job_index != -1)
        {
        /* If this is a job array it is possible we only have the array name
        and not the individual job. We need to find out what we have and
        modify the name if needed */
        ptr1 = strstr(preq->rq_ind.rq_manager.rq_objname, "[]");
        if (ptr1)
          {
          ptr1++;
          strcpy(newjobname, preq->rq_ind.rq_manager.rq_objname);
          ptr2 = strstr(newjobname, "[]");
          ptr2++;
          *ptr2 = 0;
          sprintf(preq_tmp->rq_ind.rq_manager.rq_objname,"%s%d%s",
                            newjobname, job_index, ptr1);
          }
        else
          strcpy(preq_tmp->rq_ind.rq_manager.rq_objname, preq->rq_ind.rq_manager.rq_objname);
        }
    
      /* copy the attribute list */
      if (copy_attribute_list(preq, preq_tmp) != PBSE_NONE)
        return(NULL);
    
      break;
    
      case PBS_BATCH_SignalJob:
    
        strcpy(preq_tmp->rq_ind.rq_signal.rq_jid, preq->rq_ind.rq_signal.rq_jid);
        strcpy(preq_tmp->rq_ind.rq_signal.rq_signame, preq->rq_ind.rq_signal.rq_signame);
        preq_tmp->rq_extra = strdup((char *)preq->rq_extra);
    
        break;
    
      case PBS_BATCH_MessJob:
    
        strcpy(preq_tmp->rq_ind.rq_message.rq_jid, preq->rq_ind.rq_message.rq_jid);
        preq_tmp->rq_ind.rq_message.rq_file = preq->rq_ind.rq_message.rq_file;
        strcpy(preq_tmp->rq_ind.rq_message.rq_text, preq->rq_ind.rq_message.rq_text);
    
        break;
    
      case PBS_BATCH_RunJob:
    
        if (preq->rq_ind.rq_run.rq_destin)
          preq_tmp->rq_ind.rq_run.rq_destin = strdup(preq->rq_ind.rq_run.rq_destin);
    
        break;
    
      default:
    
        break;
      }
  
    return(preq_tmp);
  }
示例#30
0
void process_request(

  int sfds) /* file descriptor (socket) to get request */

  {
#ifdef PBS_MOM
  char *id = "process_request";
#endif

  int                   rc;

  struct batch_request *request = NULL;

#ifndef PBS_MOM
  char *auth_err = NULL;
#endif

  time_now = time(NULL);

  request = alloc_br(0);

  request->rq_conn = sfds;

  /*
   * Read in the request and decode it to the internal request structure.
   */

#ifndef PBS_MOM

  if (svr_conn[sfds].cn_active == FromClientDIS)
    {
#ifdef ENABLE_UNIX_SOCKETS

    if ((svr_conn[sfds].cn_socktype & PBS_SOCK_UNIX) &&
        (svr_conn[sfds].cn_authen != PBS_NET_CONN_AUTHENTICATED))
      {
      get_creds(sfds, conn_credent[sfds].username, conn_credent[sfds].hostname);
      }

#endif /* END ENABLE_UNIX_SOCKETS */
    rc = dis_request_read(sfds, request);
    }
  else
    {
    LOG_EVENT(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_REQUEST,
      "process_req",
      "request on invalid type of connection");

    close_conn(sfds);

    free_br(request);

    return;
    }

#else /* PBS_MOM */
  rc = dis_request_read(sfds, request);

#endif /* PBS_MOM */

  if (rc == -1)
    {
    /* FAILURE */

    /* premature end of file */

    close_client(sfds);

    free_br(request);

    return;
    }

  if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL))
    {
    /* FAILURE */

    /* read error, likely cannot send reply so just disconnect */

    /* ??? not sure about this ??? */

    close_client(sfds);

    free_br(request);

    return;
    }

  if (rc > 0)
    {
    /* FAILURE */

    /*
     * request didn't decode, either garbage or unknown
     * request type, in either case, return reject-reply
     */

    req_reject(rc, 0, request, NULL, "cannot decode message");

    close_client(sfds);

    return;
    }

  if (get_connecthost(sfds, request->rq_host, PBS_MAXHOSTNAME) != 0)
    {
    char tmpLine[1024];

    sprintf(log_buffer, "%s: %lu",
            pbse_to_txt(PBSE_BADHOST),
            get_connectaddr(sfds));

    LOG_EVENT(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "", log_buffer);

    snprintf(tmpLine, sizeof(tmpLine), "cannot determine hostname for connection from %lu",
             get_connectaddr(sfds));

    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);

    return;
    }

  if (LOGLEVEL >= 1)
    {
    sprintf(
      log_buffer,
      msg_request,
      reqtype_to_txt(request->rq_type),
      request->rq_user,
      request->rq_host,
      sfds);

    LOG_EVENT(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", log_buffer);
    }

  /* is the request from a host acceptable to the server */

#ifndef PBS_MOM

  if (svr_conn[sfds].cn_socktype & PBS_SOCK_UNIX)
    {
    strcpy(request->rq_host, server_name);
    }

  if (server.sv_attr[SRV_ATR_acl_host_enable].at_val.at_long)
    {
    /* acl enabled, check it; always allow myself and nodes */

    struct pbsnode *isanode;

    isanode = PGetNodeFromAddr(get_connectaddr(sfds));

    if ((isanode == NULL) &&
        (strcmp(server_host, request->rq_host) != 0) &&
        (acl_check(
           &server.sv_attr[SRV_ATR_acl_hosts],
           request->rq_host,
           ACL_Host) == 0))
      {
      char tmpLine[1024];

      snprintf(tmpLine, sizeof(tmpLine), "request not authorized from host %s",
               request->rq_host);

      req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);

      close_client(sfds);

      return;
      }
    }

  /*
   * determine source (user client or another server) of request.
   * set the permissions granted to the client
   */

  if (svr_conn[sfds].cn_authen == PBS_NET_CONN_FROM_PRIVIL)
    {
    /* request came from another server */

    request->rq_fromsvr = 1;

    request->rq_perm =
      ATR_DFLAG_USRD | ATR_DFLAG_USWR |
      ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
      ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
      ATR_DFLAG_SvWR;
    }
  else
    {
    /* request not from another server */

    request->rq_fromsvr = 0;

    /*
     * Client must be authenticated by an Authenticate User Request, if not,
     * reject request and close connection.  -- The following is retained for
     * compat with old cmds -- The exception to this is of course the Connect
     * Request which cannot have been authenticated, because it contains the
     * needed ticket; so trap it here.  Of course, there is no prior
     * authentication on the Authenticate User request either, but it comes
     * over a reserved port and appears from another server, hence is
     * automatically granted authentication.
     *
     * The above is only true with inet sockets.  With unix domain sockets, the
     * user creds were read before the first dis_request_read call above.
     * We automatically granted authentication because we can trust the socket
     * creds.  Authorization is still granted in svr_get_privilege below
     */

    if (request->rq_type == PBS_BATCH_Connect)
      {
      req_connect(request);

      if (svr_conn[sfds].cn_socktype == PBS_SOCK_INET)
        return;

      }

    if (svr_conn[sfds].cn_socktype & PBS_SOCK_UNIX)
      {
      conn_credent[sfds].timestamp = time_now;
      svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED;
      }


    if (ENABLE_TRUSTED_AUTH == TRUE )
      rc = 0;  /* bypass the authentication of the user--trust the client completely */
    else if (munge_on)
      {
      /* If munge_on is true we will validate the connection now */
      if ( request->rq_type == PBS_BATCH_AltAuthenUser)
        {
        rc = req_altauthenuser(request);
        if (rc == PBSE_NONE)
          {
          conn_credent[sfds].timestamp = time_now;
          svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED;
          }
        return;
        }
      else if (svr_conn[sfds].cn_authen != PBS_NET_CONN_AUTHENTICATED)
        /* skip checking user if we did not get an authenticated credential */
        rc = PBSE_BADCRED;
      else
        {
        rc = authenticate_user(request, &conn_credent[sfds], &auth_err);
        }
      }
    else if (svr_conn[sfds].cn_authen != PBS_NET_CONN_AUTHENTICATED)
      rc = PBSE_BADCRED;
    else
      rc = authenticate_user(request, &conn_credent[sfds], &auth_err);

    if (rc != 0)
      {
      req_reject(rc, 0, request, NULL, auth_err);
      if (auth_err != NULL)
        free(auth_err);

      close_client(sfds);

      return;
      }

    /*
     * pbs_mom and checkpoint restart scripts both need the authority to do
     * alters and releases on checkpointable jobs.  Allow manager permission
     * for root on the jobs execution node.
     */
     
    if (((request->rq_type == PBS_BATCH_ModifyJob) ||
        (request->rq_type == PBS_BATCH_ReleaseJob)) &&
        (strcmp(request->rq_user, PBS_DEFAULT_ADMIN) == 0))
      {
      job *pjob;
      char *dptr;
      int skip = FALSE;
      char short_host[PBS_MAXHOSTNAME+1];

      /* make short host name */

      strcpy(short_host, request->rq_host);
      if ((dptr = strchr(short_host, '.')) != NULL)
        {
        *dptr = '\0';
        }
      
      if (((pjob = find_job(request->rq_ind.rq_modify.rq_objname)) != (job *)0) &&
          (pjob->ji_qs.ji_state == JOB_STATE_RUNNING))
        {

        if ((pjob->ji_wattr[JOB_ATR_checkpoint].at_flags & ATR_VFLAG_SET) &&
          ((csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "s") != NULL) ||
          (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "c") != NULL) ||
          (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "enabled") != NULL)) &&
          (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, short_host) != NULL))
          {

          request->rq_perm = svr_get_privilege(request->rq_user, server_host);
          skip = TRUE;

          }
        }
      if (!skip)
        {
        request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
        }
      }
    else
      {
      request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
      }
    }  /* END else (svr_conn[sfds].cn_authen == PBS_NET_CONN_FROM_PRIVIL) */

  /* if server shutting down, disallow new jobs and new running */

  if (server.sv_attr[SRV_ATR_State].at_val.at_long > SV_STATE_RUN)
    {
    switch (request->rq_type)
      {
      case PBS_BATCH_AsyrunJob:
      case PBS_BATCH_JobCred:
      case PBS_BATCH_MoveJob:
      case PBS_BATCH_QueueJob:
      case PBS_BATCH_RunJob:
      case PBS_BATCH_StageIn:
      case PBS_BATCH_jobscript:

        req_reject(PBSE_SVRDOWN, 0, request, NULL, NULL);

        return;

        /*NOTREACHED*/

        break;
      }
    }

#else /* THIS CODE FOR MOM ONLY */

    {
    /*extern tree *okclients; */

    extern void mom_server_update_receive_time_by_ip(u_long ipaddr, const char *cmd);

    /* check connecting host against allowed list of ok clients */

    if (LOGLEVEL >= 6)
      {
      sprintf(log_buffer, "request type %s from host %s received",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);
      }

/*    if (!tfind(svr_conn[sfds].cn_addr, &okclients)) */
    if (!AVL_is_in_tree(svr_conn[sfds].cn_addr, 0, okclients))
      {
      sprintf(log_buffer, "request type %s from host %s rejected (host not authorized)",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);

      req_reject(PBSE_BADHOST, 0, request, NULL, "request not authorized");

      close_client(sfds);

      return;
      }

    if (LOGLEVEL >= 3)
      {
      sprintf(log_buffer, "request type %s from host %s allowed",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);
      }

    mom_server_update_receive_time_by_ip(svr_conn[sfds].cn_addr, reqtype_to_txt(request->rq_type));
    }    /* END BLOCK */

  request->rq_fromsvr = 1;

  request->rq_perm =
    ATR_DFLAG_USRD | ATR_DFLAG_USWR |
    ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
    ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
    ATR_DFLAG_SvWR | ATR_DFLAG_MOM;

#endif /* END else !PBS_MOM */

  /*
   * dispatch the request to the correct processing function.
   * The processing function must call reply_send() to free
   * the request struture.
   */

  dispatch_request(sfds, request);

  return;
  }  /* END process_request() */