예제 #1
0
int reroute_job(

  job *pjob,
  pbs_queue *pque)

  {
  int        rc = PBSE_NONE;
  char       log_buf[LOCAL_LOG_BUF_SIZE];

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "%s", pjob->ji_qs.ji_jobid);
    LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }

  if ((pque != NULL) &&
      (pque->qu_qs.qu_type == QTYPE_RoutePush))
    {
    rc = job_route(pjob);

    if (rc == PBSE_ROUTEREJ)
      job_abt(&pjob, pbse_to_txt(PBSE_ROUTEREJ));
    else if (rc == PBSE_ROUTEEXPD)
      job_abt(&pjob, msg_routexceed);
    else if (rc == PBSE_QUENOEN)
      job_abt(&pjob, msg_err_noqueue);
    }

  return(rc);      
  } /* END reroute_job() */
예제 #2
0
파일: job_route.c 프로젝트: triicst/torque
void queue_route(

  pbs_queue *pque)

  {
  job *nxjb;
  job *pjob;
  int  rc;

  pjob = (job *)GET_NEXT(pque->qu_jobs);

  while (pjob != NULL)
    {
    nxjb = (job *)GET_NEXT(pjob->ji_jobque);

    if (pjob->ji_qs.ji_un.ji_routet.ji_rteretry <= time_now)
      {
      if ((rc = job_route(pjob)) == PBSE_ROUTEREJ)
        job_abt(&pjob, pbse_to_txt(PBSE_ROUTEREJ));
      else if (rc == PBSE_ROUTEEXPD)
        job_abt(&pjob, msg_routexceed);
      }

    pjob = nxjb;
    }

  return;
  }
예제 #3
0
파일: error.c 프로젝트: CESNET/torque
/** Retrieves last PBS error message. */
int
drmaa_get_pbs_error(char *error_diagnosis, size_t error_diag_len)
  {
  snprintf(error_diagnosis, error_diag_len,
           "pbs-drm: %s", pbse_to_txt(pbs_errno));
  return drmaa_map_pbs_error(pbs_errno);
  }
예제 #4
0
파일: job_route.c 프로젝트: spuder/torque
int reroute_job(

    job *pjob)

{
    int        rc = PBSE_NONE;
    char       log_buf[LOCAL_LOG_BUF_SIZE];

    if (LOGLEVEL >= 8)
    {
        sprintf(log_buf, "%s", pjob->ji_qs.ji_jobid);
        LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }

    rc = job_route(pjob);

    if (rc == PBSE_ROUTEREJ)
        job_abt(&pjob, pbse_to_txt(PBSE_ROUTEREJ));
    else if (rc == PBSE_ROUTEEXPD)
        job_abt(&pjob, msg_routexceed);
    else if (rc == PBSE_QUENOEN)
        job_abt(&pjob, msg_err_noqueue);

    return(rc);
} /* END reroute_job() */
예제 #5
0
int handle_single_delete(

  struct batch_request *preq,
  struct batch_request *preq_tmp,
  char                 *Msg)

  {
  char *jobid = preq->rq_ind.rq_delete.rq_objname;
  job  *pjob = svr_find_job(jobid, FALSE);

  if (pjob == NULL)
    {
    log_event(PBSEVENT_DEBUG,PBS_EVENTCLASS_JOB,jobid,pbse_to_txt(PBSE_UNKJOBID));
    
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, "cannot locate job");
    }
  else
    {
    unlock_ji_mutex(pjob, __func__, NULL, 0);

    /* send the asynchronous reply if needed */
    if (preq_tmp != NULL)
      {
      reply_ack(preq_tmp);
      preq->rq_noreply = TRUE; /* set for no more replies */
      enqueue_threadpool_request(single_delete_work, preq);
      }
    else
      single_delete_work(preq);
    }

  return(PBSE_NONE);
  } /* END handle_single_delete() */
예제 #6
0
job *chk_job_request(

  char                 *jobid,  /* I */
  struct batch_request *preq)   /* I */

  {
  job *pjob = NULL;

  if ((pjob = svr_find_job(jobid, FALSE)) == NULL)
    {
    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      jobid,
      pbse_to_txt(PBSE_UNKJOBID));

    req_reject(PBSE_UNKJOBID, 0, preq, NULL, "cannot locate job");

    return(NULL);
    }

  /* if we aren't authorized, pjob will be set to NULL in chk_job_req_permissions */
  chk_job_req_permissions(&pjob,preq);

  return(pjob);
  }  /* END chk_job_request() */
예제 #7
0
static void set_err_msg(

  int   code,
  char *msgbuf)

  {
  char *msg = NULL;
  char *msg_tmp;

  /* see if there is an error message associated with the code */

  *msgbuf = '\0';

  if (code == PBSE_SYSTEM)
    {
    strcpy(msgbuf, msg_daemonname);
    strcat(msgbuf, pbse_to_txt(PBSE_SYSTEM));

    msg_tmp = strerror(errno);

    if (msg_tmp)
      strncat(msgbuf, strerror(errno), ERR_MSG_SIZE - strlen(msgbuf));
    else
      strcat(msgbuf, "Unknown error");
    }
  else if (code > PBSE_)
    {
    msg = pbse_to_txt(code);
    }
  else
    {
    msg = strerror(code);
    }

  if (msg)
    {
    strncpy(msgbuf, msg, ERR_MSG_SIZE);
    }

  msgbuf[ERR_MSG_SIZE] = '\0';

  return;
  }  /* END set_err_msg() */
예제 #8
0
파일: reply_send.c 프로젝트: dbeer/torque
static void set_err_msg(

  int   code,
  char *msgbuf,
  int   len)

  {
  char *msg = NULL;
  char *msg_tmp;

  /* see if there is an error message associated with the code */

  *msgbuf = '\0';

  if (code == PBSE_SYSTEM)
    {
    snprintf(msgbuf, len, "%s%s", msg_daemonname, pbse_to_txt(PBSE_SYSTEM));

    msg_tmp = strerror(errno);

    if (msg_tmp)
      safe_strncat(msgbuf, strerror(errno), len - strlen(msgbuf));
    else
      safe_strncat(msgbuf, "Unknown error", len - strlen(msgbuf));
    }
  else if (code > PBSE_)
    {
    msg = pbse_to_txt(code);
    }
  else
    {
    msg = strerror(code);
    }

  if (msg)
    {
    snprintf(msgbuf, len, "%s", msg);
    }

  return;
  }  /* END set_err_msg() */
예제 #9
0
/*
 * log_commit_error()
 *
 * checks the error status for the connection and logs any error
 * @pre-cond: con must be a valid index into the connection table
 */
void log_commit_error(

  int   con,
  int   mom_err,
  char *job_id,
  bool &timeout)

  {
  char *err_text;
  char  log_buf[LOCAL_LOG_BUF_SIZE];
  int   errno2;

  err_text = pbs_geterrmsg(con);

  /* NOTE:  errno is modified by log_err */
  if (mom_err > PBSE_FLOOR)
    {
    sprintf(log_buf, "send_job commit failed, rc=%d (%s: %s)",
      mom_err, pbse_to_txt(mom_err), (err_text != NULL) ? err_text : "N/A");
    errno2 = mom_err;
    }
  else
    {
    sprintf(log_buf, "send_job commit failed, rc=%d (%s)",
      mom_err, (err_text != NULL) ? err_text : "N/A");
    errno2 = errno;
    }

  if (err_text != NULL)
    free(err_text);

  log_ext(errno2, __func__, log_buf, LOG_WARNING);
    
  /* if failure occurs, pbs_mom should purge job and pbs_server should set *
     job state to idle w/error msg */
  if (errno2 == EINPROGRESS)
    {
    timeout = true;

    sprintf(log_buf, "child commit request timed-out for job %s, increase tcp_timeout?",
      job_id);
    log_ext(errno2, __func__, log_buf, LOG_WARNING);
    }
  else
    {
    sprintf(log_buf, "child failed in commit request for job %s", job_id);
    log_ext(errno2, __func__, log_buf, LOG_CRIT);
    }
  }
예제 #10
0
파일: req_delete.c 프로젝트: dhill12/test
int handle_single_delete(

  struct batch_request *preq,
  struct batch_request *preq_tmp,
  char                 *Msg)

  {
  int   rc= -1;
  char *jobid = preq->rq_ind.rq_delete.rq_objname;
  job  *pjob = svr_find_job(jobid, FALSE);

  if (pjob == NULL)
    {
    log_event(PBSEVENT_DEBUG,PBS_EVENTCLASS_JOB,jobid,pbse_to_txt(PBSE_UNKJOBID));
    
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, "cannot locate job");
    }
  else
    {
    if (preq_tmp != NULL)
      {
      reply_ack(preq_tmp);
      preq->rq_noreply = TRUE; /* set for no more replies */
      }
    
    /* mutex is freed below */
    if ((rc = forced_jobpurge(pjob, preq)) == PBSE_NONE)
      rc = execute_job_delete(pjob, Msg, preq);
    }
  
  if ((rc == PBSE_NONE) ||
      (rc == PURGE_SUCCESS))
    reply_ack(preq);

  return(PBSE_NONE);
  } /* END handle_single_delete() */
예제 #11
0
int req_movejob(

  batch_request *req) /* I */

  {
  job       *jobp;
  char       log_buf[LOCAL_LOG_BUF_SIZE];
  int        local_errno = 0;

  jobp = chk_job_request(req->rq_ind.rq_move.rq_jid, req);

  if (jobp == NULL)
    {
    return(PBSE_NONE);
    }

  mutex_mgr job_mutex(jobp->ji_mutex, true);

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "%s", jobp->ji_qs.ji_jobid);
    LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }
  
  if ((jobp->ji_qs.ji_state != JOB_STATE_QUEUED) &&
      (jobp->ji_qs.ji_state != JOB_STATE_HELD) &&
      (jobp->ji_qs.ji_state != JOB_STATE_WAITING))
    {
#ifndef NDEBUG
    sprintf(log_buf, "%s %d %s", pbse_to_txt(PBSE_BADSTATE), jobp->ji_qs.ji_state, __func__);

    log_event(PBSEVENT_DEBUG,PBS_EVENTCLASS_JOB,jobp->ji_qs.ji_jobid,log_buf);
#endif /* NDEBUG */

    req_reject(PBSE_BADSTATE, 0, req, NULL, NULL);

    return(PBSE_NONE);
    }

  /*
   * svr_movejob() does the real work, handles both local and
   * network moves
   */
  
  /* We have found that sometimes the destination queue and the 
     parent queue are the same. If so we do not need to do
     anything else */
  if (strcmp(jobp->ji_qs.ji_queue, req->rq_ind.rq_move.rq_destin) == 0)
    {
    sprintf(log_buf, "Job %s already in queue %s", jobp->ji_qs.ji_jobid, jobp->ji_qs.ji_queue);
    if (LOGLEVEL >= 7)
      {
      log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
      }
    
    req_reject(PBSE_JOB_ALREADY_IN_QUEUE, 0, req, NULL, log_buf);
    return(PBSE_NONE);
    }

  switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, &local_errno, req))
    {

    case 0:

      /* success */
      snprintf(log_buf, sizeof(log_buf), "%s", msg_movejob);
      snprintf(log_buf + strlen(log_buf), sizeof(log_buf) - strlen(log_buf), msg_manager,
        req->rq_ind.rq_move.rq_destin, req->rq_user, req->rq_host);

      log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,jobp->ji_qs.ji_jobid,log_buf);

      reply_ack(req);

      break;

    case - 1:

    case 1:

      /* fail */

      /* NOTE:  can pass detailed response to requestor (NYI) */

      req_reject(local_errno, 0, req, NULL, NULL);

      break;

    case 2:

      /* deferred, will be handled by    */
      /* post_movejob() when the child completes */

      /* NO-OP */

      break;
    }  /* END switch (svr_movejob(jobp,req->rq_ind.rq_move.rq_destin,req)) */

  return(PBSE_NONE);
  }  /* END req_movejob() */
예제 #12
0
파일: pbs_track.c 프로젝트: bmdaw/torque
int main(

  int    argc,
  char **argv) /* pbs_track */

  {
  int ArgIndex;
  int NumErrs = 0;

  char *Args[MAXARGS];
  int   aindex = 0;

  int   rc;
  int   pid;

  char tmpJobID[PBS_MAXCLTJOBID];        /* from the command line */

  char JobID[PBS_MAXCLTJOBID];  /* modified job ID for MOM/server consumption */
  char ServerName[MAXSERVERNAME];

  int  DoBackground = 0;

  tmpJobID[0] = '\0';

  initialize_network_info();

  /* USAGE: pbs_track [-j <JOBID>] -- a.out arg1 arg2 ... argN */

#define GETOPT_ARGS "bj:"

  while ((ArgIndex = getopt(argc, argv, GETOPT_ARGS)) != EOF)
    {
    switch (ArgIndex)
      {

      case 'b':

        /* background process */

        DoBackground = 1;

        break;

      case 'j':

        strncpy(tmpJobID, optarg, sizeof(tmpJobID));

        if (tmpJobID[PBS_MAXCLTJOBID-1] != '\0')
          {
          /* truncation occurred! */

          fprintf(stderr, "pbs_track: given job ID too large (> %d)\n",
                  PBS_MAXCLTJOBID);

          exit(-1);
          }

        break;

      default:

        NumErrs++;

        break;
      }
    }

  if ((NumErrs > 0) ||
      (optind >= argc) ||
      (tmpJobID[0] == '\0'))
    {
    static char Usage[] = "USAGE: pbs_track [-j <JOBID>] [-b] -- a.out arg1 arg2 ... argN\n";
    fprintf(stderr, "%s", Usage);
    exit(2);
    }

  if (getenv(NO_SERVER_SUFFIX) != NULL)
    {
    snprintf(JobID, sizeof(JobID), "%s", tmpJobID);
    }
  else
    {
    if (get_server(tmpJobID, JobID, sizeof(JobID), ServerName, sizeof(ServerName)))
      {
      fprintf(stderr, "pbs_track: illegally formed job identifier: '%s'\n", JobID);
      exit(1);
      }
    }

  /* gather a.out and other arguments */

  aindex = 0;

  for (;optind < argc;optind++)
    {
    Args[aindex++] = strdup(argv[optind]);
    printf("Got arg: %s\n",
           Args[aindex-1]);
    }

  Args[aindex] = NULL;

  /* decide if we should fork or not */

  pid = 1;

  if (DoBackground == 1)
    {
    printf("FORKING!\n");

    pid = fork();
    }

  if ((DoBackground == 0) || (pid == 0))
    {
    /* either parent or child, depending on the setting */

    /* call tm_adopt() to start tracking this process */

    rc = tm_adopt(JobID, TM_ADOPT_JOBID, getpid());

    switch (rc)
      {

      case TM_SUCCESS:

        /* success! */

        break;

      case TM_ENOTFOUND:

        fprintf(stderr, "pbs_track: MOM could not find job %s\n",
                JobID);

        break;

      case TM_ESYSTEM:

      case TM_ENOTCONNECTED:

        fprintf(stderr, "pbs_track: error occurred while trying to communication with pbs_mom: %s (%d)\n",
                pbse_to_txt(rc),
                rc);

        break;

      default:

        /* Unexpected error occurred */

        fprintf(stderr, "pbs_track: unexpected error %s (%d) occurred\n",
                pbse_to_txt(rc),
                rc);

        break;
      }  /* END switch(rc) */

    if (rc != TM_SUCCESS)
      {
      exit(-1);
      }

    /* do the exec */

    if (execvp(Args[0], Args) == -1)
      {
      fprintf(stderr,"execvp failed with error %d, message:\n%s\n",
        errno,
        strerror(errno));
      }
    }  /* END if ((DoBackground == 0) || (pid == 0)) */
  else if (pid > 0)
    {
    /* parent*/

    fclose(stdin);
    fclose(stdout);
    fclose(stderr);
    }
  else if (pid < 0)
    {
    fprintf(stderr, "pbs_track: could not fork (%d:%s)\n",
            errno,
            strerror(errno));
    }

  exit(0);
  }  /* END main() */
예제 #13
0
static void post_routejob(

  struct work_task *pwt)

  {
  int  newstate;
  int  newsub;
  int  r;
  int  stat = pwt->wt_aux;
  char *id = "post_routejob";
  job *jobp = (job *)pwt->wt_parm1;

  if (WIFEXITED(stat))
    {
    r = WEXITSTATUS(stat);
    }
  else
    {
    r = 2;

    sprintf(log_buffer, msg_badexit,
            stat);

    strcat(log_buffer, id);

    log_event(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      log_buffer);
    }

  switch (r)
    {
    case 0:  /* normal return, job was routed */

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
        remove_stagein(jobp);

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_COPIED)
        remove_checkpoint(jobp);

      job_purge(jobp); /* need to remove server job struct */

      return;

      /*NOTREACHED*/

      break;

    case 1:  /* permanent rejection (or signal) */

      if (jobp->ji_qs.ji_substate == JOB_SUBSTATE_ABORT)
        {
        /* job delete in progress, just set to queued status */

        svr_setjobstate(jobp, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT);

        return;
        }

      add_dest(jobp);  /* else mark destination as bad */

      /* fall through */

    default : /* try routing again */

      /* force re-eval of job state out of Transit */

      svr_evaljobstate(jobp, &newstate, &newsub, 1);
      svr_setjobstate(jobp, newstate, newsub);

      if ((r = job_route(jobp)) == PBSE_ROUTEREJ)
        job_abt(&jobp, pbse_to_txt(PBSE_ROUTEREJ));
      else if (r != 0)
        job_abt(&jobp, msg_routexceed);

      break;
    }  /* END switch (r) */

  return;
  }  /* END post_routejob() */
예제 #14
0
void svr_format_job(
     
  FILE *fh,         /* output file handle */
  job  *pjob,       /* I */
  char *fmt,        /* printf-like format description */
  int   mailpoint,  /* note, single character  */
  char *text)       /* (optional) additional message text */

  {
    char *p, *stdmessage = NULL, *reason = NULL;

    /* First get the "standard" message */

    switch (mailpoint)
      {

      case MAIL_ABORT:

        stdmessage = msg_job_abort;
        reason = msg_job_abort;

        break;

      case MAIL_BEGIN:

        stdmessage = msg_job_start;
        reason = msg_job_start;

        break;

      case MAIL_END:

        stdmessage = msg_job_end;
        reason = msg_job_end;

        break;

      case MAIL_DEL:

        stdmessage = msg_job_del;
        reason = msg_job_del;

        break;

      case MAIL_STAGEIN:

        stdmessage = msg_job_stageinfail;
        reason = pbse_to_txt(PBSE_STAGEIN); /* NB: short version used */

        break;

      case MAIL_CHKPTCOPY:
  
        stdmessage = msg_job_copychkptfail;
	reason = msg_job_copychkptfail;
  
        break;

      case MAIL_OTHER:

      default:

        stdmessage = msg_job_otherfail;
        reason = msg_job_otherfail;

        break;
      }  /* END switch (mailpoint) */


    p = fmt;
    while (*p)
      {
      if (*p == '\\')      /* escape sequences */
        {
        switch(p[1])
          {
          case 'n':  /* newline */
            fputc('\n', fh);
            p += 2;
            break;
          case 't':  /* tab */
            fputc('\t', fh);
            p += 2;
            break;
          case '\\': /* backslash */
          case '\'': /* quote */
          case '\"': /* double-quote */
            fputc(p[1], fh);
            p += 2;
            break;
          default:  /* we don't recognise this escape, ignore it (catches p[1]==0) */
            fputc(*p++, fh);
            break;
          } /* END switch(p[1]) */
        }
        else if(*p == '%') /* format statement */
          {
          switch(p[1])
            {
            case 'd':  /* details */
              if (text != NULL)
                {
                fprintf(fh, "%s", text);
                }
              p += 2;
              break;
            case 'h':  /* host */
              if (is_cloud_job(pjob))
                {
                if (pjob->ji_wattr[(int)JOB_ATR_cloud_mapping].at_val.at_str != NULL)
                  {
                  fprintf(fh, "%s", pjob->ji_wattr[(int)JOB_ATR_cloud_mapping].at_val.at_str);
                  }
                }
              else
                {
                if (pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str != NULL)
                  {
                  fprintf(fh, "%s", pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str);
                  }
                }
              p += 2;
              break;
            case 'i':  /* jobId */
              fprintf(fh, "%s", pjob->ji_qs.ji_jobid);
              p += 2;
              break;
            case 'j':  /* jobname */
              fprintf(fh, "%s", pjob->ji_wattr[(int)JOB_ATR_jobname].at_val.at_str);
              p += 2;
              break;
            case 'm':  /* stdmessage */
              if (stdmessage != NULL)
                {
                fprintf(fh, "%s", stdmessage);
                }
              p += 2;
              break;
            case 'r':  /* reason */
              if (reason != NULL)
                {
                fprintf(fh, "%s", reason);
                }
              p += 2;
              break;
            default:  /* we don't recognise this format, ignore it (catches p[1]==0) */
              fputc(*p++, fh);
              break;
            } /* END switch(p[1]) */
          }
        else               /* ordinary character */
          {
          fputc(*p++, fh);
          }
      } /* END while (*p) */
  } /* END format_job() */
예제 #15
0
void log_ext(

  int         errnum,   /* I (errno or PBSErrno) */
  const char *routine,  /* I */
  const char *text,     /* I */
  int         severity) /* I */

  {
  char  buf[LOG_BUF_SIZE];

  char *EPtr = NULL;

  char  EBuf[1024];

  char  tmpLine[2048];

  const char *SeverityText = NULL;

  tmpLine[0] = '\0';

  EBuf[0] = '\0';

  if (errnum == -1)
    {
    buf[0] = '\0';
    }
  else
    {
    /* NOTE:  some strerror() routines return "Unknown error X" w/bad errno */

    if (errnum >= 15000)
      {
      EPtr = pbse_to_txt(errnum);
      }
    else
      {
      EPtr = strerror(errnum);
      }

    if (EPtr == NULL)
      {
      sprintf(EBuf, "unexpected error %d",
              errnum);

      EPtr = EBuf;
      }

    sprintf(tmpLine,"%s (%d) in ",
            EPtr,
            errnum);
    }

  SeverityText = log_get_severity_string(severity);

  snprintf(buf,sizeof(buf),"%s::%s%s, %s",
    SeverityText,
    tmpLine,
    routine,
    text);

  buf[LOG_BUF_SIZE - 1] = '\0';

  pthread_mutex_lock(&log_mutex);

  if (log_opened == 0)
    {
#if !SYSLOG
    log_open("/dev/console", log_directory);
#endif /* not SYSLOG */
    }

  if (isatty(2))
    {
    fprintf(stderr, "%s: %s\n",
            msg_daemonname,
            buf);
    }

  if (log_opened > 0)
    {
    pthread_mutex_unlock(&log_mutex);

    log_record(
      PBSEVENT_ERROR | PBSEVENT_FORCE,
      PBS_EVENTCLASS_SERVER,
      msg_daemonname,
      buf);
    }
  else
    pthread_mutex_unlock(&log_mutex);

#if SYSLOG
  if (syslogopen == 0)
    {
    openlog(msg_daemonname, LOG_NOWAIT, LOG_DAEMON);

    syslogopen = 1;
    }

  syslog(severity|LOG_DAEMON,"%s",buf);

#endif /* SYSLOG */

  return;
  }  /* END log_ext() */
예제 #16
0
/**
 * @brief
 *	Verify one attribute
 *
 * @par Functionality:
 *      1. Finds the attribute in the correct object attribute list\n
 *      2. Invokes the at_verify_datatype function to check datatype is good\n
 *      3. Invokes the at_verify_value function to check if the value is good\n
 *	4. This function is also called from the hooks verification functions,
 *	   "is_job_input_valid" and "is_resv_input_valid" from
 *	    lib\Libpython\pbs_python_svr_internal.c
 *
 * @see verify_attributes
 *
 * @param[in]	batch_request	-	Batch Request Type
 * @param[in]	parent_object	-	Parent Object Type
 * @param[in]	cmd		-	Command Type
 * @param[in]	pattr		-	list of attributes
 * @param[out]	verified	-	Whether verification was done
 * @param[out]	err_msg		-	Error message for attribute verification
 *					failure
 * @return	int
 * @retval	0   - Passed verification
 * @retval	> 0 - attribute failed verification (pbs error number returned)
 * @retval	-1  - Out of memory
 *
 * @par	verified:
 *	1 - if the verification could be done\n
 *	0 - No verification handlers present, verification not done\n
 *	This output parameter is primarily used by the hooks verification
 *	functions to figure out whether any attribute verification was really
 *	done. If not done (value was 0) then the hooks code calls the server
 *	decode functions in an attempt to verify the attribute values.
 *
 * @par	err_msg:
 *	If the attribute fails verification, the err_msg parameter is set
 *	to the reason of failure. \n
 *	The err_msg parameter is passed to all the attribute verifiction
 *	routines, such that if a need arises, it would be possible for the
 *	individual routines to set a custom error message. \n
 * 	If the called attribute verification routines do not set any custom
 *	verification error message, then this routine sets the error message
 *	by calling "pbse_to_txt" to convert the return error code to error msg.
 *
 * @par	Side effects:
 *	pbs_errno set on error
 *
 * @par MT-safe: Yes
 */
int
verify_an_attribute(int batch_request, int parent_object, int cmd,
                    struct attropl *pattr,
                    int *verified,
                    char **err_msg)
{
    ecl_attribute_def * p_eclattr = NULL;
    int err_code = PBSE_NONE;
    char *p;

    *verified = 1; /* set to verified */

    /* skip check when dealing with a "resource" parent object */
    if (parent_object == MGR_OBJ_RSC)
        return PBSE_NONE;

    if ((p_eclattr = ecl_findattr(parent_object, pattr)) == NULL) {
        err_code = PBSE_NOATTR;
        goto err;
    }

    if (pattr->value == NULL || pattr->value[0] == '\0') {

        /* allow empty/null values for unset/delete of pbs_manager */
        if ((batch_request == PBS_BATCH_Manager) &&
                (cmd == MGR_CMD_UNSET || cmd == MGR_CMD_DELETE))
            return PBSE_NONE;

        /* for the following stat calls, the value can be null/empty */
        if (batch_request == PBS_BATCH_StatusJob ||
                batch_request == PBS_BATCH_StatusQue ||
                batch_request == PBS_BATCH_StatusSvr ||
                batch_request == PBS_BATCH_StatusNode ||
                batch_request == PBS_BATCH_StatusRsc ||
                batch_request == PBS_BATCH_StatusHook ||
                batch_request == PBS_BATCH_StatusResv ||
                batch_request == PBS_BATCH_StatusSched)
            return PBSE_NONE;
    }

    /* for others, value shouldn't be null */
    if (pattr->value == NULL) {
        err_code = PBSE_BADATVAL;
        goto err;
    }

    /*
     * When using ifl library directly, there is a possibility where resource is passed as NULL
     * Check this variable for NULL and send error if it is NULL.
     */
    if (strcasecmp(pattr->name, ATTR_l) == 0) {
        if (pattr->resource == NULL) {
            err_code = PBSE_UNKRESC;
            goto err;
        }
    }


    if (p_eclattr->at_verify_datatype) {
        if ((err_code = p_eclattr->at_verify_datatype(pattr, err_msg)))
            goto err;
    }

    if (p_eclattr->at_verify_value) {
        if ((err_code = p_eclattr->at_verify_value(batch_request,
                        parent_object, cmd, pattr, err_msg)))
            goto err;
    }

    if (p_eclattr->at_verify_value == NULL) /* no verify func */
        *verified = 0;

    return PBSE_NONE;

err:
    if ((err_code !=0) && (*err_msg == NULL)) {
        /* find err_msg and update it */
        p = pbse_to_txt(err_code);
        if (p) {
            *err_msg = strdup(p);
            if (*err_msg == NULL) {
                err_code = PBSE_SYSTEM;
                return -1;
            }
        }
    }
    return err_code;
}
예제 #17
0
/*
 * handle_adoption_results() - Determine if call to tm_adopt was successful
 *
 * The results of the tm_adopt call are evaluated and the user is informed
 * of its status. If we need to call a new command (i.e. we are not adopting
 * an existing process), the command is also called and this process is replaced.
 */
int handle_adoption_results(
  int          rc,
  int          DoBackground,
  int          this_pid,
  char        *JobID,
  std::string  tmpAdopteeID,
  char       **Args)
  {

  if ((DoBackground == 0) || (this_pid == 0) || tmpAdopteeID.size() > 0)
    {
    switch (rc)
      {

      case TM_SUCCESS:

        /* success! */
        fprintf(stderr, "Success!\n");

        break;

      case TM_ENOTFOUND:

        fprintf(stderr, "pbs_track: MOM could not find job %s\n",
                JobID);

        break;

      case TM_ESYSTEM:

      case TM_ENOTCONNECTED:

        fprintf(stderr, "pbs_track: error occurred while trying to communication with pbs_mom: %s (%d)\n",
                pbse_to_txt(rc),
                rc);

        break;

      case TM_EPERM:

        fprintf(stderr, "pbs_track: permission denied: %s (%d)\n",
                pbse_to_txt(rc),
                rc);

        break;

      default:

        /* Unexpected error occurred */

        fprintf(stderr, "pbs_track: unexpected error %s (%d) occurred\n",
                pbse_to_txt(rc),
                rc);

        break;
      }  /* END switch(rc) */

    if (rc != TM_SUCCESS)
      {
      return -1;
      }

    /* do the exec */

    if (tmpAdopteeID.size() == 0 &&
        execvp(Args[0], Args) == -1)
      {
      fprintf(stderr,"execvp failed with error %d, message:\n%s\n",
        errno,
        strerror(errno));
      return errno;
      }
    }

  return 0;
  }
예제 #18
0
int process_request(

  struct tcp_chan *chan) /* file descriptor (socket) to get request */

  {
  int                   rc = PBSE_NONE;
  struct batch_request *request = NULL;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  long                  acl_enable = FALSE;
  long                  state = SV_STATE_DOWN;

  time_t                time_now = time(NULL);
  int                   free_request = TRUE;
  char                  tmpLine[MAXLINE];
  char                 *auth_err = NULL;
  enum conn_type        conn_active;
  unsigned short        conn_socktype;
  unsigned short        conn_authen;
  unsigned long         conn_addr;
  int                   sfds = chan->sock;

  pthread_mutex_lock(svr_conn[sfds].cn_mutex);
  conn_active = svr_conn[sfds].cn_active;
  conn_socktype = svr_conn[sfds].cn_socktype;
  conn_authen = svr_conn[sfds].cn_authen;
  conn_addr = svr_conn[sfds].cn_addr;
  svr_conn[sfds].cn_lasttime = time_now;
  pthread_mutex_unlock(svr_conn[sfds].cn_mutex);

  if ((request = alloc_br(0)) == NULL)
    {
    snprintf(tmpLine, sizeof(tmpLine),
        "cannot allocate memory for request from %lu",
        conn_addr);
    req_reject(PBSE_MEM_MALLOC, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_SYSTEM;
    goto process_request_cleanup;
    }

  request->rq_conn = sfds;

  /*
   * Read in the request and decode it to the internal request structure.
   */
  if (conn_active == FromClientDIS || conn_active == ToServerDIS)
    {
#ifdef ENABLE_UNIX_SOCKETS

    if ((conn_socktype & PBS_SOCK_UNIX) &&
        (conn_authen != PBS_NET_CONN_AUTHENTICATED))
      {
      /* get_creds interestingly always returns 0 */
      get_creds(sfds, conn_credent[sfds].username, conn_credent[sfds].hostname);
      }

#endif /* END ENABLE_UNIX_SOCKETS */
    rc = dis_request_read(chan, request);
    }
  else
    {
    char out[80];

    snprintf(tmpLine, MAXLINE, "request on invalid type of connection: %d, sock type: %d, from address %s", 
                conn_active,conn_socktype, netaddr_long(conn_addr, out));
    log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST,
      "process_req", tmpLine);
    snprintf(tmpLine, sizeof(tmpLine),
        "request on invalid type of connection (%d) from %s",
        conn_active,
        netaddr_long(conn_addr, out));
    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_BADHOST;
    goto process_request_cleanup;
    }

  if (rc == -1)
    {
    /* FAILURE */
    /* premature end of file */
    rc = PBSE_PREMATURE_EOF;
    goto process_request_cleanup;
    }

  if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL) || (rc == PBSE_SOCKET_CLOSE))
    {
    /* FAILURE */
    /* read error, likely cannot send reply so just disconnect */
    /* ??? not sure about this ??? */
    goto process_request_cleanup;
    }

  if (rc > 0)
    {
    /* FAILURE */

    /*
     * request didn't decode, either garbage or unknown
     * request type, in either case, return reject-reply
     */

    req_reject(rc, 0, request, NULL, "cannot decode message");
    free_request = FALSE;
    goto process_request_cleanup;
    }

  if (get_connecthost(sfds, request->rq_host, PBS_MAXHOSTNAME) != 0)
    {
    sprintf(log_buf, "%s: %lu",
      pbse_to_txt(PBSE_BADHOST),
      conn_addr);

    log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "", log_buf);

    snprintf(tmpLine, sizeof(tmpLine),
        "cannot determine hostname for connection from %lu",
        conn_addr);

    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_BADHOST;
    goto process_request_cleanup;
    }

  if (LOGLEVEL >= 1)
    {
    sprintf(log_buf,
      msg_request,
      reqtype_to_txt(request->rq_type),
      request->rq_user,
      request->rq_host,
      sfds);

    log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", log_buf);
    }

  /* is the request from a host acceptable to the server */
  if (conn_socktype & PBS_SOCK_UNIX)
    {
    strcpy(request->rq_host, server_name);
    }

  get_svr_attr_l(SRV_ATR_acl_host_enable, &acl_enable);
  if (acl_enable)
    {
    /* acl enabled, check it; always allow myself and nodes */
    struct array_strings *pas = NULL;
    struct pbsnode       *isanode;

    get_svr_attr_arst(SRV_ATR_acl_hosts, &pas);
    isanode = PGetNodeFromAddr(conn_addr);

    if ((isanode == NULL) &&
        (strcmp(server_host, request->rq_host) != 0) &&
        (acl_check_my_array_string(pas, request->rq_host, ACL_Host) == 0))
      {
      char tmpLine[MAXLINE];
      snprintf(tmpLine, sizeof(tmpLine), "request not authorized from host %s",
               request->rq_host);

      req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
      free_request = FALSE;
      rc = PBSE_BADHOST;
      goto process_request_cleanup;
      }

    if (isanode != NULL)
      unlock_node(isanode, "process_request", NULL, LOGLEVEL);
    }

  /*
   * determine source (user client or another server) of request.
   * set the permissions granted to the client
   */

  if (conn_authen == PBS_NET_CONN_FROM_PRIVIL)
    {
    /* request came from another server */

    request->rq_fromsvr = 1;

    request->rq_perm =
      ATR_DFLAG_USRD | ATR_DFLAG_USWR |
      ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
      ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
      ATR_DFLAG_SvWR;
    }
  else
    {
    /* request not from another server */
    conn_credent[sfds].timestamp = time_now;

    request->rq_fromsvr = 0;

    /*
     * Client must be authenticated by an Authenticate User Request, if not,
     * reject request and close connection.  -- The following is retained for
     * compat with old cmds -- The exception to this is of course the Connect
     * Request which cannot have been authenticated, because it contains the
     * needed ticket; so trap it here.  Of course, there is no prior
     * authentication on the Authenticate User request either, but it comes
     * over a reserved port and appears from another server, hence is
     * automatically granted authentication.
     *
     * The above is only true with inet sockets.  With unix domain sockets, the
     * user creds were read before the first dis_request_read call above.
     * We automatically granted authentication because we can trust the socket
     * creds.  Authorization is still granted in svr_get_privilege below
     */

    if (request->rq_type == PBS_BATCH_Connect)
      {
      req_connect(request);

      if (conn_socktype == PBS_SOCK_INET)
        {
        rc = PBSE_IVALREQ;
        req_reject(rc, 0, request, NULL, NULL);
        free_request = FALSE;
        goto process_request_cleanup;
        }

      }

    if (conn_socktype & PBS_SOCK_UNIX)
      {
      pthread_mutex_lock(svr_conn[sfds].cn_mutex);
      svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED;
      pthread_mutex_unlock(svr_conn[sfds].cn_mutex);
      }

    if (ENABLE_TRUSTED_AUTH == TRUE )
      rc = PBSE_NONE;  /* bypass the authentication of the user--trust the client completely */
    else if (munge_on)
      {
      /* If munge_on is true we will validate the connection now */
      if (request->rq_type == PBS_BATCH_AltAuthenUser)
        {
        rc = req_altauthenuser(request);
        free_request = FALSE;
        goto process_request_cleanup;
        }
      else
        {
        rc = authenticate_user(request, &conn_credent[sfds], &auth_err);
        }
      }
    else if (conn_authen != PBS_NET_CONN_AUTHENTICATED)
      /* skip checking user if we did not get an authenticated credential */
      rc = PBSE_BADCRED;
    else
      rc = authenticate_user(request, &conn_credent[sfds], &auth_err);

    if (rc != 0)
      {
      req_reject(rc, 0, request, NULL, auth_err);
      if (auth_err != NULL)
        free(auth_err);
      free_request = FALSE;
      goto process_request_cleanup;
      }

    /*
     * pbs_mom and checkpoint restart scripts both need the authority to do
     * alters and releases on checkpointable jobs.  Allow manager permission
     * for root on the jobs execution node.
     */
     
    if (((request->rq_type == PBS_BATCH_ModifyJob) ||
        (request->rq_type == PBS_BATCH_ReleaseJob)) &&
        (strcmp(request->rq_user, PBS_DEFAULT_ADMIN) == 0))
      {
      job *pjob;
      char *dptr;
      int skip = FALSE;
      char short_host[PBS_MAXHOSTNAME+1];

      /* make short host name */

      strcpy(short_host, request->rq_host);
      if ((dptr = strchr(short_host, '.')) != NULL)
        {
        *dptr = '\0';
        }
      
      if ((pjob = svr_find_job(request->rq_ind.rq_modify.rq_objname, FALSE)) != (job *)0)
        {
        if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
          {

          if ((pjob->ji_wattr[JOB_ATR_checkpoint].at_flags & ATR_VFLAG_SET) &&
              ((csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "s") != NULL) ||
               (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "c") != NULL) ||
               (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "enabled") != NULL)) &&
              (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, short_host) != NULL))
            {
            request->rq_perm = svr_get_privilege(request->rq_user, server_host);
            skip = TRUE;
            }

          }
        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
        }
      
      if (!skip)
        {
        request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
        }
      }
    else
      {
      request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
      }
    }  /* END else (conn_authen == PBS_NET_CONN_FROM_PRIVIL) */

  /* if server shutting down, disallow new jobs and new running */
  get_svr_attr_l(SRV_ATR_State, &state);

  if (state > SV_STATE_RUN)
    {
    switch (request->rq_type)
      {
      case PBS_BATCH_AsyrunJob:
      case PBS_BATCH_JobCred:
      case PBS_BATCH_MoveJob:
      case PBS_BATCH_QueueJob:
      case PBS_BATCH_RunJob:
      case PBS_BATCH_StageIn:
      case PBS_BATCH_jobscript:

        req_reject(PBSE_SVRDOWN, 0, request, NULL, NULL);
        rc = PBSE_SVRDOWN;
        free_request = FALSE;
        goto process_request_cleanup;
        /*NOTREACHED*/

        break;
      }
    }

  /*
   * dispatch the request to the correct processing function.
   * The processing function must call reply_send() to free
   * the request struture.
   */

  rc = dispatch_request(sfds, request);

  return(rc);

process_request_cleanup:

  if (free_request == TRUE)
    free_br(request);

  return(rc);
  }  /* END process_request() */
예제 #19
0
void post_signal_req(

  batch_request *preq)

  {
  char                 *jobid;
  job                  *pjob;

  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  /* request has been handled elsewhere */
  if (preq == NULL)
    return;

  preq->rq_conn = preq->rq_orgconn;  /* restore client socket */

  if (preq->rq_reply.brp_code)
    {
    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_REQUEST,
      preq->rq_ind.rq_signal.rq_jid,
      pbse_to_txt(PBSE_MOMREJECT));

    errno = 0;

    req_reject(preq->rq_reply.brp_code, 0, preq, NULL, NULL);
    }
  else
    {
    if ((jobid = preq->rq_extra) == NULL)
      {
      log_err(ENOMEM, __func__, (char *)"Cannot allocate memory! FAILURE");
      return;
      }

    if ((pjob = svr_find_job(jobid, FALSE)) != NULL)
      {
      if (strcmp(preq->rq_ind.rq_signal.rq_signame, SIG_SUSPEND) == 0)
        {
        if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) == 0)
          {
          pjob->ji_qs.ji_svrflags |= JOB_SVFLG_Suspend;
          
          set_statechar(pjob);
          
          job_save(pjob, SAVEJOB_QUICK, 0);
          
          /* release resources allocated to suspended job - NORWAY */
          
          free_nodes(pjob);
          }
        }
      else if (strcmp(preq->rq_ind.rq_signal.rq_signame, SIG_RESUME) == 0)
        {
        if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend)
          {
          /* re-allocate assigned node to resumed job - NORWAY */
          
          set_old_nodes(pjob);
          
          pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_Suspend;
          
          set_statechar(pjob);
          
          job_save(pjob, SAVEJOB_QUICK, 0);
          }
        }
    
      unlock_ji_mutex(pjob, __func__, (char *)"5", LOGLEVEL);
      }
    else
      {
      /* job is gone */
      snprintf(log_buf,sizeof(log_buf),
        "Cannot find job '%s', assuming success",
        jobid);
      log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, __func__, log_buf);
      }

    free(jobid);

    reply_ack(preq);
    }

  return;
  }  /* END post_signal_req() */
예제 #20
0
void finish_routing_processing(

  job *pjob,
  int  status)

  {
  int          newstate;
  int          newsub;

  if (pjob == NULL)
    return;

  if (LOGLEVEL >= 10)
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, pjob->ji_qs.ji_jobid);

  switch (status)
    {
    case LOCUTION_SUCCESS:  /* normal return, job was routed */

      if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn)
        remove_stagein(&pjob);

      if (pjob != NULL)
        {
        if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_COPIED)
          remove_checkpoint(&pjob);

        if (pjob != NULL)
          svr_job_purge(pjob); /* need to remove server job struct */
        }

      break;

    case LOCUTION_FAIL:  /* permanent rejection (or signal) */

      if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_ABORT)
        {
        /* job delete in progress, just set to queued status */
        svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT, FALSE);
        
        svr_mailowner(pjob, 'a', TRUE, "Couldn't route job to remote server");

        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);

        return;
        }

      add_dest(pjob);  /* else mark destination as bad */

      /* fall through */

    default: /* try routing again */
       
      svr_mailowner(pjob, 'a', TRUE, "Couldn't route job to remote server");

      /* force re-eval of job state out of Transit */

      svr_evaljobstate(*pjob, newstate, newsub, 1);
      svr_setjobstate(pjob, newstate, newsub, FALSE);

      if ((status = job_route(pjob)) == PBSE_ROUTEREJ)
        job_abt(&pjob, pbse_to_txt(PBSE_ROUTEREJ));
      else if (status != 0)
        job_abt(&pjob, msg_routexceed);
      else
        unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);


      break;
    }  /* END switch (status) */

  return;
  } /* END finish_routing_processing() */
예제 #21
0
void process_request(

  int sfds) /* file descriptor (socket) to get request */

  {
#ifdef PBS_MOM
  char *id = "process_request";
#endif

  int                   rc;

  struct batch_request *request = NULL;

#ifndef PBS_MOM
  char *auth_err = NULL;
#endif

  time_now = time(NULL);

  request = alloc_br(0);

  request->rq_conn = sfds;

  /*
   * Read in the request and decode it to the internal request structure.
   */

#ifndef PBS_MOM

  if (svr_conn[sfds].cn_active == FromClientDIS)
    {
#ifdef ENABLE_UNIX_SOCKETS

    if ((svr_conn[sfds].cn_socktype & PBS_SOCK_UNIX) &&
        (svr_conn[sfds].cn_authen != PBS_NET_CONN_AUTHENTICATED))
      {
      get_creds(sfds, conn_credent[sfds].username, conn_credent[sfds].hostname);
      }

#endif /* END ENABLE_UNIX_SOCKETS */
    rc = dis_request_read(sfds, request);
    }
  else
    {
    LOG_EVENT(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_REQUEST,
      "process_req",
      "request on invalid type of connection");

    close_conn(sfds);

    free_br(request);

    return;
    }

#else /* PBS_MOM */
  rc = dis_request_read(sfds, request);

#endif /* PBS_MOM */

  if (rc == -1)
    {
    /* FAILURE */

    /* premature end of file */

    close_client(sfds);

    free_br(request);

    return;
    }

  if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL))
    {
    /* FAILURE */

    /* read error, likely cannot send reply so just disconnect */

    /* ??? not sure about this ??? */

    close_client(sfds);

    free_br(request);

    return;
    }

  if (rc > 0)
    {
    /* FAILURE */

    /*
     * request didn't decode, either garbage or unknown
     * request type, in either case, return reject-reply
     */

    req_reject(rc, 0, request, NULL, "cannot decode message");

    close_client(sfds);

    return;
    }

  if (get_connecthost(sfds, request->rq_host, PBS_MAXHOSTNAME) != 0)
    {
    char tmpLine[1024];

    sprintf(log_buffer, "%s: %lu",
            pbse_to_txt(PBSE_BADHOST),
            get_connectaddr(sfds));

    LOG_EVENT(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "", log_buffer);

    snprintf(tmpLine, sizeof(tmpLine), "cannot determine hostname for connection from %lu",
             get_connectaddr(sfds));

    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);

    return;
    }

  if (LOGLEVEL >= 1)
    {
    sprintf(
      log_buffer,
      msg_request,
      reqtype_to_txt(request->rq_type),
      request->rq_user,
      request->rq_host,
      sfds);

    LOG_EVENT(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", log_buffer);
    }

  /* is the request from a host acceptable to the server */

#ifndef PBS_MOM

  if (svr_conn[sfds].cn_socktype & PBS_SOCK_UNIX)
    {
    strcpy(request->rq_host, server_name);
    }

  if (server.sv_attr[SRV_ATR_acl_host_enable].at_val.at_long)
    {
    /* acl enabled, check it; always allow myself and nodes */

    struct pbsnode *isanode;

    isanode = PGetNodeFromAddr(get_connectaddr(sfds));

    if ((isanode == NULL) &&
        (strcmp(server_host, request->rq_host) != 0) &&
        (acl_check(
           &server.sv_attr[SRV_ATR_acl_hosts],
           request->rq_host,
           ACL_Host) == 0))
      {
      char tmpLine[1024];

      snprintf(tmpLine, sizeof(tmpLine), "request not authorized from host %s",
               request->rq_host);

      req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);

      close_client(sfds);

      return;
      }
    }

  /*
   * determine source (user client or another server) of request.
   * set the permissions granted to the client
   */

  if (svr_conn[sfds].cn_authen == PBS_NET_CONN_FROM_PRIVIL)
    {
    /* request came from another server */

    request->rq_fromsvr = 1;

    request->rq_perm =
      ATR_DFLAG_USRD | ATR_DFLAG_USWR |
      ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
      ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
      ATR_DFLAG_SvWR;
    }
  else
    {
    /* request not from another server */

    request->rq_fromsvr = 0;

    /*
     * Client must be authenticated by an Authenticate User Request, if not,
     * reject request and close connection.  -- The following is retained for
     * compat with old cmds -- The exception to this is of course the Connect
     * Request which cannot have been authenticated, because it contains the
     * needed ticket; so trap it here.  Of course, there is no prior
     * authentication on the Authenticate User request either, but it comes
     * over a reserved port and appears from another server, hence is
     * automatically granted authentication.
     *
     * The above is only true with inet sockets.  With unix domain sockets, the
     * user creds were read before the first dis_request_read call above.
     * We automatically granted authentication because we can trust the socket
     * creds.  Authorization is still granted in svr_get_privilege below
     */

    if (request->rq_type == PBS_BATCH_Connect)
      {
      req_connect(request);

      if (svr_conn[sfds].cn_socktype == PBS_SOCK_INET)
        return;

      }

    if (svr_conn[sfds].cn_socktype & PBS_SOCK_UNIX)
      {
      conn_credent[sfds].timestamp = time_now;
      svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED;
      }


    if (ENABLE_TRUSTED_AUTH == TRUE )
      rc = 0;  /* bypass the authentication of the user--trust the client completely */
    else if (munge_on)
      {
      /* If munge_on is true we will validate the connection now */
      if ( request->rq_type == PBS_BATCH_AltAuthenUser)
        {
        rc = req_altauthenuser(request);
        if (rc == PBSE_NONE)
          {
          conn_credent[sfds].timestamp = time_now;
          svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED;
          }
        return;
        }
      else if (svr_conn[sfds].cn_authen != PBS_NET_CONN_AUTHENTICATED)
        /* skip checking user if we did not get an authenticated credential */
        rc = PBSE_BADCRED;
      else
        {
        rc = authenticate_user(request, &conn_credent[sfds], &auth_err);
        }
      }
    else if (svr_conn[sfds].cn_authen != PBS_NET_CONN_AUTHENTICATED)
      rc = PBSE_BADCRED;
    else
      rc = authenticate_user(request, &conn_credent[sfds], &auth_err);

    if (rc != 0)
      {
      req_reject(rc, 0, request, NULL, auth_err);
      if (auth_err != NULL)
        free(auth_err);

      close_client(sfds);

      return;
      }

    /*
     * pbs_mom and checkpoint restart scripts both need the authority to do
     * alters and releases on checkpointable jobs.  Allow manager permission
     * for root on the jobs execution node.
     */
     
    if (((request->rq_type == PBS_BATCH_ModifyJob) ||
        (request->rq_type == PBS_BATCH_ReleaseJob)) &&
        (strcmp(request->rq_user, PBS_DEFAULT_ADMIN) == 0))
      {
      job *pjob;
      char *dptr;
      int skip = FALSE;
      char short_host[PBS_MAXHOSTNAME+1];

      /* make short host name */

      strcpy(short_host, request->rq_host);
      if ((dptr = strchr(short_host, '.')) != NULL)
        {
        *dptr = '\0';
        }
      
      if (((pjob = find_job(request->rq_ind.rq_modify.rq_objname)) != (job *)0) &&
          (pjob->ji_qs.ji_state == JOB_STATE_RUNNING))
        {

        if ((pjob->ji_wattr[JOB_ATR_checkpoint].at_flags & ATR_VFLAG_SET) &&
          ((csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "s") != NULL) ||
          (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "c") != NULL) ||
          (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "enabled") != NULL)) &&
          (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, short_host) != NULL))
          {

          request->rq_perm = svr_get_privilege(request->rq_user, server_host);
          skip = TRUE;

          }
        }
      if (!skip)
        {
        request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
        }
      }
    else
      {
      request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
      }
    }  /* END else (svr_conn[sfds].cn_authen == PBS_NET_CONN_FROM_PRIVIL) */

  /* if server shutting down, disallow new jobs and new running */

  if (server.sv_attr[SRV_ATR_State].at_val.at_long > SV_STATE_RUN)
    {
    switch (request->rq_type)
      {
      case PBS_BATCH_AsyrunJob:
      case PBS_BATCH_JobCred:
      case PBS_BATCH_MoveJob:
      case PBS_BATCH_QueueJob:
      case PBS_BATCH_RunJob:
      case PBS_BATCH_StageIn:
      case PBS_BATCH_jobscript:

        req_reject(PBSE_SVRDOWN, 0, request, NULL, NULL);

        return;

        /*NOTREACHED*/

        break;
      }
    }

#else /* THIS CODE FOR MOM ONLY */

    {
    /*extern tree *okclients; */

    extern void mom_server_update_receive_time_by_ip(u_long ipaddr, const char *cmd);

    /* check connecting host against allowed list of ok clients */

    if (LOGLEVEL >= 6)
      {
      sprintf(log_buffer, "request type %s from host %s received",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);
      }

/*    if (!tfind(svr_conn[sfds].cn_addr, &okclients)) */
    if (!AVL_is_in_tree(svr_conn[sfds].cn_addr, 0, okclients))
      {
      sprintf(log_buffer, "request type %s from host %s rejected (host not authorized)",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);

      req_reject(PBSE_BADHOST, 0, request, NULL, "request not authorized");

      close_client(sfds);

      return;
      }

    if (LOGLEVEL >= 3)
      {
      sprintf(log_buffer, "request type %s from host %s allowed",
        reqtype_to_txt(request->rq_type),
        request->rq_host);

      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);
      }

    mom_server_update_receive_time_by_ip(svr_conn[sfds].cn_addr, reqtype_to_txt(request->rq_type));
    }    /* END BLOCK */

  request->rq_fromsvr = 1;

  request->rq_perm =
    ATR_DFLAG_USRD | ATR_DFLAG_USWR |
    ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
    ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
    ATR_DFLAG_SvWR | ATR_DFLAG_MOM;

#endif /* END else !PBS_MOM */

  /*
   * dispatch the request to the correct processing function.
   * The processing function must call reply_send() to free
   * the request struture.
   */

  dispatch_request(sfds, request);

  return;
  }  /* END process_request() */
예제 #22
0
int job_route(

  job *jobp)      /* job to route */

  {
  int               bad_state = 0;
  time_t            life;
  time_t            time_now = time(NULL);
  char              log_buf[LOCAL_LOG_BUF_SIZE];

  struct pbs_queue *qp = jobp->ji_qhdr;
  long              retry_time;

  if (qp == NULL)
    return(PBSE_QUENOEN);
  
  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "%s", jobp->ji_qs.ji_jobid);
    LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }

  /* see if the job is able to be routed */
  switch (jobp->ji_qs.ji_state)
    {

    case JOB_STATE_TRANSIT:

      return(0);  /* already going, ignore it */

      /*NOTREACHED*/

      break;

    case JOB_STATE_QUEUED:

      /* NO-OP */

      break;   /* ok to try */

    case JOB_STATE_HELD:

      /* job may be acceptable */

      bad_state = !qp->qu_attr[QR_ATR_RouteHeld].at_val.at_long;

      break;

    case JOB_STATE_WAITING:

      /* job may be acceptable */

      bad_state = !qp->qu_attr[QR_ATR_RouteWaiting].at_val.at_long;

      break;

    default:

      snprintf(log_buf, sizeof(log_buf), "%s %d %s", 
        pbse_to_txt(PBSE_BADSTATE), jobp->ji_qs.ji_state, __func__);

      log_event(PBSEVENT_DEBUG,PBS_EVENTCLASS_JOB,jobp->ji_qs.ji_jobid,log_buf);
      
      return(PBSE_NONE);

      /*NOTREACHED*/

      break;
    }

  /* check the queue limits, can we route any (more) */
  if (qp->qu_attr[QA_ATR_Started].at_val.at_long == 0)
    {
    /* queue not started - no routing */

    return(0);
    }

  if ((qp->qu_attr[QA_ATR_MaxRun].at_flags & ATR_VFLAG_SET) &&
      (qp->qu_attr[QA_ATR_MaxRun].at_val.at_long <= qp->qu_njstate[JOB_STATE_TRANSIT]))
    {
    /* max number of jobs being routed */

    return(0);
    }

  /* what is the retry time and life time of a job in this queue */

  if (qp->qu_attr[QR_ATR_RouteRetryTime].at_flags & ATR_VFLAG_SET)
    {
    retry_time =
      (long)time_now +
      qp->qu_attr[QR_ATR_RouteRetryTime].at_val.at_long;
    }
  else
    {
    retry_time = (long)time_now + PBS_NET_RETRY_TIME;
    }

  if (qp->qu_attr[QR_ATR_RouteLifeTime].at_flags & ATR_VFLAG_SET)
    {
    life =
      jobp->ji_qs.ji_un.ji_routet.ji_quetime +
      qp->qu_attr[QR_ATR_RouteLifeTime].at_val.at_long;
    }
  else
    {
    life = 0; /* forever */
    }

  if (life && (life < time_now))
    {
    log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,jobp->ji_qs.ji_jobid,msg_routexceed);

    /* job too long in queue */
    return(PBSE_ROUTEEXPD);
    }

  if (bad_state)
    {
    /* not currently routing this job */

    return(PBSE_NONE);
    }

  if (qp->qu_attr[QR_ATR_AltRouter].at_val.at_long == 0)
    {
    return(default_router(jobp, qp, retry_time));
    }

  return(site_alt_router(jobp, qp, retry_time));
  }  /* END job_route() */
예제 #23
0
int default_router(

  job              *jobp,
  struct pbs_queue *qp,
  long              retry_time)

  {
  struct array_strings *dest_attr = NULL;
  char                 *destination;
  int                   last;
  int                   local_errno = 0;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];

    if (LOGLEVEL >= 7)
      {
      sprintf(log_buf, "%s", jobp->ji_qs.ji_jobid);
      LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
      }

  if (qp->qu_attr[QR_ATR_RouteDestin].at_flags & ATR_VFLAG_SET)
    {
    dest_attr = qp->qu_attr[QR_ATR_RouteDestin].at_val.at_arst;

    last = dest_attr->as_usedptr;
    }
  else
    {
    last = 0;
    }

  /* loop through all possible destinations */

  jobp->ji_retryok = 0;

  while (1)
    {
    if (jobp->ji_lastdest >= last)
      {
      jobp->ji_lastdest = 0; /* have tried all */

      if (jobp->ji_retryok == 0)
        {
        log_event(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          jobp->ji_qs.ji_jobid,
          pbse_to_txt(PBSE_ROUTEREJ));

        return(PBSE_ROUTEREJ);
        }
      else
        {
        /* set time to retry job */

        jobp->ji_qs.ji_un.ji_routet.ji_rteretry = retry_time;

        return(0);
        }
      }

    destination = dest_attr->as_string[jobp->ji_lastdest++];

    if (is_bad_dest(jobp, destination))
      continue;

    switch (svr_movejob(jobp, destination, &local_errno, NULL, TRUE))
      {
      case ROUTE_PERM_FAILURE: /* permanent failure */

        add_dest(jobp);

        break;

      case ROUTE_SUCCESS:  /* worked */

      case ROUTE_DEFERRED:  /* deferred */

        return(0);

        /*NOTREACHED*/

        break;

      case ROUTE_RETRY:  /* failed, but try destination again */
        
        jobp->ji_retryok = 1;

        break;

      }
    }

  return(-1);
  }  /* END default_router() */
예제 #24
0
static int forced_jobpurge(

  struct batch_request *preq)

  {
  job *pjob;

  if ((pjob = find_job(preq->rq_ind.rq_delete.rq_objname)) == NULL)
    {
    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      preq->rq_ind.rq_delete.rq_objname,
      pbse_to_txt(PBSE_UNKJOBID));

    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return(-1);
    }

  /* check about possibly purging the job */

  if (preq->rq_extend != NULL)
    {
    if (!strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr)))
      {
      if (((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) != 0) ||
          ((svr_chk_owner(preq, pjob) == 0) && (server.sv_attr[SRV_ATR_OwnerPurge].at_val.at_long)))
        {
        sprintf(log_buffer, "purging job without checking MOM");

        log_event(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);

        reply_ack(preq);

        free_nodes(pjob);

        if (pjob->ji_qhdr->qu_qs.qu_type == QTYPE_Execution)
          {
          set_resc_assigned(pjob, DECR);
          }

        job_purge(pjob);

        return(1);
        }
      else
        {
        /* FAILURE */

        req_reject(PBSE_PERM, 0, preq, NULL, NULL);

        return(-1);
        }
      }
    }

  return(0);
  }  /* END forced_jobpurge() */
예제 #25
0
int req_orderjob(

  struct batch_request *vp) /* I */

  {
  job                  *pjob;
  job                  *pjob1;
  job                  *pjob2;
  int                   rank;
  int                   rc = 0;
  char                  tmpqn[PBS_MAXQUEUENAME+1];
  struct batch_request *req = (struct batch_request *)vp;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  pbs_queue            *pque1;
  pbs_queue            *pque2;

  if ((pjob1 = chk_job_request(req->rq_ind.rq_move.rq_jid, req)) == NULL)
    {
    return(PBSE_NONE);
    }

  mutex_mgr job1_mutex(pjob1->ji_mutex, true);

  if ((pjob2 = chk_job_request(req->rq_ind.rq_move.rq_destin, req)) == NULL)
    {
    return(PBSE_NONE);
    }

  mutex_mgr job2_mutex(pjob2->ji_mutex, true);

  if (((pjob = pjob1)->ji_qs.ji_state == JOB_STATE_RUNNING) ||
      ((pjob = pjob2)->ji_qs.ji_state == JOB_STATE_RUNNING))
    {
#ifndef NDEBUG
    sprintf(log_buf, "%s %d",
            pbse_to_txt(PBSE_BADSTATE),
            pjob->ji_qs.ji_state);

    strcat(log_buf, __func__);

    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buf);
#endif /* NDEBUG */

    req_reject(PBSE_BADSTATE, 0, req, NULL, NULL);

    return(PBSE_NONE);
    }
  else if ((pjob1->ji_qhdr == NULL) || (pjob2->ji_qhdr == NULL))
    {
    req_reject(PBSE_BADSTATE, 0, req, NULL, "One of the jobs does not have a queue");
    return(PBSE_NONE);
    }
  else if (pjob1->ji_qhdr != pjob2->ji_qhdr)
    {
    /* jobs are in different queues */
    int ok = FALSE;

    if ((pque2 = get_jobs_queue(&pjob2)) == NULL)
      {
      rc = PBSE_BADSTATE;
      job2_mutex.set_lock_on_exit(false);
      }
    else
      {
      mutex_mgr pque2_mutex = mutex_mgr(pque2->qu_mutex, true);
      if ((rc = svr_chkque(pjob1, pque2, get_variable(pjob1, pbs_o_host), MOVE_TYPE_Order, NULL)) == PBSE_NONE)
        {
        pque2_mutex.unlock();

        if ((pque1 = get_jobs_queue(&pjob1)) == NULL)
          {
          rc = PBSE_BADSTATE;
          job1_mutex.set_lock_on_exit(false);
          }
        else if (pjob1 != NULL)
          {
          mutex_mgr pque1_mutex = mutex_mgr(pque1->qu_mutex, true);
          if ((rc = svr_chkque(pjob2, pque1, get_variable(pjob2, pbs_o_host), MOVE_TYPE_Order, NULL)) == PBSE_NONE)
            {
            ok = TRUE;
            }
          }
        }
      }

    if (ok == FALSE)
      {
      req_reject(rc, 0, req, NULL, NULL);

      return(PBSE_NONE);
      }
    }

  /* now swap the order of the two jobs in the queue lists */
  rank = pjob1->ji_wattr[JOB_ATR_qrank].at_val.at_long;

  pjob1->ji_wattr[JOB_ATR_qrank].at_val.at_long =
    pjob2->ji_wattr[JOB_ATR_qrank].at_val.at_long;

  pjob2->ji_wattr[JOB_ATR_qrank].at_val.at_long = rank;

  if (pjob1->ji_qhdr != pjob2->ji_qhdr)
    {
    strcpy(tmpqn, pjob1->ji_qs.ji_queue);
    strcpy(pjob1->ji_qs.ji_queue, pjob2->ji_qs.ji_queue);
    strcpy(pjob2->ji_qs.ji_queue, tmpqn);

    svr_dequejob(pjob1, FALSE);
    svr_dequejob(pjob2, FALSE);

    if (svr_enquejob(pjob1, FALSE, -1) == PBSE_JOB_RECYCLED)
      {
      pjob1 = NULL;
      job1_mutex.set_lock_on_exit(false);
      }

    if (svr_enquejob(pjob2, FALSE, -1) == PBSE_JOB_RECYCLED)
      {
      pjob2 = NULL;
      job2_mutex.set_lock_on_exit(false);
      }
    }
  else
    {
    if ((pque1 = get_jobs_queue(&pjob1)) != NULL)
      {
      mutex_mgr pque1_mutex = mutex_mgr(pque1->qu_mutex, true);
      swap_jobs(pque1->qu_jobs,pjob1,pjob2);
      swap_jobs(NULL,pjob1,pjob2);
      }
    }

  /* need to update disk copy of both jobs to save new order */
  if (pjob1 != NULL)
    {
    job_save(pjob1, SAVEJOB_FULL, 0);
    }

  if (pjob2 != NULL)
    {
    job_save(pjob2, SAVEJOB_FULL, 0);
    }

  /* SUCCESS */
  reply_ack(req);

  return(PBSE_NONE);
  }  /* END req_orderjob() */
예제 #26
0
void req_orderjob(

  struct batch_request *req)  /* I */

  {
#ifndef NDEBUG
  char *id = "req_orderjob";
#endif
  job *pjob;
  job *pjob1;
  job *pjob2;
  int  rank;
  int  rc;
  char  tmpqn[PBS_MAXQUEUENAME+1];

  if ((pjob1 = chk_job_request(req->rq_ind.rq_move.rq_jid, req)) == NULL)
    {
    return;
    }

  if ((pjob2 = chk_job_request(req->rq_ind.rq_move.rq_destin, req)) == NULL)
    {
    return;
    }

  if (((pjob = pjob1)->ji_qs.ji_state == JOB_STATE_RUNNING) ||
      ((pjob = pjob2)->ji_qs.ji_state == JOB_STATE_RUNNING))
    {
#ifndef NDEBUG
    sprintf(log_buffer, "%s %d",
            pbse_to_txt(PBSE_BADSTATE),
            pjob->ji_qs.ji_state);

    strcat(log_buffer, id);

    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);
#endif /* NDEBUG */

    req_reject(PBSE_BADSTATE, 0, req, NULL, NULL);

    return;
    }
  else if (pjob1->ji_qhdr != pjob2->ji_qhdr)
    {
    /* jobs are in different queues */

    if ((rc = svr_chkque(
                pjob1,
                pjob2->ji_qhdr,
                get_variable(pjob1, pbs_o_host),
                MOVE_TYPE_Order,
                NULL)) ||
        (rc = svr_chkque(
                pjob2,
                pjob1->ji_qhdr,
                get_variable(pjob2, pbs_o_host),
                MOVE_TYPE_Order,
                NULL)))
      {
      req_reject(rc, 0, req, NULL, NULL);

      return;
      }
    }

  /* now swap the order of the two jobs in the queue lists */

  rank = pjob1->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long;

  pjob1->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long =
    pjob2->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long;

  pjob2->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = rank;

  if (pjob1->ji_qhdr != pjob2->ji_qhdr)
    {
    (void)strcpy(tmpqn, pjob1->ji_qs.ji_queue);
    (void)strcpy(pjob1->ji_qs.ji_queue, pjob2->ji_qs.ji_queue);
    (void)strcpy(pjob2->ji_qs.ji_queue, tmpqn);
    svr_dequejob(pjob1);
    svr_dequejob(pjob2);
    (void)svr_enquejob(pjob1);
    (void)svr_enquejob(pjob2);

    }
  else
    {
    swap_link(&pjob1->ji_jobque,  &pjob2->ji_jobque);
    swap_link(&pjob1->ji_alljobs, &pjob2->ji_alljobs);
    }

  /* need to update disk copy of both jobs to save new order */

  job_save(pjob1, SAVEJOB_FULL);

  job_save(pjob2, SAVEJOB_FULL);

  reply_ack(req);

  /* SUCCESS */

  return;
  }  /* END req_orderjob() */
예제 #27
0
int validate_socket(

  int          psock,
  std::string &external_err_msg)

  {
  int            rc = PBSE_NONE;
  char           tmp_buf[LOCAL_LOG_BUF];
  char           write_buf[1024];
  char          *read_buf = NULL;
  long long      read_buf_len = 0;
  uid_t          myrealuid;
  int            local_socket = 0;
  int            parent_client_socket = 0;
  struct passwd *pwent;
  char          *err_msg = NULL;
  char          *l_server = NULL;
  int            l_server_len = 0;
  unsigned short af_family;
  long long      code = -1;
  int            write_buf_len = 0;
  int            local_errno;
  pid_t          mypid;
  char           unix_sockname[MAXPATHLEN + 1];
  char           err_buf[MAXPATHLEN];

  myrealuid = getuid();

  if ((pwent = getpwuid(myrealuid)) == NULL)
    {
    snprintf(tmp_buf, LOCAL_LOG_BUF, "cannot get account info: uid %d, errno %d (%s)\n", (int)myrealuid, errno, strerror(errno));
    log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, tmp_buf);
    }
  else if ((rc = get_hostaddr_hostent_af(&local_errno, (char *)AUTH_IP, &af_family, &l_server, &l_server_len)) != PBSE_NONE)
    {
    snprintf(err_buf, sizeof(err_buf), "get_hostaddr_hostend_af failed: %d", rc);
    external_err_msg = err_buf;
    }
  else if ((rc = get_parent_client_socket(psock, &parent_client_socket)) != PBSE_NONE)
    {
    snprintf(err_buf, sizeof(err_buf), "get_parent_client_socket failed: %d", rc);
    external_err_msg = err_buf;
    }
  else
    {
    snprintf(unix_sockname, sizeof(unix_sockname), "%s/%s", TRQAUTHD_SOCK_DIR, TRQAUTHD_SOCK_NAME);
    /* format is:
     * trq_system|trq_port|Validation_type|user|pid|psock|
     */
    mypid = getpid();
    sprintf(write_buf, "%d|%d|%s|%d|%d|%d|%s|%d|%d|", TRQ_AUTH_CONNECTION, (int)strlen(server_name), server_name, server_port, AUTH_TYPE_IFF, (int)strlen(pwent->pw_name), pwent->pw_name, mypid, parent_client_socket);
    /*
     * total_length|val
     */
    write_buf_len = strlen(write_buf);
    if ((local_socket = socket_get_unix()) <= 0)
      {
      external_err_msg = "qsub was unable to open a socket\n";
      rc = PBSE_SOCKET_FAULT;
      }
    else if ((rc = socket_connect_unix(local_socket, unix_sockname, &err_msg)) != PBSE_NONE)
      {
      external_err_msg = "qsub couldn't connect its socket to trqauthd: VERIFY THAT trqauthd IS RUNNING\n";
      }
    else if ((rc = socket_write(local_socket, write_buf, write_buf_len)) != write_buf_len)
      {
      rc = PBSE_SOCKET_WRITE;
      external_err_msg = "qsub couldn't write authentication information to trqauthd";
      }
    else if ((rc = socket_read_num(local_socket, &code)) != PBSE_NONE)
      {
      external_err_msg = "qsub couldn't read the size of information from trqauthd\n";
      }
    else if ((rc = socket_read_str(local_socket, &read_buf, &read_buf_len)) != PBSE_NONE)
      {
      external_err_msg = "qsub couldn't read the response from trqauthd\n";
      }
    else if ((rc = parse_daemon_response(code, read_buf_len, read_buf)) != PBSE_NONE)
      {
      snprintf(err_buf, sizeof(err_buf), "qsub received error code %lld ('%s') from trqauthd\n", code, pbse_to_txt(code));
      external_err_msg = err_buf;
      }
    else
      {
      if (getenv("PBSDEBUG"))
        {
        fprintf(stdout, "%s : Connection authorized (server socket %d)\n", __func__, parent_client_socket);
        }
      }

    if (local_socket >= 0)
      socket_close(local_socket);
    }

  if (rc != PBSE_NONE)
    {
    if (err_msg != NULL)
      {
      snprintf(err_buf, sizeof(err_buf), "Error in connection to trqauthd (%d)-[%s]\n", rc, err_msg);
      external_err_msg = err_buf;
      }
    }

  if (err_msg != NULL)
    free(err_msg);

  if (read_buf != NULL)
    free(read_buf);

  if (l_server != NULL)
    free(l_server);

  return(rc);
  }
예제 #28
0
void req_movejob(

  struct batch_request *req)

  {
#ifndef NDEBUG
  char *id = "req_movejob";
#endif
  job *jobp;

  jobp = chk_job_request(req->rq_ind.rq_move.rq_jid, req);

  if (jobp == NULL)
    {
    return;
    }

  if ((jobp->ji_qs.ji_state != JOB_STATE_QUEUED) &&
      (jobp->ji_qs.ji_state != JOB_STATE_HELD) &&
      (jobp->ji_qs.ji_state != JOB_STATE_WAITING))
    {
#ifndef NDEBUG
    sprintf(log_buffer, "%s %d",
            pbse_to_txt(PBSE_BADSTATE),
            jobp->ji_qs.ji_state);

    strcat(log_buffer, id);

    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      log_buffer);
#endif /* NDEBUG */

    req_reject(PBSE_BADSTATE, 0, req, NULL, NULL);

    return;
    }

  /*
   * svr_movejob() does the real work, handles both local and
   * network moves
   */

  switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, req))
    {

    case ROUTE_SUCCESS:

      /* success */

      strcpy(log_buffer, msg_movejob);

      sprintf(log_buffer + strlen(log_buffer), msg_manager,
              req->rq_ind.rq_move.rq_destin,
              req->rq_user,
              req->rq_host);

      log_event(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        jobp->ji_qs.ji_jobid,
        log_buffer);

      reply_ack(req);

      break;

    case ROUTE_PERM_FAILURE:

    case ROUTE_RETRY:

      /* fail */

      /* NOTE:  can pass detailed response to requestor (NYI) */

      req_reject(pbs_errno, 0, req, NULL, NULL);

      break;

    case ROUTE_DEFERRED:

      /* deferred, will be handled by    */
      /* post_movejob() when the child completes */

      /* NO-OP */

      break;
    }  /* END switch (svr_movejob(jobp,req->rq_ind.rq_move.rq_destin,req)) */

  return;
  }  /* END req_movejob() */
예제 #29
0
static void
post_doq(struct work_task *pwt)
{
	struct batch_request *preq = (struct batch_request *)pwt->wt_parm1;
	char *jobid = preq->rq_ind.rq_register.rq_child;
	char *msg;
	job  *pjob;
	job  *ppjob;
	struct depend_job pparent;
	int rc;

	if (preq->rq_reply.brp_code) {
		/* request was rejected */

		(void)strcpy(log_buffer, msg_regrej);
		(void)strcat(log_buffer, preq->rq_ind.rq_register.rq_parent);

		log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
			jobid, log_buffer);
		pjob = find_job(jobid);
		if ((msg = pbse_to_txt(preq->rq_reply.brp_code)) != NULL) {
			(void)strcat(log_buffer, " ");
			(void)strcat(log_buffer, msg);
		}
		if (pjob) {
			if (preq->rq_reply.brp_code == PBSE_JOB_MOVED) {
				/* Creating a separate log buffer because if we end up aborting the submitted job
				 * we don't want to change what goes into accounting log via job_abt
				 */
				char log_msg[LOG_BUF_SIZE];
				snprintf(log_msg, sizeof(log_msg), "%s, %s", msg_job_moved,
					"sending dependency request to remote server");
				log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobid, log_msg);
				ppjob = find_job(preq->rq_ind.rq_register.rq_parent);
				if(ppjob && (ppjob->ji_qs.ji_state == JOB_STATE_MOVED) && (ppjob->ji_qs.ji_substate == JOB_SUBSTATE_MOVED)) {
					char *destin;
					/* job destination should be <remote queue>@<remote server> */
					destin = strchr(ppjob->ji_qs.ji_destin, (int)'@');
					if (destin != NULL) {
						strncpy(pparent.dc_child, ppjob->ji_qs.ji_jobid, sizeof(pparent.dc_child));
						strncpy(pparent.dc_svr, destin+1, sizeof(pparent.dc_svr));
						rc = send_depend_req(pjob, &pparent, preq->rq_ind.rq_register.rq_dependtype,
							JOB_DEPEND_OP_REGISTER,
							SYNC_SCHED_HINT_NULL, post_doq);
						if (rc) {
							snprintf(log_msg, sizeof(log_msg), "%s",
								"Failed to send dependency request to remote server, aborting job");
							log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_ERR, jobid, log_msg);
							check_block(pjob, log_buffer);
							job_abt(pjob, log_buffer);
						}
					}
					else {
						/* Ideally if a job is moved, destination can not be empty */
						/* If we come across an empty destination, abort the job */
						check_block(pjob, log_buffer);
						job_abt(pjob, log_buffer);
					}
				}
				else {
					check_block(pjob, log_buffer);
					job_abt(pjob, log_buffer);
				}
			}
			else {
				check_block(pjob, log_buffer);
				job_abt(pjob, log_buffer);
			}
		}
	}

	release_req(pwt);
}
예제 #30
0
void chk_job_req_permissions(

  job                  **pjob_ptr, /* M */
  struct batch_request  *preq) /* I */

  {
  job  *pjob = *pjob_ptr;
  char  tmpLine[MAXLINE];
  char  log_buf[LOCAL_LOG_BUF_SIZE];

  if (svr_authorize_jobreq(preq, pjob) == -1)
    {
    sprintf(log_buf, msg_permlog,
      preq->rq_type,
      "Job",
      pjob->ji_qs.ji_jobid,
      preq->rq_user,
      preq->rq_host);

    log_event(PBSEVENT_SECURITY,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf);

    req_reject(PBSE_PERM, 0, preq, NULL, "operation not permitted");

    unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);

    *pjob_ptr = NULL;
    }
  else if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING)
    {
    /* job has completed */

    switch (preq->rq_type)
      {

      case PBS_BATCH_Rerun:

        /* allow re-run to be executed for completed jobs */

        /* NO-OP */

        break;

      default:

        sprintf(log_buf, "%s %s",
          pbse_to_txt(PBSE_BADSTATE),
          PJobState[pjob->ji_qs.ji_state]);

        log_event(PBSEVENT_DEBUG,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf);

        snprintf(tmpLine, sizeof(tmpLine), 
          "invalid state for job - %s",
          PJobState[pjob->ji_qs.ji_state]);

        req_reject(PBSE_BADSTATE, 0, preq, NULL, tmpLine);

        unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);

        *pjob_ptr = NULL;

        break;
      }  /* END switch (preq->rq_type) */
    }    /* END if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING) */

  /* SUCCESS - request is valid */
  } /* END chk_job_req_permissions() */