Example #1
0
/*
 * log_commit_error()
 *
 * checks the error status for the connection and logs any error
 * @pre-cond: con must be a valid index into the connection table
 */
void log_commit_error(

  int   con,
  int   mom_err,
  char *job_id,
  bool &timeout)

  {
  char *err_text;
  char  log_buf[LOCAL_LOG_BUF_SIZE];
  int   errno2;

  err_text = pbs_geterrmsg(con);

  /* NOTE:  errno is modified by log_err */
  if (mom_err > PBSE_FLOOR)
    {
    sprintf(log_buf, "send_job commit failed, rc=%d (%s: %s)",
      mom_err, pbse_to_txt(mom_err), (err_text != NULL) ? err_text : "N/A");
    errno2 = mom_err;
    }
  else
    {
    sprintf(log_buf, "send_job commit failed, rc=%d (%s)",
      mom_err, (err_text != NULL) ? err_text : "N/A");
    errno2 = errno;
    }

  if (err_text != NULL)
    free(err_text);

  log_ext(errno2, __func__, log_buf, LOG_WARNING);
    
  /* if failure occurs, pbs_mom should purge job and pbs_server should set *
     job state to idle w/error msg */
  if (errno2 == EINPROGRESS)
    {
    timeout = true;

    sprintf(log_buf, "child commit request timed-out for job %s, increase tcp_timeout?",
      job_id);
    log_ext(errno2, __func__, log_buf, LOG_WARNING);
    }
  else
    {
    sprintf(log_buf, "child failed in commit request for job %s", job_id);
    log_ext(errno2, __func__, log_buf, LOG_CRIT);
    }
  }
Example #2
0
void log_err(

  int         errnum,  /* I (errno or PBSErrno) */
  const char *routine, /* I */
  const char *text)    /* I */

  {
  log_ext(errnum,routine,text,LOG_ERR);

  return;
  }  /* END log_err() */
Example #3
0
struct passwd *getpwnam_ext( 

  char *user_name) /* I */

  {
  struct passwd *pwent = NULL;
  int            retrycnt = 0;

  /* bad argument check */
  if (user_name == NULL)
    return NULL;

  errno = 0;

  while ((pwent == NULL) && (retrycnt != -1) && (retrycnt < LDAP_RETRIES))
    {
    pwent = getpwnam_wrapper( user_name );

    /* if the user wasn't found check for any errors to log */
    if (pwent == NULL)
      {
      switch (errno)
        {
        case EINTR:
        case EIO:
        case EMFILE:
        case ENFILE:
        case ENOMEM:
        case ERANGE:
          sprintf(log_buffer, "ERROR: getpwnam() error %d (%s)",
                  errno,
                  strerror(errno));

          log_ext(-1, __func__, log_buffer, LOG_ERR);
          retrycnt++;
          break;

        default:
          retrycnt = -1;
          break;
        }
      }
    }

  return(pwent);
  } /* END getpwnam_ext() */
Example #4
0
static void process_gpu_request_reply(

  struct work_task *pwt)
  {
  char   *id = "process_gpu_request_reply";

  struct batch_request *preq;

  svr_disconnect(pwt->wt_event); /* close connection to MOM */

  preq = pwt->wt_parm1;
  preq->rq_conn = preq->rq_orgconn;  /* restore client socket */

  if (preq->rq_reply.brp_code != 0)
    {
    sprintf(log_buffer,
      "MOM failed on GPU request, rc = %d",
      preq->rq_reply.brp_code);
    log_err(errno, id, log_buffer);

    req_reject(preq->rq_reply.brp_code, 0, preq, NULL, log_buffer);
    }
  else
    {
    /* record that MOM changed gpu mode */
    if (LOGLEVEL >= 7)
      {
      sprintf(
        log_buffer,
        "GPU control request completed for node %s gpuid %s mode %d reset_perm %d reset_vol %d",
        preq->rq_ind.rq_gpuctrl.rq_momnode,
        preq->rq_ind.rq_gpuctrl.rq_gpuid,
        preq->rq_ind.rq_gpuctrl.rq_gpumode,
        preq->rq_ind.rq_gpuctrl.rq_reset_perm,
        preq->rq_ind.rq_gpuctrl.rq_reset_vol);

      log_ext(-1, id, log_buffer, LOG_INFO);
      }

    reply_ack(preq);
    }
  }
Example #5
0
/*
 * process_gpu_request_reply
 * called when a gpu change request was sent to MOM and the answer
 * is received.  Completes the gpu request.
 */
void process_gpu_request_reply(

  batch_request *preq)

  {
  char log_buf[LOCAL_LOG_BUF_SIZE];

  if (preq == NULL)
    return;

  preq->rq_conn = preq->rq_orgconn;  /* restore client socket */

  if (preq->rq_reply.brp_code != 0)
    {
    sprintf(log_buf,
      "MOM failed on GPU request, rc = %d",
      preq->rq_reply.brp_code);
    log_err(errno, __func__, log_buf);

    req_reject(preq->rq_reply.brp_code, 0, preq, NULL, log_buf);
    }
  else
    {
    /* record that MOM changed gpu mode */
    if (LOGLEVEL >= 7)
      {
      sprintf(
        log_buf,
        "GPU control request completed for node %s gpuid %s mode %d reset_perm %d reset_vol %d",
        preq->rq_ind.rq_gpuctrl.rq_momnode,
        preq->rq_ind.rq_gpuctrl.rq_gpuid,
        preq->rq_ind.rq_gpuctrl.rq_gpumode,
        preq->rq_ind.rq_gpuctrl.rq_reset_perm,
        preq->rq_ind.rq_gpuctrl.rq_reset_vol);

      log_ext(-1, __func__, log_buf, LOG_INFO);
      }

    reply_ack(preq);
    }
  } /* END process_gpu_request_reply() */
Example #6
0
int req_gpuctrl_svr(
    
  struct batch_request *preq)

  {
  int rc = PBSE_NONE;
  char  *nodename = NULL;
  char  *gpuid = NULL;
  int    gpumode = -1;
  int    reset_perm = -1;
  int    reset_vol = -1;
  char   log_buf[LOCAL_LOG_BUF_SIZE+1];
  int    local_errno = 0;
  struct pbsnode *pnode = NULL;
  int    gpuidx = -1;
  int    conn;

  if ((preq->rq_perm &
       (ATR_DFLAG_MGWR | ATR_DFLAG_MGRD | ATR_DFLAG_OPRD | ATR_DFLAG_OPWR)) == 0)
    {
    rc = PBSE_PERM;
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
        "invalid permissions (ATR_DFLAG_MGWR | ATR_DFLAG_MGRD | ATR_DFLAG_OPRD | ATR_DFLAG_OPWR)");
    req_reject(rc, 0, preq, NULL, log_buf);
    return rc;
    }

  nodename = preq->rq_ind.rq_gpuctrl.rq_momnode;
  gpuid = preq->rq_ind.rq_gpuctrl.rq_gpuid;
  gpumode = preq->rq_ind.rq_gpuctrl.rq_gpumode;
  reset_perm = preq->rq_ind.rq_gpuctrl.rq_reset_perm;
  reset_vol = preq->rq_ind.rq_gpuctrl.rq_reset_vol;

  if (LOGLEVEL >= 7)
    {
    sprintf(
      log_buf,
      "GPU control request for node %s gpuid %s mode %d reset_perm %d reset_vol %d",
      nodename,
      gpuid,
      gpumode,
      reset_perm,
      reset_vol);

    log_ext(-1, __func__, log_buf, LOG_INFO);
    }

  /* validate mom node exists */

  pnode = find_nodebyname(nodename);

  if (pnode == NULL)
    {
    req_reject(PBSE_UNKNODE, 0, preq, NULL, NULL);
    return PBSE_UNKNODE;
    }

  /* validate that the node is up */

  if ((pnode->nd_state & (INUSE_DOWN | INUSE_OFFLINE | INUSE_UNKNOWN))||(pnode->nd_power_state != POWER_STATE_RUNNING))
    {
    rc = PBSE_UNKREQ;
    sprintf(log_buf,"Node %s is not available",pnode->nd_name);
    req_reject(rc, 0, preq, NULL, log_buf);
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* validate that the node has real gpus not virtual */

  if (!pnode->nd_gpus_real)
    {
    rc = PBSE_UNKREQ;
    req_reject(rc, 0, preq, NULL, "Not allowed for virtual gpus");
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* validate the gpuid exists */

  if ((gpuidx = gpu_entry_by_id(pnode, gpuid, FALSE)) == -1)
    {
    rc = PBSE_UNKREQ;
    req_reject(rc, 0, preq, NULL, "GPU ID does not exist on node");
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* validate that we have a real request */

  if ((gpumode == -1) && (reset_perm == -1) && (reset_vol == -1))
    {
    rc = PBSE_UNKREQ;
    req_reject(rc, 0, preq, NULL, "No action specified");
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* for mode changes validate the mode with the driver_version */

  if ((pnode->nd_gpusn[gpuidx].driver_ver == 260) && (gpumode > 2))
    {
    rc = PBSE_UNKREQ;
    req_reject(rc, 0, preq, NULL, "GPU driver version does not support mode 3");
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* we need to relay request to the mom for processing */
  /* have MOM attempt to change the gpu mode */

  preq->rq_orgconn = preq->rq_conn;  /* restore client socket */

  unlock_node(pnode, __func__, NULL, LOGLEVEL);
  conn = svr_connect(
           pnode->nd_addrs[0],
           pbs_mom_port,
           &local_errno,
           NULL,
           NULL);
    

  if (conn >= 0)
    {
    if ((rc = issue_Drequest(conn, preq)) != PBSE_NONE)
      req_reject(rc, 0, preq, NULL, NULL);
    else
      process_gpu_request_reply(preq);
    }
  else
    {
    req_reject(PBSE_UNKREQ, 0, preq, NULL, "Failed to get connection to mom");
    }

  return rc;
  }
Example #7
0
int get_cpuset_strings(

  job  *pjob,   /* I */
  char *CpuStr, /* O */
  char *MemStr) /* O */

  {
  char   *id = "get_cpuset_strings";

  vnodent *np = pjob->ji_vnods;
  int     j;
  int     cpu_index;
  int     ratio = 0;
  char    tmpStr[MAXPATHLEN];
  int     numa_index;

#ifdef NUMA_SUPPORT
  numanode *numa_tmp;
  int     mem_index;
#endif

  if ((pjob == NULL) || 
      (CpuStr == NULL) ||
      (MemStr == NULL))
    return(FAILURE);

  CpuStr[0] = '\0';
  MemStr[0] = '\0';

  if (pjob->ji_wattr[JOB_ATR_node_exclusive].at_flags & ATR_VFLAG_SET)
    {
    if (pjob->ji_wattr[JOB_ATR_node_exclusive].at_val.at_long != 0)
      {
      return(get_exclusive_cpuset_strings(pjob,CpuStr,MemStr,np));
      }
    }

  for (j = 0;j < pjob->ji_numvnod;++j, np++)
    {
    char *dash = strchr(np->vn_host->hn_host,'-');

    if (dash != NULL)
      {
      /* make sure this is the last dash in the name */
      while ((strchr(dash+1,'-') != NULL))
        {
        dash = strchr(dash+1,'-');
        }

      numa_index = atoi(dash+1);
      }
    else
      {
      log_err(-1,id,"could not parse node number from node name\n");
      numa_index = 0;
      }

    if (CpuStr[0] != '\0')
      strcat(CpuStr, ",");

#ifdef NUMA_SUPPORT
    numa_tmp = numa_nodes + numa_index;
    cpu_index = np->vn_index + numa_tmp->cpu_offset;
    ratio = numa_tmp->num_cpus / numa_tmp->num_mems;
    mem_index = (np->vn_index / ratio) + numa_tmp->mem_offset;
#else
    cpu_index = np->vn_index;
#endif /* NUMA_SUPPORT */

    sprintf(tmpStr, "%d", cpu_index);

    strcat(CpuStr, tmpStr);

#ifdef NUMA_SUPPORT
    sprintf(tmpStr,"%d",mem_index);

    if (strstr(MemStr,tmpStr) == NULL)
      {
      if (MemStr[0] != '\0')
        strcat(MemStr, ",");

      strcat(MemStr, tmpStr);
      }
#endif
    }

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buffer,
      "found cpus (%s) mems (%s) ratio = %d",
      CpuStr, MemStr, ratio);
    log_ext(-1, id, log_buffer, LOG_DEBUG);
    }

  return(SUCCESS);
  }
Example #8
0
int run_pelog(

  int   which,      /* I (one of PE_*) */
  char *specpelog,  /* I - script path */
  job  *pjob,       /* I - associated job */
  int   pe_io_type, /* I - io type */
  int   deletejob)  /* I - called before a job being deleted (purge -p) */

  {
  struct sigaction  act;
  struct sigaction  oldact;
  char             *arg[12];
  int               fds1 = 0;
  int               fds2 = 0;
  int               fd_input;
  char              resc_list[2048];
  char              resc_used[2048];

  struct stat       sbuf;
  char              sid[20];
  char              exit_stat[11];
  int               waitst;
  int               isjoined;  /* boolean */
  char              buf[MAXPATHLEN + 1024];
  char              pelog[MAXPATHLEN + 1024];

  uid_t             real_uid;
  gid_t            *real_gids = NULL;
  gid_t             real_gid;
  int               num_gids;

  int               jobtypespecified = 0;

  resource         *r;

  char             *EmptyString = (char *)"";

  int               LastArg;
  int               aindex;

  int               rc;

  char             *ptr;

  int               moabenvcnt = 14;  /* # of entries in moabenvs */
  static char      *moabenvs[] = {
      (char *)"MOAB_NODELIST",
      (char *)"MOAB_JOBID",
      (char *)"MOAB_JOBNAME",
      (char *)"MOAB_USER",
      (char *)"MOAB_GROUP",
      (char *)"MOAB_CLASS",
      (char *)"MOAB_TASKMAP",
      (char *)"MOAB_QOS",
      (char *)"MOAB_PARTITION",
      (char *)"MOAB_PROCCOUNT",
      (char *)"MOAB_NODECOUNT",
      (char *)"MOAB_MACHINE",
      (char *)"MOAB_JOBARRAYINDEX",
      (char *)"MOAB_JOBARRAYRANGE"
      };

  if ((pjob == NULL) || (specpelog == NULL) || (specpelog[0] == '\0'))
    {
    return(0);
    }

  ptr = pjob->ji_wattr[JOB_ATR_jobtype].at_val.at_str;

  if (ptr != NULL)
    {
    jobtypespecified = 1;

    snprintf(pelog,sizeof(pelog),"%s.%s",
      specpelog,
      ptr);
    }
  else
    {
    snprintf(pelog, sizeof(pelog), "%s", specpelog);
    }
    
  real_uid = getuid();
  real_gid = getgid();
  if ((num_gids = getgroups(0, real_gids)) < 0)
    {
    log_err(errno, __func__, (char *)"getgroups failed\n");
    
    return(-1);
    }

  /* to support root squashing, become the user before performing file checks */
  if ((which == PE_PROLOGUSER) || 
      (which == PE_EPILOGUSER) || 
      (which == PE_PROLOGUSERJOB) || 
      (which == PE_EPILOGUSERJOB))
    {

    real_gids = (gid_t *)calloc(num_gids, sizeof(gid_t));
    
    if (real_gids == NULL)
      {
      log_err(ENOMEM, __func__, (char *)"Cannot allocate memory! FAILURE\n");
      
      return(-1);
      }
    
    if (getgroups(num_gids,real_gids) < 0)
      {
      log_err(errno, __func__, (char *)"getgroups failed\n");
      free(real_gids);
      
      return(-1);
      }
    
    /* pjob->ji_grpcache will not be set if using LDAP and LDAP not set */
    /* It is possible that ji_grpcache failed to allocate as well. 
       Make sure ji_grpcache is not NULL */
    if (pjob->ji_grpcache != NULL)
      {
      if (setgroups(
            pjob->ji_grpcache->gc_ngroup,
            (gid_t *)pjob->ji_grpcache->gc_groups) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setgroups() for UID = %lu failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));
      
        log_err(errno, __func__, log_buffer);
      
        undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
        free(real_gids);
      
        return(-1);
        }
      }
    else
      {
      sprintf(log_buffer, "pjob->ji_grpcache is null. check_pwd likely failed.");
      log_err(-1, __func__, log_buffer);
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      return(-1);
      }
    
    if (setegid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "setegid(%lu) for UID = %lu failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exgid,
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
      
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      
      return(-1);
      }
    
    if (setuid_ext(pjob->ji_qs.ji_un.ji_momt.ji_exuid, TRUE) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "seteuid(%lu) failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
      
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);

      return(-1);
      }
    }

  rc = stat(pelog,&sbuf);

  if ((rc == -1) && (jobtypespecified == 1))
    {
    snprintf(pelog, sizeof(pelog), "%s", specpelog);

    rc = stat(pelog,&sbuf);
    }

  if (rc == -1)
    {
    if (errno == ENOENT || errno == EBADF)
      {
      /* epilog/prolog script does not exist */

      if (LOGLEVEL >= 5)
        {
        static char tmpBuf[1024];

        sprintf(log_buffer, "%s script '%s' for job %s does not exist (cwd: %s,pid: %d)",
          PPEType[which],
          (pelog[0] != '\0') ? pelog : "NULL",
          pjob->ji_qs.ji_jobid,
          getcwd(tmpBuf, sizeof(tmpBuf)),
          getpid());

        log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
        }

#ifdef ENABLE_CSA
      if ((which == PE_EPILOGUSER) && (!strcmp(pelog, path_epiloguser)))
        {
        /*
          * Add a workload management end record
        */
        if (LOGLEVEL >= 8)
          {
          sprintf(log_buffer, "%s calling add_wkm_end from run_pelog() - no user epilog",
            pjob->ji_qs.ji_jobid);

          log_err(-1, __func__, log_buffer);
          }

        add_wkm_end(pjob->ji_wattr[JOB_ATR_pagg_id].at_val.at_ll,
            pjob->ji_qs.ji_un.ji_momt.ji_exitstat, pjob->ji_qs.ji_jobid);
        }

#endif /* ENABLE_CSA */

      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);

      return(0);
      }
      
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);

    return(pelog_err(pjob,pelog,errno,(char *)"cannot stat"));
    }

  if (LOGLEVEL >= 5)
    {
    sprintf(log_buffer,"running %s script '%s' for job %s",
      PPEType[which],
      (pelog[0] != '\0') ? pelog : "NULL",
      pjob->ji_qs.ji_jobid);

    log_ext(-1, __func__, log_buffer, LOG_DEBUG);  /* not actually an error--but informational */
    }

  /* script must be owned by root, be regular file, read and execute by user *
   * and not writeable by group or other */

  if (reduceprologchecks == TRUE)
    {
    if ((!S_ISREG(sbuf.st_mode)) ||
        (!(sbuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))))
      {
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      return(pelog_err(pjob,pelog,-1, (char *)"permission Error"));
      }
    }
  else
    {
    if (which == PE_PROLOGUSERJOB || which == PE_EPILOGUSERJOB)
      {
      if ((sbuf.st_uid != pjob->ji_qs.ji_un.ji_momt.ji_exuid) || 
          (!S_ISREG(sbuf.st_mode)) ||
          ((sbuf.st_mode & (S_IRUSR | S_IXUSR)) != (S_IRUSR | S_IXUSR)) ||
          (sbuf.st_mode & (S_IWGRP | S_IWOTH)))
        {
        undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
        free(real_gids);
        return(pelog_err(pjob,pelog,-1, (char *)"permission Error"));
        }
      }
    else if ((sbuf.st_uid != 0) ||
        (!S_ISREG(sbuf.st_mode)) ||
        ((sbuf.st_mode & (S_IRUSR | S_IXUSR)) != (S_IRUSR | S_IXUSR)) ||\
        (sbuf.st_mode & (S_IWGRP | S_IWOTH)))
      {
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      return(pelog_err(pjob,pelog,-1, (char *)"permission Error"));
      }
    
    if ((which == PE_PROLOGUSER) || (which == PE_EPILOGUSER))
      {
      /* script must also be read and execute by other */
      
      if ((sbuf.st_mode & (S_IROTH | S_IXOTH)) != (S_IROTH | S_IXOTH))
        {
        undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
        free(real_gids);
        return(pelog_err(pjob, pelog, -1,  (char *)"permission Error"));
        }
      }
    } /* END !reduceprologchecks */

  fd_input = pe_input(pjob->ji_qs.ji_jobid);

  if (fd_input < 0)
    {
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);
    return(pelog_err(pjob, pelog, -2,  (char *)"no pro/epilogue input file"));
    }

  run_exit = 0;

  child = fork();

  if (child > 0)
    {
    int KillSent = FALSE;

    /* parent - watch for prolog/epilog to complete */

    close(fd_input);

    /* switch back to root if necessary */
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);

    act.sa_handler = pelogalm;

    sigemptyset(&act.sa_mask);

    act.sa_flags = 0;

    sigaction(SIGALRM, &act, &oldact);

    /* it would be nice if the harvest routine could block for 5 seconds,
       and if the prolog is not complete in that time, mark job as prolog
       pending, append prolog child, and continue */

    /* main loop should attempt to harvest prolog in non-blocking mode.
       If unsuccessful after timeout, job should be terminated, and failure
       reported.  If successful, mom should unset prolog pending, and
       continue with job start sequence.  Mom should report job as running
       while prologpending flag is set.  (NOTE:  must track per job prolog
       start time)
    */

    alarm(pe_alarm_time);

    while (waitpid(child, &waitst, 0) < 0)
      {
      if (errno != EINTR)
        {
        /* exit loop. non-alarm based failure occurred */

        run_exit = -3;

        MOMPrologFailureCount++;

        break;
        }

      if (run_exit == -4)
        {
        if (KillSent == FALSE)
          {
          MOMPrologTimeoutCount++;

          /* timeout occurred */

          KillSent = TRUE;

          /* NOTE:  prolog/epilog may be locked in KERNEL space and unkillable */

          alarm(5);
          }
        else
          {
          /* cannot kill prolog/epilog, give up */

          run_exit = -5;

          break;
          }
        }
      }    /* END while (wait(&waitst) < 0) */

    /* epilog/prolog child completed */
#ifdef ENABLE_CSA
    if ((which == PE_EPILOGUSER) && (!strcmp(pelog, path_epiloguser)))
      {
      /*
       * Add a workload management end record
      */
      if (LOGLEVEL >= 8)
        {
        sprintf(log_buffer, "%s calling add_wkm_end from run_pelog() - after user epilog",
                pjob->ji_qs.ji_jobid);

        log_err(-1, __func__, log_buffer);
        }

      add_wkm_end(pjob->ji_wattr[JOB_ATR_pagg_id].at_val.at_ll,
          pjob->ji_qs.ji_un.ji_momt.ji_exitstat, pjob->ji_qs.ji_jobid);
      }

#endif /* ENABLE_CSA */

    alarm(0);

    /* restore the previous handler */

    sigaction(SIGALRM, &oldact, 0);

    if (run_exit == 0)
      {
      if (WIFEXITED(waitst))
        {
        run_exit = WEXITSTATUS(waitst);
        }
      }
    }
  else
    {
    /* child - run script */

    log_close(0);

    if (lockfds >= 0)
      {
      close(lockfds);

      lockfds = -1;
      }

    net_close(-1);

    if (fd_input != 0)
      {
      close(0);

      if (dup(fd_input) == -1) {}

      close(fd_input);
      }

    if (pe_io_type == PE_IO_TYPE_NULL)
      {
      /* no output, force to /dev/null */

      fds1 = open("/dev/null", O_WRONLY, 0600);
      fds2 = open("/dev/null", O_WRONLY, 0600);
      }
    else if (pe_io_type == PE_IO_TYPE_STD)
      {
      /* open job standard out/error */

      /*
       * We need to know if files are joined or not.
       * If they are then open the correct file and duplicate it to the other
      */

      isjoined = is_joined(pjob);

      switch (isjoined)
        {
        case -1:

          fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds1 = (fds2 < 0)?-1:dup(fds2);

          break;

        case 1:

          fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds2 = (fds1 < 0)?-1:dup(fds1);

          break;

        default:

          fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);
          break;
        }
      }

    if (!deletejob)
      if ((fds1 < 0) ||
          (fds2 < 0))
        {
        if (fds1 >= 0)
          close(fds1);
        if (fds2 >= 0)
          close(fds2);

        exit(-1);
        }

    if (pe_io_type != PE_IO_TYPE_ASIS)
      {
      /* If PE_IO_TYPE_ASIS, leave as is, already open to job */

      if (fds1 != 1)
        {
        close(1);

        if (dup(fds1) >= 0)
          {
          close(fds1);
          }
        }

      if (fds2 != 2)
        {
        close(2);

        if (dup(fds2) >= 0)
          {
          close(fds2);
          }
        }
      }

    if ((which == PE_PROLOGUSER) || 
        (which == PE_EPILOGUSER) || 
        (which == PE_PROLOGUSERJOB) || 
        (which == PE_EPILOGUSERJOB))
      {
      if (chdir(pjob->ji_grpcache->gc_homedir) != 0)
        {
        /* warn only, no failure */

        sprintf(log_buffer,
          "PBS: chdir to %s failed: %s (running user %s in current directory)",
          pjob->ji_grpcache->gc_homedir,
          strerror(errno),
          which == PE_PROLOGUSER ? "prologue" : "epilogue");

        if (write_ac_socket(2, log_buffer, strlen(log_buffer)) == -1) {}

        fsync(2);
        }
      }

    /* for both prolog and epilog */

    if (DEBUGMODE == 1)
      {
      fprintf(stderr, "PELOGINFO:  script:'%s'  jobid:'%s'  euser:'******'  egroup:'%s'  jobname:'%s' SSID:'%ld'  RESC:'%s'\n",
              pelog,
              pjob->ji_qs.ji_jobid,
              pjob->ji_wattr[JOB_ATR_euser].at_val.at_str,
              pjob->ji_wattr[JOB_ATR_egroup].at_val.at_str,
              pjob->ji_wattr[JOB_ATR_jobname].at_val.at_str,
              pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long,
              resc_to_string(pjob, JOB_ATR_resource, resc_list, sizeof(resc_list)));
      }

    arg[0] = pelog;

    arg[1] = pjob->ji_qs.ji_jobid;
    arg[2] = pjob->ji_wattr[JOB_ATR_euser].at_val.at_str;
    arg[3] = pjob->ji_wattr[JOB_ATR_egroup].at_val.at_str;
    arg[4] = pjob->ji_wattr[JOB_ATR_jobname].at_val.at_str;

    /* NOTE:  inside child */

    if ((which == PE_EPILOG) || 
        (which == PE_EPILOGUSER) || 
        (which == PE_EPILOGUSERJOB))
      {
      /* for epilog only */

      sprintf(sid, "%ld",
              pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long);
      sprintf(exit_stat,"%d",
              pjob->ji_qs.ji_un.ji_momt.ji_exitstat);

      arg[5] = sid;
      arg[6] = resc_to_string(pjob, JOB_ATR_resource, resc_list, sizeof(resc_list));
      arg[7] = resc_to_string(pjob, JOB_ATR_resc_used, resc_used, sizeof(resc_used));
      arg[8] = pjob->ji_wattr[JOB_ATR_in_queue].at_val.at_str;
      arg[9] = pjob->ji_wattr[JOB_ATR_account].at_val.at_str;
      arg[10] = exit_stat;
      arg[11] = NULL;

      LastArg = 11;
      }
    else
      {
      /* prolog */

      arg[5] = resc_to_string(pjob, JOB_ATR_resource, resc_list, sizeof(resc_list));
      arg[6] = pjob->ji_wattr[JOB_ATR_in_queue].at_val.at_str;
      arg[7] = pjob->ji_wattr[JOB_ATR_account].at_val.at_str;
      arg[8] = NULL;

      LastArg = 8;
      }

    for (aindex = 0;aindex < LastArg;aindex++)
      {
      if (arg[aindex] == NULL)
        arg[aindex] = EmptyString;
      }  /* END for (aindex) */

    /*
     * Pass Resource_List.nodes request in environment
     * to allow pro/epi-logue setup/teardown of system
     * settings.  --pw, 2 Jan 02
     * Fixed to use putenv for sysV compatibility.
     *  --troy, 11 jun 03
     *
     */

    r = find_resc_entry(
          &pjob->ji_wattr[JOB_ATR_resource],
          find_resc_def(svr_resc_def, (char *)"nodes", svr_resc_size));

    if (r != NULL)
      {
      /* setenv("PBS_RESOURCE_NODES",r->rs_value.at_val.at_str,1); */

      const char *ppn_str = "ppn=";
      int num_nodes = 1;
      int num_ppn = 1;

      /* PBS_RESOURCE_NODES */
      put_env_var("PBS_RESOURCE_NODES", r->rs_value.at_val.at_str);

      /* PBS_NUM_NODES */
      num_nodes = strtol(r->rs_value.at_val.at_str, NULL, 10);

      /* 
       * InitUserEnv() also calculates num_nodes and num_ppn the same way
       */
      if (num_nodes != 0)
        {
        char *tmp;
        char *other_reqs;

        /* get the ppn */
        if ((tmp = strstr(r->rs_value.at_val.at_str,ppn_str)) != NULL)
          {
          tmp += strlen(ppn_str);

          num_ppn = strtol(tmp, NULL, 10);
          }

        other_reqs = r->rs_value.at_val.at_str;

        while ((other_reqs = strchr(other_reqs, '+')) != NULL)
          {
          other_reqs += 1;
          num_nodes += strtol(other_reqs, &other_reqs, 10);
          }
        }

      sprintf(buf, "%d", num_nodes);
      put_env_var("PBS_NUM_NODES", buf);

      /* PBS_NUM_PPN */
      sprintf(buf, "%d", num_ppn);
      put_env_var("PBS_NUM_PPN", buf);

      /* PBS_NP */
      sprintf(buf, "%d", pjob->ji_numvnod);
      put_env_var("PBS_NP", buf);
      }  /* END if (r != NULL) */

    r = find_resc_entry(
          &pjob->ji_wattr[JOB_ATR_resource],
          find_resc_def(svr_resc_def, (char *)"gres", svr_resc_size));

    if (r != NULL)
      {
      /* setenv("PBS_RESOURCE_NODES",r->rs_value.at_val.at_str,1); */
      put_env_var("PBS_RESOURCE_GRES", r->rs_value.at_val.at_str);
      }

    if (TTmpDirName(pjob, buf, sizeof(buf)))
      {
      put_env_var("TMPDIR", buf);
      }

    /* Set PBS_SCHED_HINT */

    {
    char *envname = (char *)"PBS_SCHED_HINT";
    char *envval;

    if ((envval = get_job_envvar(pjob, envname)) != NULL)
      {
      put_env_var("PBS_SCHED_HINT", envval);
      }
    }

    /* Set PBS_NODENUM */

    sprintf(buf, "%d",
      pjob->ji_nodeid);
    put_env_var("PBS_NODENUM", buf);

    /* Set PBS_MSHOST */

    put_env_var("PBS_MSHOST", pjob->ji_vnods[0].vn_host->hn_host);

    /* Set PBS_NODEFILE */

    if (pjob->ji_flags & MOM_HAS_NODEFILE)
      {
      sprintf(buf, "%s/%s",
        path_aux,
        pjob->ji_qs.ji_jobid);
      put_env_var("PBS_NODEFILE", buf);
      }

    /* Set PBS_O_WORKDIR */
    {
    char *workdir_val;

    workdir_val = get_job_envvar(pjob,"PBS_O_WORKDIR");
    if (workdir_val != NULL)
      {
      put_env_var("PBS_O_WORKDIR", workdir_val);
      }
    }

    /* SET BEOWULF_JOB_MAP */

    {

    struct array_strings *vstrs;

    int VarIsSet = 0;
    int j;

    vstrs = pjob->ji_wattr[JOB_ATR_variables].at_val.at_arst;

    for (j = 0;j < vstrs->as_usedptr;++j)
      {
      if (!strncmp(
            vstrs->as_string[j],
            "BEOWULF_JOB_MAP=",
            strlen("BEOWULF_JOB_MAP=")))
        {
        VarIsSet = 1;

        break;
        }
      }

    if (VarIsSet == 1)
      {
      char *val = strchr(vstrs->as_string[j], '=');

      if (val != NULL)
        put_env_var("BEOWULF_JOB_MAP", val+1);
      }
    }

  /* Set some Moab env variables if they exist */

  if ((which == PE_PROLOG) || (which == PE_EPILOG))
    {
    char *tmp_val;

    for (aindex=0;aindex<moabenvcnt;aindex++)
      {
      tmp_val = get_job_envvar(pjob,moabenvs[aindex]);
      if (tmp_val != NULL)
        {
        put_env_var(moabenvs[aindex], tmp_val);
        }
      }
    }

  /*
   * if we want to run as user then we need to reset real user permissions
   * since it seems that some OSs use real not effective user id when execv'ing
   */

  if ((which == PE_PROLOGUSER) || 
      (which == PE_EPILOGUSER) || 
      (which == PE_PROLOGUSERJOB) || 
      (which == PE_EPILOGUSERJOB))
    {
    setuid_ext(pbsuser, TRUE);
    setegid(pbsgroup);

    if (setgid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "setgid(%lu) for UID = %lu failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exgid,
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
     
      exit(-1);
      }
    
    if (setuid_ext(pjob->ji_qs.ji_un.ji_momt.ji_exuid, FALSE) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "setuid(%lu) failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
     
      exit(-1);
      }
    }

    execv(pelog,arg);

    sprintf(log_buffer,"execv of %s failed: %s\n",
      pelog,
      strerror(errno));

    if (write_ac_socket(2, log_buffer, strlen(log_buffer)) == -1)
      {
      /* cannot write message to stderr */

      /* NO-OP */
      }

    fsync(2);

    exit(255);
    }  /* END else () */

  switch (run_exit)
    {
    case 0:

      /* SUCCESS */

      /* NO-OP */

      break;

    case - 3:

      pelog_err(pjob, pelog, run_exit,  (char *)"child wait interrupted");

      break;

    case - 4:

      pelog_err(pjob, pelog, run_exit,  (char *)"prolog/epilog timeout occurred, child cleaned up");

      break;

    case - 5:

      pelog_err(pjob, pelog, run_exit, (char *) "prolog/epilog timeout occurred, cannot kill child");

      break;

    default:

      pelog_err(pjob, pelog, run_exit,  (char *)"nonzero p/e exit status");

      break;
    }  /* END switch (run_exit) */

  return(run_exit);
  }  /* END run_pelog() */
Example #9
0
/**
 * adds the cpus to the jobset
 *
 * @param pjob - the job associated with the jobset
 * @param path - the path to the jobset directory
 * @return SUCCESS if the files are correctly written, else FALSE
 */
int add_cpus_to_jobset(

  char *path,
  job  *pjob)

  {
  FILE *fd;
  char *id = "add_cpus_to_jobset";
  char  cpusbuf[MAXPATHLEN+1];
  char  tmppath[MAXPATHLEN+1];
#ifdef NUMA_SUPPORT
  char  memsbuf[MAXPATHLEN+1];
#endif  /* end NUMA_SUPPORT */

  if ((pjob == NULL) ||
      (path == NULL))
    {
    return(FAILURE);
    }

  /* Make the string defining the CPUs to add into the jobset */
#ifdef NUMA_SUPPORT
  get_cpuset_strings(pjob,cpusbuf,memsbuf);
#else
  get_cpu_string(pjob,cpusbuf);
#endif  /* end NUMA_SUPPORT */

  snprintf(tmppath,sizeof(tmppath),"%s/cpus",path);

  sprintf(log_buffer, "CPUSET: %s job %s path %s\n", cpusbuf,
          pjob->ji_qs.ji_jobid, tmppath);
  log_event(PBSEVENT_SYSTEM, 
    PBS_EVENTCLASS_SERVER,
    id,
    log_buffer);

  fd = fopen(tmppath, "w");
  if (fd)
    {
    unsigned int len;

    if (LOGLEVEL >= 7)
      {
      sprintf(log_buffer, "adding cpus %s to %s", cpusbuf, tmppath);
      log_ext(-1, id, log_buffer, LOG_DEBUG);
      }

    len = strlen(cpusbuf);

    if (fwrite(cpusbuf, sizeof(char), len, fd) != len)
      {
      log_err(-1,id,"ERROR:  Unable to write cpus to cpuset\n");
      fclose(fd);
      return(FAILURE);
      }

    fclose(fd);
#ifdef NUMA_SUPPORT
    snprintf(tmppath,sizeof(tmppath),"%s/mems",path);
    fd = fopen(tmppath, "w");
    if (fd)
      {
      unsigned int len;

      if (LOGLEVEL >= 7)
        {
        sprintf(log_buffer, "adding mems %s to %s", memsbuf, tmppath);
        log_ext(-1, id, log_buffer, LOG_DEBUG);
        }

      len = strlen(memsbuf);

      if (fwrite(memsbuf, sizeof(char), len, fd) != len)
        {
        log_err(-1,id,"ERROR:  Unable to write mems to cpuset\n");
        fclose(fd);
        return(FAILURE);
        }

      fclose(fd);
      return(SUCCESS);
      }
#else
    return(SUCCESS);
#endif  /* end NUMA_SUPPORT */
    }
    
  return(FAILURE);
  }
Example #10
0
void remove_boot_set(

  char *rootStr, /* I/O */
  char *bootStr) /* I */

  {
  static char    id[] = "remove_boot_set";
  int   j;
  int   first;
  int   cpusetMap[1024];
  char  tmpBuf[MAXPATHLEN];

  if ((rootStr == NULL) ||
      (bootStr == NULL))
    return;
  
  /* clear out map */
  for (j=0; j<1024; j++)
    {
    cpusetMap[j] = 0;
    }

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buffer,
      "removing boot cpuset (%s) from root cpuset (%s)",
      bootStr, rootStr);
    log_ext(-1, id, log_buffer, LOG_DEBUG);
    }

  /* add the root cpuset to the map */
  adjust_root_map(rootStr, cpusetMap, 1024, TRUE);

  /* now remove the boot cpuset from the map */
  adjust_root_map(bootStr, cpusetMap, 1024, FALSE);
  
  /* convert the cpuset map back into the root cpuset string */

  rootStr[0] = '\0';
  first = TRUE;
  for (j=0; j<1024; j++)
    {
    if (cpusetMap[j] > 0 )
      {
        if (first)
          {
          sprintf (rootStr, "%d", j);
          first = FALSE;
          }
        else
          {
          sprintf (tmpBuf, ",%d", j);
          strcat (rootStr, tmpBuf);
          }
      }
    }

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buffer,
      "resulting root cpuset (%s)",
      rootStr);
    log_ext(-1, id, log_buffer, LOG_DEBUG);
    }


  return;
  }
Example #11
0
void req_gpuctrl(

  struct batch_request *preq)

  {
  char   *id = "req_gpuctrl";

  char  *nodename = NULL;
  char  *gpuid = NULL;
  int    gpumode = -1;
  int    reset_perm = -1;
  int    reset_vol = -1;
#ifdef NVIDIA_GPUS
  struct pbsnode *pnode = NULL;
  int    gpuidx = -1;
  int    rc = 0;
  int    conn;
#endif  /* NVIDIA_GPUS */

  if ((preq->rq_perm &
       (ATR_DFLAG_MGWR | ATR_DFLAG_MGRD | ATR_DFLAG_OPRD | ATR_DFLAG_OPWR)) == 0)
    {
    req_reject(PBSE_PERM, 0, preq, NULL, NULL);
    return;
    }

  nodename = preq->rq_ind.rq_gpuctrl.rq_momnode;
  gpuid = preq->rq_ind.rq_gpuctrl.rq_gpuid;
  gpumode = preq->rq_ind.rq_gpuctrl.rq_gpumode;
  reset_perm = preq->rq_ind.rq_gpuctrl.rq_reset_perm;
  reset_vol = preq->rq_ind.rq_gpuctrl.rq_reset_vol;

#ifdef NVIDIA_GPUS

  if (LOGLEVEL >= 7)
    {
    sprintf(
      log_buffer,
      "GPU control request for node %s gpuid %s mode %d reset_perm %d reset_vol %d",
      nodename,
      gpuid,
      gpumode,
      reset_perm,
      reset_vol);

    log_ext(-1, id, log_buffer, LOG_INFO);
    }

  /* validate mom node exists */

  pnode = find_nodebyname(nodename);

  if (pnode == NULL)
    {
    req_reject(PBSE_UNKNODE, 0, preq, NULL, NULL);
    return;
    }

  /* validate that the node is up */

  if (pnode->nd_state & (INUSE_DELETED | INUSE_DOWN | INUSE_OFFLINE | INUSE_UNKNOWN))
    {
    sprintf(
      log_buffer,
      "Node %s is not available",
      pnode->nd_name);
    req_reject(PBSE_UNKREQ, 0, preq, NULL, log_buffer);
    return;
    }


  /* validate that the node has real gpus not virtual */

  if (!pnode->nd_gpus_real)
    {
    req_reject(PBSE_UNKREQ, 0, preq, NULL, "Not allowed for virtual gpus");
    return;
    }

  /* validate the gpuid exists */

  if ((gpuidx = gpu_entry_by_id(pnode, gpuid, FALSE)) == -1)
    {
    req_reject(PBSE_UNKREQ, 0, preq, NULL, "GPU ID does not exist on node");
    return;
    }

  /* validate that we have a real request */

  if ((gpumode == -1) && (reset_perm == -1) && (reset_vol == -1))
    {
    req_reject(PBSE_UNKREQ, 0, preq, NULL, "No action specified");
    return;
    }

  /* for mode changes validate the mode with the driver_version */

  if ((pnode->nd_gpusn[gpuidx].driver_ver == 260) && (gpumode > 2))
    {
    req_reject(PBSE_UNKREQ, 0, preq, NULL, "GPU driver version does not support mode 3");
    return;
    }

  /* we need to relay request to the mom for processing */
  /* have MOM attempt to change the gpu mode */

  preq->rq_orgconn = preq->rq_conn;  /* restore client socket */

  conn = svr_connect(
           pnode->nd_addrs[0],
           pbs_mom_port,
           process_Dreply,
           ToServerDIS);

  if (conn >= 0)
    {
    if ((rc = issue_Drequest(conn, preq, process_gpu_request_reply, NULL)) != 0)
      {
      req_reject(rc, 0, preq, NULL, NULL);
      }
    }
  else
    {
    req_reject(PBSE_UNKREQ, 0, preq, NULL, "Failed to get connection to mom");
    }

#else

    sprintf(
      log_buffer,
      "GPU control request not supported: node %s gpuid %s mode %d reset_perm %d reset_vol %d",
      nodename,
      gpuid,
      gpumode,
      reset_perm,
      reset_vol);

  if (LOGLEVEL >= 3)
    {
      log_ext(-1, id, log_buffer, LOG_INFO);
    }

  req_reject(PBSE_NOSUP, 0, preq, NULL, NULL);

#endif  /* NVIDIA_GPUS */

  return;
  }
Example #12
0
void mom_job_purge(

  job *pjob)  /* I (modified) */

  {
  job_file_delete_info *jfdi;

  jfdi = (job_file_delete_info *)calloc(1, sizeof(job_file_delete_info));

  if (jfdi == NULL)
    {
    log_err(ENOMEM,__func__, (char *)"No space to allocate info for job file deletion");
    return;
    }

#ifdef NVIDIA_GPUS
  /*
   * Did this job have a gpuid assigned?
   * if so, then update gpu status
   */
  if (((pjob->ji_wattr[JOB_ATR_exec_gpus].at_flags & ATR_VFLAG_SET) != 0) &&
      (pjob->ji_wattr[JOB_ATR_exec_gpus].at_val.at_str != NULL))
    {
    send_update_soon();
    }
#endif  /* NVIDIA_GPUS */

  /* initialize struct information */
  if (pjob->ji_flags & MOM_HAS_TMPDIR)
    {
    jfdi->has_temp_dir = TRUE;
    pjob->ji_flags &= ~MOM_HAS_TMPDIR;
    }
  else
    jfdi->has_temp_dir = FALSE;

  strcpy(jfdi->jobid,pjob->ji_qs.ji_jobid);
  strcpy(jfdi->prefix,pjob->ji_qs.ji_fileprefix);

  if ((pjob->ji_wattr[JOB_ATR_checkpoint_dir].at_flags & ATR_VFLAG_SET) &&
      (pjob->ji_wattr[JOB_ATR_checkpoint_name].at_flags & ATR_VFLAG_SET))
    jfdi->checkpoint_dir = strdup(pjob->ji_wattr[JOB_ATR_checkpoint_dir].at_val.at_str);

  jfdi->gid = pjob->ji_qs.ji_un.ji_momt.ji_exgid;
  jfdi->uid = pjob->ji_qs.ji_un.ji_momt.ji_exuid;

  /* remove each pid in ji_job_pid_set from the global_job_sid_set */
  for (job_pid_set_t::const_iterator job_pid_set_iter = pjob->ji_job_pid_set->begin();
       job_pid_set_iter != pjob->ji_job_pid_set->end();
       job_pid_set_iter++)
    {
    /* get pid entry from ji_job_pid_set */
    pid_t job_pid = *job_pid_set_iter;

    /* see if job_pid exists in job_sid set */
    job_pid_set_t::const_iterator it = global_job_sid_set.find(job_pid);
    if (it != global_job_sid_set.end())
      {
      /* remove job_pid from the set */
      global_job_sid_set.erase(it);
      }
    }

  if (thread_unlink_calls == TRUE)
    enqueue_threadpool_request(delete_job_files, jfdi, request_pool);
  else
    delete_job_files(jfdi);

  /* remove this job from the global queue */
  delete_link(&pjob->ji_jobque);
  delete_link(&pjob->ji_alljobs);

  remove_from_exiting_list(pjob);

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer,"removing job");

    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer);
    }

#if IBM_SP2==2        /* IBM SP PSSP 3.1 */
  unload_sp_switch(pjob);

#endif   /* IBM SP */

  //We had a request to change the frequency for the job and now that the job is done
  //we want to change the frequency back.
  resource *presc = find_resc_entry(&pjob->ji_wattr[JOB_ATR_resource],
            find_resc_def(svr_resc_def, "cpuclock", svr_resc_size));
  if (presc != NULL)
    {
    std::string beforeFreq;

    nd_frequency.get_frequency_string(beforeFreq);
    if(!nd_frequency.restore_frequency())
      {
      std::string msg = "Failed to restore frequency.";
      log_ext(nd_frequency.get_last_error(),__func__,msg.c_str(),LOG_ERR);
      }
    else
      {
      std::string afterFreq;
      nd_frequency.get_frequency_string(afterFreq);
      std::string msg = "Restored frequency from " + beforeFreq + " to " + afterFreq;
      log_ext(PBSE_CHANGED_CPU_FREQUENCY,__func__, msg.c_str(),LOG_NOTICE);
      }
    }

  mom_job_free(pjob);

  /* if no jobs are left, check if MOM should be restarted */

  if (((job *)GET_NEXT(svr_alljobs)) == NULL)
    MOMCheckRestart();

  return;
  }  /* END mom_job_purge() */
Example #13
0
int remtree(

  char *dirname)

  {
  DIR           *dir;

  struct dirent *pdir;
  char           namebuf[MAXPATHLEN];
  int            len;
  int            rtnv = 0;
#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64) && defined(LARGEFILE_WORKS)

  struct stat64  sb;
#else

  struct stat    sb;
#endif

#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64) && defined(LARGEFILE_WORKS)

  if (lstat64(dirname, &sb) == -1)
#else
  if (lstat(dirname, &sb) == -1)
#endif
    {

    if (errno != ENOENT)
      log_err(errno, __func__, (char *)"stat");

    return(-1);
    }

  if (S_ISDIR(sb.st_mode))
    {
    if ((dir = opendir(dirname)) == NULL)
      {
      if (errno != ENOENT)
        log_err(errno, __func__, (char *)"opendir");

      return(-1);
      }

    snprintf(namebuf, sizeof(namebuf), "%s/", dirname);

    len = strlen(namebuf);

    while ((pdir = readdir(dir)) != NULL)
      {
      if (pdir->d_name[0] == '.' && (pdir->d_name[1] == '\0' ||
         (pdir->d_name[1] == '.' && pdir->d_name[2] == '\0')))
        continue;

      snprintf(namebuf + len, sizeof(namebuf) - len, "%s", pdir->d_name);

#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64) && defined(LARGEFILE_WORKS)
      if (lstat64(namebuf, &sb) == -1)
#else
      if (lstat(namebuf, &sb) == -1)
#endif
        {
        log_err(errno, __func__, (char *)"stat");

        rtnv = -1;

        continue;
        }

      if (S_ISDIR(sb.st_mode))
        {
        rtnv = remtree(namebuf);
        }
      else if (unlink(namebuf) < 0)
        {
        if (errno != ENOENT)
          {
          sprintf(log_buffer, "unlink failed on %s", namebuf);
          log_err(errno, __func__, log_buffer);
          
          rtnv = -1;
          }
        }
      else if (LOGLEVEL >= 7)
        {
        sprintf(log_buffer, "unlink(1) succeeded on %s", namebuf);

        log_ext(-1, __func__, log_buffer, LOG_DEBUG);
        }
      }    /* END while ((pdir = readdir(dir)) != NULL) */

    closedir(dir);

    if (rmdir(dirname) < 0)
      {
      if ((errno != ENOENT) && (errno != EINVAL))
        {
        sprintf(log_buffer, "rmdir failed on %s",
                dirname);

        log_err(errno, __func__, log_buffer);

        rtnv = -1;
        }
      }
    else if (LOGLEVEL >= 7)
      {
      sprintf(log_buffer, "rmdir succeeded on %s", dirname);

      log_ext(-1, __func__, log_buffer, LOG_DEBUG);
      }
    }
  else if (unlink(dirname) < 0)
    {
    snprintf(log_buffer,sizeof(log_buffer),"unlink failed on %s",dirname);
    log_err(errno,__func__,log_buffer);

    rtnv = -1;
    }
  else if (LOGLEVEL >= 7)
    {
    sprintf(log_buffer, "unlink(2) succeeded on %s", dirname);

    log_ext(-1, __func__, log_buffer, LOG_DEBUG);
    }

  return(rtnv);
  }  /* END remtree() */
Example #14
0
void *contact_sched(

  void *new_cmd)  /* I */

  {
  int   sock;

  char  tmpLine[1024];
  char  EMsg[1024];

  char  log_buf[LOCAL_LOG_BUF_SIZE];
  int cmd = *(int *)new_cmd;

  free(new_cmd);

  /* connect to the Scheduler */
  sock = client_to_svr(pbs_scheduler_addr, pbs_scheduler_port, 1, EMsg);

  if (sock < 0)
    {
    /* Thread exit */

    return(NULL);
    }

  add_scheduler_conn(
    sock,
    FromClientDIS,
    pbs_scheduler_addr,
    pbs_scheduler_port,
    PBS_SOCK_INET,
    NULL);

  pthread_mutex_lock(scheduler_sock_jobct_mutex);
  scheduler_sock = sock;
  pthread_mutex_unlock(scheduler_sock_jobct_mutex);

  pthread_mutex_lock(svr_conn[sock].cn_mutex);
  svr_conn[sock].cn_authen = PBS_NET_CONN_FROM_PRIVIL;
  pthread_mutex_unlock(svr_conn[sock].cn_mutex);

  /* send command to Scheduler */

  if (put_4byte(sock, cmd) < 0)
    {
    sprintf(tmpLine, "%s - port %d",
            msg_sched_nocall,
            pbs_scheduler_port);

    log_ext(errno, __func__, tmpLine, LOG_ALERT);

    close_conn(sock, FALSE);

    /* Thread exit */
    return(NULL);
    }

  /*
   * call process_pbs_server_port_scheduler which will
   * handle 1 or more batch requests that may be received
   * from the scheduler.
   */

  process_pbs_server_port_scheduler(&sock);

  sprintf(log_buf, msg_sched_called, (cmd != SCH_ERROR) ? PSchedCmdType[cmd] : "ERROR");

  log_event(PBSEVENT_SCHED,PBS_EVENTCLASS_SERVER,server_name,log_buf);

  /* Thread exit */
  return(NULL);
  }  /* END contact_sched() */
Example #15
0
int send_job(

  job       *jobp,
  pbs_net_t  hostaddr, /* host address, host byte order */
  int        port, /* service port, host byte order */
  int        move_type, /* move, route, or execute */
  void (*post_func)(struct work_task *),     /* after move */
  void      *data)  /* ptr to optional batch_request to be put */
                    /* in the work task structure */

  {
  tlist_head  attrl;
  enum conn_type cntype = ToServerDIS;
  int    con;
  char  *destin = jobp->ji_qs.ji_destin;
  int    encode_type;
  int    i;
  int    NumRetries;

  char  *id = "send_job";

  attribute *pattr;

  pid_t  pid;

  struct attropl *pqjatr;      /* list (single) of attropl for quejob */
  char  *safail = "sigaction failed\n";
  char  *spfail = "sigprocmask failed\n";
  char   script_name[MAXPATHLEN + 1];
  sigset_t  child_set, all_set;

  struct  sigaction child_action;

  struct work_task *ptask;

  mbool_t        Timeout = FALSE;

  char          *pc;

  sigemptyset(&child_set);
  sigaddset(&child_set, SIGCHLD);
  sigfillset(&all_set);

  /* block SIGCHLD until work task is established */

  if (sigprocmask(SIG_BLOCK, &child_set, NULL) == -1)
    {
    log_err(errno,id,spfail);

    pbs_errno = PBSE_SYSTEM;

    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      "cannot set signal mask");

    return(ROUTE_PERM_FAILURE);
    }

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer,"about to send job - type=%d",
      move_type);
 
    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      "forking in send_job");
    }

  pid = fork();

  if (pid == -1)
    {
    /* error on fork */

    log_err(errno, id, "fork failed\n");

    if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1)
      log_err(errno, id, spfail);

    pbs_errno = PBSE_SYSTEM;

    return(ROUTE_PERM_FAILURE);
    }

  if (pid != 0)
    {
    /* The parent (main server) */

    /* create task to monitor job startup */

    /* CRI:   need way to report to scheduler job is starting, not started */

    ptask = set_task(WORK_Deferred_Child, pid, post_func, jobp);

    if (ptask == NULL)
      {
      log_err(errno, id, msg_err_malloc);

      return(ROUTE_PERM_FAILURE);
      }

    ptask->wt_parm2 = data;

    append_link(
      &((job *)jobp)->ji_svrtask,
      &ptask->wt_linkobj,
      ptask);

    /* now can unblock SIGCHLD */

    if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1)
      log_err(errno, id, spfail);

    if (LOGLEVEL >= 1)
      {
      extern long   DispatchTime[];
      extern job   *DispatchJob[];
      extern char  *DispatchNode[];

      extern time_t time_now;

      struct pbsnode *NP;

      /* record job dispatch time */

      int jindex;

      for (jindex = 0;jindex < 20;jindex++)
        {
        if (DispatchJob[jindex] == NULL)
          {
          DispatchTime[jindex] = time_now;

          DispatchJob[jindex] = jobp;

          if ((NP = PGetNodeFromAddr(hostaddr)) != NULL)
            DispatchNode[jindex] = NP->nd_name;
          else
            DispatchNode[jindex] = NULL;

          break;
          }
        }
      }

    /* SUCCESS */

    return(ROUTE_DEFERRED);
    }  /* END if (pid != 0) */

  /*
   * the child process
   *
   * set up signal catcher for error return
   */

  rpp_terminate();

  child_action.sa_handler = net_move_die;

  sigfillset(&child_action.sa_mask);

  child_action.sa_flags = 0;

  if (sigaction(SIGHUP, &child_action, NULL))
    log_err(errno, id, safail);

  if (sigaction(SIGINT, &child_action, NULL))
    log_err(errno, id, safail);

  if (sigaction(SIGQUIT, &child_action, NULL))
    log_err(errno, id, safail);

  /* signal handling is set, now unblock */

  if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1)
    log_err(errno, id, spfail);

  /* encode job attributes to be moved */

  CLEAR_HEAD(attrl);

  /* select attributes/resources to send based on move type */

  if (move_type == MOVE_TYPE_Exec)
    {
    /* moving job to MOM - ie job start */

    resc_access_perm = ATR_DFLAG_MOM;
    encode_type = ATR_ENCODE_MOM;
    cntype = ToServerDIS;
    }
  else
    {
    /* moving job to alternate server? */

    resc_access_perm =
      ATR_DFLAG_USWR |
      ATR_DFLAG_OPWR |
      ATR_DFLAG_MGWR |
      ATR_DFLAG_SvRD;

    encode_type = ATR_ENCODE_SVR;

    /* clear default resource settings */

    svr_dequejob(jobp);
    }

  pattr = jobp->ji_wattr;

  for (i = 0;i < JOB_ATR_LAST;i++)
    {
    if (((job_attr_def + i)->at_flags & resc_access_perm) ||
      ((strncmp((job_attr_def + i)->at_name,"session_id",10) == 0) &&
      (jobp->ji_wattr[JOB_ATR_checkpoint_name].at_flags & ATR_VFLAG_SET)))
      {
      (job_attr_def + i)->at_encode(
        pattr + i,
        &attrl,
        (job_attr_def + i)->at_name,
        NULL,
        encode_type);
      }
    }    /* END for (i) */

  attrl_fixlink(&attrl);

  /* put together the job script file name */

  strcpy(script_name, path_jobs);

  if (jobp->ji_wattr[JOB_ATR_job_array_request].at_flags & ATR_VFLAG_SET)
    {
    strcat(script_name, jobp->ji_arraystruct->ai_qs.fileprefix);
    }
  else
    {
    strcat(script_name, jobp->ji_qs.ji_fileprefix);
    }

  strcat(script_name, JOB_SCRIPT_SUFFIX);


  pbs_errno = 0;
  con = -1;

  for (NumRetries = 0;NumRetries < RETRY;NumRetries++)
    {
    int rc;

    /* connect to receiving server with retries */

    if (NumRetries > 0)
      {
      /* recycle after an error */

      if (con >= 0)
        svr_disconnect(con);

      /* check pbs_errno from previous attempt */

      if (should_retry_route(pbs_errno) == -1)
        {
        sprintf(log_buffer, "child failed in previous commit request for job %s",
                jobp->ji_qs.ji_jobid);

        log_err(pbs_errno, id, log_buffer);

        exit(1); /* fatal error, don't retry */
        }

      sleep(1 << NumRetries);
      }

    /* NOTE:  on node hangs, svr_connect is successful */

    if ((con = svr_connect(hostaddr, port, 0, cntype)) == PBS_NET_RC_FATAL)
      {
      sprintf(log_buffer, "send_job failed to %lx port %d",
        hostaddr,
        port);

      log_err(pbs_errno, id, log_buffer);

      exit(1);
      }

    if (con == PBS_NET_RC_RETRY)
      {
      pbs_errno = 0; /* should retry */

      continue;
      }

    /*
     * if the job is substate JOB_SUBSTATE_TRNOUTCM which means
     * we are recovering after being down or a late failure, we
     * just want to send the "ready-to-commit/commit"
     */

    if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM)
      {
      if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT)
        {
        jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT;

        job_save(jobp, SAVEJOB_QUICK);
        }

      pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl;

      if ((pc = PBSD_queuejob(
                  con,
                  jobp->ji_qs.ji_jobid,
                  destin,
                  pqjatr,
                  NULL)) == NULL)
        {
        if ((pbs_errno == PBSE_EXPIRED) || (pbs_errno == PBSE_READ_REPLY_TIMEOUT))
          {
          /* queue job timeout based on pbs_tcp_timeout */

          Timeout = TRUE;
          }

        if ((pbs_errno == PBSE_JOBEXIST) && (move_type == MOVE_TYPE_Exec))
          {
          /* already running, mark it so */

          log_event(
            PBSEVENT_ERROR,
            PBS_EVENTCLASS_JOB,
            jobp->ji_qs.ji_jobid,
            "MOM reports job already running");

          exit(0);
          }

        sprintf(log_buffer, "send of job to %s failed error = %d",
          destin,
          pbs_errno);

        log_event(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          jobp->ji_qs.ji_jobid,
          log_buffer);

        continue;
        }  /* END if ((pc = PBSD_queuejob() == NULL) */

      free(pc);

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT)
        {
        if (PBSD_jscript(con, script_name, jobp->ji_qs.ji_jobid) != 0)
          continue;
        }

      /* XXX may need to change the logic below, if we are sending the job to
         a mom on the same host and the mom and server are not sharing the same
         spool directory, then we still need to move the file */

      if ((move_type == MOVE_TYPE_Exec) &&
          (jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) &&
          (hostaddr != pbs_server_addr))
        {
        /* send files created on prior run */

        if ((move_job_file(con,jobp,StdOut) != 0) ||
            (move_job_file(con,jobp,StdErr) != 0) ||
            (move_job_file(con,jobp,Checkpoint) != 0))
          {
          continue;
          }
        }

      /* ignore signals */

      if (sigprocmask(SIG_BLOCK, &all_set, NULL) == -1)
        log_err(errno, id, "sigprocmask\n");

      jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM;

      job_save(jobp, SAVEJOB_QUICK);
      }
    else
      {
      /* ignore signals */

      if (sigprocmask(SIG_BLOCK, &all_set, NULL) == -1)
        log_err(errno, id, "sigprocmask\n");
      }

    if (PBSD_rdytocmt(con, jobp->ji_qs.ji_jobid) != 0)
      {
      if (sigprocmask(SIG_UNBLOCK, &all_set, NULL) == -1)
        log_err(errno, id, "sigprocmask\n");

      continue;
      }


    if ((rc = PBSD_commit(con, jobp->ji_qs.ji_jobid)) != 0)
      {
      int errno2;

      /* NOTE:  errno is modified by log_err */

      errno2 = errno;

      sprintf(log_buffer, "send_job commit failed, rc=%d (%s)",
              rc,
              (connection[con].ch_errtxt != NULL) ? connection[con].ch_errtxt : "N/A");

      log_ext(errno2, id, log_buffer, LOG_WARNING);

      /* if failure occurs, pbs_mom should purge job and pbs_server should set *
         job state to idle w/error msg */

      if (errno2 == EINPROGRESS)
        {
        /* request is still being processed */

        /* increase tcp_timeout in qmgr? */

        Timeout = TRUE;

        /* do we need a continue here? */

        sprintf(log_buffer, "child commit request timed-out for job %s, increase tcp_timeout?",
                jobp->ji_qs.ji_jobid);

        log_ext(errno2, id, log_buffer, LOG_WARNING);

        /* don't retry on timeout--break out and report error! */

        break;
        }
      else
        {
        sprintf(log_buffer, "child failed in commit request for job %s",
                jobp->ji_qs.ji_jobid);

        log_ext(errno2, id, log_buffer, LOG_CRIT);

        /* FAILURE */

        exit(1);
        }
      }    /* END if ((rc = PBSD_commit(con,jobp->ji_qs.ji_jobid)) != 0) */

    svr_disconnect(con);

    /* child process is done */

    /* SUCCESS */

    exit(0);
    }  /* END for (NumRetries) */

  if (con >= 0)
    svr_disconnect(con);

  if (Timeout == TRUE)
    {
    /* 10 indicates that job migrate timed out, server will mark node down *
          and abort the job - see post_sendmom() */

    sprintf(log_buffer, "child timed-out attempting to start job %s",
            jobp->ji_qs.ji_jobid);

    log_ext(pbs_errno, id, log_buffer, LOG_WARNING);

    exit(10);
    }

  if (should_retry_route(pbs_errno) == -1)
    {
    sprintf(log_buffer, "child failed and will not retry job %s",
      jobp->ji_qs.ji_jobid);

    log_err(pbs_errno, id, log_buffer);

    exit(1);
    }

  exit(2);

  /*NOTREACHED*/

  return(ROUTE_SUCCESS);
  }  /* END send_job() */
Example #16
0
int remtree(

  char *dirname)

  {
  static char id[] = "remtree";
  DIR  *dir;

  struct dirent *pdir;
  char           namebuf[MAXPATHLEN];
  char          *filnam;
  int            i;
  int            rtnv = 0;
#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64) && defined(LARGEFILE_WORKS)

  struct stat64 sb;
#else

  struct stat sb;
#endif

#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64) && defined(LARGEFILE_WORKS)

  if (lstat64(dirname, &sb) == -1)
#else
  if (lstat(dirname, &sb) == -1)
#endif
    {

    if (errno != ENOENT)
      log_err(errno, id, "stat");

    return(-1);
    }

  if (S_ISDIR(sb.st_mode))
    {
    if ((dir = opendir(dirname)) == NULL)
      {
      if (errno != ENOENT)
        log_err(errno, id, "opendir");

      return(-1);
      }

    strcpy(namebuf, dirname);

    strcat(namebuf, "/");

    i = strlen(namebuf);

    filnam = &namebuf[i];

    while ((pdir = readdir(dir)) != NULL)
      {
      if ((pdir->d_name[0] == '.') &&
          ((pdir->d_name[1] == '\0') || (pdir->d_name[1] == '.')))
        continue;

      strcpy(filnam, pdir->d_name);

#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64) && defined(LARGEFILE_WORKS)
      if (lstat64(namebuf, &sb) == -1)
#else
      if (lstat(namebuf, &sb) == -1)
#endif
        {
        log_err(errno, id, "stat");

        rtnv = -1;

        continue;
        }

      if (S_ISDIR(sb.st_mode))
        {
        rtnv = remtree(namebuf);
        }
      else if (unlink(namebuf) < 0)
        {
        if (errno != ENOENT)
          {
          sprintf(log_buffer, "unlink failed on %s", namebuf);
          log_err(errno, id, log_buffer);
          
          rtnv = -1;
          }
        }
      else if (LOGLEVEL >= 7)
        {
        sprintf(log_buffer, "unlink(1) succeeded on %s", namebuf);

        log_ext(-1, id, log_buffer, LOG_DEBUG);
        }
      }    /* END while ((pdir = readdir(dir)) != NULL) */

    closedir(dir);

    if (rmdir(dirname) < 0)
      {
      if ((errno != ENOENT) && (errno != EINVAL))
        {
        sprintf(log_buffer, "rmdir failed on %s",
                dirname);

        log_err(errno, id, log_buffer);

        rtnv = -1;
        }
      }
    else if (LOGLEVEL >= 7)
      {
      sprintf(log_buffer, "rmdir succeeded on %s", dirname);

      log_ext(-1, id, log_buffer, LOG_DEBUG);
      }
    }
  else if (unlink(dirname) < 0)
    {
    snprintf(log_buffer,sizeof(log_buffer),"unlink failed on %s",dirname);
    log_err(errno,id,log_buffer);

    rtnv = -1;
    }
  else if (LOGLEVEL >= 7)
    {
    sprintf(log_buffer, "unlink(2) succeeded on %s", dirname);

    log_ext(-1, id, log_buffer, LOG_DEBUG);
    }

  return(rtnv);
  }  /* END remtree() */
Example #17
0
int run_pelog(

  int   which,      /* I (one of PE_*) */
  char *specpelog,  /* I - script path */
  job  *pjob,       /* I - associated job */
  int   pe_io_type) /* I */

  {
  char *id = "run_pelog";

  struct sigaction act, oldact;
  char *arg[12];
  int   fds1 = 0;
  int   fds2 = 0;
  int   fd_input;
  char  resc_list[2048];
  char  resc_used[2048];

  struct stat sbuf;
  char   sid[20];
  char   exit_stat[11];
  int    waitst;
  int    isjoined;  /* boolean */
  char   buf[MAXPATHLEN + 1024];
  char   pelog[MAXPATHLEN + 1024];

  int    jobtypespecified = 0;

  resource      *r;

  char          *EmptyString = "";

  int            LastArg;
  int            aindex;

  int            rc;

  char          *ptr;

  if ((pjob == NULL) || (specpelog == NULL) || (specpelog[0] == '\0'))
    {
    return(0);
    }

  ptr = pjob->ji_wattr[(int)JOB_ATR_jobtype].at_val.at_str;

  if (ptr != NULL)
    {
    jobtypespecified = 1;

    snprintf(pelog,sizeof(pelog),"%s.%s",
      specpelog,
      ptr);
    }
  else
    {
    strncpy(pelog,specpelog,sizeof(pelog));
    }

  rc = stat(pelog,&sbuf);

  if ((rc == -1) && (jobtypespecified == 1))
    {
    strncpy(pelog,specpelog,sizeof(pelog));

    rc = stat(pelog,&sbuf);
    }

  if (rc == -1)
    {
    if (errno == ENOENT || errno == EBADF)
      {
      /* epilog/prolog script does not exist */

      if (LOGLEVEL >= 5)
        {
        static char tmpBuf[1024];

        sprintf(log_buffer, "%s script '%s' for job %s does not exist (cwd: %s,pid: %d)",
          PPEType[which],
          (pelog != NULL) ? pelog : "NULL",
          (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL",
          getcwd(tmpBuf, sizeof(tmpBuf)),
          (int)getpid());

        log_record(PBSEVENT_SYSTEM, 0, id, log_buffer);
        }

#ifdef ENABLE_CSA
      if ((which == PE_EPILOGUSER) && (!strcmp(pelog, path_epiloguser)))
        {
        /*
          * Add a workload management end record
        */
        if (LOGLEVEL >= 8)
          {
          sprintf(log_buffer, "%s calling add_wkm_end from run_pelog() - no user epilog",
            pjob->ji_qs.ji_jobid);

          log_err(-1, id, log_buffer);
          }

        add_wkm_end(pjob->ji_wattr[(int)JOB_ATR_pagg_id].at_val.at_ll,

                    pjob->ji_qs.ji_un.ji_momt.ji_exitstat, pjob->ji_qs.ji_jobid);
        }

#endif /* ENABLE_CSA */

      return(0);
      }

    return(pelog_err(pjob,pelog,errno,"cannot stat"));
    }

  if (LOGLEVEL >= 5)
    {
    sprintf(log_buffer,"running %s script '%s' for job %s",
      PPEType[which],
      (pelog != NULL) ? pelog : "NULL",
      pjob->ji_qs.ji_jobid);

    log_ext(-1,id,log_buffer,LOG_DEBUG);  /* not actually an error--but informational */
    }

  /* script must be owned by root, be regular file, read and execute by user *
   * and not writeable by group or other */

  if(which == PE_PROLOGUSERJOB || which == PE_EPILOGUSERJOB)
    {
    if ((sbuf.st_uid != pjob->ji_qs.ji_un.ji_momt.ji_exuid) ||
        (!S_ISREG(sbuf.st_mode)) ||
        ((sbuf.st_mode & (S_IRUSR | S_IXUSR)) != (S_IRUSR | S_IXUSR)) ||
        (sbuf.st_mode & (S_IWGRP | S_IWOTH)))
      {
      return(pelog_err(pjob,pelog,-1,"permission Error"));
      }
    }
  else if ((sbuf.st_uid != 0) ||
      (!S_ISREG(sbuf.st_mode)) ||
      ((sbuf.st_mode & (S_IRUSR | S_IXUSR)) != (S_IRUSR | S_IXUSR)) ||
      (sbuf.st_mode & (S_IWGRP | S_IWOTH)))
    {
    return(pelog_err(pjob,pelog,-1,"permission Error"));
    }

  if ((which == PE_PROLOGUSER) || (which == PE_EPILOGUSER))
    {
    /* script must also be read and execute by other */

    if ((sbuf.st_mode & (S_IROTH | S_IXOTH)) != (S_IROTH | S_IXOTH))
      {
      return(pelog_err(pjob, pelog, -1, "permission Error"));
      }
    }

  fd_input = pe_input(pjob->ji_qs.ji_jobid);

  if (fd_input < 0)
    {
    return(pelog_err(pjob, pelog, -2, "no pro/epilogue input file"));
    }

  run_exit = 0;

  child = fork();

  if (child > 0)
    {
    int KillSent = FALSE;

    /* parent - watch for prolog/epilog to complete */

    close(fd_input);

    act.sa_handler = pelogalm;

    sigemptyset(&act.sa_mask);

    act.sa_flags = 0;

    sigaction(SIGALRM, &act, &oldact);

    /* it would be nice if the harvest routine could block for 5 seconds,
       and if the prolog is not complete in that time, mark job as prolog
       pending, append prolog child, and continue */

    /* main loop should attempt to harvest prolog in non-blocking mode.
       If unsuccessful after timeout, job should be terminated, and failure
       reported.  If successful, mom should unset prolog pending, and
       continue with job start sequence.  Mom should report job as running
       while prologpending flag is set.  (NOTE:  must track per job prolog
       start time)
    */

    alarm(pe_alarm_time);

    while (waitpid(child, &waitst, 0) < 0)
      {
      if (errno != EINTR)
        {
        /* exit loop. non-alarm based failure occurred */

        run_exit = -3;

        MOMPrologFailureCount++;

        break;
        }

      if (run_exit == -4)
        {
        if (KillSent == FALSE)
          {
          MOMPrologTimeoutCount++;

          /* timeout occurred */

          KillSent = TRUE;

          /* NOTE:  prolog/epilog may be locked in KERNEL space and unkillable */

          alarm(5);
          }
        else
          {
          /* cannot kill prolog/epilog, give up */

          run_exit = -5;

          break;
          }
        }
      }    /* END while (wait(&waitst) < 0) */

    /* epilog/prolog child completed */
#ifdef ENABLE_CSA
    if ((which == PE_EPILOGUSER) && (!strcmp(pelog, path_epiloguser)))
      {
      /*
       * Add a workload management end record
      */
      if (LOGLEVEL >= 8)
        {
        sprintf(log_buffer, "%s calling add_wkm_end from run_pelog() - after user epilog",
                pjob->ji_qs.ji_jobid);

        log_err(-1, id, log_buffer);
        }

      add_wkm_end(pjob->ji_wattr[(int)JOB_ATR_pagg_id].at_val.at_ll,

                  pjob->ji_qs.ji_un.ji_momt.ji_exitstat, pjob->ji_qs.ji_jobid);
      }

#endif /* ENABLE_CSA */

    alarm(0);

    /* restore the previous handler */

    sigaction(SIGALRM, &oldact, 0);

    if (run_exit == 0)
      {
      if (WIFEXITED(waitst))
        {
        run_exit = WEXITSTATUS(waitst);
        }
      }
    }
  else
    {
    /* child - run script */

    log_close(0);

    if (lockfds >= 0)
      {
      close(lockfds);

      lockfds = -1;
      }

    net_close(-1);

    if ((which == PE_PROLOGUSER) || (which == PE_EPILOGUSER) || (which == PE_PROLOGUSERJOB) || which == PE_EPILOGUSERJOB)
      {
      if (setgroups(
          pjob->ji_grpcache->gc_ngroup,
          (gid_t *)pjob->ji_grpcache->gc_groups) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setgroups() for UID = %lu failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));

        log_err(errno, id, log_buffer);

        exit(255);
        }

      if (setgid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setgid(%lu) for UID = %lu failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exgid,
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));

        log_err(errno, id, log_buffer);

        exit(255);
        }

      if (setuid(pjob->ji_qs.ji_un.ji_momt.ji_exuid) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setuid(%lu) failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));

        log_err(errno, id, log_buffer);

        exit(255);
        }
      }

    if (fd_input != 0)
      {
      close(0);

      if (dup(fd_input) == -1) {}

      close(fd_input);
      }

    if (pe_io_type == PE_IO_TYPE_NULL)
      {
      /* no output, force to /dev/null */

      fds1 = open("/dev/null", O_WRONLY, 0600);
      fds2 = open("/dev/null", O_WRONLY, 0600);
      }
    else if (pe_io_type == PE_IO_TYPE_STD)
      {
      /* open job standard out/error */

      /*
       * We need to know if files are joined or not.
       * If they are then open the correct file and duplicate it to the other
      */

      isjoined = is_joined(pjob);

      switch (isjoined)
        {
        case -1:

          fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds1 = dup(fds2);

          break;

        case 1:

          fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds2 = dup(fds1);

          break;

        default:

          fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);
          break;
        }
      }

    if (pe_io_type != PE_IO_TYPE_ASIS)
      {
      /* If PE_IO_TYPE_ASIS, leave as is, already open to job */

      if (fds1 != 1)
        {
        close(1);

        if (dup(fds1) == -1) {}

        close(fds1);
        }

      if (fds2 != 2)
        {
        close(2);

        if (dup(fds2) == -1) {}

        close(fds2);
        }
      }

    if ((which == PE_PROLOGUSER) || (which == PE_EPILOGUSER) || (which == PE_PROLOGUSERJOB) || (which == PE_EPILOGUSERJOB))
      {
      if (chdir(pjob->ji_grpcache->gc_homedir) != 0)
        {
        /* warn only, no failure */

        sprintf(log_buffer,
          "PBS: chdir to %s failed: %s (running user %s in current directory)",
          pjob->ji_grpcache->gc_homedir,
          strerror(errno),
          which == PE_PROLOGUSER ? "prologue" : "epilogue");

        if (write(2, log_buffer, strlen(log_buffer)) == -1) {}

        fsync(2);
        }
      }

    /* for both prolog and epilog */

    if (DEBUGMODE == 1)
      {
      fprintf(stderr, "PELOGINFO:  script:'%s'  jobid:'%s'  euser:'******'  egroup:'%s'  jobname:'%s' SSID:'%ld'  RESC:'%s'\n",
              pelog,
              pjob->ji_qs.ji_jobid,
              pjob->ji_wattr[(int)JOB_ATR_euser].at_val.at_str,
              pjob->ji_wattr[(int)JOB_ATR_egroup].at_val.at_str,
              pjob->ji_wattr[(int)JOB_ATR_jobname].at_val.at_str,
              pjob->ji_wattr[(int)JOB_ATR_session_id].at_val.at_long,
              resc_to_string(pjob, (int)JOB_ATR_resource, resc_list, sizeof(resc_list)));
      }

    arg[0] = pelog;

    arg[1] = pjob->ji_qs.ji_jobid;
    arg[2] = pjob->ji_wattr[(int)JOB_ATR_euser].at_val.at_str;
    arg[3] = pjob->ji_wattr[(int)JOB_ATR_egroup].at_val.at_str;
    arg[4] = pjob->ji_wattr[(int)JOB_ATR_jobname].at_val.at_str;

    set_resource_vars(pjob,NULL);

    /* NOTE:  inside child */

    if ( which == PE_EPILOG || which == PE_EPILOGUSER || which == PE_EPILOGUSERJOB )
      {
      /* for epilog only */

      sprintf(sid, "%ld",
              pjob->ji_wattr[(int)JOB_ATR_session_id].at_val.at_long);
      sprintf(exit_stat,"%d",
              pjob->ji_qs.ji_un.ji_exect.ji_exitstat);

      arg[5] = sid;
      arg[6] = resc_to_string(pjob, (int)JOB_ATR_resource, resc_list, sizeof(resc_list));
      arg[7] = resc_to_string(pjob, (int)JOB_ATR_resc_used, resc_used, sizeof(resc_used));
      arg[8] = pjob->ji_wattr[(int)JOB_ATR_in_queue].at_val.at_str;
      arg[9] = pjob->ji_wattr[(int)JOB_ATR_account].at_val.at_str;
      arg[10] = exit_stat;
      arg[11] = NULL;

      LastArg = 11;
      }
    else if (which == PE_MAGRATHEA)
      {
      char *cc = NULL, *c = NULL;

      setenv("MAGRATHEA_CLUSTER",pjob->ji_wattr[(int)JOB_ATR_jobname].at_val.at_str,1);

      if ((pjob->ji_wattr[(int)JOB_ATR_cloud_mapping].at_flags & ATR_VFLAG_SET) &&
          (pjob->ji_wattr[(int)JOB_ATR_cloud_mapping].at_val.at_str))
        {
        c = cloud_mom_mapping(pjob->ji_wattr[(int)JOB_ATR_cloud_mapping].at_val.at_str,mom_host,&cc);
        }

      if (c)
        arg[5]=c;
      else
        arg[5]=mom_host;

      setenv("MAGRATHEA_VIRTUAL_HOST",arg[5],1);

      if (cc)
        {
        setenv("MAGRATHEA_VIRTUAL_ALTERNATIVE",cc,1);
        free(cc);
        }

      if (pjob->ji_wattr[(int)JOB_ATR_vlan_id].at_val.at_str != NULL )
        {
        setenv("MAGRATHEA_VLANID",pjob->ji_wattr[(int)JOB_ATR_vlan_id].at_val.at_str,1);
        }

      switch (is_cloud_job(pjob))
        {
        case 1: setenv("MAGRATHEA_TYPE","create",1); break;
        case 2: setenv("MAGRATHEA_TYPE","internal",1); break;
        default: setenv("MAGRATHEA_TYPE","none",1); break;
        }


      arg[6]=(char *)0;
      LastArg = 6;
      }
    else
      {
      /* prolog */

      arg[5] = resc_to_string(pjob, (int)JOB_ATR_resource, resc_list, sizeof(resc_list));
      arg[6] = pjob->ji_wattr[(int)JOB_ATR_in_queue].at_val.at_str;
      arg[7] = pjob->ji_wattr[(int)JOB_ATR_account].at_val.at_str;
      arg[8] = NULL;

      LastArg = 8;
      }

    for (aindex = 0;aindex < LastArg;aindex++)
      {
      if (arg[aindex] == NULL)
        arg[aindex] = EmptyString;
      }  /* END for (aindex) */

    /*
     * Pass Resource_List.nodes request in environment
     * to allow pro/epi-logue setup/teardown of system
     * settings.  --pw, 2 Jan 02
     * Fixed to use putenv for sysV compatibility.
     *  --troy, 11 jun 03
     *
     */

    r = find_resc_entry(
          &pjob->ji_wattr[(int)JOB_ATR_resource],
          find_resc_def(svr_resc_def, "nodes", svr_resc_size));

    if (r != NULL)
      {
      /* setenv("PBS_RESOURCE_NODES",r->rs_value.at_val.at_str,1); */

      const char *envname = "PBS_RESOURCE_NODES=";
      char *envstr;

      envstr = malloc(
                 (strlen(envname) + strlen(r->rs_value.at_val.at_str) + 1) * sizeof(char));

      if (envstr != NULL)
        {
        strcpy(envstr,envname);

        strcat(envstr,r->rs_value.at_val.at_str);

        /* do _not_ free the string when using putenv */

        putenv(envstr);
        }
      }  /* END if (r != NULL) */

    r = find_resc_entry(
          &pjob->ji_wattr[(int)JOB_ATR_resource],
          find_resc_def(svr_resc_def, "gres", svr_resc_size));

    if (r != NULL)
      {
      /* setenv("PBS_RESOURCE_NODES",r->rs_value.at_val.at_str,1); */

      const char *envname = "PBS_RESOURCE_GRES=";
      char *envstr;

      envstr = malloc(
                 (strlen(envname) + strlen(r->rs_value.at_val.at_str) + 1) * sizeof(char));

      if (envstr != NULL)
        {
        strcpy(envstr,envname);

        strcat(envstr,r->rs_value.at_val.at_str);

        /* do _not_ free the string when using putenv */

        putenv(envstr);
        }
      }  /* END if (r != NULL) */

    if (TTmpDirName(pjob, buf))
      {
      const char *envname = "TMPDIR=";
      char *envstr;

      envstr = malloc(
                 (strlen(envname) + strlen(buf) + 1) * sizeof(char));

      if (envstr != NULL)
        {
        strcpy(envstr,envname);

        strcat(envstr,buf);

        /* do _not_ free the string when using putenv */

        putenv(envstr);
        }
      }  /* END if (TTmpDirName(pjob,&buf)) */

    /* Set PBS_SCHED_HINT */

      {
      char *envname = "PBS_SCHED_HINT";
      char *envval;
      char *envstr;

      if ((envval = get_job_envvar(pjob, envname)) != NULL)
        {
        envstr = malloc((strlen(envname) + strlen(envval) + 2) * sizeof(char));

        if (envstr != NULL)
          {
          sprintf(envstr,"%s=%s",
            envname,
            envval);

          putenv(envstr);
          }
        }
      }

    /* Set PBS_NODENUM */
      {
      char *envname = "PBS_NODENUM";
      char *envstr;

      sprintf(buf, "%d",
        pjob->ji_nodeid);

      envstr = malloc((strlen(envname) + strlen(buf) + 2) * sizeof(char));

      if (envstr != NULL)
        {
        sprintf(envstr,"%s=%d",
          envname,
          pjob->ji_nodeid);

        putenv(envstr);
        }
      }

    /* Set PBS_MSHOST */
      {
      char *envname = "PBS_MSHOST";
      char *envstr;

      if ((pjob->ji_vnods[0].vn_host != NULL) && (pjob->ji_vnods[0].vn_host->hn_host != NULL))
        {
        envstr = malloc((strlen(envname) + strlen(pjob->ji_vnods[0].vn_host->hn_host) + 2) * sizeof(char));

        if (envstr != NULL)
          {
          sprintf(envstr,"%s=%s",
            envname,
            pjob->ji_vnods[0].vn_host->hn_host);

          putenv(envstr);
          }
        }
      }

    /* Set PBS_NODEFILE */
      {
      char *envname = "PBS_NODEFILE";
      char *envstr;

      if (pjob->ji_flags & MOM_HAS_NODEFILE)
        {
        sprintf(buf, "%s/%s",
          path_aux,
          pjob->ji_qs.ji_jobid);

        envstr = malloc((strlen(envname) + strlen(buf) + 2) * sizeof(char));

        if (envstr != NULL)
          {
          sprintf(envstr,"%s=%s",
            envname,
            buf);

          putenv(envstr);
          }
        }
      }

    /* Set umask */
    if (pjob->ji_wattr[(int)JOB_ATR_umask].at_flags & ATR_VFLAG_SET)
      {
      char *buf = calloc(strlen("PBS_UMASK=")+16,1);
      if (buf != NULL)
        {
        sprintf(buf,"PBS_UMASK=%#o",pjob->ji_wattr[(int)JOB_ATR_umask].at_val.at_long);
        putenv(buf);
        }
      }

    /* Set PBS_O_Workdir */
      {
      char *envname = "PBS_O_WORKDIR";
      char *workdir_val;
      char *envstr;

      workdir_val = get_job_envvar(pjob,envname);
      if (workdir_val != NULL)
        {
        envstr = malloc((strlen(workdir_val) + strlen(envname) + 2) * sizeof(char));

        if (envstr != NULL)
          {
          sprintf(envstr,"%s=%s",
            envname,
            workdir_val);

          putenv(envstr);
          }
        }
      }

    /* SET BEOWULF_JOB_MAP */

      {

      struct array_strings *vstrs;

      int VarIsSet = 0;
      int j;

      vstrs = pjob->ji_wattr[(int)JOB_ATR_variables].at_val.at_arst;

      for (j = 0;j < vstrs->as_usedptr;++j)
        {
        if (!strncmp(
              vstrs->as_string[j],
              "BEOWULF_JOB_MAP=",
              strlen("BEOWULF_JOB_MAP=")))
          {
          VarIsSet = 1;

          break;
          }
        }

      if (VarIsSet == 1)
        {
        char *envstr;

        envstr = malloc((strlen(vstrs->as_string[j])) * sizeof(char));

        if (envstr != NULL)
          {
          strcpy(envstr,vstrs->as_string[j]);

          putenv(envstr);
          }
        }
      }

    execv(pelog,arg);

    sprintf(log_buffer,"execv of %s failed: %s\n",
      pelog,
      strerror(errno));

    if (write(2, log_buffer, strlen(log_buffer)) == -1) 
      {
      /* cannot write message to stderr */

      /* NO-OP */
      }

    fsync(2);

    exit(255);
    }  /* END else () */

  switch (run_exit)
    {
    case 0:

      /* SUCCESS */

      /* NO-OP */

      break;

    case - 3:

      pelog_err(pjob, pelog, run_exit, "child wait interrupted");

      break;

    case - 4:

      pelog_err(pjob, pelog, run_exit, "prolog/epilog timeout occurred, child cleaned up");

      break;

    case - 5:

      pelog_err(pjob, pelog, run_exit, "prolog/epilog timeout occurred, cannot kill child");

      break;

    default:

      pelog_err(pjob, pelog, run_exit, "nonzero p/e exit status");

      break;
    }  /* END switch (run_exit) */

  return(run_exit);
  }  /* END run_pelog() */
int is_gpustat_get(

  struct pbsnode           *np,      /* I (modified) */
  unsigned int             &i,
  std::vector<std::string> &status_info)

  {
  pbs_attribute      temp;
  const char        *gpuid = NULL;
  char               log_buf[LOCAL_LOG_BUF_SIZE];
  int                gpuidx = -1;
  std::stringstream  gpuinfo;
  int                need_delimiter = FALSE;
  int                reportedgpucnt = 0;
  int                startgpucnt = 0;
  int                drv_ver = 0;

  if (np == NULL)
    {
    sprintf(log_buf, "Invalid parameter for np  passed to is_gpustat_get");
    log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, __func__, log_buf);
    return(PBSE_BAD_PARAMETER);
    }

  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "received gpu status from node %s", np->nd_name);

    log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf);
    }

  /* save current gpu count for node */
  startgpucnt = np->nd_ngpus;

  /*
   *  Before filling the "temp" pbs_attribute, initialize it.
   *  The second and third parameter to decode_arst are never
   *  used, so just leave them empty. (GBS)
   */

  memset(&temp, 0, sizeof(temp));

  if (decode_arst(&temp, NULL, NULL, NULL, 0))
    {
    DBPRT(("is_gpustat_get:  cannot initialize attribute\n"));

    return(DIS_NOCOMMIT);
    }

  i++;

  for (; i < status_info.size(); i++)
    {
    /* add the info to the "temp" attribute */
    const char *str = status_info[i].c_str();

    /* get timestamp */
    if (!strncmp(str, "timestamp=", 10))
      {
      if (decode_arst(&temp, NULL, NULL, str, 0))
        {
        DBPRT(("is_gpustat_get: cannot add attributes\n"));

        free_arst(&temp);
        move_past_gpu_status(i, status_info);

        return(DIS_NOCOMMIT);
        }
      continue;
      }

    /* get driver version, if there is one */
    if (!strncmp(str, "driver_ver=", 11))
      {
      if (decode_arst(&temp, NULL, NULL, str, 0))
        {
        DBPRT(("is_gpustat_get: cannot add attributes\n"));

        free_arst(&temp);
        move_past_gpu_status(i, status_info);

        return(DIS_NOCOMMIT);
        }
      drv_ver = atoi(str + 11);
      continue;
      }
    else if (!strcmp(str, END_GPU_STATUS))
      {
      break;
      }

    /* gpuid must come before the rest or we will be in trouble */

    if (!strncmp(str, "gpuid=", 6))
      {
      if (gpuinfo.str().size() > 0)
        {
        if (decode_arst(&temp, NULL, NULL, gpuinfo.str().c_str(), 0))
          {
          DBPRT(("is_gpustat_get: cannot add attributes\n"));

          free_arst(&temp);
          move_past_gpu_status(i, status_info);

          return(DIS_NOCOMMIT);
          }

        gpuinfo.str("");
        }

      gpuid = &str[6];

      /*
       * Get this gpus index, if it does not yet exist then find an empty entry.
       * We need to allow for the gpu status results being returned in
       * different orders since the nvidia order may change upon mom's reboot
       */

      gpuidx = gpu_entry_by_id(np, gpuid, TRUE);
      if (gpuidx == -1)
        {
        /*
         * Failure - we could not get / create a nd_gpusn entry for this gpu,
         * log an error message.
         */

        if (LOGLEVEL >= 3)
          {
          sprintf(log_buf,
            "Failed to get/create entry for gpu %s on node %s\n",
            gpuid,
            np->nd_name);

          log_ext(-1, __func__, log_buf, LOG_DEBUG);
          }

        free_arst(&temp);
        move_past_gpu_status(i, status_info);

        return(DIS_SUCCESS);
        }

      gpuinfo << "gpu[" << gpuidx << "]=gpu_id=" << gpuid << ";";
      need_delimiter = FALSE;
      reportedgpucnt++;
      np->nd_gpusn[gpuidx].driver_ver = drv_ver;

      /* mark that this gpu node is not virtual */
      np->nd_gpus_real = TRUE;
      
      /*
       * if we have not filled in the gpu_id returned by the mom node
       * then fill it in
       */
      if ((gpuidx >= 0) && (np->nd_gpusn[gpuidx].gpuid == NULL))
        {
        np->nd_gpusn[gpuidx].gpuid = strdup(gpuid);
        }      

      }
    else
      {
      if (need_delimiter)
        {
        gpuinfo << ";";
        }
     
      gpuinfo << str;
      
      need_delimiter = TRUE;
      }

    /* check current gpu mode and determine gpu state */
    
    if (!memcmp(str, "gpu_mode=", 9))
      {
      if ((!memcmp(str + 9, "Normal", 6)) || (!memcmp(str + 9, "Default", 7)))
        {
        np->nd_gpusn[gpuidx].mode = gpu_normal;
        if (gpu_has_job(np, gpuidx))
          {
          np->nd_gpusn[gpuidx].state = gpu_shared;
          }
        else
          {
          np->nd_gpusn[gpuidx].inuse = 0;
          np->nd_gpusn[gpuidx].state = gpu_unallocated;
          }
        }
      else if ((!memcmp(str + 9, "Exclusive", 9)) ||
              (!memcmp(str + 9, "Exclusive_Thread", 16)))
        {
        np->nd_gpusn[gpuidx].mode = gpu_exclusive_thread;
        if (gpu_has_job(np, gpuidx))
          {
          np->nd_gpusn[gpuidx].state = gpu_exclusive;
          }
        else
          {
          np->nd_gpusn[gpuidx].inuse = 0;
          np->nd_gpusn[gpuidx].state = gpu_unallocated;
          }
        }
      else if (!memcmp(str + 9, "Exclusive_Process", 17))
        {
        np->nd_gpusn[gpuidx].mode = gpu_exclusive_process;
        if (gpu_has_job(np, gpuidx))
          {
          np->nd_gpusn[gpuidx].state = gpu_exclusive;
          }
        else
          {
          np->nd_gpusn[gpuidx].inuse = 0;
          np->nd_gpusn[gpuidx].state = gpu_unallocated;
          }
        }
      else if (!memcmp(str + 9, "Prohibited", 10))
        {
        np->nd_gpusn[gpuidx].mode = gpu_prohibited;
        np->nd_gpusn[gpuidx].state = gpu_unavailable;
        }
      else
        {
        /* unknown mode, default to prohibited */
        np->nd_gpusn[gpuidx].mode = gpu_prohibited;
        np->nd_gpusn[gpuidx].state = gpu_unavailable;
        if (LOGLEVEL >= 3)
          {
          sprintf(log_buf,
            "GPU %s has unknown mode on node %s",
            gpuid,
            np->nd_name);

          log_ext(-1, __func__, log_buf, LOG_DEBUG);
          }
        }
 
      /* add gpu_mode so it gets added to the pbs_attribute */

      if (need_delimiter)
        {
        gpuinfo << ";";
        }

      switch (np->nd_gpusn[gpuidx].state)
        {
        case gpu_unallocated:

          gpuinfo << "gpu_state=Unallocated";
          break;

        case gpu_shared:

          gpuinfo << "gpu_state=Shared";
          break;

        case gpu_exclusive:

          gpuinfo << "gpu_state=Exclusive";
          break;

        case gpu_unavailable:

          gpuinfo << "gpu_state=Unavailable";
          break;
        }
      }

    } /* end of while disrst */

  if (gpuinfo.str().size() > 0)
    {
    if (decode_arst(&temp, NULL, NULL, gpuinfo.str().c_str(), 0))
      {
      DBPRT(("is_gpustat_get: cannot add attributes\n"));
      
      free_arst(&temp);
      move_past_gpu_status(i, status_info);

      return(DIS_NOCOMMIT);
      }
    }

  /* maintain the gpu count, if it has changed we need to update the nodes file */

  if (reportedgpucnt != startgpucnt)
    {
    np->nd_ngpus = reportedgpucnt;

    /* update the nodes file */
    update_nodes_file(np);
    }

  node_gpustatus_list(&temp, np, ATR_ACTION_ALTER);
  move_past_gpu_status(i, status_info);

  return(DIS_SUCCESS);
  }  /* END is_gpustat_get() */
Example #19
0
int send_job_work(

  char           *job_id,
  const char     *node_name, /* I */
  int             type,      /* I */
  int            *my_err,    /* O */
  batch_request  *preq)      /* M */

  {
  int                   rc = LOCUTION_FAIL;
  int                   ret = PBSE_NONE;
  int                   local_errno = 0;
  tlist_head            attrl;

  int                   encode_type;
  int                   mom_err = PBSE_NONE;
  int                   resc_access_perm;
  std::string           script_name;
  char                 *pc;
  char                  stdout_path[MAXPATHLEN + 1];
  char                  stderr_path[MAXPATHLEN + 1];
  char                  chkpt_path[MAXPATHLEN + 1];
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  long                  start_time = time(NULL);
  bool                  attempt_to_queue_job = false;
  bool                  change_substate_on_attempt_to_queue = false;
  bool                  need_to_send_job_script = false;
  bool                  job_has_run = false;
  job                  *pjob = NULL;
  char                  job_destin[PBS_MAXROUTEDEST+1];

  bool                  Timeout = false;
  
  unsigned long         job_momaddr = -1;
  unsigned short        job_momport = -1;

  if ((pjob = svr_find_job(job_id, TRUE)) == NULL)
    {
    *my_err = PBSE_JOBNOTFOUND;
    req_reject(-1, 0, preq, NULL, NULL);
    return(PBSE_JOBNOTFOUND);
    }

  mutex_mgr job_mutex(pjob->ji_mutex, true);

  if (strlen(pjob->ji_qs.ji_destin) != 0)
    strcpy(job_destin, pjob->ji_qs.ji_destin);
  else
    job_destin[0] = '\0';

  job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr;
  job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport;

  if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT)
    need_to_send_job_script = TRUE;

  if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN)
    job_has_run = TRUE;

  if ((job_destin[0] != '\0') && 
      (type != MOVE_TYPE_Exec))
    {
    if ((pc = strchr(job_destin, '@')) != NULL)
      {
      job_momaddr = get_hostaddr(&local_errno, pc + 1);
      job_momport = pbs_server_port_dis;
      }
    }

  /* encode job attributes to be moved */
  CLEAR_HEAD(attrl);

  /* select attributes/resources to send based on move type */
  if (type == MOVE_TYPE_Exec)
    {
    /* moving job to MOM - ie job start */

    resc_access_perm = ATR_DFLAG_MOM;
    encode_type = ATR_ENCODE_MOM;
    }
  else
    {
    /* moving job to alternate server? */
    resc_access_perm =
      ATR_DFLAG_USWR |
      ATR_DFLAG_OPWR |
      ATR_DFLAG_MGWR |
      ATR_DFLAG_SvRD;

    encode_type = ATR_ENCODE_SVR;

    /* clear default resource settings */
    ret = svr_dequejob(pjob, FALSE);
    if (ret)
      {
      job_mutex.set_unlock_on_exit(false);
      return(ret);
      }
    }

  encode_attributes(attrl, pjob, resc_access_perm, encode_type);

  rc = get_job_script_path(pjob, script_name);

  if (rc != PBSE_NONE)
    {
    if (rc == PBSE_JOB_RECYCLED)
      job_mutex.set_unlock_on_exit(false);
  
    free_server_attrs(&attrl);

    return(rc);
    }
  
  if (job_has_run)
    {
    if ((get_job_file_path(pjob, StdOut, stdout_path, sizeof(stdout_path)) != 0) ||
        (get_job_file_path(pjob, StdErr, stderr_path, sizeof(stderr_path)) != 0) ||
        (get_job_file_path(pjob, Checkpoint, chkpt_path, sizeof(chkpt_path)) != 0))
      {
      job_mutex.unlock();
      goto send_job_work_end;
      }
    }

  /* if the job is substate JOB_SUBSTATE_TRNOUTCM it means we are 
   * recovering after being down or a late failure so we just want 
   * to send the "ready-to-commit/commit" */
  if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM)
    {
    attempt_to_queue_job = true;

    if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT)
      change_substate_on_attempt_to_queue = true;
    }
  
  job_mutex.unlock();
  
  rc = send_job_over_network_with_retries(job_id,
                                          job_destin,
                                          attrl,
                                          attempt_to_queue_job,
                                          change_substate_on_attempt_to_queue,
                                          Timeout,
                                          script_name.c_str(),
                                          need_to_send_job_script,
                                          job_has_run,
                                          job_momaddr,
                                          job_momport,
                                          stdout_path,
                                          stderr_path,
                                          chkpt_path,
                                          type,
                                          my_err,
                                          &mom_err);

  if (Timeout == TRUE)
    {
    /* 10 indicates that job migrate timed out, server will mark node down *
          and abort the job - see post_sendmom() */
    sprintf(log_buf, "child timed-out attempting to start job %s", job_id);
    log_ext(*my_err, __func__, log_buf, LOG_WARNING);
    rc = LOCUTION_REQUEUE;
    }
  else if (rc != LOCUTION_SUCCESS)
    {
    if (should_retry_route(*my_err) == -1)
      {
      sprintf(log_buf, "child failed and will not retry job %s", job_id);
      log_err(*my_err, __func__, log_buf);
      rc = LOCUTION_FAIL;
      }
    else
      rc = LOCUTION_REQUEUE;
    }
  
  if (type == MOVE_TYPE_Exec)
    {
    if (node_name != NULL)
      update_failure_counts(node_name, rc);
    else
      update_failure_counts(job_destin, rc);
    }

send_job_work_end:
  finish_move_process(job_id, preq, start_time, node_name, rc, type, mom_err);
  free_server_attrs(&attrl);

  return(rc);
  } /* END send_job_work() */
Example #20
0
static int contact_sched(

  int cmd)  /* I */

  {
  int sock;

  char  tmpLine[1024];
  char  EMsg[1024];

  char *id = "contact_sched";

  /* connect to the Scheduler */

#if 0   /* don't check if scheduler runs on same node as server */

  if (!addr_ok(pbs_scheduler_addr))
    {
    pbs_errno = EHOSTDOWN;
    return -1;
    }

#endif

  sock = client_to_svr(pbs_scheduler_addr, pbs_scheduler_port, 1, EMsg);

  if (sock < 0)
    {
    /* FAILURE */

    bad_node_warning(pbs_scheduler_addr);

#if 0
    sprintf(tmpLine, "%s - port %d %s",
            msg_sched_nocall,
            pbs_scheduler_port,
            EMsg);

    log_ext(errno,id,tmpLine,LOG_ALERT);
#endif

    return(-1);
    }

  add_conn(

    sock,
    FromClientDIS,
    pbs_scheduler_addr,
    pbs_scheduler_port,
    PBS_SOCK_INET,
    process_request);

  svr_conn[sock].cn_authen = PBS_NET_CONN_FROM_PRIVIL;

  net_add_close_func(sock, scheduler_close);

  /* send command to Scheduler */

  if (put_4byte(sock, cmd) < 0)
    {
    sprintf(tmpLine, "%s - port %d",
            msg_sched_nocall,
            pbs_scheduler_port);

    log_ext(errno,id,tmpLine,LOG_ALERT);

    close_conn(sock);

    return(-1);
    }

  sprintf(log_buffer, msg_sched_called,

          (cmd != SCH_ERROR) ? PSchedCmdType[cmd] : "ERROR");

  log_event(
    PBSEVENT_SCHED,
    PBS_EVENTCLASS_SERVER,
    server_name,
    log_buffer);

  return (sock);
  }  /* END contact_sched() */
Example #21
0
int run_pelog(

  int   which,      /* I (one of PE_*) */
  char *specpelog,  /* I - script path */
  job  *pjob,       /* I - associated job */
  int   pe_io_type, /* I - io type */
  int   delete_job)  /* I - called before a job being deleted (purge -p) */

  {
  int               fd_input;

  struct stat       sbuf;
  char              pelog[MAXPATHLEN + 1024];

  uid_t             real_uid;
  gid_t            *real_gids = NULL;
  gid_t             real_gid;
  int               num_gids;

  bool              jobtypespecified = false;

  int               rc;

  char             *ptr;

  int               pipes[2];
  int               kid_read;
  int               kid_write;
  int               parent_read;
  int               parent_write;

  if ((pjob == NULL) ||
      (specpelog == NULL) ||
      (specpelog[0] == '\0'))
    {
    return(0);
    }

  ptr = pjob->ji_wattr[JOB_ATR_jobtype].at_val.at_str;

  if (ptr != NULL)
    {
    jobtypespecified = true;

    snprintf(pelog,sizeof(pelog),"%s.%s",
      specpelog,
      ptr);
    }
  else
    {
    snprintf(pelog, sizeof(pelog), "%s", specpelog);
    }
    
  real_uid = getuid();
  real_gid = getgid();
  if ((num_gids = getgroups(0, real_gids)) < 0)
    {
    log_err(errno, __func__, (char *)"getgroups failed\n");
    
    return(-1);
    }

  /* to support root squashing, become the user before performing file checks */
  if ((which == PE_PROLOGUSER) || 
      (which == PE_EPILOGUSER) || 
      (which == PE_PROLOGUSERJOB) || 
      (which == PE_EPILOGUSERJOB))
    {

    real_gids = (gid_t *)calloc(num_gids, sizeof(gid_t));
    
    if (real_gids == NULL)
      {
      log_err(ENOMEM, __func__, (char *)"Cannot allocate memory! FAILURE\n");
      
      return(-1);
      }
    
    if (getgroups(num_gids,real_gids) < 0)
      {
      log_err(errno, __func__, (char *)"getgroups failed\n");
      free(real_gids);
      
      return(-1);
      }
    
    /* pjob->ji_grpcache will not be set if using LDAP and LDAP not set */
    /* It is possible that ji_grpcache failed to allocate as well. 
       Make sure ji_grpcache is not NULL */
    if (pjob->ji_grpcache != NULL)
      {
      if (setgroups(
            pjob->ji_grpcache->gc_ngroup,
            (gid_t *)pjob->ji_grpcache->gc_groups) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setgroups() for UID = %lu failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));
      
        log_err(errno, __func__, log_buffer);
      
        undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
        free(real_gids);
      
        return(-1);
        }
      }
    else
      {
      sprintf(log_buffer, "pjob->ji_grpcache is null. check_pwd likely failed.");
      log_err(-1, __func__, log_buffer);
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      return(-1);
      }
    
    if (setegid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "setegid(%lu) for UID = %lu failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exgid,
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
      
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      
      return(-1);
      }
    
    if (setuid_ext(pjob->ji_qs.ji_un.ji_momt.ji_exuid, TRUE) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "seteuid(%lu) failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
      
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);

      return(-1);
      }
    }

  rc = check_if_pelog_exists(which, pelog, sizeof(pelog), sbuf, specpelog, *pjob, jobtypespecified);

  switch (rc)
    {
      
    case PBSE_NONE:

      // continue
      break;

    case PELOG_DOESNT_EXIST:

      // not an error but we are done
      rc = PBSE_NONE;
      
      // fall through

    default:

      // error, notify caller 
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      return(rc);
    }

  if (LOGLEVEL >= 5)
    {
    sprintf(log_buffer,"running %s script '%s' for job %s",
      PPEType[which],
      (pelog[0] != '\0') ? pelog : "NULL",
      pjob->ji_qs.ji_jobid);

    log_ext(-1, __func__, log_buffer, LOG_DEBUG);
    }

  /* script must be owned by root, be regular file, read and execute by user *
   * and not writeable by group or other */

  if ((rc = check_pelog_permissions(sbuf, reduceprologchecks, pjob, pelog, which)) != PBSE_NONE)
    {
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);
    return(rc);
    }

  fd_input = pe_input(pjob->ji_qs.ji_jobid);

  if (fd_input < 0)
    {
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);
    return(pelog_err(pjob, pelog, -2,  "no pro/epilogue input file"));
    }

  run_exit = 0;

  // Set up communications between parent and child so that
  // child can send back the session id.
  if (pipe(pipes) == -1)
    {
    free(real_gids);
    return(-1);
    }

  if (pipes[1] < 3)
    {
    kid_write = fcntl(pipes[1], F_DUPFD, 3);

    close(pipes[1]);
    }
  else
    {
    kid_write = pipes[1];
    }

  parent_read = pipes[0];

  if (pipe(pipes) == -1)
    {
    free(real_gids);
    return(-1);
    }

  if (pipes[0] < 3)
    {
    kid_read = fcntl(pipes[0], F_DUPFD, 3);

    close(pipes[0]);
    }
  else
    {
    kid_read = pipes[0];
    }

  parent_write = pipes[1];

  if ((kid_read < 0) ||
      (kid_write < 0))
    {
    free(real_gids);
    return(-1);
    }

  child = fork();

  if (child > 0)
    {
    /* parent - watch for prolog/epilog to complete */

    close(fd_input);

    close(kid_read);
    close(kid_write);
    read(parent_read,(char *)&childSessionID,sizeof(childSessionID));
    close(parent_read);
    close(parent_write);


    /* switch back to root if necessary */
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);

    rc = get_child_exit_status(pjob, pelog, which);

    return(rc);
    }
  else if (child == 0)
    {
    /* child - run script */
    prepare_and_run_pelog_as_child(pjob, pe_io_type, delete_job, specpelog, pelog, which,
        parent_read, parent_write, kid_read, kid_write, fd_input);
    
    // NOTREACHED: the above function doesn't return
    exit(255);
    }  /* END else () */
  else
    {
    // ERROR
    log_err(errno, __func__, "Fork failed");
    rc = -1;
    }
    
  free(real_gids);

  return(rc);
  }  /* END run_pelog() */