Example #1
0
int run_pelog(

  int   which,      /* I (one of PE_*) */
  char *specpelog,  /* I - script path */
  job  *pjob,       /* I - associated job */
  int   pe_io_type, /* I - io type */
  int   deletejob)  /* I - called before a job being deleted (purge -p) */

  {
  struct sigaction  act;
  struct sigaction  oldact;
  char             *arg[12];
  int               fds1 = 0;
  int               fds2 = 0;
  int               fd_input;
  char              resc_list[2048];
  char              resc_used[2048];

  struct stat       sbuf;
  char              sid[20];
  char              exit_stat[11];
  int               waitst;
  int               isjoined;  /* boolean */
  char              buf[MAXPATHLEN + 1024];
  char              pelog[MAXPATHLEN + 1024];

  uid_t             real_uid;
  gid_t            *real_gids = NULL;
  gid_t             real_gid;
  int               num_gids;

  int               jobtypespecified = 0;

  resource         *r;

  char             *EmptyString = (char *)"";

  int               LastArg;
  int               aindex;

  int               rc;

  char             *ptr;

  int               moabenvcnt = 14;  /* # of entries in moabenvs */
  static char      *moabenvs[] = {
      (char *)"MOAB_NODELIST",
      (char *)"MOAB_JOBID",
      (char *)"MOAB_JOBNAME",
      (char *)"MOAB_USER",
      (char *)"MOAB_GROUP",
      (char *)"MOAB_CLASS",
      (char *)"MOAB_TASKMAP",
      (char *)"MOAB_QOS",
      (char *)"MOAB_PARTITION",
      (char *)"MOAB_PROCCOUNT",
      (char *)"MOAB_NODECOUNT",
      (char *)"MOAB_MACHINE",
      (char *)"MOAB_JOBARRAYINDEX",
      (char *)"MOAB_JOBARRAYRANGE"
      };

  if ((pjob == NULL) || (specpelog == NULL) || (specpelog[0] == '\0'))
    {
    return(0);
    }

  ptr = pjob->ji_wattr[JOB_ATR_jobtype].at_val.at_str;

  if (ptr != NULL)
    {
    jobtypespecified = 1;

    snprintf(pelog,sizeof(pelog),"%s.%s",
      specpelog,
      ptr);
    }
  else
    {
    snprintf(pelog, sizeof(pelog), "%s", specpelog);
    }
    
  real_uid = getuid();
  real_gid = getgid();
  if ((num_gids = getgroups(0, real_gids)) < 0)
    {
    log_err(errno, __func__, (char *)"getgroups failed\n");
    
    return(-1);
    }

  /* to support root squashing, become the user before performing file checks */
  if ((which == PE_PROLOGUSER) || 
      (which == PE_EPILOGUSER) || 
      (which == PE_PROLOGUSERJOB) || 
      (which == PE_EPILOGUSERJOB))
    {

    real_gids = (gid_t *)calloc(num_gids, sizeof(gid_t));
    
    if (real_gids == NULL)
      {
      log_err(ENOMEM, __func__, (char *)"Cannot allocate memory! FAILURE\n");
      
      return(-1);
      }
    
    if (getgroups(num_gids,real_gids) < 0)
      {
      log_err(errno, __func__, (char *)"getgroups failed\n");
      free(real_gids);
      
      return(-1);
      }
    
    /* pjob->ji_grpcache will not be set if using LDAP and LDAP not set */
    /* It is possible that ji_grpcache failed to allocate as well. 
       Make sure ji_grpcache is not NULL */
    if (pjob->ji_grpcache != NULL)
      {
      if (setgroups(
            pjob->ji_grpcache->gc_ngroup,
            (gid_t *)pjob->ji_grpcache->gc_groups) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setgroups() for UID = %lu failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));
      
        log_err(errno, __func__, log_buffer);
      
        undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
        free(real_gids);
      
        return(-1);
        }
      }
    else
      {
      sprintf(log_buffer, "pjob->ji_grpcache is null. check_pwd likely failed.");
      log_err(-1, __func__, log_buffer);
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      return(-1);
      }
    
    if (setegid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "setegid(%lu) for UID = %lu failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exgid,
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
      
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      
      return(-1);
      }
    
    if (setuid_ext(pjob->ji_qs.ji_un.ji_momt.ji_exuid, TRUE) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "seteuid(%lu) failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
      
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);

      return(-1);
      }
    }

  rc = stat(pelog,&sbuf);

  if ((rc == -1) && (jobtypespecified == 1))
    {
    snprintf(pelog, sizeof(pelog), "%s", specpelog);

    rc = stat(pelog,&sbuf);
    }

  if (rc == -1)
    {
    if (errno == ENOENT || errno == EBADF)
      {
      /* epilog/prolog script does not exist */

      if (LOGLEVEL >= 5)
        {
        static char tmpBuf[1024];

        sprintf(log_buffer, "%s script '%s' for job %s does not exist (cwd: %s,pid: %d)",
          PPEType[which],
          (pelog[0] != '\0') ? pelog : "NULL",
          pjob->ji_qs.ji_jobid,
          getcwd(tmpBuf, sizeof(tmpBuf)),
          getpid());

        log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
        }

#ifdef ENABLE_CSA
      if ((which == PE_EPILOGUSER) && (!strcmp(pelog, path_epiloguser)))
        {
        /*
          * Add a workload management end record
        */
        if (LOGLEVEL >= 8)
          {
          sprintf(log_buffer, "%s calling add_wkm_end from run_pelog() - no user epilog",
            pjob->ji_qs.ji_jobid);

          log_err(-1, __func__, log_buffer);
          }

        add_wkm_end(pjob->ji_wattr[JOB_ATR_pagg_id].at_val.at_ll,
            pjob->ji_qs.ji_un.ji_momt.ji_exitstat, pjob->ji_qs.ji_jobid);
        }

#endif /* ENABLE_CSA */

      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);

      return(0);
      }
      
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);

    return(pelog_err(pjob,pelog,errno,(char *)"cannot stat"));
    }

  if (LOGLEVEL >= 5)
    {
    sprintf(log_buffer,"running %s script '%s' for job %s",
      PPEType[which],
      (pelog[0] != '\0') ? pelog : "NULL",
      pjob->ji_qs.ji_jobid);

    log_ext(-1, __func__, log_buffer, LOG_DEBUG);  /* not actually an error--but informational */
    }

  /* script must be owned by root, be regular file, read and execute by user *
   * and not writeable by group or other */

  if (reduceprologchecks == TRUE)
    {
    if ((!S_ISREG(sbuf.st_mode)) ||
        (!(sbuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))))
      {
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      return(pelog_err(pjob,pelog,-1, (char *)"permission Error"));
      }
    }
  else
    {
    if (which == PE_PROLOGUSERJOB || which == PE_EPILOGUSERJOB)
      {
      if ((sbuf.st_uid != pjob->ji_qs.ji_un.ji_momt.ji_exuid) || 
          (!S_ISREG(sbuf.st_mode)) ||
          ((sbuf.st_mode & (S_IRUSR | S_IXUSR)) != (S_IRUSR | S_IXUSR)) ||
          (sbuf.st_mode & (S_IWGRP | S_IWOTH)))
        {
        undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
        free(real_gids);
        return(pelog_err(pjob,pelog,-1, (char *)"permission Error"));
        }
      }
    else if ((sbuf.st_uid != 0) ||
        (!S_ISREG(sbuf.st_mode)) ||
        ((sbuf.st_mode & (S_IRUSR | S_IXUSR)) != (S_IRUSR | S_IXUSR)) ||\
        (sbuf.st_mode & (S_IWGRP | S_IWOTH)))
      {
      undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
      free(real_gids);
      return(pelog_err(pjob,pelog,-1, (char *)"permission Error"));
      }
    
    if ((which == PE_PROLOGUSER) || (which == PE_EPILOGUSER))
      {
      /* script must also be read and execute by other */
      
      if ((sbuf.st_mode & (S_IROTH | S_IXOTH)) != (S_IROTH | S_IXOTH))
        {
        undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
        free(real_gids);
        return(pelog_err(pjob, pelog, -1,  (char *)"permission Error"));
        }
      }
    } /* END !reduceprologchecks */

  fd_input = pe_input(pjob->ji_qs.ji_jobid);

  if (fd_input < 0)
    {
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);
    return(pelog_err(pjob, pelog, -2,  (char *)"no pro/epilogue input file"));
    }

  run_exit = 0;

  child = fork();

  if (child > 0)
    {
    int KillSent = FALSE;

    /* parent - watch for prolog/epilog to complete */

    close(fd_input);

    /* switch back to root if necessary */
    undo_set_euid_egid(which,real_uid,real_gid,num_gids,real_gids,__func__);
    free(real_gids);

    act.sa_handler = pelogalm;

    sigemptyset(&act.sa_mask);

    act.sa_flags = 0;

    sigaction(SIGALRM, &act, &oldact);

    /* it would be nice if the harvest routine could block for 5 seconds,
       and if the prolog is not complete in that time, mark job as prolog
       pending, append prolog child, and continue */

    /* main loop should attempt to harvest prolog in non-blocking mode.
       If unsuccessful after timeout, job should be terminated, and failure
       reported.  If successful, mom should unset prolog pending, and
       continue with job start sequence.  Mom should report job as running
       while prologpending flag is set.  (NOTE:  must track per job prolog
       start time)
    */

    alarm(pe_alarm_time);

    while (waitpid(child, &waitst, 0) < 0)
      {
      if (errno != EINTR)
        {
        /* exit loop. non-alarm based failure occurred */

        run_exit = -3;

        MOMPrologFailureCount++;

        break;
        }

      if (run_exit == -4)
        {
        if (KillSent == FALSE)
          {
          MOMPrologTimeoutCount++;

          /* timeout occurred */

          KillSent = TRUE;

          /* NOTE:  prolog/epilog may be locked in KERNEL space and unkillable */

          alarm(5);
          }
        else
          {
          /* cannot kill prolog/epilog, give up */

          run_exit = -5;

          break;
          }
        }
      }    /* END while (wait(&waitst) < 0) */

    /* epilog/prolog child completed */
#ifdef ENABLE_CSA
    if ((which == PE_EPILOGUSER) && (!strcmp(pelog, path_epiloguser)))
      {
      /*
       * Add a workload management end record
      */
      if (LOGLEVEL >= 8)
        {
        sprintf(log_buffer, "%s calling add_wkm_end from run_pelog() - after user epilog",
                pjob->ji_qs.ji_jobid);

        log_err(-1, __func__, log_buffer);
        }

      add_wkm_end(pjob->ji_wattr[JOB_ATR_pagg_id].at_val.at_ll,
          pjob->ji_qs.ji_un.ji_momt.ji_exitstat, pjob->ji_qs.ji_jobid);
      }

#endif /* ENABLE_CSA */

    alarm(0);

    /* restore the previous handler */

    sigaction(SIGALRM, &oldact, 0);

    if (run_exit == 0)
      {
      if (WIFEXITED(waitst))
        {
        run_exit = WEXITSTATUS(waitst);
        }
      }
    }
  else
    {
    /* child - run script */

    log_close(0);

    if (lockfds >= 0)
      {
      close(lockfds);

      lockfds = -1;
      }

    net_close(-1);

    if (fd_input != 0)
      {
      close(0);

      if (dup(fd_input) == -1) {}

      close(fd_input);
      }

    if (pe_io_type == PE_IO_TYPE_NULL)
      {
      /* no output, force to /dev/null */

      fds1 = open("/dev/null", O_WRONLY, 0600);
      fds2 = open("/dev/null", O_WRONLY, 0600);
      }
    else if (pe_io_type == PE_IO_TYPE_STD)
      {
      /* open job standard out/error */

      /*
       * We need to know if files are joined or not.
       * If they are then open the correct file and duplicate it to the other
      */

      isjoined = is_joined(pjob);

      switch (isjoined)
        {
        case -1:

          fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds1 = (fds2 < 0)?-1:dup(fds2);

          break;

        case 1:

          fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds2 = (fds1 < 0)?-1:dup(fds1);

          break;

        default:

          fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);
          break;
        }
      }

    if (!deletejob)
      if ((fds1 < 0) ||
          (fds2 < 0))
        {
        if (fds1 >= 0)
          close(fds1);
        if (fds2 >= 0)
          close(fds2);

        exit(-1);
        }

    if (pe_io_type != PE_IO_TYPE_ASIS)
      {
      /* If PE_IO_TYPE_ASIS, leave as is, already open to job */

      if (fds1 != 1)
        {
        close(1);

        if (dup(fds1) >= 0)
          {
          close(fds1);
          }
        }

      if (fds2 != 2)
        {
        close(2);

        if (dup(fds2) >= 0)
          {
          close(fds2);
          }
        }
      }

    if ((which == PE_PROLOGUSER) || 
        (which == PE_EPILOGUSER) || 
        (which == PE_PROLOGUSERJOB) || 
        (which == PE_EPILOGUSERJOB))
      {
      if (chdir(pjob->ji_grpcache->gc_homedir) != 0)
        {
        /* warn only, no failure */

        sprintf(log_buffer,
          "PBS: chdir to %s failed: %s (running user %s in current directory)",
          pjob->ji_grpcache->gc_homedir,
          strerror(errno),
          which == PE_PROLOGUSER ? "prologue" : "epilogue");

        if (write_ac_socket(2, log_buffer, strlen(log_buffer)) == -1) {}

        fsync(2);
        }
      }

    /* for both prolog and epilog */

    if (DEBUGMODE == 1)
      {
      fprintf(stderr, "PELOGINFO:  script:'%s'  jobid:'%s'  euser:'******'  egroup:'%s'  jobname:'%s' SSID:'%ld'  RESC:'%s'\n",
              pelog,
              pjob->ji_qs.ji_jobid,
              pjob->ji_wattr[JOB_ATR_euser].at_val.at_str,
              pjob->ji_wattr[JOB_ATR_egroup].at_val.at_str,
              pjob->ji_wattr[JOB_ATR_jobname].at_val.at_str,
              pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long,
              resc_to_string(pjob, JOB_ATR_resource, resc_list, sizeof(resc_list)));
      }

    arg[0] = pelog;

    arg[1] = pjob->ji_qs.ji_jobid;
    arg[2] = pjob->ji_wattr[JOB_ATR_euser].at_val.at_str;
    arg[3] = pjob->ji_wattr[JOB_ATR_egroup].at_val.at_str;
    arg[4] = pjob->ji_wattr[JOB_ATR_jobname].at_val.at_str;

    /* NOTE:  inside child */

    if ((which == PE_EPILOG) || 
        (which == PE_EPILOGUSER) || 
        (which == PE_EPILOGUSERJOB))
      {
      /* for epilog only */

      sprintf(sid, "%ld",
              pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long);
      sprintf(exit_stat,"%d",
              pjob->ji_qs.ji_un.ji_momt.ji_exitstat);

      arg[5] = sid;
      arg[6] = resc_to_string(pjob, JOB_ATR_resource, resc_list, sizeof(resc_list));
      arg[7] = resc_to_string(pjob, JOB_ATR_resc_used, resc_used, sizeof(resc_used));
      arg[8] = pjob->ji_wattr[JOB_ATR_in_queue].at_val.at_str;
      arg[9] = pjob->ji_wattr[JOB_ATR_account].at_val.at_str;
      arg[10] = exit_stat;
      arg[11] = NULL;

      LastArg = 11;
      }
    else
      {
      /* prolog */

      arg[5] = resc_to_string(pjob, JOB_ATR_resource, resc_list, sizeof(resc_list));
      arg[6] = pjob->ji_wattr[JOB_ATR_in_queue].at_val.at_str;
      arg[7] = pjob->ji_wattr[JOB_ATR_account].at_val.at_str;
      arg[8] = NULL;

      LastArg = 8;
      }

    for (aindex = 0;aindex < LastArg;aindex++)
      {
      if (arg[aindex] == NULL)
        arg[aindex] = EmptyString;
      }  /* END for (aindex) */

    /*
     * Pass Resource_List.nodes request in environment
     * to allow pro/epi-logue setup/teardown of system
     * settings.  --pw, 2 Jan 02
     * Fixed to use putenv for sysV compatibility.
     *  --troy, 11 jun 03
     *
     */

    r = find_resc_entry(
          &pjob->ji_wattr[JOB_ATR_resource],
          find_resc_def(svr_resc_def, (char *)"nodes", svr_resc_size));

    if (r != NULL)
      {
      /* setenv("PBS_RESOURCE_NODES",r->rs_value.at_val.at_str,1); */

      const char *ppn_str = "ppn=";
      int num_nodes = 1;
      int num_ppn = 1;

      /* PBS_RESOURCE_NODES */
      put_env_var("PBS_RESOURCE_NODES", r->rs_value.at_val.at_str);

      /* PBS_NUM_NODES */
      num_nodes = strtol(r->rs_value.at_val.at_str, NULL, 10);

      /* 
       * InitUserEnv() also calculates num_nodes and num_ppn the same way
       */
      if (num_nodes != 0)
        {
        char *tmp;
        char *other_reqs;

        /* get the ppn */
        if ((tmp = strstr(r->rs_value.at_val.at_str,ppn_str)) != NULL)
          {
          tmp += strlen(ppn_str);

          num_ppn = strtol(tmp, NULL, 10);
          }

        other_reqs = r->rs_value.at_val.at_str;

        while ((other_reqs = strchr(other_reqs, '+')) != NULL)
          {
          other_reqs += 1;
          num_nodes += strtol(other_reqs, &other_reqs, 10);
          }
        }

      sprintf(buf, "%d", num_nodes);
      put_env_var("PBS_NUM_NODES", buf);

      /* PBS_NUM_PPN */
      sprintf(buf, "%d", num_ppn);
      put_env_var("PBS_NUM_PPN", buf);

      /* PBS_NP */
      sprintf(buf, "%d", pjob->ji_numvnod);
      put_env_var("PBS_NP", buf);
      }  /* END if (r != NULL) */

    r = find_resc_entry(
          &pjob->ji_wattr[JOB_ATR_resource],
          find_resc_def(svr_resc_def, (char *)"gres", svr_resc_size));

    if (r != NULL)
      {
      /* setenv("PBS_RESOURCE_NODES",r->rs_value.at_val.at_str,1); */
      put_env_var("PBS_RESOURCE_GRES", r->rs_value.at_val.at_str);
      }

    if (TTmpDirName(pjob, buf, sizeof(buf)))
      {
      put_env_var("TMPDIR", buf);
      }

    /* Set PBS_SCHED_HINT */

    {
    char *envname = (char *)"PBS_SCHED_HINT";
    char *envval;

    if ((envval = get_job_envvar(pjob, envname)) != NULL)
      {
      put_env_var("PBS_SCHED_HINT", envval);
      }
    }

    /* Set PBS_NODENUM */

    sprintf(buf, "%d",
      pjob->ji_nodeid);
    put_env_var("PBS_NODENUM", buf);

    /* Set PBS_MSHOST */

    put_env_var("PBS_MSHOST", pjob->ji_vnods[0].vn_host->hn_host);

    /* Set PBS_NODEFILE */

    if (pjob->ji_flags & MOM_HAS_NODEFILE)
      {
      sprintf(buf, "%s/%s",
        path_aux,
        pjob->ji_qs.ji_jobid);
      put_env_var("PBS_NODEFILE", buf);
      }

    /* Set PBS_O_WORKDIR */
    {
    char *workdir_val;

    workdir_val = get_job_envvar(pjob,"PBS_O_WORKDIR");
    if (workdir_val != NULL)
      {
      put_env_var("PBS_O_WORKDIR", workdir_val);
      }
    }

    /* SET BEOWULF_JOB_MAP */

    {

    struct array_strings *vstrs;

    int VarIsSet = 0;
    int j;

    vstrs = pjob->ji_wattr[JOB_ATR_variables].at_val.at_arst;

    for (j = 0;j < vstrs->as_usedptr;++j)
      {
      if (!strncmp(
            vstrs->as_string[j],
            "BEOWULF_JOB_MAP=",
            strlen("BEOWULF_JOB_MAP=")))
        {
        VarIsSet = 1;

        break;
        }
      }

    if (VarIsSet == 1)
      {
      char *val = strchr(vstrs->as_string[j], '=');

      if (val != NULL)
        put_env_var("BEOWULF_JOB_MAP", val+1);
      }
    }

  /* Set some Moab env variables if they exist */

  if ((which == PE_PROLOG) || (which == PE_EPILOG))
    {
    char *tmp_val;

    for (aindex=0;aindex<moabenvcnt;aindex++)
      {
      tmp_val = get_job_envvar(pjob,moabenvs[aindex]);
      if (tmp_val != NULL)
        {
        put_env_var(moabenvs[aindex], tmp_val);
        }
      }
    }

  /*
   * if we want to run as user then we need to reset real user permissions
   * since it seems that some OSs use real not effective user id when execv'ing
   */

  if ((which == PE_PROLOGUSER) || 
      (which == PE_EPILOGUSER) || 
      (which == PE_PROLOGUSERJOB) || 
      (which == PE_EPILOGUSERJOB))
    {
    setuid_ext(pbsuser, TRUE);
    setegid(pbsgroup);

    if (setgid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "setgid(%lu) for UID = %lu failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exgid,
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
     
      exit(-1);
      }
    
    if (setuid_ext(pjob->ji_qs.ji_un.ji_momt.ji_exuid, FALSE) != 0)
      {
      snprintf(log_buffer,sizeof(log_buffer),
        "setuid(%lu) failed: %s\n",
        (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
        strerror(errno));
      
      log_err(errno, __func__, log_buffer);
     
      exit(-1);
      }
    }

    execv(pelog,arg);

    sprintf(log_buffer,"execv of %s failed: %s\n",
      pelog,
      strerror(errno));

    if (write_ac_socket(2, log_buffer, strlen(log_buffer)) == -1)
      {
      /* cannot write message to stderr */

      /* NO-OP */
      }

    fsync(2);

    exit(255);
    }  /* END else () */

  switch (run_exit)
    {
    case 0:

      /* SUCCESS */

      /* NO-OP */

      break;

    case - 3:

      pelog_err(pjob, pelog, run_exit,  (char *)"child wait interrupted");

      break;

    case - 4:

      pelog_err(pjob, pelog, run_exit,  (char *)"prolog/epilog timeout occurred, child cleaned up");

      break;

    case - 5:

      pelog_err(pjob, pelog, run_exit, (char *) "prolog/epilog timeout occurred, cannot kill child");

      break;

    default:

      pelog_err(pjob, pelog, run_exit,  (char *)"nonzero p/e exit status");

      break;
    }  /* END switch (run_exit) */

  return(run_exit);
  }  /* END run_pelog() */
Example #2
0
void
UserProc::execute()
{
	ArgList new_args;
	char    **argv;
	char	**argp;
	char	**envp;
	sigset_t	sigmask;
	MyString	a_out_name;
	MyString	shortname;
	int		user_syscall_fd = -1;
	const	int READ_END = 0;
	const	int WRITE_END = 1;
	int		pipe_fds[2];
	FILE	*cmd_fp;
	char	buf[128];
	ReliSock	*new_reli = NULL;

	pipe_fds[0] = -1;
	pipe_fds[1] = -1;

	shortname.formatstr( "condor_exec.%d.%d", cluster, proc );
	a_out_name.formatstr( "%s/%s/%s", Execute, local_dir, shortname.Value() );

		// Set up arg vector according to class of job
	switch( job_class ) {

	  case CONDOR_UNIVERSE_STANDARD:
		if( pipe(pipe_fds) < 0 ) {
			EXCEPT( "pipe()" );}

			dprintf( D_ALWAYS, "Pipe built\n" );
		
			// The user process should not try to read commands from
			// 0, 1, or 2 since we'll be using the commands to redirect
			// those.
		if( pipe_fds[READ_END] < 14 ) {
			dup2( pipe_fds[READ_END], 14 );
			close( pipe_fds[READ_END] );
			pipe_fds[READ_END] = 14;
		}
		dprintf( D_ALWAYS, "New pipe_fds[%d,%d]\n", pipe_fds[0], pipe_fds[1] );
		sprintf( buf, "%d", pipe_fds[READ_END] );
		dprintf( D_ALWAYS, "cmd_fd = %s\n", buf );

		new_args.AppendArg(shortname);
		new_args.AppendArg("-_condor_cmd_fd");
		new_args.AppendArg(buf);
		break;

	  case CONDOR_UNIVERSE_PVM:
#if 1
		EXCEPT( "Don't know how to deal with PVM jobs" );
#else
		new_args.AppendArg(shortname);
		new_args.AppendArg("-1");
		new_args.AppendArg(in);
		new_args.AppendArg(out);
		new_args.AppendArg(err);
#endif
		break;

	  case CONDOR_UNIVERSE_VANILLA:
	  	if (privsep_enabled()) {
			EXCEPT("Don't know how to deal with Vanilla jobs");
		}
		new_args.AppendArg(shortname.Value());
		break;
	}

	new_args.AppendArgsFromArgList(args);

		// take care of USER_JOB_WRAPPER
	support_job_wrapper(a_out_name,&new_args);

	MyString exec_name;
	exec_name = a_out_name;

	// If privsep is turned on, then we need to use the PrivSep
	// Switchboard to launch the job
	//
	FILE* switchboard_in_fp;
	FILE* switchboard_err_fp;
	int switchboard_child_in_fd;
	int switchboard_child_err_fd;
	if (privsep_enabled()) {

		// create the pipes that we'll use to communicate
		//
		if (!privsep_create_pipes(switchboard_in_fp,
		                          switchboard_child_in_fd,
		                          switchboard_err_fp,
		                          switchboard_child_err_fd)) {
			EXCEPT("can't launch job: privsep_create_pipes failure");
		}
	}

	argv = new_args.GetStringArray();

		// Set an environment variable that tells the job where it may put scratch data
		// even if it moves to a different directory.

		// get the environment vector
	envp = env_obj.getStringArray();

		// We may run more than one of these, so each needs its own
		// remote system call connection to the shadow
	if( job_class == CONDOR_UNIVERSE_PVM ) {
		new_reli = NewConnection( v_pid );
		user_syscall_fd = new_reli->get_file_desc();
	}

		// print out arguments to execve
	dprintf( D_ALWAYS, "Calling execve( \"%s\"", exec_name.Value() );
	for( argp = argv; *argp; argp++ ) {							// argv
		dprintf( D_ALWAYS | D_NOHEADER, ", \"%s\"", *argp );
	}
	dprintf( D_ALWAYS | D_NOHEADER, ", 0" );
	for( argp = envp; *argp; argp++ ) {							// envp
		dprintf( D_ALWAYS | D_NOHEADER, ", \"%s\"", *argp );
	}
	dprintf( D_ALWAYS | D_NOHEADER, ", 0 )\n" );


	if( (pid = fork()) < 0 ) {
		EXCEPT( "fork" );
	}

	if( pid == 0 ) {	// the child

			// Block only these 3 signals which have special meaning for
			// checkpoint/restart purposes.  Leave other signals ublocked
			// so that if we get an exception during the restart process,
			// we will get a core file to debug.
		sigemptyset( &sigmask );
		// for some reason if we block these, the user process is unable
		// to unblock some or all of them.
#if 0
		sigaddset( &sigmask, SIGUSR1 );
		sigaddset( &sigmask, SIGUSR2 );
		sigaddset( &sigmask, SIGTSTP );
#endif
		sigprocmask( SIG_SETMASK, &sigmask, 0 );

			// renice
		renice_self( "JOB_RENICE_INCREMENT" );

			// make certain the syscall sockets which are being passed
			// to the user job are setup to be blocking sockets.  this
			// is done by calling timeout(0) CEDAR method.
			// we must do this because the syscall lib does _not_ 
			// expect to see any failures due to errno EAGAIN...
		if ( SyscallStream ) {
			SyscallStream->timeout(0);
		}
		if ( new_reli ) {
			new_reli->timeout(0);
		}

			// If I'm using privledge separation, connect to the procd.
			// we need to register a family with the procd for the newly
			// created process, so that the ProcD will allow us to send
			// signals to it
			//
		if (privsep_enabled() == true) {
			MyString procd_address;
			bool response;
			bool ret;
			ProcFamilyClient pfc;

			procd_address = get_procd_address();
			ret = pfc.initialize(procd_address.Value());
			if (ret == false) {
				EXCEPT("Failure to initialize the ProcFamilyClient object");
			}

			ret = pfc.register_subfamily(getpid(), getppid(), 60, response);

			if (ret == false) {
				EXCEPT("Could not communicate with procd. Aborting.");
			}

			if (response == false) {
				EXCEPT("Procd refused to register job subfamily. Aborting.");
			}
		}

			// If there is a requested coresize for this job, enforce it.
			// Do it before the set_priv_final to ensure root can alter 
			// the coresize to the requested amount. Otherwise, just
			// use whatever the current default is.
		if (coredump_limit_exists == TRUE) {
			limit( RLIMIT_CORE, coredump_limit, CONDOR_HARD_LIMIT, "max core size" );
		}

			// child process should have only it's submitting uid, and cannot
			// switch back to root or some other uid.  
			// It'd be nice to check for errors here, but
			// unfortunately, we can't, since this only returns the
			// previous priv state, not whether it worked or not. 
			//  -Derek Wright 4/30/98
		set_user_priv_final();

		switch( job_class ) {
		  
		  case CONDOR_UNIVERSE_STANDARD:
			// if we're using PrivSep, the chdir here could fail. instead,
			// we pass the job's IWD to the switchboard via pipe
			//
		  	if (!privsep_enabled()) {
				if( chdir(local_dir) < 0 ) {
					EXCEPT( "chdir(%s)", local_dir );
				}
			}
			close( pipe_fds[WRITE_END] );
			break;

		  case CONDOR_UNIVERSE_PVM:
			if( chdir(local_dir) < 0 ) {
				EXCEPT( "chdir(%s)", local_dir );
			}
			close( pipe_fds[WRITE_END] );
			dup2( user_syscall_fd, RSC_SOCK );
			break;

		  case CONDOR_UNIVERSE_VANILLA:
			set_iwd();
			open_std_file( 0 );
			open_std_file( 1 );
			open_std_file( 2 );

			(void)close( RSC_SOCK );
			(void)close( CLIENT_LOG );

			break;
		}

			// Make sure we're not root
		if( getuid() == 0 ) {
				// EXCEPT( "We're about to start as root, aborting." );
				// You can't see this error message at all.  So, just
				// exit(4), which is what EXCEPT normally gives. 
			exit( 4 );
		}

#if defined( LINUX ) && (defined(I386) || defined(X86_64))
		// adjust the execution domain of the child to be suitable for
		// checkpointing.
		patch_personality();
#endif 

			// if we're using privsep, we'll exec the PrivSep Switchboard
			// first, which is setuid; it will then setuid to the user we
			// give it and exec the real job
			//
		if (privsep_enabled()) {
			close(fileno(switchboard_in_fp));
			close(fileno(switchboard_err_fp));
			privsep_get_switchboard_command("exec",
			                                switchboard_child_in_fd,
			                                switchboard_child_err_fd,
			                                exec_name,
			                                new_args);
			deleteStringArray(argv);
			argv = new_args.GetStringArray();
		}

			// Everything's ready, start it up...
		errno = 0;
		execve( exec_name.Value(), argv, envp );

			// A successful call to execve() never returns, so it is an
			// error if we get here.  A number of errors are possible
			// but the most likely is that there is insufficient swap
			// space to start the new process.  We don't try to log
			// anything, since we have the UID/GID of the job's owner
			// and cannot write into the log files...
		exit( JOB_EXEC_FAILED );
	}

		// The parent

		// PrivSep - we have at this point only spawned the switchboard
		// with the "exec" command. we need to use our pipe to it in
		// order to tell it how to execute the user job, and then use
		// the error pipe to make sure everything worked
		//
	if (privsep_enabled()) {

		close(switchboard_child_in_fd);
		close(switchboard_child_err_fd);

		privsep_exec_set_uid(switchboard_in_fp, uid);
		privsep_exec_set_path(switchboard_in_fp, exec_name.Value());
		privsep_exec_set_args(switchboard_in_fp, new_args);
		privsep_exec_set_env(switchboard_in_fp, env_obj);
		privsep_exec_set_iwd(switchboard_in_fp, local_dir);
		privsep_exec_set_inherit_fd(switchboard_in_fp, pipe_fds[0]);
		privsep_exec_set_inherit_fd(switchboard_in_fp, RSC_SOCK);
		privsep_exec_set_inherit_fd(switchboard_in_fp, CLIENT_LOG);
		privsep_exec_set_is_std_univ(switchboard_in_fp);
		fclose(switchboard_in_fp);

		if (!privsep_get_switchboard_response(switchboard_err_fp)) {
			EXCEPT("error starting job: "
			           "privsep get_switchboard_response failure");
		}
	}

	dprintf( D_ALWAYS, "Started user job - PID = %d\n", pid );
	if( job_class != CONDOR_UNIVERSE_VANILLA ) {
			// Send the user process its startup environment conditions
		close( pipe_fds[READ_END] );
		cmd_fp = fdopen( pipe_fds[WRITE_END], "w" );
		dprintf( D_ALWAYS, "cmd_fp = %p\n", cmd_fp );

		if( is_restart() ) {
#if 1
			fprintf( cmd_fp, "restart\n" );
			dprintf( D_ALWAYS, "restart\n" );
#else
			fprintf( cmd_fp, "restart %s\n", target_ckpt );
			dprintf( D_ALWAYS, "restart %s\n", target_ckpt );
#endif
			fprintf( cmd_fp, "end\n" );
			dprintf( D_ALWAYS, "end\n" );
		} else {
			fprintf( cmd_fp, "end\n" );
			dprintf( D_ALWAYS, "end\n" );
		}
		fclose( cmd_fp );
	}

	deleteStringArray(argv);
	deleteStringArray(envp);
	state = EXECUTING;

	if( new_reli ) {
		delete new_reli;
	}

	// removed some vanilla-specific code here
	//
	ASSERT(job_class != CONDOR_UNIVERSE_VANILLA);
}
Example #3
0
void req_mvjobfile(

  struct batch_request *preq)  /* I */

  {
  int          fds;
  enum job_file  jft;
  int          oflag;
  job         *pj;

  struct passwd *pwd;

  jft = (enum job_file)preq->rq_ind.rq_jobfile.rq_type;

  if (preq->rq_ind.rq_jobfile.rq_sequence == 0)
    oflag = O_CREAT | O_WRONLY | O_TRUNC;
  else
    oflag = O_CREAT | O_WRONLY | O_APPEND;

  pj = locate_new_job(preq->rq_conn, NULL);

  if (pj == NULL)
    pj = find_job(preq->rq_ind.rq_jobfile.rq_jobid);

  if (pj == NULL)
    {
    snprintf(log_buffer, 1024, "cannot find job %s for move of %s file",
             preq->rq_ind.rq_jobfile.rq_jobid,
             TJobFileType[jft]);

    log_err(-1, "req_mvjobfile", log_buffer);

    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return;
    }

  if ((pj->ji_grpcache == NULL) && (check_pwd(pj) == NULL))
    {
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return;
    }

  if (((pwd = getpwnam(pj->ji_wattr[(int)JOB_ATR_euser].at_val.at_str)) == NULL) ||
      ((fds = open_std_file(pj, jft, oflag, pwd->pw_gid)) < 0))
    {
    /* FAILURE */

    req_reject(PBSE_MOMREJECT, 0, preq, NULL, "password lookup failed");

    return;
    }

  if (write(
        fds,
        preq->rq_ind.rq_jobfile.rq_data,
        preq->rq_ind.rq_jobfile.rq_size) != preq->rq_ind.rq_jobfile.rq_size)
    {
    req_reject(PBSE_SYSTEM, 0, preq, NULL, "cannot create file");
    }
  else
    {
    reply_ack(preq);
    }

  close(fds);

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer, "successfully moved %s file for job '%s'",
            TJobFileType[jft],
            preq->rq_ind.rq_jobfile.rq_jobid);

    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      log_buffer);
    }

  return;
  }  /* END req_mvjobfile() */
Example #4
0
void req_mvjobfile(

  struct batch_request *preq)  /* I */

  {
  int          fds;
  enum job_file  jft;
  int          oflag;
  job         *pj;

  struct passwd *pwd;
  char          *buf = NULL;

  jft = (enum job_file)preq->rq_ind.rq_jobfile.rq_type;

  if (preq->rq_ind.rq_jobfile.rq_sequence == 0)
    oflag = O_CREAT | O_WRONLY | O_TRUNC;
  else
    oflag = O_CREAT | O_WRONLY | O_APPEND;

  pj = locate_new_job(preq->rq_conn, NULL);

  if (pj == NULL)
    pj = mom_find_job(preq->rq_ind.rq_jobfile.rq_jobid);

  if (pj == NULL)
    {
    snprintf(log_buffer, 1024, "cannot find job %s for move of %s file",
      preq->rq_ind.rq_jobfile.rq_jobid,
      TJobFileType[jft]);

    log_err(-1, __func__, log_buffer);

    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return;
    }

  bool good;
  good = check_pwd(pj);
  if ((pj->ji_grpcache == NULL) && 
      (good == false))
    {
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return;
    }

  /* check_pwd allocated pwd and getpwnam_ext is going to allocate
     another one. Free pwd first */
  if ((pwd = getpwnam_ext(&buf, pj->ji_wattr[JOB_ATR_euser].at_val.at_str)) == NULL)
    {
    /* FAILURE */
    req_reject(PBSE_MOMREJECT, 0, preq, NULL, "password lookup failed");

    return;
    }

  if ((fds = open_std_file(pj, jft, oflag, pwd->pw_gid)) < 0)
    {
    int   keeping = 1;
    char *path = std_file_name(pj, jft, &keeping);

    snprintf(log_buffer,sizeof(log_buffer),
      "Cannot create file %s",
      path);

    req_reject(PBSE_SYSTEM, 0, preq, NULL, log_buffer);

    if (pwd)
      {
      free_pwnam(pwd, buf);
      }
    return;
    }
  if (pwd)
    {
    free_pwnam(pwd, buf);
    }

  if (write_ac_socket(
        fds,
        preq->rq_ind.rq_jobfile.rq_data,
        preq->rq_ind.rq_jobfile.rq_size) != preq->rq_ind.rq_jobfile.rq_size)
    {
    req_reject(PBSE_SYSTEM, 0, preq, NULL, "cannot create file");
    }
  else
    {
    if (LOGLEVEL >= 6)
      {
      sprintf(log_buffer, "successfully moved %s file for job '%s'",
        TJobFileType[jft],
        preq->rq_ind.rq_jobfile.rq_jobid);
      
      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        pj->ji_qs.ji_jobid,
        log_buffer);
      }

    reply_ack(preq);
    }

  close(fds);

  return;
  }  /* END req_mvjobfile() */
Example #5
0
int run_pelog(

  int   which,      /* I (one of PE_*) */
  char *specpelog,  /* I - script path */
  job  *pjob,       /* I - associated job */
  int   pe_io_type) /* I */

  {
  char *id = "run_pelog";

  struct sigaction act, oldact;
  char *arg[12];
  int   fds1 = 0;
  int   fds2 = 0;
  int   fd_input;
  char  resc_list[2048];
  char  resc_used[2048];

  struct stat sbuf;
  char   sid[20];
  char   exit_stat[11];
  int    waitst;
  int    isjoined;  /* boolean */
  char   buf[MAXPATHLEN + 1024];
  char   pelog[MAXPATHLEN + 1024];

  int    jobtypespecified = 0;

  resource      *r;

  char          *EmptyString = "";

  int            LastArg;
  int            aindex;

  int            rc;

  char          *ptr;

  if ((pjob == NULL) || (specpelog == NULL) || (specpelog[0] == '\0'))
    {
    return(0);
    }

  ptr = pjob->ji_wattr[(int)JOB_ATR_jobtype].at_val.at_str;

  if (ptr != NULL)
    {
    jobtypespecified = 1;

    snprintf(pelog,sizeof(pelog),"%s.%s",
      specpelog,
      ptr);
    }
  else
    {
    strncpy(pelog,specpelog,sizeof(pelog));
    }

  rc = stat(pelog,&sbuf);

  if ((rc == -1) && (jobtypespecified == 1))
    {
    strncpy(pelog,specpelog,sizeof(pelog));

    rc = stat(pelog,&sbuf);
    }

  if (rc == -1)
    {
    if (errno == ENOENT || errno == EBADF)
      {
      /* epilog/prolog script does not exist */

      if (LOGLEVEL >= 5)
        {
        static char tmpBuf[1024];

        sprintf(log_buffer, "%s script '%s' for job %s does not exist (cwd: %s,pid: %d)",
          PPEType[which],
          (pelog != NULL) ? pelog : "NULL",
          (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL",
          getcwd(tmpBuf, sizeof(tmpBuf)),
          (int)getpid());

        log_record(PBSEVENT_SYSTEM, 0, id, log_buffer);
        }

#ifdef ENABLE_CSA
      if ((which == PE_EPILOGUSER) && (!strcmp(pelog, path_epiloguser)))
        {
        /*
          * Add a workload management end record
        */
        if (LOGLEVEL >= 8)
          {
          sprintf(log_buffer, "%s calling add_wkm_end from run_pelog() - no user epilog",
            pjob->ji_qs.ji_jobid);

          log_err(-1, id, log_buffer);
          }

        add_wkm_end(pjob->ji_wattr[(int)JOB_ATR_pagg_id].at_val.at_ll,

                    pjob->ji_qs.ji_un.ji_momt.ji_exitstat, pjob->ji_qs.ji_jobid);
        }

#endif /* ENABLE_CSA */

      return(0);
      }

    return(pelog_err(pjob,pelog,errno,"cannot stat"));
    }

  if (LOGLEVEL >= 5)
    {
    sprintf(log_buffer,"running %s script '%s' for job %s",
      PPEType[which],
      (pelog != NULL) ? pelog : "NULL",
      pjob->ji_qs.ji_jobid);

    log_ext(-1,id,log_buffer,LOG_DEBUG);  /* not actually an error--but informational */
    }

  /* script must be owned by root, be regular file, read and execute by user *
   * and not writeable by group or other */

  if(which == PE_PROLOGUSERJOB || which == PE_EPILOGUSERJOB)
    {
    if ((sbuf.st_uid != pjob->ji_qs.ji_un.ji_momt.ji_exuid) ||
        (!S_ISREG(sbuf.st_mode)) ||
        ((sbuf.st_mode & (S_IRUSR | S_IXUSR)) != (S_IRUSR | S_IXUSR)) ||
        (sbuf.st_mode & (S_IWGRP | S_IWOTH)))
      {
      return(pelog_err(pjob,pelog,-1,"permission Error"));
      }
    }
  else if ((sbuf.st_uid != 0) ||
      (!S_ISREG(sbuf.st_mode)) ||
      ((sbuf.st_mode & (S_IRUSR | S_IXUSR)) != (S_IRUSR | S_IXUSR)) ||
      (sbuf.st_mode & (S_IWGRP | S_IWOTH)))
    {
    return(pelog_err(pjob,pelog,-1,"permission Error"));
    }

  if ((which == PE_PROLOGUSER) || (which == PE_EPILOGUSER))
    {
    /* script must also be read and execute by other */

    if ((sbuf.st_mode & (S_IROTH | S_IXOTH)) != (S_IROTH | S_IXOTH))
      {
      return(pelog_err(pjob, pelog, -1, "permission Error"));
      }
    }

  fd_input = pe_input(pjob->ji_qs.ji_jobid);

  if (fd_input < 0)
    {
    return(pelog_err(pjob, pelog, -2, "no pro/epilogue input file"));
    }

  run_exit = 0;

  child = fork();

  if (child > 0)
    {
    int KillSent = FALSE;

    /* parent - watch for prolog/epilog to complete */

    close(fd_input);

    act.sa_handler = pelogalm;

    sigemptyset(&act.sa_mask);

    act.sa_flags = 0;

    sigaction(SIGALRM, &act, &oldact);

    /* it would be nice if the harvest routine could block for 5 seconds,
       and if the prolog is not complete in that time, mark job as prolog
       pending, append prolog child, and continue */

    /* main loop should attempt to harvest prolog in non-blocking mode.
       If unsuccessful after timeout, job should be terminated, and failure
       reported.  If successful, mom should unset prolog pending, and
       continue with job start sequence.  Mom should report job as running
       while prologpending flag is set.  (NOTE:  must track per job prolog
       start time)
    */

    alarm(pe_alarm_time);

    while (waitpid(child, &waitst, 0) < 0)
      {
      if (errno != EINTR)
        {
        /* exit loop. non-alarm based failure occurred */

        run_exit = -3;

        MOMPrologFailureCount++;

        break;
        }

      if (run_exit == -4)
        {
        if (KillSent == FALSE)
          {
          MOMPrologTimeoutCount++;

          /* timeout occurred */

          KillSent = TRUE;

          /* NOTE:  prolog/epilog may be locked in KERNEL space and unkillable */

          alarm(5);
          }
        else
          {
          /* cannot kill prolog/epilog, give up */

          run_exit = -5;

          break;
          }
        }
      }    /* END while (wait(&waitst) < 0) */

    /* epilog/prolog child completed */
#ifdef ENABLE_CSA
    if ((which == PE_EPILOGUSER) && (!strcmp(pelog, path_epiloguser)))
      {
      /*
       * Add a workload management end record
      */
      if (LOGLEVEL >= 8)
        {
        sprintf(log_buffer, "%s calling add_wkm_end from run_pelog() - after user epilog",
                pjob->ji_qs.ji_jobid);

        log_err(-1, id, log_buffer);
        }

      add_wkm_end(pjob->ji_wattr[(int)JOB_ATR_pagg_id].at_val.at_ll,

                  pjob->ji_qs.ji_un.ji_momt.ji_exitstat, pjob->ji_qs.ji_jobid);
      }

#endif /* ENABLE_CSA */

    alarm(0);

    /* restore the previous handler */

    sigaction(SIGALRM, &oldact, 0);

    if (run_exit == 0)
      {
      if (WIFEXITED(waitst))
        {
        run_exit = WEXITSTATUS(waitst);
        }
      }
    }
  else
    {
    /* child - run script */

    log_close(0);

    if (lockfds >= 0)
      {
      close(lockfds);

      lockfds = -1;
      }

    net_close(-1);

    if ((which == PE_PROLOGUSER) || (which == PE_EPILOGUSER) || (which == PE_PROLOGUSERJOB) || which == PE_EPILOGUSERJOB)
      {
      if (setgroups(
          pjob->ji_grpcache->gc_ngroup,
          (gid_t *)pjob->ji_grpcache->gc_groups) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setgroups() for UID = %lu failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));

        log_err(errno, id, log_buffer);

        exit(255);
        }

      if (setgid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setgid(%lu) for UID = %lu failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exgid,
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));

        log_err(errno, id, log_buffer);

        exit(255);
        }

      if (setuid(pjob->ji_qs.ji_un.ji_momt.ji_exuid) != 0)
        {
        snprintf(log_buffer,sizeof(log_buffer),
          "setuid(%lu) failed: %s\n",
          (unsigned long)pjob->ji_qs.ji_un.ji_momt.ji_exuid,
          strerror(errno));

        log_err(errno, id, log_buffer);

        exit(255);
        }
      }

    if (fd_input != 0)
      {
      close(0);

      if (dup(fd_input) == -1) {}

      close(fd_input);
      }

    if (pe_io_type == PE_IO_TYPE_NULL)
      {
      /* no output, force to /dev/null */

      fds1 = open("/dev/null", O_WRONLY, 0600);
      fds2 = open("/dev/null", O_WRONLY, 0600);
      }
    else if (pe_io_type == PE_IO_TYPE_STD)
      {
      /* open job standard out/error */

      /*
       * We need to know if files are joined or not.
       * If they are then open the correct file and duplicate it to the other
      */

      isjoined = is_joined(pjob);

      switch (isjoined)
        {
        case -1:

          fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds1 = dup(fds2);

          break;

        case 1:

          fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds2 = dup(fds1);

          break;

        default:

          fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);

          fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                               pjob->ji_qs.ji_un.ji_momt.ji_exgid);
          break;
        }
      }

    if (pe_io_type != PE_IO_TYPE_ASIS)
      {
      /* If PE_IO_TYPE_ASIS, leave as is, already open to job */

      if (fds1 != 1)
        {
        close(1);

        if (dup(fds1) == -1) {}

        close(fds1);
        }

      if (fds2 != 2)
        {
        close(2);

        if (dup(fds2) == -1) {}

        close(fds2);
        }
      }

    if ((which == PE_PROLOGUSER) || (which == PE_EPILOGUSER) || (which == PE_PROLOGUSERJOB) || (which == PE_EPILOGUSERJOB))
      {
      if (chdir(pjob->ji_grpcache->gc_homedir) != 0)
        {
        /* warn only, no failure */

        sprintf(log_buffer,
          "PBS: chdir to %s failed: %s (running user %s in current directory)",
          pjob->ji_grpcache->gc_homedir,
          strerror(errno),
          which == PE_PROLOGUSER ? "prologue" : "epilogue");

        if (write(2, log_buffer, strlen(log_buffer)) == -1) {}

        fsync(2);
        }
      }

    /* for both prolog and epilog */

    if (DEBUGMODE == 1)
      {
      fprintf(stderr, "PELOGINFO:  script:'%s'  jobid:'%s'  euser:'******'  egroup:'%s'  jobname:'%s' SSID:'%ld'  RESC:'%s'\n",
              pelog,
              pjob->ji_qs.ji_jobid,
              pjob->ji_wattr[(int)JOB_ATR_euser].at_val.at_str,
              pjob->ji_wattr[(int)JOB_ATR_egroup].at_val.at_str,
              pjob->ji_wattr[(int)JOB_ATR_jobname].at_val.at_str,
              pjob->ji_wattr[(int)JOB_ATR_session_id].at_val.at_long,
              resc_to_string(pjob, (int)JOB_ATR_resource, resc_list, sizeof(resc_list)));
      }

    arg[0] = pelog;

    arg[1] = pjob->ji_qs.ji_jobid;
    arg[2] = pjob->ji_wattr[(int)JOB_ATR_euser].at_val.at_str;
    arg[3] = pjob->ji_wattr[(int)JOB_ATR_egroup].at_val.at_str;
    arg[4] = pjob->ji_wattr[(int)JOB_ATR_jobname].at_val.at_str;

    set_resource_vars(pjob,NULL);

    /* NOTE:  inside child */

    if ( which == PE_EPILOG || which == PE_EPILOGUSER || which == PE_EPILOGUSERJOB )
      {
      /* for epilog only */

      sprintf(sid, "%ld",
              pjob->ji_wattr[(int)JOB_ATR_session_id].at_val.at_long);
      sprintf(exit_stat,"%d",
              pjob->ji_qs.ji_un.ji_exect.ji_exitstat);

      arg[5] = sid;
      arg[6] = resc_to_string(pjob, (int)JOB_ATR_resource, resc_list, sizeof(resc_list));
      arg[7] = resc_to_string(pjob, (int)JOB_ATR_resc_used, resc_used, sizeof(resc_used));
      arg[8] = pjob->ji_wattr[(int)JOB_ATR_in_queue].at_val.at_str;
      arg[9] = pjob->ji_wattr[(int)JOB_ATR_account].at_val.at_str;
      arg[10] = exit_stat;
      arg[11] = NULL;

      LastArg = 11;
      }
    else if (which == PE_MAGRATHEA)
      {
      char *cc = NULL, *c = NULL;

      setenv("MAGRATHEA_CLUSTER",pjob->ji_wattr[(int)JOB_ATR_jobname].at_val.at_str,1);

      if ((pjob->ji_wattr[(int)JOB_ATR_cloud_mapping].at_flags & ATR_VFLAG_SET) &&
          (pjob->ji_wattr[(int)JOB_ATR_cloud_mapping].at_val.at_str))
        {
        c = cloud_mom_mapping(pjob->ji_wattr[(int)JOB_ATR_cloud_mapping].at_val.at_str,mom_host,&cc);
        }

      if (c)
        arg[5]=c;
      else
        arg[5]=mom_host;

      setenv("MAGRATHEA_VIRTUAL_HOST",arg[5],1);

      if (cc)
        {
        setenv("MAGRATHEA_VIRTUAL_ALTERNATIVE",cc,1);
        free(cc);
        }

      if (pjob->ji_wattr[(int)JOB_ATR_vlan_id].at_val.at_str != NULL )
        {
        setenv("MAGRATHEA_VLANID",pjob->ji_wattr[(int)JOB_ATR_vlan_id].at_val.at_str,1);
        }

      switch (is_cloud_job(pjob))
        {
        case 1: setenv("MAGRATHEA_TYPE","create",1); break;
        case 2: setenv("MAGRATHEA_TYPE","internal",1); break;
        default: setenv("MAGRATHEA_TYPE","none",1); break;
        }


      arg[6]=(char *)0;
      LastArg = 6;
      }
    else
      {
      /* prolog */

      arg[5] = resc_to_string(pjob, (int)JOB_ATR_resource, resc_list, sizeof(resc_list));
      arg[6] = pjob->ji_wattr[(int)JOB_ATR_in_queue].at_val.at_str;
      arg[7] = pjob->ji_wattr[(int)JOB_ATR_account].at_val.at_str;
      arg[8] = NULL;

      LastArg = 8;
      }

    for (aindex = 0;aindex < LastArg;aindex++)
      {
      if (arg[aindex] == NULL)
        arg[aindex] = EmptyString;
      }  /* END for (aindex) */

    /*
     * Pass Resource_List.nodes request in environment
     * to allow pro/epi-logue setup/teardown of system
     * settings.  --pw, 2 Jan 02
     * Fixed to use putenv for sysV compatibility.
     *  --troy, 11 jun 03
     *
     */

    r = find_resc_entry(
          &pjob->ji_wattr[(int)JOB_ATR_resource],
          find_resc_def(svr_resc_def, "nodes", svr_resc_size));

    if (r != NULL)
      {
      /* setenv("PBS_RESOURCE_NODES",r->rs_value.at_val.at_str,1); */

      const char *envname = "PBS_RESOURCE_NODES=";
      char *envstr;

      envstr = malloc(
                 (strlen(envname) + strlen(r->rs_value.at_val.at_str) + 1) * sizeof(char));

      if (envstr != NULL)
        {
        strcpy(envstr,envname);

        strcat(envstr,r->rs_value.at_val.at_str);

        /* do _not_ free the string when using putenv */

        putenv(envstr);
        }
      }  /* END if (r != NULL) */

    r = find_resc_entry(
          &pjob->ji_wattr[(int)JOB_ATR_resource],
          find_resc_def(svr_resc_def, "gres", svr_resc_size));

    if (r != NULL)
      {
      /* setenv("PBS_RESOURCE_NODES",r->rs_value.at_val.at_str,1); */

      const char *envname = "PBS_RESOURCE_GRES=";
      char *envstr;

      envstr = malloc(
                 (strlen(envname) + strlen(r->rs_value.at_val.at_str) + 1) * sizeof(char));

      if (envstr != NULL)
        {
        strcpy(envstr,envname);

        strcat(envstr,r->rs_value.at_val.at_str);

        /* do _not_ free the string when using putenv */

        putenv(envstr);
        }
      }  /* END if (r != NULL) */

    if (TTmpDirName(pjob, buf))
      {
      const char *envname = "TMPDIR=";
      char *envstr;

      envstr = malloc(
                 (strlen(envname) + strlen(buf) + 1) * sizeof(char));

      if (envstr != NULL)
        {
        strcpy(envstr,envname);

        strcat(envstr,buf);

        /* do _not_ free the string when using putenv */

        putenv(envstr);
        }
      }  /* END if (TTmpDirName(pjob,&buf)) */

    /* Set PBS_SCHED_HINT */

      {
      char *envname = "PBS_SCHED_HINT";
      char *envval;
      char *envstr;

      if ((envval = get_job_envvar(pjob, envname)) != NULL)
        {
        envstr = malloc((strlen(envname) + strlen(envval) + 2) * sizeof(char));

        if (envstr != NULL)
          {
          sprintf(envstr,"%s=%s",
            envname,
            envval);

          putenv(envstr);
          }
        }
      }

    /* Set PBS_NODENUM */
      {
      char *envname = "PBS_NODENUM";
      char *envstr;

      sprintf(buf, "%d",
        pjob->ji_nodeid);

      envstr = malloc((strlen(envname) + strlen(buf) + 2) * sizeof(char));

      if (envstr != NULL)
        {
        sprintf(envstr,"%s=%d",
          envname,
          pjob->ji_nodeid);

        putenv(envstr);
        }
      }

    /* Set PBS_MSHOST */
      {
      char *envname = "PBS_MSHOST";
      char *envstr;

      if ((pjob->ji_vnods[0].vn_host != NULL) && (pjob->ji_vnods[0].vn_host->hn_host != NULL))
        {
        envstr = malloc((strlen(envname) + strlen(pjob->ji_vnods[0].vn_host->hn_host) + 2) * sizeof(char));

        if (envstr != NULL)
          {
          sprintf(envstr,"%s=%s",
            envname,
            pjob->ji_vnods[0].vn_host->hn_host);

          putenv(envstr);
          }
        }
      }

    /* Set PBS_NODEFILE */
      {
      char *envname = "PBS_NODEFILE";
      char *envstr;

      if (pjob->ji_flags & MOM_HAS_NODEFILE)
        {
        sprintf(buf, "%s/%s",
          path_aux,
          pjob->ji_qs.ji_jobid);

        envstr = malloc((strlen(envname) + strlen(buf) + 2) * sizeof(char));

        if (envstr != NULL)
          {
          sprintf(envstr,"%s=%s",
            envname,
            buf);

          putenv(envstr);
          }
        }
      }

    /* Set umask */
    if (pjob->ji_wattr[(int)JOB_ATR_umask].at_flags & ATR_VFLAG_SET)
      {
      char *buf = calloc(strlen("PBS_UMASK=")+16,1);
      if (buf != NULL)
        {
        sprintf(buf,"PBS_UMASK=%#o",pjob->ji_wattr[(int)JOB_ATR_umask].at_val.at_long);
        putenv(buf);
        }
      }

    /* Set PBS_O_Workdir */
      {
      char *envname = "PBS_O_WORKDIR";
      char *workdir_val;
      char *envstr;

      workdir_val = get_job_envvar(pjob,envname);
      if (workdir_val != NULL)
        {
        envstr = malloc((strlen(workdir_val) + strlen(envname) + 2) * sizeof(char));

        if (envstr != NULL)
          {
          sprintf(envstr,"%s=%s",
            envname,
            workdir_val);

          putenv(envstr);
          }
        }
      }

    /* SET BEOWULF_JOB_MAP */

      {

      struct array_strings *vstrs;

      int VarIsSet = 0;
      int j;

      vstrs = pjob->ji_wattr[(int)JOB_ATR_variables].at_val.at_arst;

      for (j = 0;j < vstrs->as_usedptr;++j)
        {
        if (!strncmp(
              vstrs->as_string[j],
              "BEOWULF_JOB_MAP=",
              strlen("BEOWULF_JOB_MAP=")))
          {
          VarIsSet = 1;

          break;
          }
        }

      if (VarIsSet == 1)
        {
        char *envstr;

        envstr = malloc((strlen(vstrs->as_string[j])) * sizeof(char));

        if (envstr != NULL)
          {
          strcpy(envstr,vstrs->as_string[j]);

          putenv(envstr);
          }
        }
      }

    execv(pelog,arg);

    sprintf(log_buffer,"execv of %s failed: %s\n",
      pelog,
      strerror(errno));

    if (write(2, log_buffer, strlen(log_buffer)) == -1) 
      {
      /* cannot write message to stderr */

      /* NO-OP */
      }

    fsync(2);

    exit(255);
    }  /* END else () */

  switch (run_exit)
    {
    case 0:

      /* SUCCESS */

      /* NO-OP */

      break;

    case - 3:

      pelog_err(pjob, pelog, run_exit, "child wait interrupted");

      break;

    case - 4:

      pelog_err(pjob, pelog, run_exit, "prolog/epilog timeout occurred, child cleaned up");

      break;

    case - 5:

      pelog_err(pjob, pelog, run_exit, "prolog/epilog timeout occurred, cannot kill child");

      break;

    default:

      pelog_err(pjob, pelog, run_exit, "nonzero p/e exit status");

      break;
    }  /* END switch (run_exit) */

  return(run_exit);
  }  /* END run_pelog() */
Example #6
0
void setup_pelog_outputs(
    
  job  *pjob,
  int   pe_io_type,
  int   delete_job,
  char *specpelog)

  {
  int fds1 = 0;
  int fds2 = 0;

  if (pe_io_type == PE_IO_TYPE_NULL)
    {
    /* no output, force to /dev/null */

    fds1 = open("/dev/null", O_WRONLY, 0600);
    fds2 = open("/dev/null", O_WRONLY, 0600);
    }
  else if (pe_io_type == PE_IO_TYPE_STD)
    {
    /* open job standard out/error */

    /*
     * We need to know if files are joined or not.
     * If they are then open the correct file and duplicate it to the other
    */

    int isjoined = is_joined(pjob);

    switch (isjoined)
      {
      case -1:

        fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                             pjob->ji_qs.ji_un.ji_momt.ji_exgid);

        fds1 = (fds2 < 0)?-1:dup(fds2);

        break;

      case 1:

        fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                             pjob->ji_qs.ji_un.ji_momt.ji_exgid);

        fds2 = (fds1 < 0)?-1:dup(fds1);

        break;

      default:

        fds1 = open_std_file(pjob, StdOut, O_WRONLY | O_APPEND,
                             pjob->ji_qs.ji_un.ji_momt.ji_exgid);

        fds2 = open_std_file(pjob, StdErr, O_WRONLY | O_APPEND,
                             pjob->ji_qs.ji_un.ji_momt.ji_exgid);
        break;
      }
    }

  /*
   * dupeStdFiles is a flag for those that couldn't open their .OU/.ER files
  */
  int dupeStdFiles = 1;

  if (!delete_job)
    {
    if ((fds1 < 0) ||
        (fds2 < 0))
      {
      if (fds1 >= 0)
        close(fds1);

      if (fds2 >= 0)
        close(fds2);

      if ((pe_io_type == PE_IO_TYPE_STD) &&
          (strlen(specpelog) == strlen(path_epilogp)) &&
          (strcmp(path_epilogp, specpelog) == 0))
        dupeStdFiles = 0;
      else
        exit(-1);
      }
    }

  if (pe_io_type != PE_IO_TYPE_ASIS)
    {
    /* If PE_IO_TYPE_ASIS, leave as is, already open to job */

    /* dup only for those fds1 >= 0 */

    if (fds1 != 1)
      {
      close(1);

      if (dupeStdFiles)
        {
        if ((fds1 >= 0)&&(dup(fds1) >= 0))
          close(fds1);
        }
      }

    if (fds2 != 2)
      {
      close(2);

      if (dupeStdFiles)
        {
        if ((fds2 >= 0)&&(dup(fds2) >= 0))
          close(fds2);
        }
      }
    }
  } /* END setup_pelog_outputs() */