示例#1
0
文件: job_recov.c 项目: CESNET/torque
int job_save(

  job *pjob,  /* pointer to job structure */
  int  updatetype) /* 0=quick, 1=full     */

  {
  int fds;
  int i;
  char namebuf1[MAXPATHLEN];
  char namebuf2[MAXPATHLEN];
  int openflags;
  int redo;

  strcpy(namebuf1, path_jobs); /* job directory path */
  strcat(namebuf1, pjob->ji_qs.ji_fileprefix);
  strcpy(namebuf2, namebuf1); /* setup for later */

#ifdef PBS_MOM
  strcat(namebuf1, JOB_FILE_SUFFIX);
#else

  if (pjob->ji_isparent == TRUE)
    {
    strcat(namebuf1, JOB_FILE_TMP_SUFFIX);
    }
  else
    {
    strcat(namebuf1, JOB_FILE_SUFFIX);
    }

#endif

  /* if ji_modified is set, ie an attribute changed, then update mtime */

  if (pjob->ji_modified)
    {
    pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long = time_now;
    }

  if (updatetype == SAVEJOB_QUICK)
    {
    openflags = O_WRONLY | O_Sync;

    /* NOTE:  open, do not create */

    fds = open(namebuf1, openflags, 0600);

    if (fds < 0)
      {
      char tmpLine[1024];

      snprintf(tmpLine, sizeof(tmpLine), "cannot open file '%s' for job %s in state %s (%s)",
               namebuf1,
               pjob->ji_qs.ji_jobid,
               PJobSubState[MAX(0,pjob->ji_qs.ji_substate)],
               (updatetype == 0) ? "quick" : "full");

      log_err(errno, "job_save", tmpLine);

      /* FAILURE */

      return(-1);
      }

    /* just write the "critical" base structure to the file */

    while ((i = write(fds, (char *) & pjob->ji_qs, quicksize)) != (ssize_t)quicksize)
      {
      if ((i < 0) && (errno == EINTR))
        {
        /* retry the write */

        if (lseek(fds, (off_t)0, SEEK_SET) < 0)
          {
          log_err(errno, "job_save", "lseek");

          fsync(fds);
          close(fds);

          return(-1);
          }

        continue;
        }
      else
        {
        log_err(errno, "job_save", "quickwrite");

        fsync(fds);
        close(fds);

        /* FAILURE */

        return(-1);
        }
      }

    fsync(fds);
    close(fds);
    }
  else /* SAVEJOB_FULL, SAVEJOB_NEW, SAVEJOB_ARY */
    {
    /*
     * write the whole structure to the file.
     * For a update, this is done to a new file to protect the
     * old against crashs.
     * The file is written in four parts:
     * (1) the job structure,
     * (2) the attribtes in "encoded" form,
     * (3) the attributes in the "external" form, and last
     * (4) the dependency list.
     */

    strcat(namebuf2, JOB_FILE_COPY);

    openflags = O_CREAT | O_WRONLY | O_Sync;

    /* NOTE:  create file if required */

    if (updatetype == SAVEJOB_NEW)
      fds = open(namebuf1, openflags, 0600);
    else
      fds = open(namebuf2, openflags, 0600);

    if (fds < 0)
      {
      log_err(errno, "job_save", "open for full save");

      return(-1);
      }

    for (i = 0;i < MAX_SAVE_TRIES;++i)
      {
      redo = 0; /* try to save twice */

      save_setup(fds);

      if (save_struct((char *)&pjob->ji_qs, (size_t)quicksize) != 0)
        {
        redo++;
        }
      else if (save_attr(job_attr_def, pjob->ji_wattr, (int)JOB_ATR_LAST) != 0)
        {
        redo++;
        }

#ifdef PBS_MOM
      else if (save_tmsock(pjob) != 0)
        {
        redo++;
        }
      else if (save_roottask(pjob) != 0)
        {
        redo++;
        }
      else if (save_jobflags(pjob) != 0)
        {
        redo++;
        }

#endif  /* PBS_MOM */
      else if (save_flush() != 0)
        {
        redo++;
        }

      if (redo != 0)
        {
        if (lseek(fds, (off_t)0, SEEK_SET) < 0)
          {
          log_err(errno, "job_save", "full lseek");
          }
        }
      else
        {
        break;
        }
      }  /* END for (i) */

    fsync(fds);
    close(fds);

    if (i >= MAX_SAVE_TRIES)
      {
      if (updatetype == SAVEJOB_FULL)
        unlink(namebuf2);

      return(-1);
      }

    if (updatetype == SAVEJOB_FULL)
      {
      unlink(namebuf1);

      if (rename(namebuf2,namebuf1) != 0)
        {
        LOG_EVENT(
          PBSEVENT_ERROR | PBSEVENT_SECURITY,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "Link in job_save failed");
        }
      }

    pjob->ji_modified = 0;
    }  /* END (updatetype == SAVEJOB_QUICK) */

  return(0);
  }  /* END job_save() */
示例#2
0
int job_save(

  job *pjob,  /* pointer to job structure */
  int  updatetype, /* 0=quick, 1=full, 2=new     */
  int  mom_port)   /* if 0 ignore otherwise append to end of job name. this is for multi-mom mode */

  {
  int     fds;
  int     i;
  char    namebuf1[MAXPATHLEN];
  char    namebuf2[MAXPATHLEN];
  char    save_buf[SAVEJOB_BUF_SIZE];
  const char   *tmp_ptr = NULL;
  size_t  buf_remaining = sizeof(save_buf);

  int     openflags;
  int     redo;
  time_t  time_now = time(NULL);


#ifdef PBS_MOM
  tmp_ptr = JOB_FILE_SUFFIX;
#else
  if (pjob->ji_is_array_template == TRUE)
    tmp_ptr = (char *)JOB_FILE_TMP_SUFFIX;
  else
    tmp_ptr = (char *)JOB_FILE_SUFFIX;
#endif

  if (mom_port)
    {
    snprintf(namebuf1, MAXPATHLEN, "%s%s%d%s",
        path_jobs, pjob->ji_qs.ji_fileprefix, mom_port, tmp_ptr);
    snprintf(namebuf2, MAXPATHLEN, "%s%s%d%s",
        path_jobs, pjob->ji_qs.ji_fileprefix, mom_port, JOB_FILE_COPY);
    }
  else
    {
    snprintf(namebuf1, MAXPATHLEN, "%s%s%s",
        path_jobs, pjob->ji_qs.ji_fileprefix, tmp_ptr);
    snprintf(namebuf2, MAXPATHLEN, "%s%s%s",
        path_jobs, pjob->ji_qs.ji_fileprefix, JOB_FILE_COPY);
    }

  /* if ji_modified is set, ie an pbs_attribute changed, then update mtime */

  if (pjob->ji_modified)
    {
    pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long = time_now;
    }

  if (updatetype == SAVEJOB_QUICK)
    {
    openflags = O_WRONLY | O_Sync;

    /* NOTE:  open, do not create */

    fds = open(namebuf1, openflags, 0600);

    if (fds < 0)
      {
      char tmpLine[1024];

      snprintf(tmpLine, sizeof(tmpLine), "cannot open file '%s' for job %s in state %s (%s)",
               namebuf1,
               pjob->ji_qs.ji_jobid,
               PJobSubState[MAX(0,pjob->ji_qs.ji_substate)],
               (updatetype == 0) ? "quick" : "full");

      log_err(errno, "job_save", tmpLine);

      /* FAILURE */

      return(-1);
      }

    /* just write the "critical" base structure to the file */

    while ((i = write_ac_socket(fds, (char *)&pjob->ji_qs, sizeof(pjob->ji_qs))) != sizeof(pjob->ji_qs))
      {
      if ((i < 0) && (errno == EINTR))
        {
        /* retry the write */

        if (lseek(fds, (off_t)0, SEEK_SET) < 0)
          {
          log_err(errno, "job_save", (char *)"lseek");

          close(fds);

          return(-1);
          }

        continue;
        }
      else
        {
        log_err(errno, "job_save", (char *)"quickwrite");

        close(fds);

        /* FAILURE */

        return(-1);
        }
      }

    close(fds);
    }
  else /* SAVEJOB_FULL, SAVEJOB_NEW, SAVEJOB_ARY */
    {
    /*
     * write the whole structure to the file.
     * For a update, this is done to a new file to protect the
     * old against crashs.
     * The file is written in four parts:
     * (1) the job structure,
     * (2) the attribtes in "encoded" form,
     * (3) the attributes in the "external" form, and last
     * (4) the dependency list.
     */

    openflags = O_CREAT | O_WRONLY | O_Sync;

    /* NOTE:  create file if required */

    if (updatetype == SAVEJOB_NEW)
      fds = open(namebuf1, openflags, 0600);
    else
      fds = open(namebuf2, openflags, 0600);

    if (fds < 0)
      {
      log_err(errno, "job_save", (char *)"open for full save");

      return(-1);
      }

    for (i = 0; i < MAX_SAVE_TRIES; i++)
      {
      redo = 0; /* try to save twice */

#ifndef PBS_MOM
      lock_ss();
#endif

      if (save_struct((char *)&pjob->ji_qs, sizeof(pjob->ji_qs), fds, save_buf, &buf_remaining, sizeof(save_buf)) != PBSE_NONE)
        {
        redo++;
        }
      else if (save_attr(job_attr_def,
            pjob->ji_wattr,
            JOB_ATR_LAST,
            fds,
            save_buf,
            &buf_remaining,
            sizeof(save_buf)) != PBSE_NONE)
        {
        redo++;
        }

#ifdef PBS_MOM
      else if (save_tmsock(pjob, fds, save_buf, &buf_remaining, sizeof(save_buf)) != PBSE_NONE)
        {
        redo++;
        }

#endif  /* PBS_MOM */
      else if (write_buffer(save_buf, sizeof(save_buf) - buf_remaining, fds) != PBSE_NONE)
        {
        redo++;
        }

#ifndef PBS_MOM
      unlock_ss();
#endif

      if (redo != 0)
        {
        if (lseek(fds, (off_t)0, SEEK_SET) < 0)
          {
          log_err(errno, "job_save", (char *)"full lseek");
          }
        }
      else
        {
        break;
        }
      }  /* END for (i) */

    close(fds);

    if (i >= MAX_SAVE_TRIES)
      {
      if (updatetype == SAVEJOB_FULL)
        unlink(namebuf2);

      return(-1);
      }

    if (updatetype == SAVEJOB_FULL)
      {
      unlink(namebuf1);

      if (link(namebuf2, namebuf1) == -1)
        {
        log_event(
          PBSEVENT_ERROR | PBSEVENT_SECURITY,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          (char *)"Link in job_save failed");
        }
      else
        {
        unlink(namebuf2);
        }
      }

    pjob->ji_modified = 0;
    }  /* END (updatetype == SAVEJOB_QUICK) */

  return(PBSE_NONE);
  }  /* END job_save() */