Пример #1
0
END_TEST

START_TEST(job_set_wait_test)
  {
  struct job test_job;
  struct pbs_attribute test_attribute;
  int result = -1;

  memset(&test_job, 0, sizeof(test_job));
  memset(&test_attribute, 0, sizeof(test_attribute));

  result = job_set_wait(NULL, &test_job, 0);
  fail_unless(result == PBSE_BAD_PARAMETER, "NULL input attribute pointer fail");

  result = job_set_wait(&test_attribute, NULL, 0);
  fail_unless(result == PBSE_BAD_PARAMETER, "NULL input job pointer fail");

  result = job_set_wait(&test_attribute, &test_job, 0);
  fail_unless(result == PBSE_NONE, "job_set_wait fail");
  }
Пример #2
0
static void post_stagein(

  struct work_task *pwt)

  {
  int        code;
  int        newstate;
  int        newsub;
  job       *pjob;

  struct batch_request *preq;
  attribute      *pwait;

  preq = pwt->wt_parm1;
  code = preq->rq_reply.brp_code;
  pjob = find_job(preq->rq_extra);

  free(preq->rq_extra);

  if (pjob != NULL)
    {
    if (code != 0)
      {
      /* stage in failed - hold job */

      free_nodes(pjob);

      pwait = &pjob->ji_wattr[(int)JOB_ATR_exectime];

      if ((pwait->at_flags & ATR_VFLAG_SET) == 0)
        {
        pwait->at_val.at_long = time_now + PBS_STAGEFAIL_WAIT;

        pwait->at_flags |= ATR_VFLAG_SET;

        job_set_wait(pwait, pjob, 0);
        }

      svr_setjobstate(pjob, JOB_STATE_WAITING, JOB_SUBSTATE_STAGEFAIL);

      if (preq->rq_reply.brp_choice == BATCH_REPLY_CHOICE_Text)
        {
        /* set job comment */

        /* NYI */

        svr_mailowner(
          pjob,
          MAIL_STAGEIN,
          MAIL_FORCE,
          preq->rq_reply.brp_un.brp_txt.brp_str);
        }
      }
    else
      {
      /* stage in was successful */

      pjob->ji_qs.ji_svrflags |= JOB_SVFLG_StagedIn;

      if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_STAGEGO)
        {
        if (is_checkpoint_restart(pjob))
          {
          /* need to copy checkpoint file to mom before running */
          svr_send_checkpoint(
              pjob,
              preq,
              JOB_STATE_RUNNING,
              JOB_SUBSTATE_CHKPTGO);
          }
        else
          {
          /* continue to start job running */

          svr_strtjob2(pjob, NULL);
          }
        }
      else
        {
        svr_evaljobstate(pjob, &newstate, &newsub, 0);

        svr_setjobstate(pjob, newstate, newsub);
        }
      }
    }    /* END if (pjob != NULL) */

  release_req(pwt); /* close connection and release request */

  return;
  }  /* END post_stagein() */
Пример #3
0
static void post_checkpointsend(

  struct work_task *pwt)

  {
  int        code;
  job       *pjob;

  struct batch_request *preq;
  attribute      *pwait;

  preq = pwt->wt_parm1;
  code = preq->rq_reply.brp_code;
  pjob = find_job(preq->rq_extra);

  free(preq->rq_extra);

  if (pjob != NULL)
    {
    if (code != 0)
      {
      /* copy failed - hold job */

      free_nodes(pjob);

      pwait = &pjob->ji_wattr[(int)JOB_ATR_exectime];

      if ((pwait->at_flags & ATR_VFLAG_SET) == 0)
        {
        pwait->at_val.at_long = time_now + PBS_STAGEFAIL_WAIT;

        pwait->at_flags |= ATR_VFLAG_SET;

        job_set_wait(pwait, pjob, 0);
        }

      svr_setjobstate(pjob, JOB_STATE_WAITING, JOB_SUBSTATE_STAGEFAIL);

      if (preq->rq_reply.brp_choice == BATCH_REPLY_CHOICE_Text)
        {

        sprintf(log_buffer, "Failed to copy checkpoint file to mom - %s",
                preq->rq_reply.brp_un.brp_txt.brp_str);

        log_event(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);

        /* NYI */

        svr_mailowner(
          pjob,
          MAIL_CHKPTCOPY,
          MAIL_FORCE,
          preq->rq_reply.brp_un.brp_txt.brp_str);
        }
      }
    else
      {
      /* checkpoint copy was successful */

      pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHECKPOINT_COPIED;
      
      /* set restart_name attribute to the checkpoint_name we just copied */
      
      job_attr_def[(int)JOB_ATR_restart_name].at_set(
        &pjob->ji_wattr[(int)JOB_ATR_restart_name],
        &pjob->ji_wattr[(int)JOB_ATR_checkpoint_name],
        SET);

      pjob->ji_modified = 1;
      
      job_save(pjob, SAVEJOB_FULL);
      
      /* continue to start job running */

      svr_strtjob2(pjob, NULL);
      }
    }    /* END if (pjob != NULL) */

  release_req(pwt); /* close connection and release request */

  return;
  }  /* END post_checkpointsend() */