Exemplo n.º 1
0
/**
 * Terminates a job. 
 * @param jobid is the PID assigned by the queue
 * @return 0 if correct, non-zero if error
 */
int rm_terminateJob(struct soap* s, char* jobid, char* user)
{
   int connectionIdentifier = pbs_connect(server);
   if (connectionIdentifier < 1 )
	   return BESE_BACKEND;
   int rc = pbs_deljob(connectionIdentifier, jobid, NULL);
   updateErrorNo();
   pbs_disconnect(connectionIdentifier);
   return BESE_OK;
}
Exemplo n.º 2
0
int
drmaa_control(const char *job_id, int action, char *errmsg, size_t errlen)
  {
  drmaa_session_t *c = NULL;
  int rc = 0;

  DEBUG(("-> drmaa_control(job_id=%s,action=%d)", job_id, action));
  GET_DRMAA_SESSION(c);
  pthread_mutex_lock(&c->conn_mutex);

  switch (action)
    {
      /*
       * We cannot know whether we did suspend job
       * in other way than remembering this inside DRMAA session.
       */

    case DRMAA_CONTROL_SUSPEND:
      drmaa_find_job(c, job_id, NULL, DRMAA_JOB_SUSPENDED);
      rc = pbs_sigjob(c->pbs_conn, (char*)job_id, "SIGSTOP", NULL);
      break;

    case DRMAA_CONTROL_RESUME:
      drmaa_find_job(c, job_id, NULL, DRMAA_JOB_RESUMED);
      rc = pbs_sigjob(c->pbs_conn, (char*)job_id, "SIGCONT", NULL);
      break;

    case DRMAA_CONTROL_HOLD:
      rc = pbs_holdjob(c->pbs_conn, (char*)job_id, USER_HOLD, NULL);
      break;

    case DRMAA_CONTROL_RELEASE:
      rc = pbs_rlsjob(c->pbs_conn, (char*)job_id, USER_HOLD, NULL);
      break;

    case DRMAA_CONTROL_TERMINATE:
      rc = pbs_deljob(c->pbs_conn, (char*)job_id, NULL); /* deldelay=N
             -- delay between SIGTERM and SIGKILL (default 0)*/
      break;
    }

  pthread_mutex_unlock(&c->conn_mutex);

  RELEASE_DRMAA_SESSION(c);
  DEBUG(("<- drmaa_control() =%d", rc));

  if (rc)
    RAISE_PBS();
  else return DRMAA_ERRNO_SUCCESS;
  }
Exemplo n.º 3
0
Arquivo: fifo.c Projeto: CESNET/torque
int scheduling_cycle(

  int sd)

  {
  server_info *sinfo;  /* ptr to the server/queue/job/node info */
  job_info *jinfo;  /* ptr to the job to see if it can run */
  int ret = SUCCESS;  /* return code from is_ok_to_run_job() */
  char log_msg[MAX_LOG_SIZE]; /* used to log an message about job */
  char comment[MAX_COMMENT_SIZE]; /* used to update comment of job */

  sched_log(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", "Entering Schedule");

  update_cycle_status();

  /* create the server / queue / job / node structures */

  if ((sinfo = query_server(sd)) == NULL)
    {
    fprintf(stderr, "Problem with creating server data strucutre\n");

    return(0);
    }

  if (init_scheduling_cycle(sinfo) == 0)
    {
    sched_log(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_SERVER,
      sinfo -> name,
      "init_scheduling_cycle failed.");

    free_server(sinfo, 1);

    return(0);
    }

  /* main scheduling loop */

  while ((jinfo = next_job(sinfo, 0)))
    {
    sched_log(
      PBSEVENT_DEBUG2,
      PBS_EVENTCLASS_JOB,
      jinfo->name,
      "Considering job to run");

    if ((ret = is_ok_to_run_job(sd, sinfo, jinfo->queue, jinfo)) == SUCCESS)
      {
      run_update_job(sd, sinfo, jinfo->queue, jinfo);
      }
    else
      {
      if (jinfo->can_never_run)
        {
        sched_log(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          jinfo->name,
          "Job Deleted because it would never run");

        pbs_deljob(sd, jinfo->name, "Job could never run");
        }

      jinfo->can_not_run = 1;

      if (translate_job_fail_code(ret, comment, log_msg))
        {
        /* if the comment doesn't get changed, its because it hasn't changed.
         * if the reason for the job has not changed, we do not need to log it
         */

        if (update_job_comment(sd, jinfo, comment) == 0)
          {
          sched_log(
            PBSEVENT_SCHED,
            PBS_EVENTCLASS_JOB,
            jinfo->name,
            log_msg);
          }
        }

      if ((ret != NOT_QUEUED) && cstat.strict_fifo)
        {
        update_jobs_cant_run(
          sd,
          jinfo->queue->jobs,
          jinfo,
          COMMENT_STRICT_FIFO,
          START_AFTER_JOB);
        }
      }
    }

  if (cstat.fair_share)
    update_last_running(sinfo);

  free_server(sinfo, 1); /* free server and queues and jobs */

  sched_log(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", "Leaving schedule\n");

  return 0;
  }
Exemplo n.º 4
0
/* if a user requested deleting 'all' then this routine will get the list of
 * jobs from the server and try to delete all jobs that are not in a
 * 'C'omplete or 'E'xiting state
 */
void qdel_all(
  char *extend)   /* I */

  {
  char *jobid;
  char *state = 0;
  int connect;
  int stat;
  int retries;

  struct batch_status *p_status;

  struct batch_status *p;

  struct attropl *p_atropl = 0;

  struct attrl *a;

  connect = cnt2server('\0');

  if (connect <= 0)
    {
    fprintf(stderr, "qdel: cannot connect to default server (errno=%d) %s\n",
            pbs_errno,
            pbs_strerror(pbs_errno));

    return;
    }

  p_status = pbs_selstat(connect, p_atropl, NULL);

  if (p_status == NULL)
    {
    fprintf(stderr, "qdel: cannot find any jobs to delete\n");
    }

  for (p = p_status;p != NULL;p = p->next)
    {
    jobid = p->name;
    a = p->attribs;

    while (a != NULL)
      {
      if ((a->name != NULL) && (!strcmp(a->name, ATTR_state)))
        {
        state = a->value;
        break;
        }

      a = a->next;
      }

    /* 
     * Don't bother deleting jobs that are 'C'omplete or 'E'xiting
     * Unless we are Purging, then try 'C'ompleted jobs as well
     */
    
    if (((strstr(extend,DELPURGE) != NULL) && (*state != 'E')) ||
        ((*state != 'E') && (*state != 'C')))
      {
      retries = 0;

redo:
      stat = pbs_deljob(connect, jobid, extend);

      /*
       * if MOM is too slow to respond, we will retry a few times before
       * before giving up
       */

      if (stat && (pbs_errno == PBSE_NORELYMOM) && (retries < 3))
        {
        sleep(1);
        retries++;
        goto redo;
        }

      if (stat &&
          (pbs_errno != PBSE_UNKJOBID) &&
          (pbs_errno != PBSE_BADSTATE))
        {
        printf("Deletion Error: %d (%s)\n", pbs_errno, pbs_strerror(pbs_errno));
        prt_job_err("qdel", connect, jobid);
        }
      }
    }

  pbs_disconnect(connect);

  return;
  }
Exemplo n.º 5
0
int main(

  int    argc,
  char **argv)

  {
  int c;
  int errflg = 0;
  int any_failed = 0;
  int purge_completed = FALSE;
  int located = FALSE;
  char *pc;

  char job_id[PBS_MAXCLTJOBID]; /* from the command line */

  char job_id_out[PBS_MAXCLTJOBID];
  char server_out[MAXSERVERNAME];
  char rmt_server[MAXSERVERNAME];

  char extend[1024];

#define GETOPT_ARGS "acm:pW:t:"

  extend[0] = '\0';

  while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF)
    {
    switch (c)
      {

      case 'a': /* Async job deletion */

        if (extend[0] != '\0')
          {
          errflg++;

          break;
          }

        strcpy(extend, DELASYNC);

        break;

      case 'c':

        if (extend[0] != '\0')
          {
          errflg++;

          break;
          }

        snprintf(extend,sizeof(extend),"%s%ld",PURGECOMP,(long)(time(NULL)));
        purge_completed = TRUE;

        break;

      case 'm':

        /* add delete message */

        if (extend[0] != '\0')
          {
          /* extension option already specified */

          errflg++;

          break;
          }

        strncpy(extend, optarg, sizeof(extend));

        break;

      case 'p':

        if (extend[0] != '\0')
          {
          errflg++;

          break;
          }

        strcpy(extend, DELPURGE);

        strcat(extend, "1");

        break;

      case 't':

        if (extend[0] != '\0')
          {
          errflg++;

          break;
          }

        pc = optarg;

        if (strlen(pc) == 0)
          {
          fprintf(stderr, "qdel: illegal -t value (array range cannot be zero length)\n");

          errflg++;

          break;
          }

        snprintf(extend,sizeof(extend),"%s%s",
          ARRAY_RANGE,
          pc);

        break;

      case 'W':

        if (extend[0] != '\0')
          {
          errflg++;

          break;
          }

        pc = optarg;

        if (strlen(pc) == 0)
          {
          fprintf(stderr, "qdel: illegal -W value\n");

          errflg++;

          break;
          }

        while (*pc != '\0')
          {
          if (!isdigit(*pc))
            {
            fprintf(stderr, "qdel: illegal -W value\n");

            errflg++;

            break;
            }

          pc++;
          }

        strcpy(extend, DELDELAY);

        strcat(extend, optarg);

        break;

      default:

        errflg++;

        break;
      }
    }    /* END while (c) */

  if (purge_completed)
    {
    strcpy(server_out,pbs_default());
    goto cnt;
    }

  if ((errflg != 0) || (optind >= argc))
    {
    static char usage[] = "usage: qdel [{ -a | -c | -p | -t | -W delay | -m message}] [<JOBID>[<JOBID>]|'all'|'ALL']...\n";

    fprintf(stderr, "%s", usage);

    fprintf(stderr, "       -a -c, -m, -p, -t, and -W are mutually exclusive\n");

    exit(2);
    }

  for (;optind < argc;optind++)
    {
    int connect;
    int stat = 0;

    /* check to see if user specified 'all' to delete all jobs */

    strcpy(job_id, argv[optind]);

    if ((strcmp("all", job_id) == 0) || (strcmp("ALL", job_id) == 0))
      {
      qdel_all(extend);
      continue;
      }
    else if (get_server(job_id, job_id_out, server_out))
      {
      fprintf(stderr, "qdel: illegally formed job identifier: %s\n",
              job_id);

      any_failed = 1;

      continue;
      }

cnt:

    connect = cnt2server(server_out);

    if (connect <= 0)
      {
      fprintf(stderr, "qdel: cannot connect to server %s (errno=%d) %s\n",
              pbs_server,
              pbs_errno,
              pbs_strerror(pbs_errno));

      any_failed = pbs_errno;

      continue;
      }

    stat = pbs_deljob(connect, job_id_out, extend);

    if (stat && (pbs_errno != PBSE_UNKJOBID))
      {
      prt_job_err("qdel", connect, job_id_out);

      any_failed = pbs_errno;
      }
    else if (stat && (pbs_errno == PBSE_UNKJOBID) && !located)
      {
      located = TRUE;

      if (locate_job(job_id_out, server_out, rmt_server))
        {
        pbs_disconnect(connect);

        strcpy(server_out, rmt_server);

        goto cnt;
        }

      prt_job_err("qdel", connect, job_id_out);

      any_failed = pbs_errno;
      }

    pbs_disconnect(connect);
    }

  exit(any_failed);
  }  /* END main() */
Exemplo n.º 6
0
int
schd_reject_job(Job *job, char *reason)
  {
  char   *id = "schd_reject_job";
  static char *message = NULL;
  int     rc = 0;

  if (message == NULL)
    {
    if ((message = (char *)malloc(MSG_BUFFER_SIZE)) == NULL)
      {
      (void)sprintf(log_buffer, "cannot malloc %d bytes\n",
                    MSG_BUFFER_SIZE);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      return (-1);
      }
    }

  DBPRT((

          "*************************************************************************\n"));

  if (schd_TEST_ONLY)
    {
    DBPRT(("JOB %s WOULD HAVE BEEN DELETED!!!\n", job->jobid));
    DBPRT(("Message: %s\n", reason));
    }
  else
    {
    (void)sprintf(message,
                  "\n"
                  "PBS job '%s' was rejected by all execution queues.\n"
                  "\n"
                  "The reason given for this action was :\n"
                  "\n"
                  "       %s\n"
                  "\n"
                  "Please correct the problem and resubmit your job, or contact the PBS\n"
                  "administrator for assistance.\n"
                  "\n"
                  "Thank you.\n"
                  "\n",
                  job->jobid, reason);


    /*
     * Ask PBS to delete the job from the queue, which should deliver the
     * message to the user.
     */

    rc = pbs_deljob(connector, job->jobid, message);

    if (rc)
      {
      (void)sprintf(log_buffer, "pbs_deljob failed: error %d", rc);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));
      return 1;
      }

    /*
     * Delete this job from the queue's list (move to a NULL queue)
     */
    schd_move_job_to(job, NULL);

    DBPRT(("JOB %s DELETED!!!\n", job->jobid));

    DBPRT(("Message: %s\n", reason));
    }

  DBPRT((

          "*************************************************************************\n"));
  return 0;
  }