Ejemplo n.º 1
0
int
main(int argc, char **argv, char **envp) /* qmove */
{
	int any_failed=0;

	char job_id[PBS_MAXCLTJOBID];		/* from the command line */
	char destination[PBS_MAXSERVERNAME];	/* from the command line */
	char *q_n_out, *s_n_out;

	char job_id_out[PBS_MAXCLTJOBID];
	char server_out[MAXSERVERNAME];
	char rmt_server[MAXSERVERNAME];

	/*test for real deal or just version and exit*/

	execution_mode(argc, argv);

#ifdef WIN32
	winsock_init();
#endif

	if (argc < 3) {
		static char usage[]="usage: qmove destination job_identifier...\n";
		static char usag2[]="       qmove --version\n";
		fprintf(stderr, usage);
		fprintf(stderr, usag2);
		exit(2);
	}

	strcpy(destination, argv[1]);
	if (parse_destination_id(destination, &q_n_out, &s_n_out)) {
		fprintf(stderr, "qmove: illegally formed destination: %s\n", destination);
		exit(2);
	}

	/*perform needed security library initializations (including none)*/

	if (CS_client_init() != CS_SUCCESS) {
		fprintf(stderr, "qmove: unable to initialize security library.\n");
		exit(2);
	}

	for (optind = 2; optind < argc; optind++) {
		int connect;
		int stat=0;
		int located = FALSE;

		strcpy(job_id, argv[optind]);
		if (get_server(job_id, job_id_out, server_out)) {
			fprintf(stderr, "qmove: illegally formed job identifier: %s\n", job_id);
			any_failed = 1;
			continue;
		}
cnt:
		connect = cnt2server(server_out);
		if (connect <= 0) {
			fprintf(stderr, "qmove: cannot connect to server %s (errno=%d)\n",
				pbs_server, pbs_errno);
			any_failed = pbs_errno;
			continue;
		}

		stat = pbs_movejob(connect, job_id_out, destination, NULL);
		if (stat && (pbs_errno != PBSE_UNKJOBID)) {
			if (stat != PBSE_NEEDQUET) {
				prt_job_err("qmove", connect, job_id_out);
				any_failed = pbs_errno;
			} else {
				fprintf(stderr, "qmove: Queue type not set for queue \'%s\'\n", destination);
			}
		} else if (stat && (pbs_errno == PBSE_UNKJOBID) && !located) {
			located = TRUE;
			if (locate_job(job_id_out, server_out, rmt_server)) {
				pbs_disconnect(connect);
				strcpy(server_out, rmt_server);
				goto cnt;
			}
			prt_job_err("qmove", connect, job_id_out);
			any_failed = pbs_errno;
		}

		pbs_disconnect(connect);
	}

	/*cleanup security library initializations before exiting*/
	CS_close_app();

	exit(any_failed);
}
Ejemplo n.º 2
0
/*
 * This function takes a pointer to a struct batch_status for a job, and
 * fills in the appropriate fields of the supplied job struct.  It returns
 * the number of items that were found.
 */
int
schd_get_jobinfo(Batch_Status *bs, Job *job)
  {
  int       changed = 0;
  int       istrue;
  char      tmp_str[120];
  char     *id = "schd_get_jobinfo";
  char     *host;
  char     *p, *tmp_p, *var_p;
  AttrList *attr;

  memset((void *)job, 0, sizeof(Job));

  job->jobid = schd_strdup(bs->name);

  if (job->jobid == NULL)
    {
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
               "schd_strdup(bs->name)");
    return (-1);
    }

  changed ++;

  for (attr = bs->attribs; attr != NULL; attr = attr->next)
    {

    /*
     * If this is the 'owner' field, chop it into 'owner' and 'host'
     * fields, and copy them into the Job struct.
     */
    if (!strcmp(attr->name, ATTR_owner))
      {

      /* Look for the '@' that separates user and hostname. */
      strcpy(tmp_str, attr->value);
      host = strchr(tmp_str, '@');

      if (host)
        {
        *host = '\0'; /* Replace '@' with NULL (ends username). */
        host ++; /* Move to first character of hostname. */
        }

      job->owner = schd_strdup(tmp_str);

      if (job->owner == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->owner)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The group to which to charge the resources for this job. */
    if (!strcmp(attr->name, ATTR_egroup))
      {
      job->group = schd_strdup(attr->value);

      if (job->group == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->group)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The comment currently assigned to this job. */
    if (!strcmp(attr->name, ATTR_comment))
      {
      job->comment = schd_strdup(attr->value);

      if (job->comment == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->comment)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The host on which this job is running (or was running for
     * suspended or checkpointed jobs. */

    if (!strcmp(attr->name, ATTR_exechost))
      {
      job->exechost = schd_strdup(attr->value);

      if (job->exechost == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->exechost)");
        return (-1);
        }

      changed ++;

      continue;
      }

    if (!strcmp(attr->name, ATTR_inter))
      {
      /* Is this job interactive or not? */
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue)
          job->flags |= JFLAGS_INTERACTIVE;
        else
          job->flags &= ~JFLAGS_INTERACTIVE;

        changed ++;
        }
      else
        {
        DBPRT(("%s: can't parse %s = %s into boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_state))
      {
      /* State is one of 'R', 'Q', 'E', etc. */
      job->state = attr->value[0];
      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_queue))
      {
      job->qname = schd_strdup(attr->value);

      if (job->qname == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->qname)");
        return (-1);
        }

      job->flags |= JFLAGS_QNAME_LOCAL;

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_v))
      {
      var_p = schd_strdup(attr->value);

      if (var_p == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(Variable_List)");
        return (-1);
        }

      p = NULL;

      tmp_p = strstr(var_p, "PBS_O_QUEUE");

      if (tmp_p)
        {
        p = strtok(tmp_p, "=");
        p = strtok(NULL,  ", ");
        }

      if (p != NULL)
        {
        job->oqueue = schd_strdup(p);
        }
      else
        {
        /* if the originating queue is unknown, default
         * to the locally defined "submit" queue.
         */
        job->oqueue = schd_strdup(schd_SubmitQueue->queue->qname);
        }

      free(var_p);

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_l))
      {
      if (!strcmp(attr->resource, "arch"))
        {
        job->arch = schd_strdup(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "mem"))
        {
        job->memory = schd_val2byte(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "ncpus"))
        {
        job->ncpus = atoi(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime = schd_val2sec(attr->value);
        changed ++;

        }

      /* That's all for requested resources. */
      continue;
      }

    if (!strcmp(attr->name, ATTR_used))
      {
      if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime_used = schd_val2sec(attr->value);
        changed ++;
        }

      /* No other interesting cases. */
      continue;
      }

    /* Creation time attribute. */
    if (!strcmp(attr->name, ATTR_ctime))
      {
      /* How long ago was it put in the queue ? */
      job->time_queued = schd_TimeNow - atoi(attr->value);
      continue;
      }

    /* Modified time attribute. */
    if (!strcmp(attr->name, ATTR_mtime))
      {
      /* When was the job last modified? */
      job->mtime = atoi(attr->value);
      continue;
      }

    /* Job Substate attribute. */
    if (!strcmp(attr->name, ATTR_substate))
      {
      if (atoi(attr->value) == 43 /* JOB_SUBSTATE_SUSPEND */)
        job->flags |= JFLAGS_SUSPENDED;

      continue;
      }

    /*
     * When was the job last eligible to run?  When a user-hold is
     * released, this value is updated to the current time.  This
     * prevents users from gaining higher priority from holding their
     * jobs.
     */
    if (!strcmp(attr->name, ATTR_etime))
      {
      job->eligible = schd_TimeNow - atoi(attr->value);

      continue;
      }
    }

  if (job->memory < 1)
    {
    job->memory = get_default_mem(job->oqueue);
    schd_alterjob(connector, job, ATTR_l, schd_byte2val(job->memory), "mem");
    changed++;
    }

  /*
   * If this job is in the "Running" or "Suspended" state, compute how
   * many seconds remain until it is completed.
   */
  if (job->state == 'R' || job->state == 'S')
    {
    job->time_left = job->walltime - job->walltime_used;
    }

  /*
   * If this job was enqueued since the last time we ran, set the job
   * flag to indicate that we have not yet seen this job.  This makes it
   * a candidate for additional processing.  There may be some inaccuracy,
   * since the time_t has resolution of 1 second.  Attempt to err on the
   * side of caution.
   */
  if ((job->state == 'Q') && (job->time_queued != UNSPECIFIED))
    {
    if (job->time_queued <= (schd_TimeNow - schd_TimeLast))
      {
      job->flags |= JFLAGS_FIRST_SEEN;
      }
    }

  /*
   * If this job was previously running and is now queued, then we
   * need to (a) flag it as having been checkpointed, and (b) move
   * it back to the submit queue, if its not already there.
   */
  if (job->exechost && job->state == 'Q')
    {
    job->flags |= JFLAGS_CHKPTD;

    if (strcmp(job->qname, schd_SubmitQueue->queue->qname))
      {
      sprintf(log_buffer, "moving Q'd job %s back to SUBMIT Q",
              job->jobid);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      pbs_movejob(connector, job->jobid, schd_SubmitQueue->queue->qname,
                  NULL);
      }
    }

  /*
   * if this job is currently Suspended (a substate of 'R'unning), then
   * pretend its queued, so that the scheduling logic will work.
   */
  if (job->state == 'S')
    {
    job->state = 'Q';
    job->flags |= JFLAGS_SUSPENDED;
    }

  /* if this job is suspended, checkpointed, or otherwise "queued"
   * on an exection queue, update the internal representation of
   * to pretend it is really on the submit queue.
   */

  if ((job->flags & JFLAGS_SUSPENDED) || (job->flags & JFLAGS_CHKPTD))
    {
    free(job->qname);
    job->qname = schd_strdup(schd_SubmitQueue->queue->qname);
    }

  /*
   * If this job came from the EXPRESS queue, set the flag so that it
   * will be treated with the highest of priority.
   */
  if (!strcmp(job->oqueue, schd_EXPRESS_Q_NAME))
    job->flags |= JFLAGS_PRIORITY;

  /*
   * If the 'etime' attribute wasn't found, set it to the time the job has
   * been queued.  Most jobs will be eligible to run their entire lifetime.
   * The exception is a job that has been held - if it was a user hold,
   * the release will reset the etime to the latest value.
   * If not eligible time was given, use the job's creation time.
   */
  if (!job->eligible)
    job->eligible = job->time_queued;

  /* if this job has waited too long, and its queue is NOT over its
   * shares, then bump it up in priority.
   */
  if (job->eligible > schd_MAX_WAIT_TIME && job->sort_order <= 100)
    job->flags |= JFLAGS_WAITING;

  return (changed);
  }
Ejemplo n.º 3
0
int
schd_run_job_on(Job *job, Queue *destq, char *exechost, int set_comment)
  {
  char   *id = "schd_run_job_on";
  char    reason[128], tmp_word[20];
  char   *date;
  Queue  *srcq = NULL;
  int     ret = 0;

  /* Get the datestamp from 'ctime()'.  Remove the trailing '\n'. */
  date = ctime(&schd_TimeNow);
  date[strlen(date) - 1] = '\0';

  if (set_comment)
    {
    sprintf(reason, "Started on %s", date);

    if (job->flags & JFLAGS_PRIORITY)
      {
      strcat(reason, " (EXPRESS/high priority job)");
      }

    if (job->flags & JFLAGS_WAITING)
      {
      strcat(reason, " (long-waiting job)");
      }

    schd_comment_job(job, reason, JOB_COMMENT_REQUIRED);
    }

  /* If this is NOT a suspended job... */
  if (!(job->flags & JFLAGS_SUSPENDED))
    {

    /*
     * If a destination Queue is provided, and it is different from the
     * source queue, then ask PBS to move the job to that queue before
     * running it.
     */
    srcq = job->queue;

    /*
     * Move the job from its queue to the specified run queue.
     */

    if ((destq != NULL) && (strcmp(destq->qname, srcq->qname) != 0))
      {
      if (pbs_movejob(connector, job->jobid, destq->qname, NULL))
        {
        (void)sprintf(log_buffer, "move job %s to queue %s failed, %d",
                      job->jobid, destq->qname, pbs_errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));
        return (-1);
        }

      schd_move_job_to(job, destq);
      }

    /*
    * Give the job handle (JOBID) to PBS to run.
    */
    if (pbs_runjob(connector, job->jobid, exechost, NULL))
      {
      (void)sprintf(log_buffer, "failed start job %s on queue %s@%s, %d",
                    job->jobid, destq->qname, exechost, pbs_errno);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      /*
       * Running failed! Move the job back to the source queue (if
       * applicable) before returning. This prevents jobs being left
       * in execution queues.
       */

      if (srcq)
        {
        DBPRT(("Attempting to move job %s back to queue %s\n",
               job->jobid, srcq->qname));

        if (pbs_movejob(connector, job->jobid, srcq->qname, NULL))
          {
          (void)sprintf(log_buffer,
                        "failed to move job %s back to queue %s, %d",
                        job->jobid, srcq->qname, pbs_errno);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                     log_buffer);
          DBPRT(("%s: %s\n", id, log_buffer));
          }

        schd_move_job_to(job, srcq);
        }

      return (-1);
      }

    strcpy(tmp_word, "started");
    }
  else    /* it IS a suspended job */
    {

    schd_move_job_to(job, destq);
    ret = pbs_sigjob(connector, job->jobid, "resume", NULL);

    if (ret)
      {
      sprintf(log_buffer, "resume of job %s FAILED (%d)",
              job->jobid, ret);
      return (-1);
      }

    job->flags &= ~JFLAGS_SUSPENDED;

    strcpy(tmp_word, "resumed");
    }

  /* PBS accepted the job (and presumably will run it). Log the fact. */
  (void)sprintf(log_buffer, "job %s %s on %s@%s", job->jobid, tmp_word,
                destq->qname, exechost);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  DBPRT(("%s: %s\n", id, log_buffer));

  /*
   * Change the state of the local representation of the job to "Running".
   */
  job->state = 'R';

  /*
   * Account for the job on this queue's statistics.  'queued' will be
   * bumped up if the queued job was moved to a new destination queue.
   */

  job->queue->queued --;

  job->queue->running ++;

  /* The queue is no longer idle.  Unset the idle timer. */
  job->queue->idle_since = 0;

  return (0);    /* Job successfully started. */
  }