Beispiel #1
0
/*
 *
 * run_update_job - run the job and update the job information
 *
 *   pbs_sd - connection to pbs_server
 *   sinfo  - server job is on
 *   qinfo  - queue job resides in
 *   jinfo  - the job to run
 *
 * returns success/failure - see pbs_errno for more info
 *
 */
int run_update_job(int pbs_sd, server_info *sinfo, queue_info *qinfo,
                   job_info *jinfo)
  {
  int ret;    /* return code from pbs_runjob() */
  node_info *best_node = NULL;  /* best node to run job on */
  char *best_node_name = NULL;  /* name of best node */
  char buf[256] = {'\0'};  /* generic buffer - comments & logging*/
  char timebuf[128];   /* buffer to hold the time and date */
  resource_req *res;   /* ptr to the resource of ncpus */
  int ncpus;    /* numeric amount of resource ncpus */
  char *errmsg;    /* used for pbs_geterrmsg() */

  strftime(timebuf, 128, "started on %a %b %d at %H:%M", localtime(&cstat.current_time));

  if (cstat.load_balancing || cstat.load_balancing_rr)
    {
    best_node = find_best_node(jinfo, sinfo -> timesharing_nodes);

    if (best_node != NULL)
      {
      best_node_name = best_node -> name;
      sprintf(buf, "Job run on node %s - %s", best_node_name, timebuf);
      }
    }

  if (best_node == NULL)
    sprintf(buf, "Job %s", timebuf);

  update_job_comment(pbs_sd, jinfo, buf);

  buf[0] = '\0';

  ret = pbs_runjob(pbs_sd, jinfo -> name, best_node_name, NULL);

  if (ret == 0)
    {
    /* If a job is 100% efficent, it will raise the load average by 1 per
     * cpu is uses.  Temporarly inflate load average by that value
     */
    if (cstat.load_balancing && best_node != NULL)
      {
      if ((res = find_resource_req(jinfo -> resreq, "ncpus")) == NULL)
        ncpus = 1;
      else
        ncpus = res -> amount;

      best_node -> loadave += ncpus;
      }

    if (cstat.help_starving_jobs && jinfo == cstat.starving_job)
      jinfo -> sch_priority = 0;

    sched_log(PBSEVENT_SCHED, PBS_EVENTCLASS_JOB, jinfo -> name, "Job Run");

    update_server_on_run(sinfo, qinfo, jinfo);

    update_queue_on_run(qinfo, jinfo);

    update_job_on_run(pbs_sd, jinfo);

    if (cstat.fair_share)
      update_usage_on_run(jinfo);

    free(sinfo -> running_jobs);

    sinfo -> running_jobs = job_filter(sinfo -> jobs, sinfo -> sc.total,
                                       check_run_job, NULL);

    free(qinfo -> running_jobs);

    qinfo -> running_jobs = job_filter(qinfo -> jobs, qinfo -> sc.total,
                                       check_run_job, NULL);
    }
  else
    {
    errmsg = pbs_geterrmsg(pbs_sd);
    sprintf(buf, "Not Running - PBS Error: %s", errmsg);
    update_job_comment(pbs_sd, jinfo, buf);
    }

  return ret;
  }
Beispiel #2
0
/*
 *
 * query_server - creates a structure of arrays consisting of a server
 *   and all the queues and jobs that reside in that server
 *
 *   pbs_sd - connection to pbs_server
 *
 * returns a pointer to the server_info struct
 *
 */
server_info *query_server(int pbs_sd)
  {

  struct batch_status *server; /* info about the server */
  server_info *sinfo;  /* scheduler internal form of server info */
  queue_info **qinfo;  /* array of queues on the server */
  resource *res;  /* ptr to cycle through sources on server */
  int       local_errno = 0;

  /* get server information from pbs server */

  if ((server = pbs_statserver_err(pbs_sd, NULL, NULL, &local_errno)) == NULL)
    {
    fprintf(stderr, "pbs_statserver failed: %d\n", local_errno);
    return NULL;
    }

  /* convert batch_status structure into server_info structure */
  if ((sinfo = query_server_info(server)) == NULL)
    {
    pbs_statfree(server);
    return NULL;
    }

  /* get the nodes, if any */
  sinfo -> nodes = query_nodes(pbs_sd, sinfo);

  /* get the queues */
  if ((sinfo -> queues = query_queues(pbs_sd, sinfo)) == NULL)
    {
    pbs_statfree(server);
    free_server(sinfo, 0);
    return NULL;
    }

  /* count the queues and total up the individual queue states
   * for server totals. (total up all the state_count structs)
   */
  qinfo = sinfo -> queues;

  while (*qinfo != NULL)
    {
    sinfo -> num_queues++;
    total_states(&(sinfo -> sc), &((*qinfo) -> sc));
    qinfo++;
    }

  if ((sinfo -> jobs = (job_info **) malloc(sizeof(job_info *) * (sinfo -> sc.total + 1))) == NULL)
    {
    free_server(sinfo, 1);
    perror("Memory allocation error");
    return NULL;
    }

  set_jobs(sinfo);

  sinfo -> running_jobs =
    job_filter(sinfo -> jobs, sinfo -> sc.total, check_run_job, NULL);

  res = sinfo -> res;

  while (res != NULL)
    {
    if (res -> assigned == UNSPECIFIED)
      res -> assigned = calc_assn_resource(sinfo -> running_jobs, res -> name);

    res = res -> next;
    }

  sinfo -> timesharing_nodes =

    node_filter(sinfo -> nodes, sinfo -> num_nodes, is_node_timeshared, NULL);

  pbs_statfree(server);

  return sinfo;
  }