/* * * run_update_job - run the job and update the job information * * pbs_sd - connection to pbs_server * sinfo - server job is on * qinfo - queue job resides in * jinfo - the job to run * * returns success/failure - see pbs_errno for more info * */ int run_update_job(int pbs_sd, server_info *sinfo, queue_info *qinfo, job_info *jinfo) { int ret; /* return code from pbs_runjob() */ node_info *best_node = NULL; /* best node to run job on */ char *best_node_name = NULL; /* name of best node */ char buf[256] = {'\0'}; /* generic buffer - comments & logging*/ char timebuf[128]; /* buffer to hold the time and date */ resource_req *res; /* ptr to the resource of ncpus */ int ncpus; /* numeric amount of resource ncpus */ char *errmsg; /* used for pbs_geterrmsg() */ strftime(timebuf, 128, "started on %a %b %d at %H:%M", localtime(&cstat.current_time)); if (cstat.load_balancing || cstat.load_balancing_rr) { best_node = find_best_node(jinfo, sinfo -> timesharing_nodes); if (best_node != NULL) { best_node_name = best_node -> name; sprintf(buf, "Job run on node %s - %s", best_node_name, timebuf); } } if (best_node == NULL) sprintf(buf, "Job %s", timebuf); update_job_comment(pbs_sd, jinfo, buf); buf[0] = '\0'; ret = pbs_runjob(pbs_sd, jinfo -> name, best_node_name, NULL); if (ret == 0) { /* If a job is 100% efficent, it will raise the load average by 1 per * cpu is uses. Temporarly inflate load average by that value */ if (cstat.load_balancing && best_node != NULL) { if ((res = find_resource_req(jinfo -> resreq, "ncpus")) == NULL) ncpus = 1; else ncpus = res -> amount; best_node -> loadave += ncpus; } if (cstat.help_starving_jobs && jinfo == cstat.starving_job) jinfo -> sch_priority = 0; sched_log(PBSEVENT_SCHED, PBS_EVENTCLASS_JOB, jinfo -> name, "Job Run"); update_server_on_run(sinfo, qinfo, jinfo); update_queue_on_run(qinfo, jinfo); update_job_on_run(pbs_sd, jinfo); if (cstat.fair_share) update_usage_on_run(jinfo); free(sinfo -> running_jobs); sinfo -> running_jobs = job_filter(sinfo -> jobs, sinfo -> sc.total, check_run_job, NULL); free(qinfo -> running_jobs); qinfo -> running_jobs = job_filter(qinfo -> jobs, qinfo -> sc.total, check_run_job, NULL); } else { errmsg = pbs_geterrmsg(pbs_sd); sprintf(buf, "Not Running - PBS Error: %s", errmsg); update_job_comment(pbs_sd, jinfo, buf); } return ret; }
/* * * query_server - creates a structure of arrays consisting of a server * and all the queues and jobs that reside in that server * * pbs_sd - connection to pbs_server * * returns a pointer to the server_info struct * */ server_info *query_server(int pbs_sd) { struct batch_status *server; /* info about the server */ server_info *sinfo; /* scheduler internal form of server info */ queue_info **qinfo; /* array of queues on the server */ resource *res; /* ptr to cycle through sources on server */ int local_errno = 0; /* get server information from pbs server */ if ((server = pbs_statserver_err(pbs_sd, NULL, NULL, &local_errno)) == NULL) { fprintf(stderr, "pbs_statserver failed: %d\n", local_errno); return NULL; } /* convert batch_status structure into server_info structure */ if ((sinfo = query_server_info(server)) == NULL) { pbs_statfree(server); return NULL; } /* get the nodes, if any */ sinfo -> nodes = query_nodes(pbs_sd, sinfo); /* get the queues */ if ((sinfo -> queues = query_queues(pbs_sd, sinfo)) == NULL) { pbs_statfree(server); free_server(sinfo, 0); return NULL; } /* count the queues and total up the individual queue states * for server totals. (total up all the state_count structs) */ qinfo = sinfo -> queues; while (*qinfo != NULL) { sinfo -> num_queues++; total_states(&(sinfo -> sc), &((*qinfo) -> sc)); qinfo++; } if ((sinfo -> jobs = (job_info **) malloc(sizeof(job_info *) * (sinfo -> sc.total + 1))) == NULL) { free_server(sinfo, 1); perror("Memory allocation error"); return NULL; } set_jobs(sinfo); sinfo -> running_jobs = job_filter(sinfo -> jobs, sinfo -> sc.total, check_run_job, NULL); res = sinfo -> res; while (res != NULL) { if (res -> assigned == UNSPECIFIED) res -> assigned = calc_assn_resource(sinfo -> running_jobs, res -> name); res = res -> next; } sinfo -> timesharing_nodes = node_filter(sinfo -> nodes, sinfo -> num_nodes, is_node_timeshared, NULL); pbs_statfree(server); return sinfo; }