/** * Gets the status of the job. * * It maps the different states of PBS jobs to * pending and running. It does not make a difference between finished, * cancelled, terminated and unknown jobs since PBS does not store this info. * @param jobid is the PID assigned by the queue * @return 0 if correct, non-zero if error */ int rm_getJobStatus(struct soap* s, char* jobid, char* user, struct bes__ActivityStatusType** jobStatus) { struct bes__ActivityStatusType *activityStatus; int connectionIdentifier; //! stores the status of a job struct batch_status* status; if (!jobid || !jobStatus) { return BESE_BAD_ARG; } connectionIdentifier = pbs_connect(server); if (!connectionIdentifier) return BESE_BACKEND; status = pbs_statjob(connectionIdentifier,jobid,NULL,NULL); pbs_disconnect(connectionIdentifier); if(status == NULL) { return BESE_NO_ACTIVITY; } activityStatus = (struct bes__ActivityStatusType*)soap_malloc(s, sizeof(struct bes__ActivityStatusType)); if (activityStatus == NULL) { return BESE_MEM_ALLOC; } memset(activityStatus, 0, sizeof(struct bes__ActivityStatusType)); struct attrl* attrList = status->attribs; while (attrList != NULL) { if(!strcmp(attrList->name, ATTR_state)) { if(!strcmp(attrList->value, "T")) { activityStatus->state = Pending; } else if(!strcmp(attrList->value, "Q")) { activityStatus->state = Pending; } else if(!strcmp(attrList->value,"H")) { activityStatus->state = Pending; } else if(!strcmp(attrList->value,"W")){ activityStatus->state = Pending; } else if(!strcmp(attrList->value,"R")){ activityStatus->state = Running; } else if(!strcmp(attrList->value,"E")) { activityStatus->state = Finished; } pbs_statfree(status); *jobStatus = activityStatus; return BESE_OK; } attrList = attrList->next; } pbs_statfree(status); return BESE_NO_ACTIVITY; }
/** * Gets the information about a job. * * It stores the info in a module variable. * In order to retrieve it, use @see readJobInfo. * @param jobid is the PID assigned by the queue * @return 0 if correct, non-zero if error */ int rm_getJobInfo(struct soap* soap, char* jobid, char* user, struct jobcard** jobInfo ) { //! stores the status of a job struct batch_status* status; int connectionIdentifier; struct jobcard* job; connectionIdentifier = pbs_connect(server); if(!connectionIdentifier) return BESE_BACKEND; status = pbs_statjob(connectionIdentifier, jobid, NULL, NULL); pbs_disconnect(connectionIdentifier); if(status == NULL) return BESE_NO_ACTIVITY; job = (struct jobcard*)soap_malloc(soap, sizeof(struct jobcard)); if (!job) return BESE_MEM_ALLOC; memset(job, 0, sizeof(struct jobcard)); fillJobStatusDefaults(job); convertJobInfo(soap, job, status); *jobInfo = job; pbs_statfree(status); return BESE_OK; }
/** * @brief * Returns pointer to status record * * @param[in] c - index into connection table * * @return returns a pointer to a batch_status structure * @retval pointer to batch status on SUCCESS * @retval NULL on failure */ struct batch_status *PBSD_status_get(int c) { struct brp_cmdstat *stp; /* pointer to a returned status record */ struct batch_status *bsp = NULL; struct batch_status *rbsp = NULL; struct batch_reply *reply; int i; /* read reply from stream into presentation element */ reply = PBSD_rdrpy(c); if (reply == NULL) { pbs_errno = PBSE_PROTOCOL; } else if (reply->brp_choice != BATCH_REPLY_CHOICE_NULL && reply->brp_choice != BATCH_REPLY_CHOICE_Text && reply->brp_choice != BATCH_REPLY_CHOICE_Status) { pbs_errno = PBSE_PROTOCOL; } else if (connection[c].ch_errno == 0) { /* have zero or more attrl structs to decode here */ stp = reply->brp_un.brp_statc; i = 0; pbs_errno = 0; while (stp != NULL) { if (i++ == 0) { rbsp = bsp = alloc_bs(); if (bsp == NULL) { pbs_errno = PBSE_SYSTEM; break; } } else { bsp->next = alloc_bs(); bsp = bsp->next; if (bsp == NULL) { pbs_errno = PBSE_SYSTEM; break; } } if ((bsp->name = strdup(stp->brp_objname)) == NULL) { pbs_errno = PBSE_SYSTEM; break; } bsp->attribs = stp->brp_attrl; if (stp->brp_attrl) stp->brp_attrl = 0; bsp->next = NULL; stp = stp->brp_stlink; } if (pbs_errno) { pbs_statfree(rbsp); rbsp = NULL; } } PBSD_FreeReply(reply); return rbsp; }
int main( int ArgC, /* I */ char **ArgV) /* I */ { const char *OptString = "c:Cd:f:h:lp:q:r:sv"; char HostList[65536]; char *HPtr; int c; int rc = PBSE_NONE; int local_errno = 0; int HostCount; int FailCount; /* initialize */ HostList[0] = '\0'; ConfigBuf[0] = '\0'; if (IamRoot() == 0) { exit(EXIT_FAILURE); } while ((c = getopt(ArgC, ArgV, OptString)) != EOF) { switch (c) { case 'c': /* clear job */ JPtr = optarg; CmdIndex = momClear; break; case 'C': /* force cycle */ CmdIndex = momQuery; Query[QueryI] = strdup("cycle"); QueryI++; break; case 'd': /* diagnose */ /* FORMAT: momctl -d<X> */ CmdIndex = momQuery; if ((Query[QueryI] = (char *)calloc(strlen(DiagPtr) + 3, sizeof(char))) == NULL) { fprintf(stderr,"ERROR: could not calloc %d bytes!\n", (int)strlen(DiagPtr) + 3); exit(EXIT_FAILURE); } if (optarg == NULL) { strncpy(Query[QueryI],DiagPtr,strlen(DiagPtr)); } else { snprintf(Query[QueryI],strlen(DiagPtr) + 2,"%s%s", DiagPtr, optarg); } QueryI++; break; case 'f': { int rc; FILE *fp; long size; if ((fp = fopen(optarg, "r")) == NULL) { fprintf(stderr, "ERROR: cannot open file '%s', errno: %d (%s)\n", optarg, errno, strerror(errno)); exit(EXIT_FAILURE); } rc = fread(HostList, sizeof(HostList), 1, fp); if ((rc == 0) && (!feof(fp))) { fprintf(stderr, "ERROR: cannot read file '%s', errno: %d (%s)\n", optarg, errno, strerror(errno)); exit(EXIT_FAILURE); } if ((size = ftell(fp)) < 0) size = 0; HostList[MIN(size,(long)sizeof(HostList) - 1)] = '\0'; fclose(fp); } /* END BLOCK */ break; case 'h': /* connect to specified host */ snprintf(HostList, sizeof(HostList), "%s", optarg); break; case 'l': CmdIndex = momLayout; break; case 'p': /* port */ if (optarg == NULL) MCShowUsage("port not specified"); MOMPort = (int)strtol(optarg, NULL, 10); if (MOMPort == 0) MCShowUsage("invalid port specified"); break; case 'q': /* query resources */ if (optarg == NULL) { MCShowUsage("query not specified"); Query[QueryI] = strdup(DiagPtr); } else { Query[QueryI] = strdup(optarg); } QueryI++; CmdIndex = momQuery; break; case 'r': /* reconfigure */ { CmdIndex = momReconfig; /* NOTE: specify remote file to load -> 'fname' */ /* specify local file to stage -> 'LOCAL:fname' */ if (optarg == NULL) MCShowUsage("file not specified"); if (!strncmp(optarg, "LOCAL:", strlen("LOCAL:"))) { FILE *fp; int size; int rc; char *ptr; char *cptr; strcpy(ConfigBuf, "CONFIG:"); cptr = ConfigBuf + strlen(ConfigBuf); ptr = optarg + strlen("LOCAL:"); if ((fp = fopen(ptr, "r")) == NULL) { fprintf(stderr, "ERROR: cannot open file '%s', errno: %d (%s)\n", optarg, errno, strerror(errno)); exit(EXIT_FAILURE); } rc = fread(cptr, sizeof(ConfigBuf) - strlen(ConfigBuf), 1, fp); if ((rc == 0) && (!feof(fp))) { fprintf(stderr, "ERROR: cannot read file '%s', errno: %d (%s)\n", optarg, errno, strerror(errno)); exit(EXIT_FAILURE); } size = ftell(fp); ConfigBuf[MIN(size + strlen("CONFIG:"),sizeof(ConfigBuf) - 1)] = '\0'; fclose(fp); } else { snprintf(ConfigBuf, sizeof(ConfigBuf), "%s", optarg); } } /* END (case 'r') */ break; case 's': /* shutdown */ CmdIndex = momShutdown; break; case 'v': /* report verbose logging */ IsVerbose = TRUE; break; } /* END switch (c) */ } /* END while (c = getopt()) */ if (CmdIndex == momNONE) { MCShowUsage("no command specified"); } if (HostList[0] == '\0') snprintf(HostList, sizeof(HostList), "%s", LocalHost); HPtr = strtok(HostList, ", \t\n"); HostCount = 0; FailCount = 0; /* at this point, all args processing and setup is completed ... * ... now we run through each comma-delimited word in HPtr */ while (HPtr != NULL) { if ((*HPtr == ':') && (*(HPtr + 1) != '\0')) { /* finds nodes with this property */ int con; char *def_server, *pserver, *servername; struct batch_status *bstatus, *pbstat; struct attrl *nodeattrs; def_server = pbs_default(); if ((pserver = strchr(HPtr,'@')) != NULL) { *pserver = '\0'; servername = pserver + 1; } else { servername = def_server; } con = pbs_connect(servername); if (con < 0) { fprintf(stderr,"failed to connect to pbs_server:%s\n", servername); exit(EXIT_FAILURE); } /* get a batch_status entry for each node in ":property" */ bstatus = pbs_statnode_err(con,HPtr,NULL,NULL, &local_errno); if (bstatus != NULL) { for (pbstat = bstatus;pbstat != NULL;pbstat = pbstat->next) { /* check state first, only do_mom() if not down */ for (nodeattrs = pbstat->attribs;nodeattrs != NULL; nodeattrs = nodeattrs->next) { if (!strcmp(nodeattrs->name, ATTR_NODE_state)) { if (!strstr(nodeattrs->value, ND_down)) { if ((rc = perform_communications_with_retry(pbstat->name, MOMPort, &FailCount)) == PBSE_NONE) HostCount++; } else { fprintf(stderr,"%12s: skipping down node\n", pbstat->name); } break; } /* END if (attrib name eq state) */ } /* END for (nodeattrs) */ } /* END for (pbstat) */ pbs_statfree(bstatus); } /* END if (bstatus != NULL) */ else { fprintf(stderr,"no nodes found in %s on %s\n", HPtr, servername); } pbs_disconnect(con); if (pserver != NULL) *pserver = '@'; } else { if ((rc = perform_communications_with_retry(HPtr, MOMPort, &FailCount)) == PBSE_NONE) HostCount++; } /* END if (*HPtr == ':') */ HPtr = strtok(NULL, ", \t\n"); } /* END while (HPtr != NULL) */ if (IsVerbose == TRUE) { fprintf(stdout, "Node Summary: %d Successful %d Failed\n", HostCount, FailCount); } /* test success of do_mom before returning success */ if (rc != PBSE_NONE) exit(EXIT_FAILURE); /* SUCCESS */ exit(EXIT_SUCCESS); } /* END main() */
int main( int argc, /* I */ char **argv) /* I */ { struct batch_status *bstatus = NULL; int con; char *specified_server = NULL; int errflg = 0; int i; extern char *optarg; extern int optind; char **pa; struct batch_status *pbstat; int flag = ALLI; char *note = NULL; enum note_flags note_flag = unused; char **nodeargs = NULL; int lindex; enum NStateEnum ListType = tnsNONE; /* get default server, may be changed by -s option */ progname = strdup(argv[0]); while ((i = getopt(argc, argv, "acdlopqrs:x-:N:n")) != EOF) { switch (i) { case 'a': flag = ALLI; break; case 'c': flag = CLEAR; break; case 'd': flag = DIAG; break; case 'l': flag = LIST; break; case 'o': flag = OFFLINE; break; case 'p': flag = PURGE; break; case 'q': quiet = 1; break; case 'r': flag = RESET; break; case 's': specified_server = optarg; break; case 'x': flag = ALLI; DisplayXML = TRUE; break; case 'N': /* preserve any previous option other than the default, * to allow -N to be combined with -o, -c, etc */ if (flag == ALLI) flag = NOTE; note = strdup(optarg); if (note == NULL) { perror("Error: strdup() returned NULL"); exit(1); } note_flag = set; /* -N n is the same as -N "" -- it clears the note */ if (!strcmp(note, "n")) *note = '\0'; if (strlen(note) > MAX_NOTE) { fprintf(stderr, "Warning: note exceeds length limit (%d) - server may reject it...\n", MAX_NOTE); } if (strchr(note, '\n') != NULL) fprintf(stderr, "Warning: note contains a newline - server may reject it...\n"); break; case 'n': note_flag = list; break; case '-': if ((optarg != NULL) && !strcmp(optarg, "version")) { fprintf(stderr, "Version: %s\nRevision: %s\n", PACKAGE_VERSION, SVN_VERSION); exit(0); } else if ((optarg != NULL) && !strcmp(optarg, "about")) { TShowAbout_exit(); } errflg = 1; break; case '?': default: errflg = 1; break; } /* END switch (i) */ } /* END while (i = getopt()) */ if ((note_flag == list) && (flag != LIST)) { fprintf(stderr, "Error: -n requires -l\n"); errflg = 1; } for (pa = argv + optind;*pa;pa++) { if (strlen(*pa) == 0) { errflg = 1; } } if (errflg != 0) { if (!quiet) { fprintf(stderr, "usage:\t%s [-{c|d|l|o|p|r}] [-s server] [-n] [-N \"note\"] [-q] node ...\n", progname); fprintf(stderr, "\t%s [-{a|x}] [-s server] [-q] [node]\n", progname); } exit(1); } con = cnt2server(specified_server); if (con <= 0) { if (!quiet) { fprintf(stderr, "%s: cannot connect to server %s, error=%d (%s)\n", progname, (specified_server) ? specified_server : pbs_default(), con * -1, pbs_strerror(con * -1)); } exit(1); } /* if flag is ALLI, LIST, get status of all nodes */ if ((flag == ALLI) || (flag == LIST) || (flag == DIAG)) { if ((flag == ALLI) || (flag == LIST) || (flag == DIAG)) { if (flag == LIST) { /* allow state specification */ if (argv[optind] != NULL) { for (lindex = 1;lindex < tnsLAST;lindex++) { if (!strcasecmp(NState[lindex], argv[optind])) { ListType = lindex; optind++; break; } } } } /* allow node specification (if none, then create an empty list) */ if (argv[optind] != NULL) { nodeargs = argv + optind; } else { nodeargs = calloc(2, sizeof(char **)); nodeargs[0] = strdup(""); nodeargs[1] = '\0'; } } } if ((note_flag == set) && (note != NULL)) { /* set the note attrib string on specified nodes */ for (pa = argv + optind;*pa;pa++) { set_note(con, *pa, note); } } switch (flag) { case DIAG: /* NYI */ break; case CLEAR: /* clear OFFLINE from specified nodes */ for (pa = argv + optind;*pa;pa++) { marknode(con, *pa, ND_offline, DECR, NULL, DECR); } break; case RESET: /* clear OFFLINE, add DOWN to specified nodes */ for (pa = argv + optind;*pa;pa++) { marknode(con, *pa, ND_offline, DECR, ND_down, INCR); } break; case OFFLINE: /* set OFFLINE on specified nodes */ for (pa = argv + optind;*pa;pa++) { marknode(con, *pa, ND_offline, INCR, NULL, INCR); } break; case PURGE: /* remove node record */ /* NYI */ break; case ALLI: if (DisplayXML == TRUE) { char *tmpBuf = NULL, *tail = NULL; int bufsize; mxml_t *DE; DE = NULL; MXMLCreateE(&DE, "Data"); for (lindex = 0;nodeargs[lindex] != '\0';lindex++) { bstatus = statnode(con, nodeargs[lindex]); for (pbstat = bstatus;pbstat;pbstat = pbstat->next) { addxmlnode(DE, pbstat); } /* END for (pbstat) */ pbs_statfree(pbstat); } MXMLToXString(DE, &tmpBuf, &bufsize, INT_MAX, &tail, TRUE); MXMLDestroyE(&DE); fprintf(stdout, "%s\n", tmpBuf); } else { for (lindex = 0;nodeargs[lindex] != '\0';lindex++) { bstatus = statnode(con, nodeargs[lindex]); for (pbstat = bstatus;pbstat;pbstat = pbstat->next) { printf("%s\n", pbstat->name); prt_node_attr(pbstat, 0); putchar('\n'); } /* END for (bpstat) */ pbs_statfree(pbstat); } } break; case LIST: /* list any node that is DOWN, OFFLINE, or UNKNOWN */ for (lindex = 0;nodeargs[lindex] != '\0';lindex++) { bstatus = statnode(con, nodeargs[lindex]); for (pbstat = bstatus;pbstat != NULL;pbstat = pbstat->next) { char *S; S = get_nstate(pbstat); if (filterbystate(pbstat, ListType, S)) { char *n; if ((note_flag == list) && (n = get_note(pbstat))) { printf("%-20.20s %-26.26s %s\n", pbstat->name, S, n); } else { printf("%-20.20s %s\n", pbstat->name, S); } } } pbs_statfree(pbstat); } break; } /* END switch (flag) */ pbs_disconnect(con); return(0); } /* END main() */
/* * schd_get_queue_limits - query queue information from the server. * * Returns 0 on success, -1 for "fatal errors", and 1 for a transient * error (i.e., the queue failed the sanity checks imposed by the * queue_sanity() function). */ int schd_get_queue_limits(Queue *queue) { char *id = "schd_get_queue_limits"; int moved = 0, istrue; Batch_Status *bs; AttrList *attr; static AttrList alist[] = { {&alist[1], ATTR_start, "", ""}, {&alist[2], ATTR_enable, "", ""}, {&alist[3], ATTR_count, "", ""}, {&alist[4], ATTR_maxuserrun, "", ""}, {&alist[5], ATTR_rescavail, "", ""}, {&alist[6], ATTR_rescassn, "", ""}, {&alist[7], ATTR_rescdflt, "", ""}, {&alist[8], ATTR_rescmax, "", ""}, {&alist[9], ATTR_rescmin, "", ""}, {&alist[10], ATTR_acluren, "", ""}, {&alist[11], ATTR_acluser, "", ""}, {NULL, ATTR_maxrun, "", ""} }; size_t mem_default = UNSPECIFIED; size_t mem_assn = UNSPECIFIED; size_t mem_max = UNSPECIFIED; size_t mem_min = UNSPECIFIED; int cpu_default = UNSPECIFIED; int cpu_assn = UNSPECIFIED; int cpu_max = UNSPECIFIED; int cpu_min = UNSPECIFIED; int nodes_from_cpu, nodes_from_mem; queue->running = UNSPECIFIED; queue->queued = UNSPECIFIED; queue->maxrun = UNSPECIFIED; queue->userrun = UNSPECIFIED; queue->nodes_max = UNSPECIFIED; queue->nodes_min = UNSPECIFIED; queue->nodes_default = UNSPECIFIED; queue->nodes_assn = UNSPECIFIED; queue->nodes_rsvd = UNSPECIFIED; queue->wallt_max = UNSPECIFIED; queue->wallt_min = UNSPECIFIED; queue->wallt_default = UNSPECIFIED; queue->flags = 0; #ifdef NODEMASK BITFIELD_CLRALL(&queue->queuemask); BITFIELD_CLRALL(&queue->availmask); #endif /* NODEMASK */ queue->rsrcs = NULL; if (queue->jobs) { DBPRT(("%s: found jobs on queue '%s'! Freeing them...\n", id, queue->qname)); schd_free_jobs(queue->jobs); } if (queue->useracl) { DBPRT(("%s: found user ACL list on queue '%s'! Freeing it...\n", id, queue->qname)); schd_free_useracl(queue->useracl); } queue->jobs = NULL; queue->useracl = NULL; /* Ask the server for information about the specified queue. */ if ((bs = pbs_statque(connector, queue->qname, alist, NULL)) == NULL) { sprintf(log_buffer, "pbs_statque failed, \"%s\" %d", queue->qname, pbs_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } /* Process the list of attributes returned by the server. */ for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* Is queue started? */ if (!strcmp(attr->name, ATTR_start)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue is not stopped. */ queue->flags &= ~QFLAGS_STOPPED; else queue->flags |= QFLAGS_STOPPED; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } /* Is queue enabled? */ if (!strcmp(attr->name, ATTR_enable)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue is not disabled. */ queue->flags &= ~QFLAGS_DISABLED; else queue->flags |= QFLAGS_DISABLED; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } /* How many jobs are queued and running? */ if (!strcmp(attr->name, ATTR_count)) { queue->queued = schd_how_many(attr->value, SC_QUEUED); queue->running = schd_how_many(attr->value, SC_RUNNING); continue; } /* Queue-wide maximum number of jobs running. */ if (!strcmp(attr->name, ATTR_maxrun)) { queue->maxrun = atoi(attr->value); continue; } /* Per-user maximum number of jobs running. */ if (!strcmp(attr->name, ATTR_maxuserrun)) { queue->userrun = atoi(attr->value); continue; } /* Is there an enabled user access control list on this queue? */ if (!strcmp(attr->name, ATTR_acluren)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue has an ACL */ queue->flags |= QFLAGS_USER_ACL; else queue->flags &= ~QFLAGS_USER_ACL; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } if (!strcmp(attr->name, ATTR_acluser)) { if (queue->useracl) { DBPRT(("queue %s acluser already set!\n", queue->qname)); schd_free_useracl(queue->useracl); } queue->useracl = schd_create_useracl(attr->value); continue; } /* Queue maximum resource usage. */ if (!strcmp(attr->name, ATTR_rescmax)) { if (!strcmp("mem", attr->resource)) { mem_max = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_max = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) { queue->wallt_max = schd_val2sec(attr->value); continue; } #ifdef NODEMASK if (!strcmp("nodemask", attr->resource)) { if (schd_str2mask(attr->value, &queue->queuemask)) { (void)sprintf(log_buffer, "couldn't convert nodemask %s", attr->value); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } else queue->flags |= QFLAGS_NODEMASK; /* Valid nodemask. */ } #endif /* NODEMASK */ continue; } /* Queue minimum resource usage. */ if (!strcmp(attr->name, ATTR_rescmin)) { if (!strcmp("mem", attr->resource)) { mem_min = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_min = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) { queue->wallt_min = schd_val2sec(attr->value); continue; } continue; } /* Queue assigned (in use) resource usage. */ if (!strcmp(attr->name, ATTR_rescassn)) { if (!strcmp("mem", attr->resource)) { mem_assn = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_assn = atoi(attr->value); } continue; } if (!strcmp(attr->name, ATTR_rescdflt)) { if (!strcmp("mem", attr->resource)) { mem_default = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_default = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) queue->wallt_default = schd_val2sec(attr->value); } /* Ignore anything else */ } pbs_statfree(bs); /* * Calculate values for queue node limits, given memory and cpu values. * Note any discrepancies. */ nodes_from_cpu = NODES_FROM_CPU(cpu_default); nodes_from_mem = NODES_FROM_MEM(mem_default); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' default cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_default, schd_byte2val(mem_default), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } nodes_from_cpu = NODES_FROM_CPU(cpu_max); nodes_from_mem = NODES_FROM_MEM(mem_max); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' maximum cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_max, schd_byte2val(mem_max), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } nodes_from_cpu = NODES_FROM_CPU(cpu_min); nodes_from_mem = NODES_FROM_MEM(mem_min); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' minimum cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_min, schd_byte2val(mem_min), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } /* * Note: The assigned cpus and memory need not be exactly the same * node equivalency. */ if ((cpu_default != UNSPECIFIED) && (mem_default != UNSPECIFIED)) queue->nodes_default = NODES_REQD(cpu_default, mem_default); if ((cpu_max != UNSPECIFIED) && (mem_max != UNSPECIFIED)) queue->nodes_max = NODES_REQD(cpu_max, mem_max); if ((cpu_min != UNSPECIFIED) && (mem_min != UNSPECIFIED)) queue->nodes_min = NODES_REQD(cpu_min, mem_min); if ((cpu_assn != UNSPECIFIED) && (mem_assn != UNSPECIFIED)) queue->nodes_assn = NODES_REQD(cpu_assn, mem_assn); /* * Move any jobs on this queue from the global list onto the queue's * list. Keep track of when the longest-running job will end, and set * the 'empty_by' field to that value. Maintain the ordering as it was * in "schd_AllJobs". */ if (schd_AllJobs) moved = queue_claim_jobs(queue, &schd_AllJobs); if (moved < 0) { sprintf(log_buffer, "%s: WARNING! Queue '%s' failed to claim jobs.", id, queue->qname); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } if (queue->nodes_assn == UNSPECIFIED) queue->nodes_assn = 0; if (queue->running == UNSPECIFIED) queue->running = 0; /* * Find out if the queue is idle, and if it was not before, set the idle * time to now. If there are running jobs, the queue is not idle at the * start of this iteration - set idle_since to 0. */ if (queue->running) { queue->idle_since = 0; } else { if (queue->idle_since == 0) queue->idle_since = schd_TimeNow; } /* * Get the resources for this queue from the resource monitor (if * available). If the resmom is not accessible, disable the queue. * If the resources were received okay, compute the available node * masks from the resources and jobs. * Don't bother with resources for the special or submit queues. */ if ((strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0) || ((schd_SpecialQueue != NULL) && (!strcmp(queue->qname, schd_SpecialQueue->queue->qname)))) { queue->rsrcs = schd_get_resources(queue->exechost); if (queue->rsrcs != NULL) { /* Account for this queue's resources. */ queue->rsrcs->nodes_alloc += queue->nodes_assn; queue->rsrcs->njobs += queue->running; /* * If the HPM counters do not appear to be in use on this host, * check for jobs on the queue that are using hpm. If so, set * the 'HPM_IN_USE' flag on the resources. This will prevent the * HPM counters from being released to global mode at the end * of the scheduling run (c.f. cleanup.c). * The 'HPM_IN_USE' flag will also be asserted if a job is run * that uses the HPM counters. */ if (schd_MANAGE_HPM && !(queue->rsrcs->flags & RSRCS_FLAGS_HPM_IN_USE)) { if (schd_hpm_job_count(queue->jobs)) queue->rsrcs->flags |= RSRCS_FLAGS_HPM_IN_USE; } #ifdef NODEMASK /* And find the nodemasks for the queue and resources. */ find_nodemasks(queue, queue->rsrcs); #endif /* NODEMASK */ } else { (void)sprintf(log_buffer, "Can't get resources for %s@%s - marking unavailable.", queue->qname, queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); queue->flags |= QFLAGS_DISABLED; } } #ifdef DEBUG schd_dump_queue(queue, QUEUE_DUMP_JOBS); #endif /* DEBUG */ /* * It would probably be better to wait for the world to stabilize * than to try to impose some artificial order upon it. Do not do * the sanity check if the queue is stopped. */ if ((queue->flags & QFLAGS_STOPPED) == 0) { if (!queue_sanity(queue)) { sprintf(log_buffer, "WARNING! Queue '%s' failed sanity checks.", queue->qname); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); } } return (0); }
Job * schd_get_jobs(char *qname, char *state) { char *id = "schd_get_jobs"; int idx, ret; int local_errno = 0; Job *joblist = NULL, *jobtail = NULL, *new_joblist; Batch_Status *pbs_head, *pbs_ptr; AttrOpList *attr; static AttrOpList alist[] = { {NULL, NULL, NULL, NULL, EQ}, {NULL, NULL, NULL, NULL, EQ} }; if ((qname == NULL) && (state == NULL)) { attr = NULL; /* Caller requested all jobs in all queues. */ } else { /* * Initialize the search criteria since alist is a static struct * and it will retain the previous search when repeatedly called. */ for (idx = 0; idx < (sizeof(alist) / sizeof(AttrOpList)); idx++) { alist[idx].next = NULL; alist[idx].name = NULL; alist[idx].value = NULL; } idx = 0; /* Was a specific queue requested? */ if (qname != NULL) { alist[idx].name = ATTR_queue; alist[idx].value = qname; idx++; } /* Was a specific state requested? */ if (state != NULL) { alist[idx].name = ATTR_state; alist[idx].value = state; if (idx > 0) alist[idx - 1].next = &alist[idx]; idx++; } /* (More tests can be added here.) */ attr = alist; } /* Ask PBS for the list of jobs requested */ pbs_head = pbs_selstat_err(connector, attr, NULL, &local_errno); if ((pbs_head == NULL) && (local_errno)) { (void)sprintf(log_buffer, "pbs_selstat failed, %d", local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); return (NULL); } for (pbs_ptr = pbs_head; pbs_ptr != NULL; pbs_ptr = pbs_ptr->next) { /* * If there is no list yet, create one. If there is already a list, * create a new element and place it after the current tail. The new * element then becomes the tail. */ new_joblist = (Job *)malloc(sizeof(Job)); if (new_joblist == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "malloc(new Job)"); /* * Free any allocated storage, set joblist to NULL, and break. * By doing this, the PBS batch_struct list will be freed, * and the NULL joblist returned to the caller. */ if (joblist) { schd_free_jobs(joblist); joblist = NULL; } break; } new_joblist->next = NULL; if (!joblist) { joblist = new_joblist; jobtail = joblist; } else { jobtail->next = new_joblist; jobtail = jobtail->next; } /* * 'jobtail' now points to a newly-created Job at the end of the * list of jobs. Call get_jobinfo() to fill it in with the contents * of this PBS batch_struct description. */ ret = schd_get_jobinfo(pbs_ptr, jobtail); if (ret < 0) { ; DBPRT(("%s: schd_get_jobinfo returned %d\n", id, ret)); } } /* * We are left with a list of Job's that was created the from the list * of Batch_Structs we got from pbs_selstat(). The Job list should * contain everything we need to know about the jobs. It is okay to * free the list returned by PBS, and return the list of Job's. */ pbs_statfree(pbs_head); return (joblist); }
int get_node_status(void) { char *id = "get_node_status"; QueueList *qptr; int local_errno = 0; Batch_Status *bs, *bsp; AttrList *attr; static AttrList alist[] = {{NULL, ATTR_NODE_state, "", ""}}; /* Query the server for status of all nodes, and then save this * info in the appropraite queue struct. */ if ((bs = pbs_statnode_err(connector, NULL, alist, NULL, &local_errno)) == NULL) { sprintf(log_buffer, "pbs_statnode failed: %d", local_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } /* First lets assume all nodes are down; later we will revise * this if we learn otherwise; we want to assume down so that * we don't get hung trying to connect to a hung node later. */ for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) qptr->queue->flags |= QFLAGS_NODEDOWN; /* Process the list of nodes returned by the server. */ for (bsp = bs; bsp != NULL; bsp = bsp->next) { for (attr = bsp->attribs; attr != NULL; attr = attr->next) { if ((strstr(attr->value, ND_free)) || (strstr(attr->value, ND_busy)) || (strstr(attr->value, ND_reserve)) || (strstr(attr->value, "job-"))) { for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) { if (strstr(bsp->name, qptr->queue->exechost)) { qptr->queue->flags &= ~QFLAGS_NODEDOWN; break; } } } else { sprintf(log_buffer, "%s (state=%s) -- marking DOWN", bsp->name, attr->value); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } } } pbs_statfree(bs); return (0); }
/* * schd_get_queue_limits - query queue information from the server. * * Returns 0 on success, -1 for "fatal errors", and 1 for a transient * error (i.e., the queue failed the sanity checks imposed by the * queue_sanity() function). */ int schd_get_queue_limits(Queue *queue) { char *id = "schd_get_queue_limits"; int istrue; int local_errno = 0; Batch_Status *bs; AttrList *attr; static AttrList alist[] = { {&alist[1], ATTR_start, "", ""}, {&alist[2], ATTR_enable, "", ""}, {&alist[3], ATTR_count, "", ""}, {&alist[4], ATTR_maxuserrun, "", ""}, {&alist[5], ATTR_rescavail, "", ""}, {&alist[6], ATTR_rescassn, "", ""}, {&alist[7], ATTR_rescdflt, "", ""}, {&alist[8], ATTR_rescmax, "", ""}, {&alist[9], ATTR_rescmin, "", ""}, {&alist[10], ATTR_acluren, "", ""}, {&alist[11], ATTR_acluser, "", ""}, {&alist[12], ATTR_p, "", ""}, {NULL, ATTR_maxrun, "", ""} }; queue->running = UNSPECIFIED; queue->queued = UNSPECIFIED; queue->maxrun = UNSPECIFIED; queue->userrun = UNSPECIFIED; queue->ncpus_max = UNSPECIFIED; queue->ncpus_min = UNSPECIFIED; queue->ncpus_default = UNSPECIFIED; queue->ncpus_assn = UNSPECIFIED; queue->mem_max = UNSPECIFIED; queue->mem_min = UNSPECIFIED; queue->mem_default = UNSPECIFIED; queue->wallt_max = UNSPECIFIED; queue->wallt_min = UNSPECIFIED; queue->wallt_default = UNSPECIFIED; queue->rsrcs = NULL; queue->flags = 0; queue->priority = UNSPECIFIED; queue->speed = UNSPECIFIED; if (queue->featureA) { free(queue->featureA); queue->featureA = NULL; } if (queue->featureB) { free(queue->featureB); queue->featureB = NULL; } if (queue->featureC) { free(queue->featureC); queue->featureC = NULL; } queue->featureD = UNSPECIFIED; queue->featureE = UNSPECIFIED; queue->featureF = UNSPECIFIED; queue->featureG = UNSPECIFIED; queue->featureH = UNSPECIFIED; queue->featureI = UNSPECIFIED; if (queue->rsrcs) { DBPRT(("%s: found resource list on queue '%s'! Freeing them...\n", id, queue->qname)); cleanup_rsrcs(queue->rsrcs); queue->rsrcs = NULL; } if (queue->jobs) { DBPRT(("%s: found jobs on queue '%s'! Freeing them...\n", id, queue->qname)); schd_free_jobs(queue->jobs); queue->jobs = NULL; } if (queue->useracl) { DBPRT(("%s: found user ACL list on queue '%s'! Freeing it...\n", id, queue->qname)); schd_free_useracl(queue->useracl); queue->useracl = NULL; } /* Ask the server for information about the specified queue. */ if ((bs = pbs_statque_err(connector, queue->qname, alist, NULL, &local_errno)) == NULL) { sprintf(log_buffer, "pbs_statque failed, \"%s\" %d", queue->qname, local_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } /* Process the list of attributes returned by the server. */ for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* Is queue started? */ if (!strcmp(attr->name, ATTR_start)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue is not stopped. */ queue->flags &= ~QFLAGS_STOPPED; else queue->flags |= QFLAGS_STOPPED; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } /* Is queue enabled? */ if (!strcmp(attr->name, ATTR_enable)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue is not disabled. */ queue->flags &= ~QFLAGS_DISABLED; else queue->flags |= QFLAGS_DISABLED; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } /* How many jobs are queued and running? */ if (!strcmp(attr->name, ATTR_count)) { queue->queued = schd_how_many(attr->value, SC_QUEUED); queue->running = schd_how_many(attr->value, SC_RUNNING); continue; } /* Queue-wide maximum number of jobs running. */ if (!strcmp(attr->name, ATTR_maxrun)) { queue->maxrun = atoi(attr->value); continue; } /* Per-user maximum number of jobs running. */ if (!strcmp(attr->name, ATTR_maxuserrun)) { queue->userrun = atoi(attr->value); continue; } /* Queue Priority Value */ if (!strcmp(attr->name, ATTR_p)) { queue->priority = atoi(attr->value); continue; } /* Is there an enabled user access control list on this queue? */ if (!strcmp(attr->name, ATTR_acluren)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue has an ACL */ queue->flags |= QFLAGS_USER_ACL; else queue->flags &= ~QFLAGS_USER_ACL; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } if (!strcmp(attr->name, ATTR_acluser)) { if (queue->useracl) { DBPRT(("queue %s acluser already set!\n", queue->qname)); schd_free_useracl(queue->useracl); } queue->useracl = schd_create_useracl(attr->value); continue; } /* Queue maximum resource usage. */ if (!strcmp(attr->name, ATTR_rescmax)) { if (!strcmp("mem", attr->resource)) { queue->mem_max = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { queue->ncpus_max = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) { queue->wallt_max = schd_val2sec(attr->value); continue; } if (!strcmp("speed", attr->resource)) { queue->speed = atoi(attr->value); continue; } if (!strcmp(FEATURE_A, attr->resource)) { queue->featureA = schd_strdup(attr->value); continue; } if (!strcmp(FEATURE_B, attr->resource)) { queue->featureB = schd_strdup(attr->value); continue; } if (!strcmp(FEATURE_C, attr->resource)) { queue->featureC = schd_strdup(attr->value); continue; } if (!strcmp(FEATURE_D, attr->resource)) { queue->featureD = atol(attr->value); continue; } if (!strcmp(FEATURE_E, attr->resource)) { queue->featureE = atol(attr->value); continue; } if (!strcmp(FEATURE_F, attr->resource)) { queue->featureF = atol(attr->value); continue; } if (!strcmp(FEATURE_G, attr->resource)) { schd_val2bool(attr->value, &istrue); queue->featureG = istrue; continue; } if (!strcmp(FEATURE_H, attr->resource)) { schd_val2bool(attr->value, &istrue); queue->featureH = istrue; continue; } if (!strcmp(FEATURE_I, attr->resource)) { schd_val2bool(attr->value, &istrue); queue->featureI = istrue; continue; } continue; } /* Queue minimum resource usage. */ if (!strcmp(attr->name, ATTR_rescmin)) { if (!strcmp("mem", attr->resource)) { queue->mem_min = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { queue->ncpus_min = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) { queue->wallt_min = schd_val2sec(attr->value); continue; } continue; } /* Queue assigned (in use) resource usage. */ if (!strcmp(attr->name, ATTR_rescassn)) { if (!strcmp("mem", attr->resource)) { queue->mem_assn = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { queue->ncpus_assn = atoi(attr->value); } continue; } if (!strcmp(attr->name, ATTR_rescdflt)) { if (!strcmp("mem", attr->resource)) { queue->mem_default = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { queue->ncpus_default = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) queue->wallt_default = schd_val2sec(attr->value); } /* Ignore anything else */ } pbs_statfree(bs); return (0); }
int main(int argc, char **argv) { char *server = NULL; char *jobid = NULL; char *var = NULL; char *value = NULL; int server_fd = 0; int ret = 0; int c = 0; struct batch_status *job = NULL; struct attrl *attribute = NULL; char *var_string = NULL; struct option prg_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, }; for ( ; ; ) { int option_index = 0; c = getopt_long(argc, argv, "s:hV", prg_options, &option_index ); if (c == -1) break; switch (c) { case 'h': usage(0); break; case 'V': printf("qsetenv version: %s; for torque version %s\n", QSETENV_VERSION, TORQUE_VERSION); exit(0); break; case 's': server = optarg; break; } } for (c = optind; c != argc; c++) { switch (c-optind) { case 0: jobid = argv[c]; break; case 1: var = argv[c]; break; case 2: value = argv[c]; break; default: printf("Too many arguments!\n"); usage(1); break; } } if (value == NULL) { printf("Too few arguments!\n"); usage(1); } if (server == NULL) { server = pbs_get_server_list(); } char *tok_server = server; char *tgt_server = NULL; while ((tgt_server = strtok(tok_server, ",")) != NULL) { tok_server = NULL; server_fd = pbs_connect(tgt_server); if (server_fd > 0) { break; } } if (server_fd <= 0) { fprintf(stderr, "Failed to connect to PBS server!\n"); exit(1); } printf("Querying job %s\n", jobid); job = pbs_statjob(server_fd, jobid, NULL, 0); if (job != NULL) { printf("job name: %s\n", job->name); var_string = job_setenv_varstr(job, var, value); attribute = (struct attrl *) malloc(sizeof(struct attrl)); memset(attribute, 0, sizeof(struct attrl)); attribute->name = ATTR_v; attribute->value = var_string; attribute->next = NULL; ret = pbs_alterjob(server_fd, jobid, attribute, NULL); if (ret != 0) { printf("Got error: %s\n", pbs_strerror(pbs_errno)); } free(attribute); attribute = NULL; } if (var_string != NULL) { free(var_string); } if (job != NULL) { pbs_statfree(job); job = NULL; } pbs_disconnect(server_fd); if (ret != 0) { return 1; } return 0; }
/** * Gets the factory attributes. * * This function uses @see loadResourceFile * and also queries the PBS queue. * @param soap is needed to allocate memory that can be deallocated by the * gsoap library after. * @param clusterInf a struct of type clusterInfo with the information needed for the * factory attributes document */ int rm_getClusterInfo(struct soap*soap, struct rm_clusterInfo** clusterInf /*,int compactResources*/) { char outputFile[256]; FILE* fd; int rc; char resource[128]; int connectionIdentifier = pbs_connect(server); struct rm_clusterInfo* clusterInfo; struct rm_resource* resourcesInfo; struct batch_status* status; if (!clusterInf) { return BESE_BAD_ARG; } clusterInfo = (struct rm_clusterInfo*) soap_malloc(soap, sizeof(struct rm_clusterInfo)); if (clusterInfo == NULL) return BESE_MEM_ALLOC; memset(clusterInfo, 0, sizeof(struct rm_clusterInfo)); //First, contact the PBS queue status = pbs_statserver(connectionIdentifier,NULL,NULL); if(status != NULL) { //Loop over the list of attributes returned struct attrl* attributeList = status->attribs; while(attributeList != NULL) { //Server_host for the CommonName element if(!strcmp(attributeList->name, "server_host")) { clusterInfo->CommonName = soap_strdup(soap, attributeList->value); } //Server_state for the IsAcceptingNewActivities element else if(!strcmp(attributeList->name, "server_state")) { if(!strcmp(attributeList->value, "Active")) clusterInfo->IsAcceptingNewActivities = true_; else clusterInfo->IsAcceptingNewActivities = false_; }//total_jobs for the TotalNumberOfActivities element else if(!strcmp(attributeList->name, "total_jobs")) { //clusterInfo->TotalNumberOfActivities = // atoi(attributeList->value); }//pbs_version for the LocalResourceManagerType element else if(!strcmp(attributeList->name, "pbs_version")) { char* pbsStr = (char*) soap_malloc(soap, strlen(PBS) + strlen(attributeList->value) + 10); sprintf(pbsStr, "%s %s %s", PBS, "Version", attributeList->value); clusterInfo->LocalResourceManagerType = pbsStr; } //fprintf(stderr,"Attribute: %s - Value: %s\n",attributeList->name,attributeList->value); attributeList = attributeList->next; } } pbs_statfree(status); pbs_disconnect(connectionIdentifier); *clusterInf = clusterInfo; return BESE_OK; }
/* schd_get_queue_memory - query queue memory limit from the server. */ size_t schd_get_queue_memory(char *qName) { char *id = "schd_get_queue_limits"; size_t mem_max, mem_default; Batch_Status *bs; AttrList *attr; int local_errno = 0; static AttrList alist[] = { {&alist[1], ATTR_rescdflt, "", ""}, {NULL, ATTR_rescmax, "", ""} }; mem_default = (size_t)0; mem_max = (size_t)0; /* Ask the server for information about the specified queue. */ if ((bs = pbs_statque_err(connector, qName, alist, NULL, &local_errno)) == NULL) { sprintf(log_buffer, "pbs_statque failed, \"%s\" %d", qName, local_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (UNSPECIFIED); } /* Process the list of attributes returned by the server. */ for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* Queue maximum resource usage. */ if (!strcmp(attr->name, ATTR_rescmax)) { if (!strcmp("mem", attr->resource)) { mem_max = schd_val2byte(attr->value); continue; } continue; } if (!strcmp(attr->name, ATTR_rescdflt)) { if (!strcmp("mem", attr->resource)) { mem_default = schd_val2byte(attr->value); continue; } } /* Ignore anything else */ } pbs_statfree(bs); if (mem_default != (size_t)0) return(mem_default); if (mem_max != (size_t)0) return(mem_max); return (UNSPECIFIED); }
int get_queue_priority(char *qname) { int i; int local_errno = 0; char *id = "queue_priority"; QueueList *qptr; Batch_Status *bs; AttrList *attr; static AttrList alist[] = {{NULL, ATTR_p, "", ""}}; /* First let's check the global array of priority value in the hope that * we have already gone to the trouble to look this queue up before. */ for (i = 0; i < MAX_PRIORITIES; ++i) { if (QprioritiesArray[i].priority == UNSPECIFIED) break; else { if (!strcmp(qname, QprioritiesArray[i].qname)) return (QprioritiesArray[i].priority); } } /* Hummm, if we got here, then we didn't find an entry for the requested * queue. So lets check the global Q lists... */ if (!strcmp(qname, schd_SubmitQueue->queue->qname)) { QprioritiesArray[i].priority = schd_SubmitQueue->queue->priority; strcpy(QprioritiesArray[i].qname, schd_SubmitQueue->queue->qname); return (QprioritiesArray[i].priority); } for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) { if (!strcmp(qname, qptr->queue->qname)) { QprioritiesArray[i].priority = qptr->queue->priority; strcpy(QprioritiesArray[i].qname, qptr->queue->qname); return (QprioritiesArray[i].priority); } } /* Okay, so if we got here, the queue that we're hunting for is unknown * to the scheduler. So lets ask the PBS server about it. */ if ((bs = pbs_statque_err(connector, qname, alist, NULL, &local_errno)) == NULL) { sprintf(log_buffer, "pbs_statque failed, \"%s\" %d", qname, local_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } /* Process the list of attributes returned by the server. */ for (attr = bs->attribs; attr != NULL; attr = attr->next) { if (!strcmp(attr->name, ATTR_p)) { QprioritiesArray[i].priority = atoi(attr->value); strcpy(QprioritiesArray[i].qname, qname); pbs_statfree(bs); return (QprioritiesArray[i].priority); } } pbs_statfree(bs); /* otherwise, return an error */ sprintf(log_buffer, "Unable to get priority for %s", qname); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); return(-1); }
/** * @brief * converts and processes the attribute values * * @param[in] connect - indiacation for connection of server * @param[in] attrp - attribute list * @param[in] dest - server option * * @return - int * @retval 0 Success * @retval exits on failure * */ int cnvrt_proc_attrib(int connect, struct attrl **attrp, char *dest) { char *str; int setflag, cnt = 0; struct attropl *jobid_ptr; struct batch_status *p, *p_status; struct attrl *a, *ap, *apx, *attr, *cmd_attr; char time_buf[80]; char job[PBS_MAXCLTJOBID]; char server[MAXSERVERNAME]; jobid_ptr = (struct attropl *)attrib; while (jobid_ptr != NULL) { if (strcmp(jobid_ptr->name, ATTR_convert) == 0) break; jobid_ptr = jobid_ptr->next; } if (get_server(jobid_ptr->value, job, server)) { fprintf(stderr, "pbs_rsub: illegally formed job identifier: %s\n", jobid_ptr->value); exit(-1); } /* update value string with full job-id (seqnum.server) */ (void)free(jobid_ptr->value); jobid_ptr->value = strdup(job); if (jobid_ptr->value == NULL) { fprintf(stderr, "Out of memory\n"); exit(2); } p_status = pbs_statjob(connect, jobid_ptr->value, NULL, NULL); if (p_status == (struct batch_status *)0) { fprintf(stderr, "Job %s does not exist\n", jobid_ptr->value); exit(2); } p = p_status; while (p != NULL) { a = p->attribs; while (a != NULL) { if (a->name != NULL) { /* avoid qmove job in R, T or E state */ if (strcmp(a->name, ATTR_state) == 0) { if (strcmp(a->value, "R") == 0 || strcmp(a->value, "T") == 0 || strcmp(a->value, "E") == 0) { fprintf(stderr, "Job not in qmove state\n"); exit(2); } } else { if (strcmp(a->name, ATTR_l) == 0 && strcmp(a->resource, "nodect") != 0 && strcmp(a->resource, "neednodes") != 0) { setflag = FALSE; ap = attrib; while (ap != NULL) { if (ap->resource != NULL) { if (strcmp(ap->resource, a->resource) == 0) { setflag = TRUE; } } if (ap->next == NULL && setflag == FALSE) { attr = (struct attrl *) malloc(sizeof(struct attrl)); if (attr == NULL) { fprintf(stderr, "pbs_rsub: Out of memory\n"); exit(2); } str = (char *) malloc(strlen(ATTR_l) + 1); if (str == NULL) { fprintf(stderr, "pbs_rsub: Out of memory\n"); exit(2); } strcpy(str, ATTR_l); attr->name = str; str = (char *) malloc(strlen(a->resource) + 1); if (str == NULL) { fprintf(stderr, "pbs_rsub: Out of memory\n"); exit(2); } strncpy(str, a->resource, strlen(a->resource)); str[strlen(a->resource)] = '\0'; attr->resource = str; if (a->value != NULL) { str = (char *) malloc(strlen(a->value) + 1); if (str == NULL) { fprintf(stderr, "pbs_rsub: Out of memory\n"); exit(2); } strncpy(str, a->value, strlen(a->value)); str[strlen(a->value)] = '\0'; attr->value = str; } else { str = (char *) malloc(1); if (str == NULL) { fprintf(stderr, "pbs_rsub: Out of memory\n"); exit(2); } str[0] = '\0'; attr->value = str; } attr->next = NULL; ap->next = attr; ap = ap->next; } setflag = FALSE; ap = ap->next; } } } } a = a->next; } p = p->next; } pbs_statfree(p_status); cmd_attr = attrib; while (cmd_attr != NULL) { if (strcmp(cmd_attr->name, ATTR_resv_start) == 0 || strcmp(cmd_attr->name, ATTR_resv_end) == 0) { if (cmd_attr->name != NULL) free(cmd_attr->name); if (cmd_attr->resource != NULL) free(cmd_attr->resource); if (cmd_attr->value != NULL) free(cmd_attr->value); apx = cmd_attr->next; free(cmd_attr); cmd_attr = apx; if (cnt == 0) attrib = cmd_attr; cnt++; } else cmd_attr = cmd_attr->next; } (void)sprintf(time_buf, "%ld", PBS_RESV_FUTURE_SCH); set_attr(&attrib, ATTR_resv_start, time_buf); *attrp = attrib; return (0); }
/* * * query_server - creates a structure of arrays consisting of a server * and all the queues and jobs that reside in that server * * pbs_sd - connection to pbs_server * * returns a pointer to the server_info struct * */ server_info *query_server(int pbs_sd) { struct batch_status *server; /* info about the server */ server_info *sinfo; /* scheduler internal form of server info */ queue_info **qinfo; /* array of queues on the server */ resource *res; /* ptr to cycle through sources on server */ int local_errno = 0; /* get server information from pbs server */ if ((server = pbs_statserver_err(pbs_sd, NULL, NULL, &local_errno)) == NULL) { fprintf(stderr, "pbs_statserver failed: %d\n", local_errno); return NULL; } /* convert batch_status structure into server_info structure */ if ((sinfo = query_server_info(server)) == NULL) { pbs_statfree(server); return NULL; } /* get the nodes, if any */ sinfo -> nodes = query_nodes(pbs_sd, sinfo); /* get the queues */ if ((sinfo -> queues = query_queues(pbs_sd, sinfo)) == NULL) { pbs_statfree(server); free_server(sinfo, 0); return NULL; } /* count the queues and total up the individual queue states * for server totals. (total up all the state_count structs) */ qinfo = sinfo -> queues; while (*qinfo != NULL) { sinfo -> num_queues++; total_states(&(sinfo -> sc), &((*qinfo) -> sc)); qinfo++; } if ((sinfo -> jobs = (job_info **) malloc(sizeof(job_info *) * (sinfo -> sc.total + 1))) == NULL) { free_server(sinfo, 1); perror("Memory allocation error"); return NULL; } set_jobs(sinfo); sinfo -> running_jobs = job_filter(sinfo -> jobs, sinfo -> sc.total, check_run_job, NULL); res = sinfo -> res; while (res != NULL) { if (res -> assigned == UNSPECIFIED) res -> assigned = calc_assn_resource(sinfo -> running_jobs, res -> name); res = res -> next; } sinfo -> timesharing_nodes = node_filter(sinfo -> nodes, sinfo -> num_nodes, is_node_timeshared, NULL); pbs_statfree(server); return sinfo; }