int schd_bits2mask(char *string, Bitfield *mask) { int i, bit, len; Bitfield new_bit; BITFIELD_CLRALL(&new_bit); len = (int)strlen(string); /* Start with the low bit. */ bit = 0; for (i = 0; i < len; i++) { if (string[len - i - 1] == '1') BITFIELD_SETB(&new_bit, bit); else if (string[len - i - 1] != '0') return (-1); ++ bit; } /* Done. Copy the new nodemask and return success. */ BITFIELD_CPY(mask, &new_bit); return (0); }
/** * @brief * availnodes() - Fill the supplied bitfield with a set bit for each node on which sufficient * resources (c.f. mapnodes.h) are configured. This is the set of nodes that * are physically available to be allocated -- policy, reserved nodes, etc, * may reduce the total number of nodes that are usable by jobs. * * NOTE: Must be called after mapnodes(). * * @param[out] maskp - pointer to Bitfield structure * * @return int * @retval 0 success * @retval !0 failure * */ int availnodes(Bitfield *maskp) { int node, i, count; Bitfield avail; /* This function needs a valid node map in order to find available nodes. */ if (nodemap == NULL) return 1; /* * If minimum resource values are not set, default them to reasonable * defaults. */ if (minnodemem < 0) minnodemem = minmem; if (minnodecpus < 0) minnodecpus = mincpus; if (memreserved < 0) memreserved = 0; (void)sprintf(log_buffer, "Minimum node resources: %d cpus, %d MB, %d MB rsvd", minnodecpus, minnodemem, memreserved); log_err(-1, __func__, log_buffer); BITFIELD_CLRALL(&avail); /* * Walk through the node map, checking for sufficient resources on each * node, and setting the appropriate bit on the mask for that node if it * is sufficiently endowed. See mapnodes.h for definition of "sufficient". */ for (node = 0; node <= maxnodeid; node++) { /* Enough CPUs? If not, skip it. */ for (count = i = 0; i < MAX_CPUS_PER_NODE; i++) if (nodemap[node].cpu[i] >= 0) count ++; /* Enough memory and cpus? If not, skip it. */ if (nodemap[node].memory < minnodemem || count < minnodecpus) { (void)sprintf(log_buffer, "node %d has only %luMB and %d cpus - cannot use", node, nodemap[node].memory, count); log_err(-1, __func__, log_buffer); continue; } /* Node has sufficient resources. Count this node as available. */ BITFIELD_SETB(&avail, node); } /* Copy the available mask to the passed-in storage, and return success. */ BITFIELD_CPY(maskp, &avail); return 0; }
int schd_alloc_nodes(int nnodes, Queue *queue, Bitfield *job_mask) { Bitfield *queue_mask; Bitfield *q_avail_mask; Bitfield new_bit; int candidate; int n_found; n_found = 0; BITFIELD_CLRALL(&new_bit); queue_mask = &(queue->queuemask); q_avail_mask = &(queue->availmask); candidate = (BITFIELD_SIZE - 1); while ((n_found < nnodes) && (candidate >= 0)) { if (BITFIELD_TSTB(queue_mask, candidate)) { if (BITFIELD_TSTB(q_avail_mask, candidate)) { /* available */ BITFIELD_SETB(&new_bit, candidate); ++n_found; } } -- candidate; } /* * If enough nodes were allocated, copy the resultant new nodemask * into the supplied area. */ if (nnodes == n_found) { BITFIELD_CPY(job_mask, &new_bit); return nnodes; } /* Didn't get enough nodes. Return 0 as an error. */ return 0; }
static int find_nodemasks(Queue *queue, Resources *rsrcs) { Job *job; Bitfield jobs_using; BITFIELD_CLRALL(&jobs_using); /* * Compute the set of nodes that are both physically available and also * assigned to this queue. */ BITFIELD_CPY(&queue->availmask, &(queue->queuemask)); BITFIELD_ANDM(&queue->availmask, &(queue->rsrcs->availmask)); /* * Compute the set of nodes in use by jobs running on the queue (if * there are any) and remove those nodes from the available node mask. */ if (queue->running) { for (job = queue->jobs; job != NULL; job = job->next) { if (job->state == 'R') BITFIELD_SETM(&jobs_using, &(job->nodemask)); } } /* * Remove the used node bits from the queue's availmask, and add them to * the resources' nodes used bits. */ BITFIELD_CLRM(&queue->availmask, &jobs_using); BITFIELD_SETM(&rsrcs->nodes_used, &jobs_using); return (0); }
/* * Find an entry for the resources for the requested host in the list of * existing resources, or create a new one for that host and return it. */ Resources * schd_get_resources(char *exechost) { char *id = "schd_get_resources"; Resources *rptr, *new_rsrcs; int rm; char *response = NULL; int badreply = 0; int cpus_avail = 0; size_t pmem_avail = 0; char hpm_ctl[64]; struct sigaction act, oact; unsigned int remain; /* Time remaining in any old alarm(). */ time_t then; /* When this alarm() was started. */ #ifdef NODEMASK Bitfield cpy; int i, j; #endif /* NODEMASK */ /* * Check for a local copy of the resources being available already. * If so, just return a reference to that Resources structure. */ if (schd_RsrcsList != NULL) { for (rptr = schd_RsrcsList; rptr != NULL; rptr = rptr->next) if (strcmp(rptr->exechost, exechost) == 0) return (rptr); } schd_timestamp("get_rsrcs"); /* * No cached resource information for 'exechost'. Need to query the * host for its information. */ if ((new_rsrcs = (Resources *)malloc(sizeof(Resources))) == NULL) { (void)sprintf(log_buffer, "Unable to alloc space for Resources."); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (NULL); /* Can't get the information - nowhere to store it. */ } memset((void *)new_rsrcs, 0, sizeof(Resources)); act.sa_flags = 0; act.sa_handler = connect_interrupt; sigemptyset(&act.sa_mask); remain = 0; then = 0; /* * Set the alarm, and maintain some idea of how long was left on any * previously set alarm. */ if (sigaction(SIGALRM, &act, &oact) == 0) { remain = alarm(GETRSRCS_CONNECT_TIME); then = time(NULL); } if ((rm = openrm(exechost, 0)) == -1) { (void)sprintf(log_buffer, "Unable to contact resmom@%s (%d)", exechost, pbs_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } /* * Turn off full response. Responses will be received in the order in * which they are sent. */ fullresp(0); /* Build a list of all the resources about which we want information. */ addreq(rm, "loadave"); addreq(rm, "availmem"); addreq(rm, "physmem"); addreq(rm, "ncpus"); #ifdef NODEMASK addreq(rm, "availmask"); #endif /* NODEMASK */ if (schd_MANAGE_HPM) { (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_QUERY_STR); addreq(rm, hpm_ctl); } /* Get the values back from the resource monitor, and round up. */ /* Receive LOADAVE response from resource monitor. */ response = getreq(rm); if (response != NULL) { new_rsrcs->loadave = atof(response) * schd_FAKE_MACH_MULT; (void)free(response); } else { (void)sprintf(log_buffer, "bad return from getreq(loadave), %d, %d", pbs_errno, errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } /* Receive AVAILMEM response from resource monitor. */ response = getreq(rm); if (response != NULL) { new_rsrcs->freemem = schd_val2byte(response); new_rsrcs->freemem *= schd_FAKE_MACH_MULT; (void)free(response); } else { (void)sprintf(log_buffer, "bad return from getreq(freemem), %d, %d", pbs_errno, errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } /* Receive PHYSMEM response from resource monitor. */ response = getreq(rm); if (response != NULL) { pmem_avail = schd_val2byte(response); pmem_avail *= schd_FAKE_MACH_MULT; (void)free(response); } else { (void)sprintf(log_buffer, "bad return from getreq(realmem), %d, %d", pbs_errno, errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } /* Receive NCPUS response from resource monitor. */ response = getreq(rm); if (response != NULL) { cpus_avail = atoi(response) * schd_FAKE_MACH_MULT; (void)free(response); } else { (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d", pbs_errno, errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } #ifdef NODEMASK /* Receive available nodes from resource monitor. */ response = getreq(rm); if (response == NULL) { (void)sprintf(log_buffer, "bad return from getreq(availmask), %d, %d", pbs_errno, errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } else { if (schd_bits2mask(response, &new_rsrcs->availmask) != 0) { if (schd_str2mask(response, &new_rsrcs->availmask) != 0) { (void)sprintf(log_buffer, "can't parse availmask '%s'", response); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } } (void)free(response); } #endif /* NODEMASK */ if (schd_MANAGE_HPM) { /* Receive HPM_CTL response from resource monitor. */ response = getreq(rm); if (response != NULL) { if (strcmp(response, HPM_CTL_USERMODE_STR) == 0) new_rsrcs->flags |= RSRCS_FLAGS_HPM_USER; else if (strcmp(response, HPM_CTL_GLOBALMODE_STR) == 0) new_rsrcs->flags &= ~RSRCS_FLAGS_HPM_USER; else { (void)sprintf(log_buffer, "bad response '%s' for '%s@%s'", response, hpm_ctl, exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } } else { (void)sprintf(log_buffer, "bad return from getreq(%s), %d, %d", hpm_ctl, pbs_errno, errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); badreply = 1; goto bail; } } /* * NOTE: response will be free()'d in bail. Be sure to explicitly free() * response if more getreq() calls are added before the code below. */ bail: if (response != NULL) (void)free(response); /* Disconnect from the resource monitor. */ if (rm >= 0) /* resmom handle "0" is valid in RPP. */ closerm(rm); /* And unset the alarm and handler. */ alarm(0); sigaction(SIGALRM, &oact, &act); /* Reset the old alarm, taking into account how much time has passed. */ if (remain) { DBPRT(("%s: old alarm had %d secs remaining, %d elapsed, ", id, remain, (time(NULL) - then))); /* How much time remains even after the time spent above? */ remain -= (time(NULL) - then); /* * Would the previous time have already expired? If so, schedule * an alarm call in 1 second (close enough, hopefully). */ if (remain < 1) remain = 1; DBPRT(("reset to %d secs\n", remain)); alarm(remain); } /* * Verify all the data came back as expected; if not, abort this * iteration of the scheduler. */ if (badreply) { (void)sprintf(log_buffer, "Got bad info from mom@%s - aborting sched run", exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); free(new_rsrcs); return (NULL); } /* Make a copy of the hostname for the resources struct. */ new_rsrcs->exechost = schd_strdup(exechost); if (new_rsrcs->exechost == NULL) { (void)sprintf(log_buffer, "Unable to copy exechost %s to rsrcs", exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); free(new_rsrcs); return (NULL); } new_rsrcs->nodes_total = NODES_REQD(cpus_avail, pmem_avail); #ifdef NODEMASK /* Copy the availmask schd_FAKE_MACH_MULT times to match avail cpus. */ BITFIELD_CPY(&cpy, &(new_rsrcs->availmask)); for (i = 2; i <= schd_FAKE_MACH_MULT; i++) { for (j = 0; j < (cpus_avail / schd_FAKE_MACH_MULT / 2); j++) BITFIELD_SHIFTL(&cpy); BITFIELD_SETM(&(new_rsrcs->availmask), &cpy); } #endif /* NODEMASK */ if (schd_RsrcsList == NULL) { schd_RsrcsList = new_rsrcs; /* Start the list. */ } else { for (rptr = schd_RsrcsList; rptr->next != NULL; rptr = rptr->next) /* Find the last element in the list. */ ; rptr->next = new_rsrcs; } /* Next pointer for the tail of the list points to nothing. */ new_rsrcs->next = NULL; return (new_rsrcs); }
static int nodemask_overlaps(void) { char *id = "nodemask_overlaps"; QueueList *qlp1, *qlp2; Bitfield overlap; /* Check for overlapping queue nodemasks in batch queues. */ for (qlp1 = schd_BatchQueues; qlp1 != NULL; qlp1 = qlp1->next) { if (!(qlp1->queue->flags & QFLAGS_NODEMASK)) continue; for (qlp2 = qlp1->next; qlp2 != NULL; qlp2 = qlp2->next) { /* * If the queues are on different hosts, they may appear to * overlap - no problem. */ if (strcmp(qlp1->queue->exechost, qlp2->queue->exechost)) continue; if ((qlp2->queue->flags & QFLAGS_NODEMASK)) { DBPRT(("comparing nodemask for queue %s with queue %s\n", qlp1->queue->qname, qlp2->queue->qname)); BITFIELD_CPY(&overlap, &(qlp1->queue->queuemask)); BITFIELD_ANDM(&overlap, &(qlp2->queue->queuemask)); if (!BITFIELD_IS_ZERO(&overlap)) { (void)sprintf(log_buffer, "Queues %s and %s have overlapping nodemasks!", qlp1->queue->qname, qlp2->queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); } } } } /* Make sure batch queues and external queues have differing nodemasks. */ for (qlp1 = schd_BatchQueues; qlp1 != NULL; qlp1 = qlp1->next) { if (!(qlp1->queue->flags & QFLAGS_NODEMASK)) continue; for (qlp2 = schd_ExternQueues; qlp2 != NULL; qlp2 = qlp2->next) { /* * If the queues are on different hosts, they may appear to * overlap - no problem. */ if (strcmp(qlp1->queue->exechost, qlp2->queue->exechost)) continue; if ((qlp2->queue->flags & QFLAGS_NODEMASK)) { DBPRT(("comparing nodemask for queue %s with queue %s\n", qlp1->queue->qname, qlp2->queue->qname)); BITFIELD_CPY(&overlap, &(qlp1->queue->queuemask)); BITFIELD_ANDM(&overlap, &(qlp2->queue->queuemask)); if (!BITFIELD_IS_ZERO(&overlap)) { (void)sprintf(log_buffer, "Queues %s and %s have overlapping nodemasks!", qlp1->queue->qname, qlp2->queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); } } } } return (0); }
int schd_str2mask(char *maskstr, Bitfield *maskp) { char *id = "str2mask"; Bitfield nodemask; char hex; int ndbit = 0, len, dec, hxbit; unsigned long long compat; char *ptr; char buf[32]; if (maskstr == NULL) return (-1); /* Nodemask string must be in the format '0x...'. */ if ((maskstr[0] != '0') || (tolower(maskstr[1]) != 'x')) { /* XXX * For backwards compatability, if the string will convert * into an unsigned long long, then assume this is an old- * style decimal nodemask (on 8-128P machines, nodemask is * an unsigned long long, and was treated as such). * * Note that this assumes that sizeof(unsigned long long) * is the same as sizeof(unsigned long), which is only the * case with the 64-bit SGI compiler ABI. */ compat = strtoul(maskstr, &ptr, 10); if (*ptr != '\0') return (-1); /* * String converted to an unsigned long long. Print it as * a hex back into a buffer, and point at that buffer instead. * This will cause the newly created hex string to be used. */ (void)sprintf(buf, "0x%llx", compat); maskstr = buf; (void)sprintf(log_buffer, "converted old-style nodemask %lu to %s", compat, maskstr); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } /* Zero out the new nodemask struct, and fill it from the maskstr. */ BITFIELD_CLRALL(&nodemask); /* Walk backwards through the hex digits in the string 0x... */ for (len = (int)strlen(maskstr); len > 2 /* 0x... */; len --) { hex = maskstr[len - 1]; /* Make sure it's a hex digit. */ if (!isxdigit((int)hex)) return (-1); /* If this digit is a '0', it contributes nothing. */ if (hex != '0') { /* Convert the hex digit to a decimal int. */ dec = isdigit((int)hex) ? (hex - '0') : (tolower((int)hex) - 'a' + 10); for (hxbit = 0; hxbit < 4; hxbit ++) { if (dec & (1 << hxbit)) BITFIELD_SETB(&nodemask, ndbit); ndbit ++; } } else ndbit += 4; /* Make sure we haven't walked past the end of the nodemask. */ if (ndbit > (sizeof(Bitfield) * 8)) return (-1); } /* Copy the newly created nodemask into the handed-in one. */ BITFIELD_CPY(maskp, &nodemask); return (0); }
int schd_alloc_nodes(int request, Queue *queue, Bitfield *maskp) { char *id = "schd_alloc_nodes"; Bitfield avail; Bitfield mask; Bitfield contig; int remain; int qmsb; int qlsb; int i, n; int count; int found; /* Make certain the nodecount request can be fulfilled. */ if (request <= 0 || request > BITFIELD_NUM_ONES(&(queue->availmask))) return 0; /* * Make a copy of the queue's available bit mask to play with, and clear * the allocated nodes mask. */ BITFIELD_CPY(&avail, &(queue->availmask)); BITFIELD_CLRALL(&mask); /* How many have been found, and how many remain. */ found = 0; remain = request; while (remain > 0) { /* * Find first and last available bit positions in the * queue's available node mask. */ qmsb = BITFIELD_MS_ONE(&avail); qlsb = BITFIELD_LS_ONE(&avail); /* * Starting with the size of the remaining nodes needed to satisfy * this request, look for a set of 'n' contiguous bits in the * available node mask. If that is not found, try the next smallest * contiguous vector, etc. */ for (n = remain; n > 0; n--) { /* * Create a contiguous bitmask of 'n' bits, starting at the * position of the highest bit in the avail mask. */ BITFIELD_CLRALL(&contig); for (i = 0; i < n; i++) BITFIELD_SETB(&contig, qmsb - i); /* * Calculate how many times this contiguous bitmask needs to be * shifted to the right to cover every set of 'n' bits between * the qmsb and qlsb, inclusive. Count the initial configuration * as well (the trailing '+ 1'). */ count = (qmsb + 1 - qlsb) - n + 1; /* * Shift the contiguous mask right one bit at a time, checking * if all the bits in the mask are set in the available mask. */ for (i = 0; i < count; i++) { /* Are all bits in contig also set in the avail mask? */ if (BITFIELD_TSTALLM(&avail, &contig)) { break; } BITFIELD_SHIFTR(&contig); } /* * If the contiguous bits are available, add them to the new job * nodemask, and remove them from the avail mask. Adjust the * remaining node count, and start the next hunt for the remaining * nodes. */ if (i < count) { BITFIELD_SETM(&mask, &contig); BITFIELD_CLRM(&avail, &contig); found += n; remain -= n; break; /* for(n) loop */ } } /* Check for something going wrong. */ if (n == 0) { DBPRT(("%s: couldn't find any contiguous bits (even one!)\n", id)); break; /* while(remain) loop */ } } /* * If no bits remain to be allocated, copy the new mask into the provided * space, and return the number of bits requested. */ if (!remain && (found == request)) { BITFIELD_CPY(maskp, &mask); DBPRT(("%s: mask %s\n", id, schd_format_nodemask(&queue->queuemask, maskp))); return found; } return 0; }