/* * PBS has been known to provide some bizarre information about the state * of the queue (i.e. no jobs are running in it, but some of its resources * are consumed). Perform some consistency checks on the queue information * before agreeing that it is correct. Generate a laundry list of everything * that appears wrong with it. */ static int queue_sanity(Queue *queue) { char *id = "queue_sanity"; Job *job; int queued, running; int is_sane; #ifdef NODEMASK int nodes_used; #endif /* NODEMASK */ is_sane = 1; /* Assume the queue is sane for now. */ #ifdef NODEMASK if (queue->flags & QFLAGS_NODEMASK) { if (queue->nodes_max != BITFIELD_NUM_ONES(&(queue->queuemask))) { sprintf(log_buffer, "Queue '%s' nodemask does not contain " "expected %d nodes (has %d)", queue->qname, queue->nodes_max, BITFIELD_NUM_ONES(&(queue->queuemask))); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } nodes_used = 0; for (job = queue->jobs; job != NULL; job = job->next) if (job->state == 'R') nodes_used += job->nodes; if (queue->nodes_assn != nodes_used) { if (queue->nodes_assn > nodes_used) { sprintf(log_buffer, "Queue '%s' has %d nodes assigned, only %d in running jobs", queue->qname, queue->nodes_assn, nodes_used); is_sane = 0; } else { sprintf(log_buffer, "Queue '%s' has %d nodes assigned, " "%d in running jobs - using %d", queue->qname, queue->nodes_assn, nodes_used, nodes_used); queue->nodes_assn = nodes_used; } log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } } #endif /* NODEMASK */ /* * Count running and queued jobs and make sure the numbers match up. */ queued = running = 0; for (job = queue->jobs; job != NULL; job = job->next) { switch (job->state) { case 'R': running ++; break; case 'Q': queued ++; break; default: /* Empty */ break; } } if (queue->running != running) { sprintf(log_buffer, "Queue '%s' expected %d running jobs, but found %d", queue->qname, queue->running, running); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); is_sane = 0; } if (queue->queued != queued) { sprintf(log_buffer, "Queue '%s' expected %d queued jobs, but found %d", queue->qname, queue->queued, queued); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); is_sane = 0; } if (queue->running && (queue->empty_by < schd_TimeNow)) { sprintf(log_buffer, "Queue '%s' was expected to be empty %d seconds ago", queue->qname, (schd_TimeNow - queue->empty_by)); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); is_sane = 0; } return (is_sane); }
int schd_alloc_nodes(int request, Queue *queue, Bitfield *maskp) { char *id = "schd_alloc_nodes"; Bitfield avail; Bitfield mask; Bitfield contig; int remain; int qmsb; int qlsb; int i, n; int count; int found; /* Make certain the nodecount request can be fulfilled. */ if (request <= 0 || request > BITFIELD_NUM_ONES(&(queue->availmask))) return 0; /* * Make a copy of the queue's available bit mask to play with, and clear * the allocated nodes mask. */ BITFIELD_CPY(&avail, &(queue->availmask)); BITFIELD_CLRALL(&mask); /* How many have been found, and how many remain. */ found = 0; remain = request; while (remain > 0) { /* * Find first and last available bit positions in the * queue's available node mask. */ qmsb = BITFIELD_MS_ONE(&avail); qlsb = BITFIELD_LS_ONE(&avail); /* * Starting with the size of the remaining nodes needed to satisfy * this request, look for a set of 'n' contiguous bits in the * available node mask. If that is not found, try the next smallest * contiguous vector, etc. */ for (n = remain; n > 0; n--) { /* * Create a contiguous bitmask of 'n' bits, starting at the * position of the highest bit in the avail mask. */ BITFIELD_CLRALL(&contig); for (i = 0; i < n; i++) BITFIELD_SETB(&contig, qmsb - i); /* * Calculate how many times this contiguous bitmask needs to be * shifted to the right to cover every set of 'n' bits between * the qmsb and qlsb, inclusive. Count the initial configuration * as well (the trailing '+ 1'). */ count = (qmsb + 1 - qlsb) - n + 1; /* * Shift the contiguous mask right one bit at a time, checking * if all the bits in the mask are set in the available mask. */ for (i = 0; i < count; i++) { /* Are all bits in contig also set in the avail mask? */ if (BITFIELD_TSTALLM(&avail, &contig)) { break; } BITFIELD_SHIFTR(&contig); } /* * If the contiguous bits are available, add them to the new job * nodemask, and remove them from the avail mask. Adjust the * remaining node count, and start the next hunt for the remaining * nodes. */ if (i < count) { BITFIELD_SETM(&mask, &contig); BITFIELD_CLRM(&avail, &contig); found += n; remain -= n; break; /* for(n) loop */ } } /* Check for something going wrong. */ if (n == 0) { DBPRT(("%s: couldn't find any contiguous bits (even one!)\n", id)); break; /* while(remain) loop */ } } /* * If no bits remain to be allocated, copy the new mask into the provided * space, and return the number of bits requested. */ if (!remain && (found == request)) { BITFIELD_CPY(maskp, &mask); DBPRT(("%s: mask %s\n", id, schd_format_nodemask(&queue->queuemask, maskp))); return found; } return 0; }