/** * @brief * reclaim_cpusets() * Given a list of cpusets, attempt to destroy each cpuset named by the list. * If it can be destroyed, unset the bits corresponding to the cpuset's nodes * in the mask (if supplied). This is used to reclaim cpusets that were * supposed to be deleted, but were in fact "stuck", and placed on stucklist. * * @param[in] listp - pointer to cpuset list * @param[in] maskp - pointer to mask bitfield * * @return int * @retval num of cpuset reclaimed success * @retval 0 error * */ int reclaim_cpusets(cpusetlist **listp, Bitfield *maskp) { cpusetlist *set, *next; int count = 0; /* * Walk the list of stuck cpusets, attempting to free each one. Keep * track of the previous and next pointers so the element can be * unlinked and freed. */ for (set = *listp; set != NULL; set = next) { next = set->next; /* Keep track of next pointer. */ /* See if this cpuset can be deleted now. If not, go on. */ if (destroy_cpuset(set->name)) { log_err(0, __func__, "could not destroy cpuset"); continue; } /* * Remove the corresponding bits from the given bitmask, if supplied, * and return the nodes to the nodepool. */ if (maskp != NULL) BITFIELD_CLRM(maskp, &(set->nodes)); BITFIELD_SETM(&nodepool, &(set->nodes)); /* Log that the cpuset was reclaimed. */ (void)sprintf(log_buffer, "stuck cpuset %s reclaimed", set->name); log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, LOG_INFO, __func__, log_buffer); #ifdef DEBUG (void)sprintf(log_buffer, "nodepool now %s", bitfield2hex(&nodepool)); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, LOG_INFO, __func__, log_buffer); #endif /* DEBUG */ /* Now free the storage for the cpusetlist element. */ if (remove_from_cpusetlist(listp, NULL, set->name, NULL)) break; count ++; /* Another cpuset reclaimed. */ } /* * Perform a quick sanity check. If there are no cpusets on the supplied * list, then there should be no bits set in the supplied bitfield. Log * an error message if this is not the case. */ if (maskp != NULL && *listp == NULL && !BITFIELD_IS_ZERO(maskp)) log_err(-1, __func__, "NULL cpusetlist but mask not empty!"); return (count); }
static int nodemask_overlaps(void) { char *id = "nodemask_overlaps"; QueueList *qlp1, *qlp2; Bitfield overlap; /* Check for overlapping queue nodemasks in batch queues. */ for (qlp1 = schd_BatchQueues; qlp1 != NULL; qlp1 = qlp1->next) { if (!(qlp1->queue->flags & QFLAGS_NODEMASK)) continue; for (qlp2 = qlp1->next; qlp2 != NULL; qlp2 = qlp2->next) { /* * If the queues are on different hosts, they may appear to * overlap - no problem. */ if (strcmp(qlp1->queue->exechost, qlp2->queue->exechost)) continue; if ((qlp2->queue->flags & QFLAGS_NODEMASK)) { DBPRT(("comparing nodemask for queue %s with queue %s\n", qlp1->queue->qname, qlp2->queue->qname)); BITFIELD_CPY(&overlap, &(qlp1->queue->queuemask)); BITFIELD_ANDM(&overlap, &(qlp2->queue->queuemask)); if (!BITFIELD_IS_ZERO(&overlap)) { (void)sprintf(log_buffer, "Queues %s and %s have overlapping nodemasks!", qlp1->queue->qname, qlp2->queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); } } } } /* Make sure batch queues and external queues have differing nodemasks. */ for (qlp1 = schd_BatchQueues; qlp1 != NULL; qlp1 = qlp1->next) { if (!(qlp1->queue->flags & QFLAGS_NODEMASK)) continue; for (qlp2 = schd_ExternQueues; qlp2 != NULL; qlp2 = qlp2->next) { /* * If the queues are on different hosts, they may appear to * overlap - no problem. */ if (strcmp(qlp1->queue->exechost, qlp2->queue->exechost)) continue; if ((qlp2->queue->flags & QFLAGS_NODEMASK)) { DBPRT(("comparing nodemask for queue %s with queue %s\n", qlp1->queue->qname, qlp2->queue->qname)); BITFIELD_CPY(&overlap, &(qlp1->queue->queuemask)); BITFIELD_ANDM(&overlap, &(qlp2->queue->queuemask)); if (!BITFIELD_IS_ZERO(&overlap)) { (void)sprintf(log_buffer, "Queues %s and %s have overlapping nodemasks!", qlp1->queue->qname, qlp2->queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); } } } } return (0); }