QList<Structure*> QueueManager::getAllOptimizedStructuresAndOneSupercellCopyForEachFormulaUnit() { QList<Structure*> list; QReadLocker trackerLocker(m_tracker->rwLock()); for (int i = 0; i < m_tracker->list()->size(); ++i) { Structure* s = m_tracker->list()->at(i); QReadLocker sLocker(&s->lock()); if (s->getStatus() == Structure::Optimized) list.append(s); else if (s->getStatus() == Structure::Supercell) { // We only want to add one copy of each supercell for each formula // unit. So do not add the supercell s if there is already one present. for (int j = 0; j < list.size(); ++j) { Structure* s2 = list.at(j); // These should never be equal, but in case they are, continue if (s == s2) continue; QReadLocker s2Locker(&s2->lock()); if (s2->getStatus() == Structure::Supercell && !s2->getSupercellString().isEmpty() && s2->getSupercellString() == s->getSupercellString() && s2->getFormulaUnits() == s->getFormulaUnits()) { break; } // Made it to the end of the list and did not find a match! else if (j == list.size() - 1) { list.append(s); } } } } return list; }
// Doxygen skip: /// @cond void QueueManager::unlockForNaming_() { Structure* s; m_tracker->lockForWrite(); m_newStructureTracker.lockForWrite(); if (!m_newStructureTracker.popFirst(s)) { m_newStructureTracker.unlock(); m_tracker->unlock(); return; } // Update structure s->lock().lockForWrite(); if (s->getStatus() != Structure::Optimized) s->setStatus(Structure::WaitingForOptimization); s->lock().unlock(); m_tracker->append(s); m_newStructureTracker.unlock(); m_tracker->unlock(); if (s->getStatus() != Structure::Optimized) emit structureStarted(s); else if (s->getStatus() == Structure::Optimized) emit structureFinished(s); }
QList<Structure*> QueueManager::getAllOptimizedStructures() { QList<Structure*> list; m_tracker->lockForRead(); Structure* s; for (int i = 0; i < m_tracker->list()->size(); i++) { s = m_tracker->list()->at(i); s->lock().lockForRead(); if (s->getStatus() == Structure::Optimized) list.append(s); s->lock().unlock(); } m_tracker->unlock(); return list; }
void QueueManager::checkRunning() { // Ensure that this is only called from the QM thread: Q_ASSERT_X(QThread::currentThread() == m_thread, Q_FUNC_INFO, "Attempting to run QueueManager::checkRunning " "from a thread other than the QM thread. "); // Get list of running structures QList<Structure*> runningStructures = getAllRunningStructures(); // iterate over all structures and handle each based on its status for (QList<Structure *>::iterator s_it = runningStructures.begin(), s_it_end = runningStructures.end(); s_it != s_it_end; ++s_it) { // Assign pointer for convenience Structure* structure = *s_it; // Check if this structure has any handlers pending. Skip if so. if (m_newlyOptimizedTracker.contains(structure) || m_stepOptimizedTracker.contains(structure) || m_inProcessTracker.contains(structure) || m_errorTracker.contains(structure) || m_submittedTracker.contains(structure) || m_newlyKilledTracker.contains(structure) || m_newDuplicateTracker.contains(structure) || m_newSupercellTracker.contains(structure) || m_restartTracker.contains(structure) || m_newSubmissionTracker.contains(structure)) { continue; } // Lookup status structure->lock().lockForRead(); Structure::State status = structure->getStatus(); structure->lock().unlock(); // Check status switch (status) { case Structure::InProcess: handleInProcessStructure(structure); break; case Structure::WaitingForOptimization: handleWaitingForOptimizationStructure(structure); break; case Structure::StepOptimized: handleStepOptimizedStructure(structure); break; case Structure::Optimized: // Shouldn't happen -- this is called by handleStepOptimizedStructure // when needed. There is a race condition between the check* functions // -- The structure may be removed from the list of running structures // by checkPopulation before checkRunning is called. // handleOptimizedStructure(structure); break; case Structure::Error: handleErrorStructure(structure); break; case Structure::Submitted: handleSubmittedStructure(structure); break; case Structure::Killed: handleKilledStructure(structure); break; case Structure::Removed: handleRemovedStructure(structure); break; case Structure::Restart: handleRestartStructure(structure); break; case Structure::Updating: handleUpdatingStructure(structure); break; case Structure::Duplicate: handleDuplicateStructure(structure); break; case Structure::Supercell: handleSupercellStructure(structure); break; case Structure::Empty: handleEmptyStructure(structure); break; } } return; }
void QueueManager::checkPopulation() { // Count jobs uint running = 0; uint optimized = 0; uint submitted = 0; QReadLocker trackerReadLocker(m_tracker->rwLock()); QList<Structure*> structures = *m_tracker->list(); // Check to see that the number of running jobs is >= that specified: int fail = 0; for (int i = 0; i < structures.size(); ++i) { Structure* structure = structures.at(i); QReadLocker structureLocker(&structure->lock()); Structure::State state = structure->getStatus(); if (structure->getFailCount() != 0) ++fail; structureLocker.unlock(); QWriteLocker runningTrackerLocker(m_runningTracker.rwLock()); // Count submitted structures if (state == Structure::Submitted || state == Structure::InProcess) { m_runningTracker.append(structure); ++submitted; } // Count running jobs and update trackers if (state != Structure::Optimized && state != Structure::Duplicate && state != Structure::Supercell && state != Structure::Killed && state != Structure::Removed) { m_runningTracker.append(structure); ++running; } else { if (state == Structure::Optimized) ++optimized; m_runningTracker.remove(structure); } } trackerReadLocker.unlock(); emit newStatusOverview(optimized, running, fail); // Submit any jobs if needed QWriteLocker jobStartTrackerLocker(m_jobStartTracker.rwLock()); int pending = m_jobStartTracker.size(); if (pending != 0 && (!m_opt->limitRunningJobs || submitted < m_opt->runningJobLimit)) { // Submit a single throttled job (1 submission per 3-8 seconds) if using // a remote queue interface. Interval is randomly chosen each iteration. // This prevents hammering the pbs server from multiple XtalOpt instances // if there is a problem with the queue. #ifdef ENABLE_SSH Structure* s = m_jobStartTracker.at(0); if (qobject_cast<RemoteQueueInterface*>( m_opt->queueInterface(s->getCurrentOptStep())) != nullptr) { if (m_lastSubmissionTimeStamp->secsTo(QDateTime::currentDateTime()) >= 3 + (6 * getRandDouble())) { startJob(); ++submitted; --pending; *m_lastSubmissionTimeStamp = QDateTime::currentDateTime(); } } else { // Local job submission doesn't need to be throttled #endif while (pending != 0 && (!m_opt->limitRunningJobs || submitted < m_opt->runningJobLimit)) { startJob(); ++submitted; --pending; } #ifdef ENABLE_SSH } #endif } jobStartTrackerLocker.unlock(); // Generate requests // Write lock for m_requestedStructures var QWriteLocker trackerWriteLocker(m_tracker->rwLock()); QReadLocker newStructureTrackerLocker(m_newStructureTracker.rwLock()); // Avoid convience function calls here, as occaisional deadlocks // can occur. // // total is getAllStructures().size() + m_requestedStructures; int total = m_tracker->size() + m_newStructureTracker.size() + m_requestedStructures; // incomplete is getAllRunningStructures.size() + m_requestedStructures: int incomplete = m_runningTracker.size() + m_newStructureTracker.size() + m_requestedStructures; int needed = m_opt->contStructs - incomplete; if ( // Are we at the continuous structure limit? (needed > 0) && // Is the cutoff either disabled or reached/exceeded? (m_opt->cutoff <= 0 || total < m_opt->cutoff) && // Check if we are testing. If so, have we reached the testing limit? (!m_opt->testingMode || total < m_opt->test_nStructs)) { // emit requests qDebug() << "Need " << needed << " structures. " << incomplete << " already incomplete."; for (int i = 0; i < needed; ++i) { ++m_requestedStructures; emit needNewStructure(); qDebug() << "Requested new structure. Total requested: " << m_requestedStructures; } } }