void QueueManager::startJob() { Structure* s; if (!m_jobStartTracker.popFirst(s)) { return; } if (!m_opt->queueInterface(s->getCurrentOptStep())->startJob(s)) { s->lock().lockForWrite(); m_opt->warning(tr("QueueManager::startJob_: Job did not start " "successfully for structure %1-%2.") .arg(s->getIDString()) .arg(s->getCurrentOptStep() + 1)); s->setStatus(Structure::Error); s->lock().unlock(); return; } s->lock().lockForWrite(); s->setStatus(Structure::Submitted); s->lock().unlock(); if (!m_opt->usingGUI()) { QReadLocker locker(&s->lock()); qDebug() << "Structure" << QString::number(s->getGeneration()) + "x" + QString::number(s->getIDNumber()) << "has been submitted!"; } emit structureSubmitted(s); }
void QueueManager::checkPopulation() { // Count jobs uint running = 0; uint optimized = 0; uint submitted = 0; QReadLocker trackerReadLocker(m_tracker->rwLock()); QList<Structure*> structures = *m_tracker->list(); // Check to see that the number of running jobs is >= that specified: int fail = 0; for (int i = 0; i < structures.size(); ++i) { Structure* structure = structures.at(i); QReadLocker structureLocker(&structure->lock()); Structure::State state = structure->getStatus(); if (structure->getFailCount() != 0) ++fail; structureLocker.unlock(); QWriteLocker runningTrackerLocker(m_runningTracker.rwLock()); // Count submitted structures if (state == Structure::Submitted || state == Structure::InProcess) { m_runningTracker.append(structure); ++submitted; } // Count running jobs and update trackers if (state != Structure::Optimized && state != Structure::Duplicate && state != Structure::Supercell && state != Structure::Killed && state != Structure::Removed) { m_runningTracker.append(structure); ++running; } else { if (state == Structure::Optimized) ++optimized; m_runningTracker.remove(structure); } } trackerReadLocker.unlock(); emit newStatusOverview(optimized, running, fail); // Submit any jobs if needed QWriteLocker jobStartTrackerLocker(m_jobStartTracker.rwLock()); int pending = m_jobStartTracker.size(); if (pending != 0 && (!m_opt->limitRunningJobs || submitted < m_opt->runningJobLimit)) { // Submit a single throttled job (1 submission per 3-8 seconds) if using // a remote queue interface. Interval is randomly chosen each iteration. // This prevents hammering the pbs server from multiple XtalOpt instances // if there is a problem with the queue. #ifdef ENABLE_SSH Structure* s = m_jobStartTracker.at(0); if (qobject_cast<RemoteQueueInterface*>( m_opt->queueInterface(s->getCurrentOptStep())) != nullptr) { if (m_lastSubmissionTimeStamp->secsTo(QDateTime::currentDateTime()) >= 3 + (6 * getRandDouble())) { startJob(); ++submitted; --pending; *m_lastSubmissionTimeStamp = QDateTime::currentDateTime(); } } else { // Local job submission doesn't need to be throttled #endif while (pending != 0 && (!m_opt->limitRunningJobs || submitted < m_opt->runningJobLimit)) { startJob(); ++submitted; --pending; } #ifdef ENABLE_SSH } #endif } jobStartTrackerLocker.unlock(); // Generate requests // Write lock for m_requestedStructures var QWriteLocker trackerWriteLocker(m_tracker->rwLock()); QReadLocker newStructureTrackerLocker(m_newStructureTracker.rwLock()); // Avoid convience function calls here, as occaisional deadlocks // can occur. // // total is getAllStructures().size() + m_requestedStructures; int total = m_tracker->size() + m_newStructureTracker.size() + m_requestedStructures; // incomplete is getAllRunningStructures.size() + m_requestedStructures: int incomplete = m_runningTracker.size() + m_newStructureTracker.size() + m_requestedStructures; int needed = m_opt->contStructs - incomplete; if ( // Are we at the continuous structure limit? (needed > 0) && // Is the cutoff either disabled or reached/exceeded? (m_opt->cutoff <= 0 || total < m_opt->cutoff) && // Check if we are testing. If so, have we reached the testing limit? (!m_opt->testingMode || total < m_opt->test_nStructs)) { // emit requests qDebug() << "Need " << needed << " structures. " << incomplete << " already incomplete."; for (int i = 0; i < needed; ++i) { ++m_requestedStructures; emit needNewStructure(); qDebug() << "Requested new structure. Total requested: " << m_requestedStructures; } } }