Example #1
0
void QueueManager::startJob()
{
  Structure* s;
  if (!m_jobStartTracker.popFirst(s)) {
    return;
  }

  if (!m_opt->queueInterface(s->getCurrentOptStep())->startJob(s)) {
    s->lock().lockForWrite();
    m_opt->warning(tr("QueueManager::startJob_: Job did not start "
                      "successfully for structure %1-%2.")
                     .arg(s->getIDString())
                     .arg(s->getCurrentOptStep() + 1));
    s->setStatus(Structure::Error);
    s->lock().unlock();
    return;
  }

  s->lock().lockForWrite();
  s->setStatus(Structure::Submitted);
  s->lock().unlock();

  if (!m_opt->usingGUI()) {
    QReadLocker locker(&s->lock());
    qDebug() << "Structure"
             << QString::number(s->getGeneration()) + "x" +
                  QString::number(s->getIDNumber())
             << "has been submitted!";
  }

  emit structureSubmitted(s);
}
Example #2
0
void QueueManager::checkPopulation()
{
  // Count jobs
  uint running = 0;
  uint optimized = 0;
  uint submitted = 0;

  QReadLocker trackerReadLocker(m_tracker->rwLock());
  QList<Structure*> structures = *m_tracker->list();

  // Check to see that the number of running jobs is >= that specified:
  int fail = 0;
  for (int i = 0; i < structures.size(); ++i) {
    Structure* structure = structures.at(i);
    QReadLocker structureLocker(&structure->lock());
    Structure::State state = structure->getStatus();
    if (structure->getFailCount() != 0)
      ++fail;
    structureLocker.unlock();

    QWriteLocker runningTrackerLocker(m_runningTracker.rwLock());
    // Count submitted structures
    if (state == Structure::Submitted || state == Structure::InProcess) {
      m_runningTracker.append(structure);
      ++submitted;
    }
    // Count running jobs and update trackers
    if (state != Structure::Optimized && state != Structure::Duplicate &&
        state != Structure::Supercell && state != Structure::Killed &&
        state != Structure::Removed) {
      m_runningTracker.append(structure);
      ++running;
    } else {
      if (state == Structure::Optimized)
        ++optimized;
      m_runningTracker.remove(structure);
    }
  }
  trackerReadLocker.unlock();
  emit newStatusOverview(optimized, running, fail);

  // Submit any jobs if needed
  QWriteLocker jobStartTrackerLocker(m_jobStartTracker.rwLock());
  int pending = m_jobStartTracker.size();
  if (pending != 0 &&
      (!m_opt->limitRunningJobs || submitted < m_opt->runningJobLimit)) {
// Submit a single throttled job (1 submission per 3-8 seconds) if using
// a remote queue interface. Interval is randomly chosen each iteration.
// This prevents hammering the pbs server from multiple XtalOpt instances
// if there is a problem with the queue.
#ifdef ENABLE_SSH
    Structure* s = m_jobStartTracker.at(0);
    if (qobject_cast<RemoteQueueInterface*>(
          m_opt->queueInterface(s->getCurrentOptStep())) != nullptr) {
      if (m_lastSubmissionTimeStamp->secsTo(QDateTime::currentDateTime()) >=
          3 + (6 * getRandDouble())) {
        startJob();
        ++submitted;
        --pending;
        *m_lastSubmissionTimeStamp = QDateTime::currentDateTime();
      }
    } else {
// Local job submission doesn't need to be throttled
#endif
      while (pending != 0 &&
             (!m_opt->limitRunningJobs || submitted < m_opt->runningJobLimit)) {
        startJob();
        ++submitted;
        --pending;
      }
#ifdef ENABLE_SSH
    }
#endif
  }
  jobStartTrackerLocker.unlock();

  // Generate requests
  // Write lock for m_requestedStructures var
  QWriteLocker trackerWriteLocker(m_tracker->rwLock());
  QReadLocker newStructureTrackerLocker(m_newStructureTracker.rwLock());

  // Avoid convience function calls here, as occaisional deadlocks
  // can occur.
  //
  // total is getAllStructures().size() + m_requestedStructures;
  int total =
    m_tracker->size() + m_newStructureTracker.size() + m_requestedStructures;
  // incomplete is getAllRunningStructures.size() + m_requestedStructures:
  int incomplete = m_runningTracker.size() + m_newStructureTracker.size() +
                   m_requestedStructures;
  int needed = m_opt->contStructs - incomplete;

  if (
    // Are we at the continuous structure limit?
    (needed > 0) &&
    // Is the cutoff either disabled or reached/exceeded?
    (m_opt->cutoff <= 0 || total < m_opt->cutoff) &&
    // Check if we are testing. If so, have we reached the testing limit?
    (!m_opt->testingMode || total < m_opt->test_nStructs)) {
    // emit requests
    qDebug() << "Need " << needed << " structures. " << incomplete
             << " already incomplete.";
    for (int i = 0; i < needed; ++i) {
      ++m_requestedStructures;
      emit needNewStructure();
      qDebug() << "Requested new structure. Total requested: "
               << m_requestedStructures;
    }
  }
}