void LexicalReorderingTableCreator::FlushCompressedQueue(bool force)
{  
  if(force || m_queue.size() > 10000)
  {
    while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine())
    {
      PackedItem pi = m_queue.top();
      m_queue.pop();
      m_lastFlushedLine++;
          
      m_compressedScores.push_back(pi.GetTrg());
      
      if((pi.GetLine()+1) % 100000 == 0)
          std::cerr << ".";
      if((pi.GetLine()+1) % 5000000 == 0)
          std::cerr << "[" << (pi.GetLine()+1) << "]" << std::endl;
    }
  }
  
  if(force)
  {
    m_lastFlushedLine = -1;
    std::cerr << std::endl << std::endl;
  }
}
void LexicalReorderingTableCreator::FlushEncodedQueue(bool force) {
  if(force || m_queue.size() > 10000)
  {
    while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine())
    {
      PackedItem pi = m_queue.top();
      m_queue.pop();
      m_lastFlushedLine++;
      
      m_lastRange.push_back(pi.GetSrc());    
      m_encodedScores.push_back(pi.GetTrg());
      
      if((pi.GetLine()+1) % 100000 == 0)
          std::cerr << ".";
      if((pi.GetLine()+1) % 5000000 == 0)
          std::cerr << "[" << (pi.GetLine()+1) << "]" << std::endl;
          
      if(m_lastRange.size() == (1ul << m_orderBits))
      {
        m_hash.AddRange(m_lastRange);
        m_hash.SaveLastRange();
        m_hash.DropLastRange();
        m_lastRange.clear();
      }
    }
  }
  
  if(force)
  {
    m_lastFlushedLine = -1;

    m_hash.AddRange(m_lastRange);
    m_lastRange.clear();
    
#ifdef WITH_THREADS
    m_hash.WaitAll();
#endif

    m_hash.SaveLastRange();
    m_hash.DropLastRange();
    m_hash.FinalizeSave();

    std::cerr << std::endl << std::endl;
  }
}
예제 #3
0
void PhraseTableCreator::FlushEncodedQueue(bool force)
{
  while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine()) {
    PackedItem pi = m_queue.top();
    m_queue.pop();
    m_lastFlushedLine++;

    if(m_lastFlushedSourcePhrase != pi.GetSrc()) {
      if(m_lastCollection.size()) {
        std::stringstream targetPhraseCollection;
        for(std::vector<std::string>::iterator it =
              m_lastCollection.begin(); it != m_lastCollection.end(); it++)
          targetPhraseCollection << *it;

        m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase));
        m_encodedTargetPhrases->push_back(targetPhraseCollection.str());

        m_lastFlushedSourceNum++;
        if(m_lastFlushedSourceNum % 100000 == 0)
          std::cerr << ".";
        if(m_lastFlushedSourceNum % 5000000 == 0)
          std::cerr << "[" << m_lastFlushedSourceNum << "]" << std::endl;

        m_lastCollection.clear();
      }
    }

    if(m_lastSourceRange.size() == (1ul << m_orderBits)) {
      m_srcHash.AddRange(m_lastSourceRange);
      m_srcHash.SaveLastRange();
      m_srcHash.DropLastRange();
      m_lastSourceRange.clear();
    }

    m_lastFlushedSourcePhrase = pi.GetSrc();
    if(m_coding == PREnc) {
      if(m_lastCollection.size() <= pi.GetRank())
        m_lastCollection.resize(pi.GetRank() + 1);
      m_lastCollection[pi.GetRank()] = pi.GetTrg();
    } else {
      m_lastCollection.push_back(pi.GetTrg());
    }
  }

  if(force) {
    if(!m_lastSourceRange.size() || m_lastSourceRange.back() != m_lastFlushedSourcePhrase)
      m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase));

    if(m_lastCollection.size()) {
      std::stringstream targetPhraseCollection;
      for(std::vector<std::string>::iterator it =
            m_lastCollection.begin(); it != m_lastCollection.end(); it++)
        targetPhraseCollection << *it;

      m_encodedTargetPhrases->push_back(targetPhraseCollection.str());
      m_lastCollection.clear();
    }

    m_srcHash.AddRange(m_lastSourceRange);
    m_lastSourceRange.clear();

#ifdef WITH_THREADS
    m_srcHash.WaitAll();
#endif

    m_srcHash.SaveLastRange();
    m_srcHash.DropLastRange();
    m_srcHash.FinalizeSave();

    m_lastFlushedLine = -1;
    m_lastFlushedSourceNum = 0;

    std::cerr << std::endl << std::endl;
  }
}
예제 #4
0
void PhraseTableCreator::FlushRankedQueue(bool force)
{
  size_t step = 1ul << 10;

  while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine()) {
    m_lastFlushedLine++;

    PackedItem pi = m_queue.top();
    m_queue.pop();

    if(m_lastSourceRange.size() == step) {
      m_rnkHash.AddRange(m_lastSourceRange);
      m_lastSourceRange.clear();
    }

    if(m_lastFlushedSourcePhrase != pi.GetSrc()) {
      if(m_rankQueue.size()) {
        m_lastFlushedSourceNum++;
        if(m_lastFlushedSourceNum % 100000 == 0) {
          std::cerr << ".";
        }
        if(m_lastFlushedSourceNum % 5000000 == 0) {
          std::cerr << "[" << m_lastFlushedSourceNum << "]" << std::endl;
        }

        m_ranks.resize(m_lastFlushedLine + 1);
        int r = 0;
        while(!m_rankQueue.empty()) {
          m_ranks[m_rankQueue.top().second] = r++;
          m_rankQueue.pop();
        }
      }
    }

    m_lastSourceRange.push_back(pi.GetTrg());

    m_rankQueue.push(std::make_pair(pi.GetScore(), pi.GetLine()));
    m_lastFlushedSourcePhrase = pi.GetSrc();
  }

  if(force) {
    m_rnkHash.AddRange(m_lastSourceRange);
    m_lastSourceRange.clear();

#ifdef WITH_THREADS
    m_rnkHash.WaitAll();
#endif

    m_ranks.resize(m_lastFlushedLine + 1);
    int r = 0;
    while(!m_rankQueue.empty()) {
      m_ranks[m_rankQueue.top().second] = r++;
      m_rankQueue.pop();
    }

    m_lastFlushedLine = -1;
    m_lastFlushedSourceNum = 0;

    std::cerr << std::endl << std::endl;
  }
}