Example #1
0
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel)
{
    
  // Not using TargetPhraseCollection avoiding "new" operator
  // which can introduce heavy locking with multiple threads
  TargetPhraseVectorPtr tpv(new TargetPhraseVector());
  size_t bitsLeft = 0;
                                
  if(m_coding == PREnc)
  {
    std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
      = m_decodingCache.Retrieve(sourcePhrase);
    
    // Has been cached and is complete or does not need to be completed
    if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
      return cachedPhraseColl.first;
  
    // Has been cached, but is incomplete
    else if(cachedPhraseColl.first != NULL)
    {
      bitsLeft = cachedPhraseColl.second;
      tpv->resize(cachedPhraseColl.first->size());
      std::copy(cachedPhraseColl.first->begin(),
                cachedPhraseColl.first->end(),
                tpv->begin());
    }
  }
  
  // Retrieve source phrase identifier
  std::string sourcePhraseString = sourcePhrase.GetStringRep(*m_input);
  size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)];
  
  if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize())
  {
    // Retrieve compressed and encoded target phrase collection  
    std::string encodedPhraseCollection;
    if(m_phraseDictionary.m_inMemory)
      encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId];
    else
      encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId];
    
    BitWrapper<> encodedBitStream(encodedPhraseCollection);
    if(m_coding == PREnc && bitsLeft)
      encodedBitStream.SeekFromEnd(bitsLeft);
    
    // Decompress and decode target phrase collection
    TargetPhraseVectorPtr decodedPhraseColl =
      DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel);
    
    return decodedPhraseColl;
  }
  else
    return TargetPhraseVectorPtr(); 
}
void PhraseTableCreator::FlushEncodedQueue(bool force)
{
  while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine()) {
    PackedItem pi = m_queue.top();
    m_queue.pop();
    m_lastFlushedLine++;

    if(m_lastFlushedSourcePhrase != pi.GetSrc()) {
      if(m_lastCollection.size()) {
        std::stringstream targetPhraseCollection;
        for(std::vector<std::string>::iterator it =
              m_lastCollection.begin(); it != m_lastCollection.end(); it++)
          targetPhraseCollection << *it;

        m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase));
        m_encodedTargetPhrases->push_back(targetPhraseCollection.str());

        m_lastFlushedSourceNum++;
        if(m_lastFlushedSourceNum % 100000 == 0)
          std::cerr << ".";
        if(m_lastFlushedSourceNum % 5000000 == 0)
          std::cerr << "[" << m_lastFlushedSourceNum << "]" << std::endl;

        m_lastCollection.clear();
      }
    }

    if(m_lastSourceRange.size() == (1ul << m_orderBits)) {
      m_srcHash.AddRange(m_lastSourceRange);
      m_srcHash.SaveLastRange();
      m_srcHash.DropLastRange();
      m_lastSourceRange.clear();
    }

    m_lastFlushedSourcePhrase = pi.GetSrc();
    if(m_coding == PREnc) {
      if(m_lastCollection.size() <= pi.GetRank())
        m_lastCollection.resize(pi.GetRank() + 1);
      m_lastCollection[pi.GetRank()] = pi.GetTrg();
    } else {
      m_lastCollection.push_back(pi.GetTrg());
    }
  }

  if(force) {
    if(!m_lastSourceRange.size() || m_lastSourceRange.back() != m_lastFlushedSourcePhrase)
      m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase));

    if(m_lastCollection.size()) {
      std::stringstream targetPhraseCollection;
      for(std::vector<std::string>::iterator it =
            m_lastCollection.begin(); it != m_lastCollection.end(); it++)
        targetPhraseCollection << *it;

      m_encodedTargetPhrases->push_back(targetPhraseCollection.str());
      m_lastCollection.clear();
    }

    m_srcHash.AddRange(m_lastSourceRange);
    m_lastSourceRange.clear();

#ifdef WITH_THREADS
    m_srcHash.WaitAll();
#endif

    m_srcHash.SaveLastRange();
    m_srcHash.DropLastRange();
    m_srcHash.FinalizeSave();

    m_lastFlushedLine = -1;
    m_lastFlushedSourceNum = 0;

    std::cerr << std::endl << std::endl;
  }
}