TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel) { // Not using TargetPhraseCollection avoiding "new" operator // which can introduce heavy locking with multiple threads TargetPhraseVectorPtr tpv(new TargetPhraseVector()); size_t bitsLeft = 0; if(m_coding == PREnc) { std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl = m_decodingCache.Retrieve(sourcePhrase); // Has been cached and is complete or does not need to be completed if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0)) return cachedPhraseColl.first; // Has been cached, but is incomplete else if(cachedPhraseColl.first != NULL) { bitsLeft = cachedPhraseColl.second; tpv->resize(cachedPhraseColl.first->size()); std::copy(cachedPhraseColl.first->begin(), cachedPhraseColl.first->end(), tpv->begin()); } } // Retrieve source phrase identifier std::string sourcePhraseString = sourcePhrase.GetStringRep(*m_input); size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)]; if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) { // Retrieve compressed and encoded target phrase collection std::string encodedPhraseCollection; if(m_phraseDictionary.m_inMemory) encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId]; else encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId]; BitWrapper<> encodedBitStream(encodedPhraseCollection); if(m_coding == PREnc && bitsLeft) encodedBitStream.SeekFromEnd(bitsLeft); // Decompress and decode target phrase collection TargetPhraseVectorPtr decodedPhraseColl = DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel); return decodedPhraseColl; } else return TargetPhraseVectorPtr(); }
void PhraseTableCreator::FlushEncodedQueue(bool force) { while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine()) { PackedItem pi = m_queue.top(); m_queue.pop(); m_lastFlushedLine++; if(m_lastFlushedSourcePhrase != pi.GetSrc()) { if(m_lastCollection.size()) { std::stringstream targetPhraseCollection; for(std::vector<std::string>::iterator it = m_lastCollection.begin(); it != m_lastCollection.end(); it++) targetPhraseCollection << *it; m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase)); m_encodedTargetPhrases->push_back(targetPhraseCollection.str()); m_lastFlushedSourceNum++; if(m_lastFlushedSourceNum % 100000 == 0) std::cerr << "."; if(m_lastFlushedSourceNum % 5000000 == 0) std::cerr << "[" << m_lastFlushedSourceNum << "]" << std::endl; m_lastCollection.clear(); } } if(m_lastSourceRange.size() == (1ul << m_orderBits)) { m_srcHash.AddRange(m_lastSourceRange); m_srcHash.SaveLastRange(); m_srcHash.DropLastRange(); m_lastSourceRange.clear(); } m_lastFlushedSourcePhrase = pi.GetSrc(); if(m_coding == PREnc) { if(m_lastCollection.size() <= pi.GetRank()) m_lastCollection.resize(pi.GetRank() + 1); m_lastCollection[pi.GetRank()] = pi.GetTrg(); } else { m_lastCollection.push_back(pi.GetTrg()); } } if(force) { if(!m_lastSourceRange.size() || m_lastSourceRange.back() != m_lastFlushedSourcePhrase) m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase)); if(m_lastCollection.size()) { std::stringstream targetPhraseCollection; for(std::vector<std::string>::iterator it = m_lastCollection.begin(); it != m_lastCollection.end(); it++) targetPhraseCollection << *it; m_encodedTargetPhrases->push_back(targetPhraseCollection.str()); m_lastCollection.clear(); } m_srcHash.AddRange(m_lastSourceRange); m_lastSourceRange.clear(); #ifdef WITH_THREADS m_srcHash.WaitAll(); #endif m_srcHash.SaveLastRange(); m_srcHash.DropLastRange(); m_srcHash.FinalizeSave(); m_lastFlushedLine = -1; m_lastFlushedSourceNum = 0; std::cerr << std::endl << std::endl; } }