void LexicalReorderingTableCreator::FlushCompressedQueue(bool force) { if(force || m_queue.size() > 10000) { while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine()) { PackedItem pi = m_queue.top(); m_queue.pop(); m_lastFlushedLine++; m_compressedScores.push_back(pi.GetTrg()); if((pi.GetLine()+1) % 100000 == 0) std::cerr << "."; if((pi.GetLine()+1) % 5000000 == 0) std::cerr << "[" << (pi.GetLine()+1) << "]" << std::endl; } } if(force) { m_lastFlushedLine = -1; std::cerr << std::endl << std::endl; } }
void LexicalReorderingTableCreator::FlushEncodedQueue(bool force) { if(force || m_queue.size() > 10000) { while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine()) { PackedItem pi = m_queue.top(); m_queue.pop(); m_lastFlushedLine++; m_lastRange.push_back(pi.GetSrc()); m_encodedScores.push_back(pi.GetTrg()); if((pi.GetLine()+1) % 100000 == 0) std::cerr << "."; if((pi.GetLine()+1) % 5000000 == 0) std::cerr << "[" << (pi.GetLine()+1) << "]" << std::endl; if(m_lastRange.size() == (1ul << m_orderBits)) { m_hash.AddRange(m_lastRange); m_hash.SaveLastRange(); m_hash.DropLastRange(); m_lastRange.clear(); } } } if(force) { m_lastFlushedLine = -1; m_hash.AddRange(m_lastRange); m_lastRange.clear(); #ifdef WITH_THREADS m_hash.WaitAll(); #endif m_hash.SaveLastRange(); m_hash.DropLastRange(); m_hash.FinalizeSave(); std::cerr << std::endl << std::endl; } }
void PhraseTableCreator::FlushEncodedQueue(bool force) { while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine()) { PackedItem pi = m_queue.top(); m_queue.pop(); m_lastFlushedLine++; if(m_lastFlushedSourcePhrase != pi.GetSrc()) { if(m_lastCollection.size()) { std::stringstream targetPhraseCollection; for(std::vector<std::string>::iterator it = m_lastCollection.begin(); it != m_lastCollection.end(); it++) targetPhraseCollection << *it; m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase)); m_encodedTargetPhrases->push_back(targetPhraseCollection.str()); m_lastFlushedSourceNum++; if(m_lastFlushedSourceNum % 100000 == 0) std::cerr << "."; if(m_lastFlushedSourceNum % 5000000 == 0) std::cerr << "[" << m_lastFlushedSourceNum << "]" << std::endl; m_lastCollection.clear(); } } if(m_lastSourceRange.size() == (1ul << m_orderBits)) { m_srcHash.AddRange(m_lastSourceRange); m_srcHash.SaveLastRange(); m_srcHash.DropLastRange(); m_lastSourceRange.clear(); } m_lastFlushedSourcePhrase = pi.GetSrc(); if(m_coding == PREnc) { if(m_lastCollection.size() <= pi.GetRank()) m_lastCollection.resize(pi.GetRank() + 1); m_lastCollection[pi.GetRank()] = pi.GetTrg(); } else { m_lastCollection.push_back(pi.GetTrg()); } } if(force) { if(!m_lastSourceRange.size() || m_lastSourceRange.back() != m_lastFlushedSourcePhrase) m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase)); if(m_lastCollection.size()) { std::stringstream targetPhraseCollection; for(std::vector<std::string>::iterator it = m_lastCollection.begin(); it != m_lastCollection.end(); it++) targetPhraseCollection << *it; m_encodedTargetPhrases->push_back(targetPhraseCollection.str()); m_lastCollection.clear(); } m_srcHash.AddRange(m_lastSourceRange); m_lastSourceRange.clear(); #ifdef WITH_THREADS m_srcHash.WaitAll(); #endif m_srcHash.SaveLastRange(); m_srcHash.DropLastRange(); m_srcHash.FinalizeSave(); m_lastFlushedLine = -1; m_lastFlushedSourceNum = 0; std::cerr << std::endl << std::endl; } }
void PhraseTableCreator::FlushRankedQueue(bool force) { size_t step = 1ul << 10; while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine()) { m_lastFlushedLine++; PackedItem pi = m_queue.top(); m_queue.pop(); if(m_lastSourceRange.size() == step) { m_rnkHash.AddRange(m_lastSourceRange); m_lastSourceRange.clear(); } if(m_lastFlushedSourcePhrase != pi.GetSrc()) { if(m_rankQueue.size()) { m_lastFlushedSourceNum++; if(m_lastFlushedSourceNum % 100000 == 0) { std::cerr << "."; } if(m_lastFlushedSourceNum % 5000000 == 0) { std::cerr << "[" << m_lastFlushedSourceNum << "]" << std::endl; } m_ranks.resize(m_lastFlushedLine + 1); int r = 0; while(!m_rankQueue.empty()) { m_ranks[m_rankQueue.top().second] = r++; m_rankQueue.pop(); } } } m_lastSourceRange.push_back(pi.GetTrg()); m_rankQueue.push(std::make_pair(pi.GetScore(), pi.GetLine())); m_lastFlushedSourcePhrase = pi.GetSrc(); } if(force) { m_rnkHash.AddRange(m_lastSourceRange); m_lastSourceRange.clear(); #ifdef WITH_THREADS m_rnkHash.WaitAll(); #endif m_ranks.resize(m_lastFlushedLine + 1); int r = 0; while(!m_rankQueue.empty()) { m_ranks[m_rankQueue.top().second] = r++; m_rankQueue.pop(); } m_lastFlushedLine = -1; m_lastFlushedSourceNum = 0; std::cerr << std::endl << std::endl; } }