void CompressionTaskReordering::operator()() { size_t scoresNum; { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); #endif scoresNum = m_scoresNum; m_scoresNum++; } while(scoresNum < m_encodedScores.size()) { std::string scores = m_encodedScores[scoresNum]; std::string compressedScores = m_creator.CompressEncodedScores(scores); std::string dummy; PackedItem packedItem(scoresNum, dummy, compressedScores, 0); #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); #endif m_creator.AddCompressedScores(packedItem); m_creator.FlushCompressedQueue(); scoresNum = m_scoresNum; m_scoresNum++; } }
void CompressionTask::operator()() { size_t collectionNum; { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); #endif collectionNum = m_collectionNum; m_collectionNum++; } while(collectionNum < m_encodedCollections.size()) { std::string collection = m_encodedCollections[collectionNum]; std::string compressedCollection = m_creator.CompressEncodedCollection(collection); std::string dummy; PackedItem packedItem(collectionNum, dummy, compressedCollection, 0); #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); #endif m_creator.AddCompressedCollection(packedItem); m_creator.FlushCompressedQueue(); collectionNum = m_collectionNum; m_collectionNum++; } }
void EncodingTaskReordering::operator()() { size_t lineNum = 0; std::vector<std::string> lines; size_t max_lines = 1000; lines.reserve(max_lines); { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_fileMutex); #endif std::string line; while(lines.size() < max_lines && std::getline(m_inFile, line)) lines.push_back(line); lineNum = m_lineNum; m_lineNum += lines.size(); } std::vector<PackedItem> result; result.reserve(max_lines); while(lines.size()) { for(size_t i = 0; i < lines.size(); i++) { std::vector<std::string> tokens; Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator); std::string encodedLine = m_creator.EncodeLine(tokens); std::string f = tokens[0]; std::string e; if(tokens.size() > 2) e = tokens[1]; PackedItem packedItem(lineNum + i, m_creator.MakeSourceTargetKey(f, e), encodedLine, i); result.push_back(packedItem); } lines.clear(); { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); #endif for(size_t i = 0; i < result.size(); i++) m_creator.AddEncodedLine(result[i]); m_creator.FlushEncodedQueue(); } result.clear(); lines.reserve(max_lines); result.reserve(max_lines); #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_fileMutex); #endif std::string line; while(lines.size() < max_lines && std::getline(m_inFile, line)) lines.push_back(line); lineNum = m_lineNum; m_lineNum += lines.size(); } }
void EncodingTask::operator()() { size_t lineNum = 0; std::vector<std::string> lines; size_t max_lines = 1000; lines.reserve(max_lines); { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_fileMutex); #endif std::string line; while(lines.size() < max_lines && std::getline(m_inFile, line)) lines.push_back(line); lineNum = m_lineNum; m_lineNum += lines.size(); } std::vector<PackedItem> result; result.reserve(max_lines); while(lines.size()) { for(size_t i = 0; i < lines.size(); i++) { std::vector<std::string> tokens; Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator); if(tokens.size() < 3) { std::cerr << "Error: It seems the following line has a wrong format:" << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; abort(); } if(tokens.size() == 3 && m_creator.m_coding != PhraseTableCreator::None && m_creator.m_warnMe) { std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl; std::cerr << "but you are using "; std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc"); std::cerr << " encoding which makes use of alignment data. " << std::endl; std::cerr << "Better use -encoding None or disable this warning with -no-warnings." << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; } size_t ownRank = 0; if(m_creator.m_coding == PhraseTableCreator::PREnc) ownRank = m_creator.m_ranks[lineNum + i]; std::string encodedLine = m_creator.EncodeLine(tokens, ownRank); PackedItem packedItem(lineNum + i, tokens[0], encodedLine, ownRank); result.push_back(packedItem); } lines.clear(); { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); #endif for(size_t i = 0; i < result.size(); i++) m_creator.AddEncodedLine(result[i]); m_creator.FlushEncodedQueue(); } result.clear(); lines.reserve(max_lines); result.reserve(max_lines); #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_fileMutex); #endif std::string line; while(lines.size() < max_lines && std::getline(m_inFile, line)) lines.push_back(line); lineNum = m_lineNum; m_lineNum += lines.size(); } }
void RankingTask::operator()() { size_t lineNum = 0; std::vector<std::string> lines; size_t max_lines = 1000; lines.reserve(max_lines); { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_fileMutex); #endif std::string line; while(lines.size() < max_lines && std::getline(m_inFile, line)) lines.push_back(line); lineNum = m_lineNum; m_lineNum += lines.size(); } std::vector<PackedItem> result; result.reserve(max_lines); while(lines.size()) { for(size_t i = 0; i < lines.size(); i++) { std::vector<std::string> tokens; Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator); if(tokens.size() < 3) { std::cerr << "Error: It seems the following line has a wrong format:" << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; abort(); } if(tokens.size() == 3 && m_creator.m_warnMe) { std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl; std::cerr << "but you are using PREnc encoding which makes use of alignment data. " << std::endl; std::cerr << "Better use -encoding None or disable this warning with -no-warnings ." << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; } std::vector<float> scores = Tokenize<float>(tokens[2]); if(scores.size() != m_creator.m_numScoreComponent) { std::cerr << "Error: It seems the following line has a wrong number of scores (" << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; abort(); } float sortScore = scores[m_creator.m_sortScoreIndex]; std::string key1 = m_creator.MakeSourceKey(tokens[0]); std::string key2 = m_creator.MakeSourceTargetKey(tokens[0], tokens[1]); PackedItem packedItem(lineNum + i, key1, key2, 0, sortScore); result.push_back(packedItem); } lines.clear(); { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); #endif for(size_t i = 0; i < result.size(); i++) m_creator.AddRankedLine(result[i]); m_creator.FlushRankedQueue(); } result.clear(); lines.reserve(max_lines); result.reserve(max_lines); #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_fileMutex); #endif std::string line; while(lines.size() < max_lines && std::getline(m_inFile, line)) lines.push_back(line); lineNum = m_lineNum; m_lineNum += lines.size(); } }