/** constructor; just initialize the base class */ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold) : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) { size_t size = input.GetSize(); m_inputPathMatrix.resize(size); for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) { for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) { size_t endPos = startPos + phaseSize -1; vector<InputPath*> &vec = m_inputPathMatrix[startPos]; WordsRange range(startPos, endPos); Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); InputPath *node; if (range.GetNumWordsCovered() == 1) { node = new InputPath(subphrase, labels, range, NULL, NULL); vec.push_back(node); } else { const InputPath &prevNode = GetInputPath(startPos, endPos - 1); node = new InputPath(subphrase, labels, range, &prevNode, NULL); vec.push_back(node); } m_phraseDictionaryQueue.push_back(node); } } }
void ChartParser::CreateInputPaths(const InputType &input) { size_t size = input.GetSize(); m_inputPathMatrix.resize(size); UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType, "Input must be a sentence or a tree, not lattice or confusion networks"); for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) { for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) { size_t endPos = startPos + phaseSize -1; vector<InputPath*> &vec = m_inputPathMatrix[startPos]; WordsRange range(startPos, endPos); Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); InputPath *node; if (range.GetNumWordsCovered() == 1) { node = new InputPath(subphrase, labels, range, NULL, NULL); vec.push_back(node); } else { const InputPath &prevNode = GetInputPath(startPos, endPos - 1); node = new InputPath(subphrase, labels, range, &prevNode, NULL); vec.push_back(node); } //m_inputPathQueue.push_back(node); } } }
/** constructor; just initialize the base class */ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice( const WordLattice &input , size_t maxNoTransOptPerCoverage, float translationOptionThreshold) : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) { UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(), "Not for models using the legqacy binary phrase table"); const InputFeature *inputFeature = StaticData::Instance().GetInputFeature(); UTIL_THROW_IF2(inputFeature == NULL, "Input feature must be specified"); size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength(); size_t size = input.GetSize(); // 1-word phrases for (size_t startPos = 0; startPos < size; ++startPos) { const std::vector<size_t> &nextNodes = input.GetNextNodes(startPos); WordsRange range(startPos, startPos); const NonTerminalSet &labels = input.GetLabelSet(startPos, startPos); const ConfusionNet::Column &col = input.GetColumn(startPos); for (size_t i = 0; i < col.size(); ++i) { const Word &word = col[i].first; UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported"); Phrase subphrase; subphrase.AddWord(word); const ScorePair &scores = col[i].second; ScorePair *inputScore = new ScorePair(scores); InputPath *path = new InputPath(subphrase, labels, range, NULL, inputScore); size_t nextNode = nextNodes[i]; path->SetNextNode(nextNode); m_inputPathQueue.push_back(path); } } // iteratively extend all paths for (size_t endPos = 1; endPos < size; ++endPos) { const std::vector<size_t> &nextNodes = input.GetNextNodes(endPos); // loop thru every previous paths size_t numPrevPaths = m_inputPathQueue.size(); for (size_t i = 0; i < numPrevPaths; ++i) { //for (size_t pathInd = 0; pathInd < prevPaths.size(); ++pathInd) { const InputPath &prevPath = *m_inputPathQueue[i]; size_t nextNode = prevPath.GetNextNode(); if (prevPath.GetWordsRange().GetEndPos() + nextNode != endPos) { continue; } size_t startPos = prevPath.GetWordsRange().GetStartPos(); if (endPos - startPos + 1 > maxPhraseLength) { continue; } WordsRange range(startPos, endPos); const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); const Phrase &prevPhrase = prevPath.GetPhrase(); const ScorePair *prevInputScore = prevPath.GetInputScore(); UTIL_THROW_IF2(prevInputScore == NULL, "Null previous score"); // loop thru every word at this position const ConfusionNet::Column &col = input.GetColumn(endPos); for (size_t i = 0; i < col.size(); ++i) { const Word &word = col[i].first; Phrase subphrase(prevPhrase); subphrase.AddWord(word); const ScorePair &scores = col[i].second; ScorePair *inputScore = new ScorePair(*prevInputScore); inputScore->PlusEquals(scores); InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore); size_t nextNode = nextNodes[i]; path->SetNextNode(nextNode); m_inputPathQueue.push_back(path); } // for (size_t i = 0; i < col.size(); ++i) { } // for (size_t i = 0; i < numPrevPaths; ++i) { } }
/** constructor; just initialize the base class */ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet( const ConfusionNet &input , size_t maxNoTransOptPerCoverage, float translationOptionThreshold) : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) { const InputFeature *inputFeature = StaticData::Instance().GetInputFeature(); CHECK(inputFeature); size_t inputSize = input.GetSize(); m_inputPathMatrix.resize(inputSize); size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); maxSizePhrase = std::min(inputSize, maxSizePhrase); // 1-word phrases for (size_t startPos = 0; startPos < inputSize; ++startPos) { vector<InputPathList> &vec = m_inputPathMatrix[startPos]; vec.push_back(InputPathList()); InputPathList &list = vec.back(); WordsRange range(startPos, startPos); const NonTerminalSet &labels = input.GetLabelSet(startPos, startPos); const ConfusionNet::Column &col = input.GetColumn(startPos); for (size_t i = 0; i < col.size(); ++i) { const Word &word = col[i].first; Phrase subphrase; subphrase.AddWord(word); const ScorePair &scores = col[i].second; ScorePair *inputScore = new ScorePair(scores); InputPath *path = new InputPath(subphrase, labels, range, NULL, inputScore); list.push_back(path); m_inputPathQueue.push_back(path); } } // subphrases of 2+ words for (size_t phraseSize = 2; phraseSize <= maxSizePhrase; ++phraseSize) { for (size_t startPos = 0; startPos < inputSize - phraseSize + 1; ++startPos) { size_t endPos = startPos + phraseSize -1; WordsRange range(startPos, endPos); const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); vector<InputPathList> &vec = m_inputPathMatrix[startPos]; vec.push_back(InputPathList()); InputPathList &list = vec.back(); // loop thru every previous path const InputPathList &prevPaths = GetInputPathList(startPos, endPos - 1); int prevNodesInd = 0; InputPathList::const_iterator iterPath; for (iterPath = prevPaths.begin(); iterPath != prevPaths.end(); ++iterPath) { //for (size_t pathInd = 0; pathInd < prevPaths.size(); ++pathInd) { const InputPath &prevPath = **iterPath; //const InputPath &prevPath = *prevPaths[pathInd]; const Phrase &prevPhrase = prevPath.GetPhrase(); const ScorePair *prevInputScore = prevPath.GetInputScore(); CHECK(prevInputScore); // loop thru every word at this position const ConfusionNet::Column &col = input.GetColumn(endPos); for (size_t i = 0; i < col.size(); ++i) { const Word &word = col[i].first; Phrase subphrase(prevPhrase); subphrase.AddWord(word); const ScorePair &scores = col[i].second; ScorePair *inputScore = new ScorePair(*prevInputScore); inputScore->PlusEquals(scores); InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore); list.push_back(path); m_inputPathQueue.push_back(path); } // for (size_t i = 0; i < col.size(); ++i) { ++prevNodesInd; } // for (iterPath = prevPaths.begin(); iterPath != prevPaths.end(); ++iterPath) { } } }