LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeMonotonic(WordsRange currRange, WordsBitmap coverage) const { if (currRange.GetStartPos() > m_prevRange.GetEndPos() && (!coverage.GetValue(m_prevRange.GetEndPos()+1) || currRange.GetStartPos() == m_prevRange.GetEndPos()+1)) { return M; } return NM; }
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMonotonic(WordsRange currRange) const { if ((m_first && currRange.GetStartPos() == 0) || (m_prevRange.GetEndPos() == currRange.GetStartPos()-1)) { return M; } return NM; }
void ChartParser::Create(const WordsRange &wordsRange, ChartParserCallback &to) { assert(m_decodeGraphList.size() == m_ruleLookupManagers.size()); std::vector <DecodeGraph*>::const_iterator iterDecodeGraph; std::vector <ChartRuleLookupManager*>::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin(); for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) { const DecodeGraph &decodeGraph = **iterDecodeGraph; assert(decodeGraph.GetSize() == 1); ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers; size_t maxSpan = decodeGraph.GetMaxChartSpan(); if (maxSpan == 0 || wordsRange.GetNumWordsCovered() <= maxSpan) { ruleLookupManager.GetChartRuleCollection(wordsRange, to); } } if (wordsRange.GetNumWordsCovered() == 1 && wordsRange.GetStartPos() != 0 && wordsRange.GetStartPos() != m_source.GetSize()-1) { bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption(); if (to.Empty() || alwaysCreateDirectTranslationOption) { // create unknown words for 1 word coverage where we don't have any trans options const Word &sourceWord = m_source.GetWord(wordsRange.GetStartPos()); m_unknown.Process(sourceWord, wordsRange, to); } } }
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMSD(WordsRange currRange) const { if (m_first) { if (currRange.GetStartPos() == 0) { return M; } else { return D; } } if (m_prevRange.GetEndPos() == currRange.GetStartPos()-1) { return M; } else if (m_prevRange.GetStartPos() == currRange.GetEndPos()+1) { return S; } return D; }
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeLeftRight(WordsRange currRange) const { if (m_first || (m_prevRange.GetEndPos() <= currRange.GetStartPos())) { return R; } return L; }
LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeLeftRight(WordsRange currRange, WordsBitmap /* coverage */) const { if (currRange.GetStartPos() > m_prevRange.GetEndPos()) { return R; } return L; }
LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, Scores& scores) const { const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType(); const WordsRange currWordsRange = topt.GetSourceWordsRange(); // keep track of the current coverage ourselves so we don't need the hypothesis WordsBitmap coverage = m_coverage; coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true); ReorderingType reoType; if (m_first) { ClearScores(scores); } else { if (modelType == LexicalReorderingConfiguration::MSD) { reoType = GetOrientationTypeMSD(currWordsRange, coverage); } else if (modelType == LexicalReorderingConfiguration::MSLR) { reoType = GetOrientationTypeMSLR(currWordsRange, coverage); } else if (modelType == LexicalReorderingConfiguration::Monotonic) { reoType = GetOrientationTypeMonotonic(currWordsRange, coverage); } else { reoType = GetOrientationTypeLeftRight(currWordsRange, coverage); } CopyScores(scores, topt, reoType); } return new HierarchicalReorderingForwardState(this, topt); }
float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo, const WordsRange &prev, const WordsRange &curr, const int FirstGap) const { if(!StaticData::Instance().UseEarlyDistortionCost()) { return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr); } else { /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007 Definitions: S : current source range S' : last translated source phrase range S'' : longest fully-translated initial segment */ int prefixEndPos = (int)FirstGap-1; if((int)FirstGap==-1) prefixEndPos = -1; // case1: S is adjacent to S'' => return 0 if ((int) curr.GetStartPos() == prefixEndPos+1) { IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl; return 0; } // case2: S is to the left of S' => return 2(length(S)) if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) { IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl; return (float) -2*(int)curr.GetNumWordsCovered(); } // case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S)) if ((int) prev.GetEndPos() <= prefixEndPos) { IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl; int z = (int)curr.GetStartPos()-prefixEndPos - 1; return (float) -2*(z + (int)curr.GetNumWordsCovered()); } // case4: otherwise => return 2(nbWordBetween(S,S')+length(S)) IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl; return (float) -2*((int)curr.GetNumWordsBetween(prev) + (int)curr.GetNumWordsCovered()); } }
Phrase Phrase::GetSubString(const WordsRange &wordsRange) const { Phrase retPhrase(wordsRange.GetNumWordsCovered()); for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++) { Word &word = retPhrase.AddWord(); word = GetWord(currPos); } return retPhrase; }
LexicalReordering::OrientationType LexicalMonotonicReordering::GetOrientationType(Hypothesis* currHypothesis) const { const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo(); const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange(); //check if there is a previous hypo if(0 == prevHypothesis->GetId()){ if(0 == currWordsRange.GetStartPos()){ return Monotone; } else { return NonMonotone; } } else { const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange(); if(prevWordsRange.GetEndPos() == currWordsRange.GetStartPos()-1){ return Monotone; } else { return NonMonotone; } } }
Phrase Phrase::GetSubString(const WordsRange &wordsRange, FactorType factorType) const { Phrase retPhrase(wordsRange.GetNumWordsCovered()); for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++) { const Factor* f = GetFactor(currPos, factorType); Word &word = retPhrase.AddWord(); word.SetFactor(factorType, f); } return retPhrase; }
LexicalReordering::OrientationType LexicalDirectionalReordering::GetOrientationType(Hypothesis* currHypothesis) const{ const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo(); const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange(); //check if there is a previous hypo if(0 == prevHypothesis->GetId()){ return Right; } else { const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange(); if(prevWordsRange.GetEndPos() <= currWordsRange.GetStartPos()){ return Right; } else { return Left; } } }
bool SearchCubePruning::CheckDistortion(const WordsBitmap &hypoBitmap, const WordsRange &range) const { // since we check for reordering limits, its good to have that limit handy int maxDistortion = StaticData::Instance().GetMaxDistortion(); // if there are reordering limits, make sure it is not violated // the coverage bitmap is handy here (and the position of the first gap) const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos() , startPos = range.GetStartPos() , endPos = range.GetEndPos(); // if reordering constraints are used (--monotone-at-punctuation or xml), check if passes all if (! m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) ) { return false; } // no limit of reordering: no problem if (maxDistortion < 0) { return true; } bool leftMostEdge = (hypoFirstGapPos == startPos); // any length extension is okay if starting at left-most edge if (leftMostEdge) { return true; } // starting somewhere other than left-most edge, use caution // the basic idea is this: we would like to translate a phrase starting // from a position further right than the left-most open gap. The // distortion penalty for the following phrase will be computed relative // to the ending position of the current extension, so we ask now what // its maximum value will be (which will always be the value of the // hypothesis starting at the left-most edge). If this vlaue is than // the distortion limit, we don't allow this extension to be made. WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos); int required_distortion = m_source.ComputeDistortionDistance(range, bestNextExtension); if (required_distortion > maxDistortion) { return false; } return true; }
void IOStream::OutputNBestList(const LatticePathList &nBestList, long translationId) { bool labeledOutput = StaticData::Instance().IsLabeledNBestList(); bool includeAlignment = StaticData::Instance().NBestIncludesAlignment(); LatticePathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const LatticePath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation *m_nBestStream << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(*m_nBestStream, edge.GetCurrTargetPhrase(), m_outputFactorOrder, false); // false for not reporting all factors } *m_nBestStream << " ||| "; // print the scores in a hardwired order // before each model type, the corresponding command-line-like name must be emitted // MERT script relies on this // basic distortion if (labeledOutput) *m_nBestStream << "d: "; *m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetDistortionScoreProducer()) << " "; // reordering vector<LexicalReordering*> rms = StaticData::Instance().GetReorderModels(); if(rms.size() > 0) { vector<LexicalReordering*>::iterator iter; for(iter = rms.begin(); iter != rms.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); for (size_t j = 0; j<scores.size(); ++j) { *m_nBestStream << scores[j] << " "; } } } // lm const LMList& lml = StaticData::Instance().GetAllLM(); if (lml.size() > 0) { if (labeledOutput) *m_nBestStream << "lm: "; LMList::const_iterator lmi = lml.begin(); for (; lmi != lml.end(); ++lmi) { *m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " "; } } // translation components if (StaticData::Instance().GetInputType()==0){ // translation components for text input vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries(); if (pds.size() > 0) { if (labeledOutput) *m_nBestStream << "tm: "; vector<PhraseDictionary*>::iterator iter; for (iter = pds.begin(); iter != pds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); for (size_t j = 0; j<scores.size(); ++j) *m_nBestStream << scores[j] << " "; } } } else{ // translation components for Confusion Network input // first translation component has GetNumInputScores() scores from the input Confusion Network // at the beginning of the vector vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries(); if (pds.size() > 0) { vector<PhraseDictionary*>::iterator iter; iter = pds.begin(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); size_t pd_numinputscore = (*iter)->GetNumInputScores(); if (pd_numinputscore){ if (labeledOutput) *m_nBestStream << "I: "; for (size_t j = 0; j < pd_numinputscore; ++j) *m_nBestStream << scores[j] << " "; } for (iter = pds.begin() ; iter != pds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); size_t pd_numinputscore = (*iter)->GetNumInputScores(); if (iter == pds.begin() && labeledOutput) *m_nBestStream << "tm: "; for (size_t j = pd_numinputscore; j < scores.size() ; ++j) *m_nBestStream << scores[j] << " "; } } } // word penalty if (labeledOutput) *m_nBestStream << "w: "; *m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetWordPenaltyProducer()) << " "; // generation vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries(); if (gds.size() > 0) { if (labeledOutput) *m_nBestStream << "g: "; vector<GenerationDictionary*>::iterator iter; for (iter = gds.begin(); iter != gds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); for (size_t j = 0; j<scores.size(); j++) { *m_nBestStream << scores[j] << " "; } } } // total *m_nBestStream << "||| " << path.GetTotalScore(); if (includeAlignment) { *m_nBestStream << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; WordsRange sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = edge.GetCurrTargetWordsRange(); *m_nBestStream << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { *m_nBestStream << "-" << sourceRange.GetEndPos(); } *m_nBestStream << "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { *m_nBestStream << "-" << targetRange.GetEndPos(); } } } *m_nBestStream << endl; } *m_nBestStream<<std::flush; }
size_t begin() const { return range.GetStartPos(); }
void ChartRuleLookupManagerMemory::GetChartRuleCollection( const WordsRange &range, bool adhereTableLimit, ChartTranslationOptionList &outColl) { size_t relEndPos = range.GetEndPos() - range.GetStartPos(); size_t absEndPos = range.GetEndPos(); // MAIN LOOP. create list of nodes of target phrases ProcessedRuleColl &processedRuleCol = *m_processedRuleColls[range.GetStartPos()]; const ProcessedRuleList &runningNodes = processedRuleCol.GetRunningNodes(); // Note that runningNodes can be expanded as the loop runs (through calls to // ExtendPartialRuleApplication()). for (size_t ind = 0; ind < runningNodes.size(); ++ind) { const ProcessedRule &prevProcessedRule = *runningNodes[ind]; const PhraseDictionaryNodeSCFG &prevNode = prevProcessedRule.GetLastNode(); const WordConsumed *prevWordConsumed = prevProcessedRule.GetLastWordConsumed(); size_t startPos = (prevWordConsumed == NULL) ? range.GetStartPos() : prevWordConsumed->GetWordsRange().GetEndPos() + 1; // search for terminal symbol if (startPos == absEndPos) { const Word &sourceWord = GetSentence().GetWord(absEndPos); const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(sourceWord); if (node != NULL) { WordConsumed *newWordConsumed = new WordConsumed(absEndPos, absEndPos , sourceWord , prevWordConsumed); ProcessedRule *processedRule = new ProcessedRule(*node, newWordConsumed); processedRuleCol.Add(relEndPos+1, processedRule); } } // search for non-terminals size_t endPos, stackInd; if (startPos > absEndPos) continue; else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) { // start. endPos = absEndPos - 1; stackInd = relEndPos; } else { endPos = absEndPos; stackInd = relEndPos + 1; } const NonTerminalSet &sourceNonTerms = GetSentence().GetLabelSet(startPos, endPos); const NonTerminalSet &targetNonTerms = GetCellCollection().GetHeadwords(WordsRange(startPos, endPos)); ExtendPartialRuleApplication(prevNode, prevWordConsumed, startPos, endPos, stackInd, sourceNonTerms, targetNonTerms, processedRuleCol); } // return list of target phrases ProcessedRuleList &nodes = processedRuleCol.Get(relEndPos + 1); size_t rulesLimit = StaticData::Instance().GetRuleLimit(); ProcessedRuleList::const_iterator iterNode; for (iterNode = nodes.begin(); iterNode != nodes.end(); ++iterNode) { const ProcessedRule &processedRule = **iterNode; const PhraseDictionaryNodeSCFG &node = processedRule.GetLastNode(); const WordConsumed *wordConsumed = processedRule.GetLastWordConsumed(); assert(wordConsumed); const TargetPhraseCollection *targetPhraseCollection = node.GetTargetPhraseCollection(); if (targetPhraseCollection != NULL) { outColl.Add(*targetPhraseCollection, *wordConsumed, adhereTableLimit, rulesLimit); } } outColl.CreateChartRules(rulesLimit); }
HierarchicalReorderingForwardState::HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt) : LexicalReorderingState(prev, topt), m_first(false), m_prevRange(topt.GetSourceWordsRange()), m_coverage(prev->m_coverage) { const WordsRange currWordsRange = topt.GetSourceWordsRange(); m_coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true); }
void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os, const Hypothesis* hypo) { os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl; std::vector<const Hypothesis*> translationPath; while (hypo) { translationPath.push_back(hypo); hypo = hypo->GetPrevHypo(); } std::reverse(translationPath.begin(), translationPath.end()); std::vector<std::string> droppedWords; std::vector<const Hypothesis*>::iterator tpi = translationPath.begin(); if(tpi == translationPath.end()) return; ++tpi; // skip initial translation state std::vector<std::string> sourceMap; std::vector<std::string> targetMap; std::vector<unsigned int> lmAcc(0); size_t lmCalls = 0; bool doLMStats = ((*tpi)->GetLMStats() != 0); if (doLMStats) lmAcc.resize((*tpi)->GetLMStats()->size(), 0); for (; tpi != translationPath.end(); ++tpi) { std::ostringstream sms; std::ostringstream tms; std::string target = (*tpi)->GetTargetPhraseStringRep(); std::string source = (*tpi)->GetSourcePhraseStringRep(); WordsRange twr = (*tpi)->GetCurrTargetWordsRange(); WordsRange swr = (*tpi)->GetCurrSourceWordsRange(); const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo(); // language model backoff stats, if (doLMStats) { std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats(); std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin(); std::vector<unsigned int>::iterator acc = lmAcc.begin(); for (; i != lmstats.end(); ++i, ++acc) { std::vector<unsigned int>::iterator j = i->begin(); lmCalls += i->size(); for (; j != i->end(); ++j) { (*acc) += *j; } } } bool epsilon = false; if (target == "") { target="<EPSILON>"; epsilon = true; droppedWords.push_back(source); } os << " SOURCE: " << swr << " " << source << std::endl << " TRANSLATED AS: " << target << std::endl << " WORD ALIGNED: " << alignmentInfo << std::endl; size_t twr_i = twr.GetStartPos(); size_t swr_i = swr.GetStartPos(); if (!epsilon) { sms << twr_i; } if (epsilon) { tms << "del(" << swr_i << ")"; } else { tms << swr_i; } swr_i++; twr_i++; for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) { sms << '-' << twr_i; } for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) { tms << '-' << swr_i; } if (!epsilon) targetMap.push_back(sms.str()); sourceMap.push_back(tms.str()); } std::vector<std::string>::iterator si = sourceMap.begin(); std::vector<std::string>::iterator ti = targetMap.begin(); os << std::endl << "SOURCE/TARGET SPANS:"; os << std::endl << " SOURCE:"; for (; si != sourceMap.end(); ++si) { os << " " << *si; } os << std::endl << " TARGET:"; for (; ti != targetMap.end(); ++ti) { os << " " << *ti; } os << std::endl << std::endl; if (doLMStats && lmCalls > 0) { std::vector<unsigned int>::iterator acc = lmAcc.begin(); const LMList& lmlist = system->GetLanguageModels(); LMList::const_iterator i = lmlist.begin(); for (; acc != lmAcc.end(); ++acc, ++i) { char buf[256]; sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls); os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl; } } if (droppedWords.size() > 0) { std::vector<std::string>::iterator dwi = droppedWords.begin(); os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl; for (; dwi != droppedWords.end(); ++dwi) { os << "\tdropped=" << *dwi << std::endl; } } os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): "; StaticData::Instance().GetScoreIndexManager().PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights()); os << std::endl; }
void OutputNBest(std::ostream& out , const Moses::TrellisPathList &nBestList , const std::vector<Moses::FactorType>& outputFactorOrder , long translationId , char reportSegmentation) { const StaticData &staticData = StaticData::Instance(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeSegmentation = staticData.NBestIncludesSegmentation(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); } out << " |||"; // print scores with feature names OutputAllFeatureScores(path.GetScoreBreakdown(), out ); // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase segmentation if (includeSegmentation) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " ||| "; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, sourceOffset, targetOffset); } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << " ||| "; OutputInput(out, edges[0]); } out << endl; } out << std::flush; }
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder,long translationId) { const StaticData &staticData = StaticData::Instance(); bool labeledOutput = staticData.IsLabeledNBestList(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeAlignment = staticData.NBestIncludesAlignment(); //bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge.GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors); } out << " |||"; std::string lastName = ""; const vector<const StatefulFeatureFunction*>& sff = staticData.GetScoreIndexManager().GetStatefulFeatureFunctions(); for( size_t i=0; i<sff.size(); i++ ) { if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() ) { lastName = sff[i]->GetScoreProducerWeightShortName(); out << " " << lastName << ":"; } vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] ); for (size_t j = 0; j<scores.size(); ++j) { out << " " << scores[j]; } } const vector<const StatelessFeatureFunction*>& slf = staticData.GetScoreIndexManager().GetStatelessFeatureFunctions(); for( size_t i=0; i<slf.size(); i++ ) { if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() ) { lastName = slf[i]->GetScoreProducerWeightShortName(); out << " " << lastName << ":"; } vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] ); for (size_t j = 0; j<scores.size(); ++j) { out << " " << scores[j]; } } // translation components if (StaticData::Instance().GetInputType()==SentenceInput){ // translation components for text input vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries(); if (pds.size() > 0) { if (labeledOutput) out << " tm:"; vector<PhraseDictionaryFeature*>::iterator iter; for (iter = pds.begin(); iter != pds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); for (size_t j = 0; j<scores.size(); ++j) out << " " << scores[j]; } } } else{ // translation components for Confusion Network input // first translation component has GetNumInputScores() scores from the input Confusion Network // at the beginning of the vector vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries(); if (pds.size() > 0) { vector<PhraseDictionaryFeature*>::iterator iter; iter = pds.begin(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); size_t pd_numinputscore = (*iter)->GetNumInputScores(); if (pd_numinputscore){ if (labeledOutput) out << " I:"; for (size_t j = 0; j < pd_numinputscore; ++j) out << " " << scores[j]; } for (iter = pds.begin() ; iter != pds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); size_t pd_numinputscore = (*iter)->GetNumInputScores(); if (iter == pds.begin() && labeledOutput) out << " tm:"; for (size_t j = pd_numinputscore; j < scores.size() ; ++j) out << " " << scores[j]; } } } // generation vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries(); if (gds.size() > 0) { if (labeledOutput) out << " g: "; vector<GenerationDictionary*>::iterator iter; for (iter = gds.begin(); iter != gds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); for (size_t j = 0; j<scores.size(); j++) { out << scores[j] << " "; } } } // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase alignment if (includeAlignment) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << "|||"; OutputInput(out, edges[0]); } out << endl; } out <<std::flush; }
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId) { const StaticData &staticData = StaticData::Instance(); bool labeledOutput = staticData.IsLabeledNBestList(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeAlignment = staticData.NBestIncludesAlignment(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge.GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors); } out << " |||"; std::string lastName = ""; const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions(); for( size_t i=0; i<sff.size(); i++ ) { if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() ) { lastName = sff[i]->GetScoreProducerWeightShortName(); out << " " << lastName << ":"; } vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] ); for (size_t j = 0; j<scores.size(); ++j) { out << " " << scores[j]; } } const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions(); for( size_t i=0; i<slf.size(); i++ ) { if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() ) { lastName = slf[i]->GetScoreProducerWeightShortName(); out << " " << lastName << ":"; } vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] ); for (size_t j = 0; j<scores.size(); ++j) { out << " " << scores[j]; } } // translation components const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries(); if (pds.size() > 0) { for( size_t i=0; i<pds.size(); i++ ) { size_t pd_numinputscore = pds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = pds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // generation const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries(); if (gds.size() > 0) { for( size_t i=0; i<gds.size(); i++ ) { size_t pd_numinputscore = gds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = gds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase alignment if (includeAlignment) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " ||| "; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo(); OutputAlignment(out, ai, sourceOffset, targetOffset); } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << "|||"; OutputInput(out, edges[0]); } out << endl; } out <<std::flush; }
void ChartRuleLookupManagerMemory::GetChartRuleCollection( const WordsRange &range, ChartParserCallback &outColl) { size_t relEndPos = range.GetEndPos() - range.GetStartPos(); size_t absEndPos = range.GetEndPos(); // MAIN LOOP. create list of nodes of target phrases // get list of all rules that apply to spans at same starting position DottedRuleColl &dottedRuleCol = *m_dottedRuleColls[range.GetStartPos()]; const DottedRuleList &expandableDottedRuleList = dottedRuleCol.GetExpandableDottedRuleList(); const ChartCellLabel &sourceWordLabel = GetSourceAt(absEndPos); // loop through the rules // (note that expandableDottedRuleList can be expanded as the loop runs // through calls to ExtendPartialRuleApplication()) for (size_t ind = 0; ind < expandableDottedRuleList.size(); ++ind) { // rule we are about to extend const DottedRuleInMemory &prevDottedRule = *expandableDottedRuleList[ind]; // we will now try to extend it, starting after where it ended size_t startPos = prevDottedRule.IsRoot() ? range.GetStartPos() : prevDottedRule.GetWordsRange().GetEndPos() + 1; // search for terminal symbol // (if only one more word position needs to be covered) if (startPos == absEndPos) { // look up in rule dictionary, if the current rule can be extended // with the source word in the last position const Word &sourceWord = sourceWordLabel.GetLabel(); const PhraseDictionaryNodeSCFG *node = prevDottedRule.GetLastNode().GetChild(sourceWord); // if we found a new rule -> create it and add it to the list if (node != NULL) { // create the rule #ifdef USE_BOOST_POOL DottedRuleInMemory *dottedRule = m_dottedRulePool.malloc(); new (dottedRule) DottedRuleInMemory(*node, sourceWordLabel, prevDottedRule); #else DottedRuleInMemory *dottedRule = new DottedRuleInMemory(*node, sourceWordLabel, prevDottedRule); #endif dottedRuleCol.Add(relEndPos+1, dottedRule); } } // search for non-terminals size_t endPos, stackInd; // span is already complete covered? nothing can be done if (startPos > absEndPos) continue; else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) { // We're at the root of the prefix tree so won't try to cover the full // span (i.e. we don't allow non-lexical unary rules). However, we need // to match non-unary rules that begin with a non-terminal child, so we // do that in two steps: during this iteration we search for non-terminals // that cover all but the last source word in the span (there won't // already be running nodes for these because that would have required a // non-lexical unary rule match for an earlier span). Any matches will // result in running nodes being appended to the list and on subsequent // iterations (for this same span), we'll extend them to cover the final // word. endPos = absEndPos - 1; stackInd = relEndPos; } else { endPos = absEndPos; stackInd = relEndPos + 1; } ExtendPartialRuleApplication(prevDottedRule, startPos, endPos, stackInd, dottedRuleCol); } // list of rules that that cover the entire span DottedRuleList &rules = dottedRuleCol.Get(relEndPos + 1); // look up target sides for the rules DottedRuleList::const_iterator iterRule; for (iterRule = rules.begin(); iterRule != rules.end(); ++iterRule) { const DottedRuleInMemory &dottedRule = **iterRule; const PhraseDictionaryNodeSCFG &node = dottedRule.GetLastNode(); // look up target sides const TargetPhraseCollection *tpc = node.GetTargetPhraseCollection(); // add the fully expanded rule (with lexical target side) if (tpc != NULL) { AddCompletedRule(dottedRule, *tpc, range, outColl); } } dottedRuleCol.Clear(relEndPos+1); }
void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( const WordsRange &range, ChartParserCallback &outColl) { const StaticData &staticData = StaticData::Instance(); size_t relEndPos = range.GetEndPos() - range.GetStartPos(); size_t absEndPos = range.GetEndPos(); // MAIN LOOP. create list of nodes of target phrases DottedRuleStackOnDisk &expandableDottedRuleList = *m_expandableDottedRuleListVec[range.GetStartPos()]; // sort save nodes so only do nodes with most counts expandableDottedRuleList.SortSavedNodes(); const DottedRuleStackOnDisk::SavedNodeColl &savedNodeColl = expandableDottedRuleList.GetSavedNodeColl(); //cerr << "savedNodeColl=" << savedNodeColl.size() << " "; const ChartCellLabel &sourceWordLabel = GetSourceAt(absEndPos); for (size_t ind = 0; ind < (savedNodeColl.size()) ; ++ind) { const SavedNodeOnDisk &savedNode = *savedNodeColl[ind]; const DottedRuleOnDisk &prevDottedRule = savedNode.GetDottedRule(); const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode(); size_t startPos = prevDottedRule.IsRoot() ? range.GetStartPos() : prevDottedRule.GetWordsRange().GetEndPos() + 1; // search for terminal symbol if (startPos == absEndPos) { OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceWordLabel.GetLabel()); if (sourceWordBerkeleyDb != NULL) { const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper); if (node != NULL) { // TODO figure out why source word is needed from node, not from sentence // prob to do with factors or non-term //const Word &sourceWord = node->GetSourceWord(); DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, sourceWordLabel, prevDottedRule); expandableDottedRuleList.Add(relEndPos+1, dottedRule); // cache for cleanup m_sourcePhraseNode.push_back(node); } delete sourceWordBerkeleyDb; } } // search for non-terminals size_t endPos, stackInd; if (startPos > absEndPos) continue; else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) { // start. endPos = absEndPos - 1; stackInd = relEndPos; } else { endPos = absEndPos; stackInd = relEndPos + 1; } // size_t nonTermNumWordsCovered = endPos - startPos + 1; // get target nonterminals in this span from chart const ChartCellLabelSet &chartNonTermSet = GetTargetLabelSet(startPos, endPos); //const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal() // ,&defaultTargetNonTerm = staticData.GetOutputDefaultNonTerminal(); // go through each SOURCE lhs const NonTerminalSet &sourceLHSSet = GetSentence().GetLabelSet(startPos, endPos); NonTerminalSet::const_iterator iterSourceLHS; for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) { const Word &sourceLHS = *iterSourceLHS; OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS); if (sourceLHSBerkeleyDb == NULL) { delete sourceLHSBerkeleyDb; continue; // vocab not in pt. node definately won't be in there } const OnDiskPt::PhraseNode *sourceNode = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); delete sourceLHSBerkeleyDb; if (sourceNode == NULL) continue; // didn't find source node // go through each TARGET lhs ChartCellLabelSet::const_iterator iterChartNonTerm; for (iterChartNonTerm = chartNonTermSet.begin(); iterChartNonTerm != chartNonTermSet.end(); ++iterChartNonTerm) { const ChartCellLabel &cellLabel = iterChartNonTerm->second; //cerr << sourceLHS << " " << defaultSourceNonTerm << " " << chartNonTerm << " " << defaultTargetNonTerm << endl; //bool isSyntaxNonTerm = (sourceLHS != defaultSourceNonTerm) || (chartNonTerm != defaultTargetNonTerm); bool doSearch = true; //isSyntaxNonTerm ? nonTermNumWordsCovered <= maxSyntaxSpan : // nonTermNumWordsCovered <= maxDefaultSpan; if (doSearch) { OnDiskPt::Word *chartNonTermBerkeleyDb = m_dbWrapper.ConvertFromMoses(Output, m_outputFactorsVec, cellLabel.GetLabel()); if (chartNonTermBerkeleyDb == NULL) continue; const OnDiskPt::PhraseNode *node = sourceNode->GetChild(*chartNonTermBerkeleyDb, m_dbWrapper); delete chartNonTermBerkeleyDb; if (node == NULL) continue; // found matching entry //const Word &sourceWord = node->GetSourceWord(); DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, cellLabel, prevDottedRule); expandableDottedRuleList.Add(stackInd, dottedRule); m_sourcePhraseNode.push_back(node); } } // for (iterChartNonTerm delete sourceNode; } // for (iterLabelListf // return list of target phrases DottedRuleCollOnDisk &nodes = expandableDottedRuleList.Get(relEndPos + 1); // source LHS DottedRuleCollOnDisk::const_iterator iterDottedRuleColl; for (iterDottedRuleColl = nodes.begin(); iterDottedRuleColl != nodes.end(); ++iterDottedRuleColl) { // node of last source word const DottedRuleOnDisk &prevDottedRule = **iterDottedRuleColl; if (prevDottedRule.Done()) continue; prevDottedRule.Done(true); const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode(); //get node for each source LHS const NonTerminalSet &lhsSet = GetSentence().GetLabelSet(range.GetStartPos(), range.GetEndPos()); NonTerminalSet::const_iterator iterLabelSet; for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) { const Word &sourceLHS = *iterLabelSet; OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS); if (sourceLHSBerkeleyDb == NULL) continue; const TargetPhraseCollection *targetPhraseCollection = NULL; const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); if (node) { UINT64 tpCollFilePos = node->GetValue(); std::map<UINT64, const TargetPhraseCollection*>::const_iterator iterCache = m_cache.find(tpCollFilePos); if (iterCache == m_cache.end()) { const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper); std::vector<float> weightT = staticData.GetWeights(&m_dictionary); targetPhraseCollection = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec ,m_outputFactorsVec ,m_dictionary ,weightT ,m_filePath , m_dbWrapper.GetVocab()); delete tpcollBerkeleyDb; m_cache[tpCollFilePos] = targetPhraseCollection; } else { // just get out of cache targetPhraseCollection = iterCache->second; } CHECK(targetPhraseCollection); if (!targetPhraseCollection->IsEmpty()) { AddCompletedRule(prevDottedRule, *targetPhraseCollection, range, outColl); } } // if (node) delete node; delete sourceLHSBerkeleyDb; } } } // for (size_t ind = 0; ind < savedNodeColl.size(); ++ind) //cerr << numDerivations << " "; }