void ChartParser::Create(const Range &range, ChartParserCallback &to) { assert(m_decodeGraphList.size() == m_ruleLookupManagers.size()); std::vector <DecodeGraph*>::const_iterator iterDecodeGraph; std::vector <ChartRuleLookupManager*>::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin(); for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) { const DecodeGraph &decodeGraph = **iterDecodeGraph; assert(decodeGraph.GetSize() == 1); ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers; size_t maxSpan = decodeGraph.GetMaxChartSpan(); size_t last = m_source.GetSize()-1; if (maxSpan != 0) { last = min(last, range.GetStartPos()+maxSpan); } if (maxSpan == 0 || range.GetNumWordsCovered() <= maxSpan) { const InputPath &inputPath = GetInputPath(range); ruleLookupManager.GetChartRuleCollection(inputPath, last, to); } } if (range.GetNumWordsCovered() == 1 && range.GetStartPos() != 0 && range.GetStartPos() != m_source.GetSize()-1) { bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption(); if (to.Empty() || alwaysCreateDirectTranslationOption) { // create unknown words for 1 word coverage where we don't have any trans options const Word &sourceWord = m_source.GetWord(range.GetStartPos()); m_unknown.Process(sourceWord, range, to); } } }
float DistortionScoreProducer:: CalculateDistortionScore(const Hypothesis& hypo, const Range &prev, const Range &curr, const int FirstGap) { // if(!StaticData::Instance().UseEarlyDistortionCost()) { if(!hypo.GetManager().options()->reordering.use_early_distortion_cost) { return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr); } // else { /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007 Definitions: S : current source range S' : last translated source phrase range S'' : longest fully-translated initial segment */ int prefixEndPos = (int)FirstGap-1; if((int)FirstGap==-1) prefixEndPos = -1; // case1: S is adjacent to S'' => return 0 if ((int) curr.GetStartPos() == prefixEndPos+1) { IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl; return 0; } // case2: S is to the left of S' => return 2(length(S)) if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) { IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl; return (float) -2*(int)curr.GetNumWordsCovered(); } // case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S)) if ((int) prev.GetEndPos() <= prefixEndPos) { IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl; int z = (int)curr.GetStartPos()-prefixEndPos - 1; return (float) -2*(z + (int)curr.GetNumWordsCovered()); } // case4: otherwise => return 2(nbWordBetween(S,S')+length(S)) IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl; return (float) -2*((int)curr.GetNumWordsBetween(prev) + (int)curr.GetNumWordsCovered()); }
Phrase Phrase::GetSubString(const Range &range) const { Phrase retPhrase(range.GetNumWordsCovered()); for (size_t currPos = range.GetStartPos() ; currPos <= range.GetEndPos() ; currPos++) { Word &word = retPhrase.AddWord(); word = GetWord(currPos); } return retPhrase; }
Phrase Phrase::GetSubString(const Range &range, FactorType factorType) const { Phrase retPhrase(range.GetNumWordsCovered()); for (size_t currPos = range.GetStartPos() ; currPos <= range.GetEndPos() ; currPos++) { const Factor* f = GetFactor(currPos, factorType); Word &word = retPhrase.AddWord(); word.SetFactor(factorType, f); } return retPhrase; }