void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo, ScoreComponentCollection* accumulator) const { const TargetPhrase& target = hypo.GetCurrTargetPhrase(); const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = target.GetAlignNonTerm().GetNonTermIndexMap(); // loop over the rule that is being applied for (size_t pos = 0; pos < target.GetSize(); ++pos) { const Word& word = target.GetWord(pos); // for non-terminals, trigger the feature mapping the LHS of the previous hypo to the RHS of this hypo if (word.IsNonTerminal()) { size_t nonTermInd = nonTermIndexMap[pos]; const ChartHypothesis* prevHypo = hypo.GetPrevHypo(nonTermInd); const Word& prevLHS = prevHypo->GetTargetLHS(); if ( (word != prevLHS) || m_scoreIdentical ) { const std::string &name = GetOrSetFeatureName(word, prevLHS); accumulator->PlusEquals(this,name,1); } } } }
ChartTrellisPath::ChartTrellisPath(const ChartHypothesis &hypo) : m_finalNode(new ChartTrellisNode(hypo)) , m_deviationPoint(NULL) , m_scoreBreakdown(hypo.GetScoreBreakdown()) , m_totalScore(hypo.GetTotalScore()) { }
void ChartHypothesis::CleanupArcList() { // point this hypo's main hypo to itself m_winningHypo = this; if (!m_arcList) return; /* keep only number of arcs we need to create all n-best paths. * However, may not be enough if only unique candidates are needed, * so we'll keep all of arc list if nedd distinct n-best list */ AllOptions const& opts = StaticData::Instance().options(); const StaticData &staticData = StaticData::Instance(); size_t nBestSize = opts.nbest.nbest_size; bool distinctNBest = (opts.nbest.only_distinct || opts.mbr.enabled || opts.output.NeedSearchGraph() || !opts.output.SearchGraphHG.empty()); if (!distinctNBest && m_arcList->size() > nBestSize) { // prune arc list only if there too many arcs NTH_ELEMENT4(m_arcList->begin() , m_arcList->begin() + nBestSize - 1 , m_arcList->end() , CompareChartHypothesisTotalScore()); // delete bad ones ChartArcList::iterator iter; for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) { ChartHypothesis *arc = *iter; delete arc; } m_arcList->erase(m_arcList->begin() + nBestSize , m_arcList->end()); } // set all arc's main hypo variable to this hypo ChartArcList::iterator iter = m_arcList->begin(); for (; iter != m_arcList->end() ; ++iter) { ChartHypothesis *arc = *iter; arc->SetWinningHypo(this); } //cerr << m_arcList->size() << " "; }
ControlRecombinationState::ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff) :m_ff(ff) { if (ff.GetType() == SameOutput) { hypo.GetOutputPhrase(m_outputPhrase); } else { m_hypo = &hypo; } }
void ChartHypothesis::CleanupArcList() { // point this hypo's main hypo to itself m_winningHypo = this; if (!m_arcList) return; /* keep only number of arcs we need to create all n-best paths. * However, may not be enough if only unique candidates are needed, * so we'll keep all of arc list if nedd distinct n-best list */ const StaticData &staticData = StaticData::Instance(); size_t nBestSize = staticData.GetNBestSize(); bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph(); if (!distinctNBest && m_arcList->size() > nBestSize) { // prune arc list only if there too many arcs nth_element(m_arcList->begin() , m_arcList->begin() + nBestSize - 1 , m_arcList->end() , CompareChartChartHypothesisTotalScore()); // delete bad ones ChartArcList::iterator iter; for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) { ChartHypothesis *arc = *iter; ChartHypothesis::Delete(arc); } m_arcList->erase(m_arcList->begin() + nBestSize , m_arcList->end()); } // set all arc's main hypo variable to this hypo ChartArcList::iterator iter = m_arcList->begin(); for (; iter != m_arcList->end() ; ++iter) { ChartHypothesis *arc = *iter; arc->SetWinningHypo(this); } //cerr << m_arcList->size() << " "; }
ChartTrellisDetour::ChartTrellisDetour( boost::shared_ptr<const ChartTrellisPath> basePath, const ChartTrellisNode &substitutedNode, const ChartHypothesis &replacementHypo) : m_basePath(basePath) , m_substitutedNode(substitutedNode) , m_replacementHypo(replacementHypo) { float diff = replacementHypo.GetTotalScore() - substitutedNode.GetHypothesis().GetTotalScore(); m_totalScore = basePath->GetTotalScore() + diff; }
void SparseHieroReorderingFeature::EvaluateChart( const ChartHypothesis& cur_hypo , ScoreComponentCollection* accumulator) const { // get index map for underlying hypotheses //const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = // cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap(); //The Huck features. For a rule with source side: // abXcdXef //We first have to split into blocks: // ab X cd X ef //Then we extract features based in the boundary words of the neighbouring blocks //For the block pair, we use the right word of the left block, and the left //word of the right block. //Need to get blocks, and their alignment. Each block has a word range (on the // on the source), a non-terminal flag, and a set of alignment points in the target phrase //We need to be able to map source word position to target word position, as //much as possible (don't need interior of non-terminals). The alignment info //objects just give us the mappings between *rule* positions. So if we can //map source word position to source rule position, and target rule position //to target word position, then we can map right through. size_t sourceStart = cur_hypo.GetCurrSourceRange().GetStartPos(); size_t sourceSize = cur_hypo.GetCurrSourceRange().GetNumWordsCovered(); vector<WordsRange> sourceNTSpans; for (size_t prevHypoId = 0; prevHypoId < cur_hypo.GetPrevHypos().size(); ++prevHypoId) { sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange()); } //put in source order. Is this necessary? sort(sourceNTSpans.begin(), sourceNTSpans.end()); //cerr << "Source NTs: "; //for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " "; //cerr << endl; typedef pair<WordsRange,bool> Block;//flag indicates NT vector<Block> sourceBlocks; sourceBlocks.push_back(Block(cur_hypo.GetCurrSourceRange(),false)); for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin(); i != sourceNTSpans.end(); ++i) { const WordsRange& prevHypoRange = *i; Block lastBlock = sourceBlocks.back(); sourceBlocks.pop_back(); //split this range into before NT, NT and after NT if (prevHypoRange.GetStartPos() > lastBlock.first.GetStartPos()) { sourceBlocks.push_back(Block(WordsRange(lastBlock.first.GetStartPos(),prevHypoRange.GetStartPos()-1),false)); } sourceBlocks.push_back(Block(prevHypoRange,true)); if (prevHypoRange.GetEndPos() < lastBlock.first.GetEndPos()) { sourceBlocks.push_back(Block(WordsRange(prevHypoRange.GetEndPos()+1,lastBlock.first.GetEndPos()), false)); } } /* cerr << "Source Blocks: "; for (size_t i = 0; i < sourceBlocks.size(); ++i) cerr << sourceBlocks[i].first << " " << (sourceBlocks[i].second ? "NT" : "T") << " "; cerr << endl; */ //Mapping from source word to target rule position vector<size_t> sourceWordToTargetRulePos(sourceSize); map<size_t,size_t> alignMap; alignMap.insert( cur_hypo.GetCurrTargetPhrase().GetAlignTerm().begin(), cur_hypo.GetCurrTargetPhrase().GetAlignTerm().end()); alignMap.insert( cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().begin(), cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().end()); //vector<size_t> alignMapTerm = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm() size_t sourceRulePos = 0; //cerr << "SW->RP "; for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin(); sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) { for (size_t sourceWordPos = sourceBlockIt->first.GetStartPos(); sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) { sourceWordToTargetRulePos[sourceWordPos - sourceStart] = alignMap[sourceRulePos]; // cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " "; if (! sourceBlockIt->second) { //T ++sourceRulePos; } } if ( sourceBlockIt->second) { //NT ++sourceRulePos; } } //cerr << endl; //Iterate through block pairs const Sentence& sentence = dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource()); //const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); for (size_t i = 0; i < sourceBlocks.size()-1; ++i) { Block& leftSourceBlock = sourceBlocks[i]; Block& rightSourceBlock = sourceBlocks[i+1]; size_t sourceLeftBoundaryPos = leftSourceBlock.first.GetEndPos(); size_t sourceRightBoundaryPos = rightSourceBlock.first.GetStartPos(); const Word& sourceLeftBoundaryWord = sentence.GetWord(sourceLeftBoundaryPos); const Word& sourceRightBoundaryWord = sentence.GetWord(sourceRightBoundaryPos); sourceLeftBoundaryPos -= sourceStart; sourceRightBoundaryPos -= sourceStart; // Need to figure out where these map to on the target. size_t targetLeftRulePos = sourceWordToTargetRulePos[sourceLeftBoundaryPos]; size_t targetRightRulePos = sourceWordToTargetRulePos[sourceRightBoundaryPos]; bool isMonotone = true; if ((sourceLeftBoundaryPos < sourceRightBoundaryPos && targetLeftRulePos > targetRightRulePos) || ((sourceLeftBoundaryPos > sourceRightBoundaryPos && targetLeftRulePos < targetRightRulePos))) { isMonotone = false; } stringstream buf; buf << "h_"; //sparse reordering, Huck if (m_type == SourceLeft || m_type == SourceCombined) { buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString(); buf << "_"; } if (m_type == SourceRight || m_type == SourceCombined) { buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString(); buf << "_"; } buf << (isMonotone ? "M" : "S"); accumulator->PlusEquals(this,buf.str(), 1); } // cerr << endl; }
ConstrainedDecodingState::ConstrainedDecodingState(const ChartHypothesis &hypo) { hypo.GetOutputPhrase(m_outputPhrase); }