void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo, ScoreComponentCollection* accumulator) const { const TargetPhrase& target = hypo.GetCurrTargetPhrase(); const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = target.GetAlignNonTerm().GetNonTermIndexMap(); // loop over the rule that is being applied for (size_t pos = 0; pos < target.GetSize(); ++pos) { const Word& word = target.GetWord(pos); // for non-terminals, trigger the feature mapping the LHS of the previous hypo to the RHS of this hypo if (word.IsNonTerminal()) { size_t nonTermInd = nonTermIndexMap[pos]; const ChartHypothesis* prevHypo = hypo.GetPrevHypo(nonTermInd); const Word& prevLHS = prevHypo->GetTargetLHS(); if ( (word != prevLHS) || m_scoreIdentical ) { const std::string &name = GetOrSetFeatureName(word, prevLHS); accumulator->PlusEquals(this,name,1); } } } }
void SparseHieroReorderingFeature::EvaluateChart( const ChartHypothesis& cur_hypo , ScoreComponentCollection* accumulator) const { // get index map for underlying hypotheses //const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = // cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap(); //The Huck features. For a rule with source side: // abXcdXef //We first have to split into blocks: // ab X cd X ef //Then we extract features based in the boundary words of the neighbouring blocks //For the block pair, we use the right word of the left block, and the left //word of the right block. //Need to get blocks, and their alignment. Each block has a word range (on the // on the source), a non-terminal flag, and a set of alignment points in the target phrase //We need to be able to map source word position to target word position, as //much as possible (don't need interior of non-terminals). The alignment info //objects just give us the mappings between *rule* positions. So if we can //map source word position to source rule position, and target rule position //to target word position, then we can map right through. size_t sourceStart = cur_hypo.GetCurrSourceRange().GetStartPos(); size_t sourceSize = cur_hypo.GetCurrSourceRange().GetNumWordsCovered(); vector<WordsRange> sourceNTSpans; for (size_t prevHypoId = 0; prevHypoId < cur_hypo.GetPrevHypos().size(); ++prevHypoId) { sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange()); } //put in source order. Is this necessary? sort(sourceNTSpans.begin(), sourceNTSpans.end()); //cerr << "Source NTs: "; //for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " "; //cerr << endl; typedef pair<WordsRange,bool> Block;//flag indicates NT vector<Block> sourceBlocks; sourceBlocks.push_back(Block(cur_hypo.GetCurrSourceRange(),false)); for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin(); i != sourceNTSpans.end(); ++i) { const WordsRange& prevHypoRange = *i; Block lastBlock = sourceBlocks.back(); sourceBlocks.pop_back(); //split this range into before NT, NT and after NT if (prevHypoRange.GetStartPos() > lastBlock.first.GetStartPos()) { sourceBlocks.push_back(Block(WordsRange(lastBlock.first.GetStartPos(),prevHypoRange.GetStartPos()-1),false)); } sourceBlocks.push_back(Block(prevHypoRange,true)); if (prevHypoRange.GetEndPos() < lastBlock.first.GetEndPos()) { sourceBlocks.push_back(Block(WordsRange(prevHypoRange.GetEndPos()+1,lastBlock.first.GetEndPos()), false)); } } /* cerr << "Source Blocks: "; for (size_t i = 0; i < sourceBlocks.size(); ++i) cerr << sourceBlocks[i].first << " " << (sourceBlocks[i].second ? "NT" : "T") << " "; cerr << endl; */ //Mapping from source word to target rule position vector<size_t> sourceWordToTargetRulePos(sourceSize); map<size_t,size_t> alignMap; alignMap.insert( cur_hypo.GetCurrTargetPhrase().GetAlignTerm().begin(), cur_hypo.GetCurrTargetPhrase().GetAlignTerm().end()); alignMap.insert( cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().begin(), cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().end()); //vector<size_t> alignMapTerm = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm() size_t sourceRulePos = 0; //cerr << "SW->RP "; for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin(); sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) { for (size_t sourceWordPos = sourceBlockIt->first.GetStartPos(); sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) { sourceWordToTargetRulePos[sourceWordPos - sourceStart] = alignMap[sourceRulePos]; // cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " "; if (! sourceBlockIt->second) { //T ++sourceRulePos; } } if ( sourceBlockIt->second) { //NT ++sourceRulePos; } } //cerr << endl; //Iterate through block pairs const Sentence& sentence = dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource()); //const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); for (size_t i = 0; i < sourceBlocks.size()-1; ++i) { Block& leftSourceBlock = sourceBlocks[i]; Block& rightSourceBlock = sourceBlocks[i+1]; size_t sourceLeftBoundaryPos = leftSourceBlock.first.GetEndPos(); size_t sourceRightBoundaryPos = rightSourceBlock.first.GetStartPos(); const Word& sourceLeftBoundaryWord = sentence.GetWord(sourceLeftBoundaryPos); const Word& sourceRightBoundaryWord = sentence.GetWord(sourceRightBoundaryPos); sourceLeftBoundaryPos -= sourceStart; sourceRightBoundaryPos -= sourceStart; // Need to figure out where these map to on the target. size_t targetLeftRulePos = sourceWordToTargetRulePos[sourceLeftBoundaryPos]; size_t targetRightRulePos = sourceWordToTargetRulePos[sourceRightBoundaryPos]; bool isMonotone = true; if ((sourceLeftBoundaryPos < sourceRightBoundaryPos && targetLeftRulePos > targetRightRulePos) || ((sourceLeftBoundaryPos > sourceRightBoundaryPos && targetLeftRulePos < targetRightRulePos))) { isMonotone = false; } stringstream buf; buf << "h_"; //sparse reordering, Huck if (m_type == SourceLeft || m_type == SourceCombined) { buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString(); buf << "_"; } if (m_type == SourceRight || m_type == SourceCombined) { buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString(); buf << "_"; } buf << (isMonotone ? "M" : "S"); accumulator->PlusEquals(this,buf.str(), 1); } // cerr << endl; }