void SourceWordDeletionFeature::ComputeFeatures(const TargetPhrase& targetPhrase, ScoreComponentCollection* accumulator, const AlignmentInfo &alignmentInfo) const { // handle special case: unknown words (they have no word alignment) size_t targetLength = targetPhrase.GetSize(); size_t sourceLength = targetPhrase.GetSourcePhrase().GetSize(); if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return; // flag aligned words bool aligned[16]; CHECK(sourceLength < 16); for(size_t i=0; i<sourceLength; i++) aligned[i] = false; for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++) aligned[ alignmentPoint->first ] = true; // process unaligned source words for(size_t i=0; i<sourceLength; i++) { if (!aligned[i]) { Word w = targetPhrase.GetSourcePhrase().GetWord(i); if (!w.IsNonTerminal()) { const StringPiece &word = w.GetFactor(m_factorType)->GetString(); if (word != "<s>" && word != "</s>") { if (!m_unrestricted && FindStringPiece(m_vocab, word) == m_vocab.end()) { accumulator->PlusEquals(this,"OTHER",1); } else { accumulator->PlusEquals(this,word,1); } } } } } }
//TODO this should be a factory function! TranslationOption::TranslationOption(const WordsRange &wordsRange , const TargetPhrase &targetPhrase , const InputType &inputType , int /*whatever*/) : m_targetPhrase(targetPhrase) , m_sourceWordsRange (wordsRange) , m_futureScore(0) { const UnknownWordPenaltyProducer *up = StaticData::Instance().GetUnknownWordPenaltyProducer(); if (up) { const ScoreProducer *scoreProducer = (const ScoreProducer *)up; // not sure why none of the c++ cast works vector<float> score(1); score[0] = FloorScore(-numeric_limits<float>::infinity()); m_scoreBreakdown.Assign(scoreProducer, score); } if (inputType.GetType() == SentenceInput) { Phrase phrase = inputType.GetSubString(wordsRange); m_sourcePhrase = new Phrase(phrase); } else { // TODO lex reordering with confusion network m_sourcePhrase = new Phrase(*targetPhrase.GetSourcePhrase()); //the target phrase from a confusion network/lattice has input scores that we want to keep m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown()); } }
//TODO this should be a factory function! TranslationOption::TranslationOption(const WordsRange &wordsRange , const TargetPhrase &targetPhrase , const InputType &inputType) : m_targetPhrase(targetPhrase) , m_sourceWordsRange(wordsRange) { // set score m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown()); if (inputType.GetType() == SentenceInput) { Phrase phrase = inputType.GetSubString(wordsRange); m_sourcePhrase = new Phrase(phrase); } else { // TODO lex reordering with confusion network m_sourcePhrase = new Phrase(*targetPhrase.GetSourcePhrase()); } }
void TargetWordInsertionFeature::ComputeFeatures(const TargetPhrase& targetPhrase, ScoreComponentCollection* accumulator, const AlignmentInfo::CollType &alignment) const { // handle special case: unknown words (they have no word alignment) size_t targetLength = targetPhrase.GetSize(); size_t sourceLength = targetPhrase.GetSourcePhrase().GetSize(); if (targetLength == 1 && sourceLength == 1) { const Factor* f1 = targetPhrase.GetWord(0).GetFactor(1); if (f1 && f1->GetString().compare(UNKNOWN_FACTOR) == 0) { return; } } // flag aligned words bool aligned[16]; CHECK(targetLength < 16); for(size_t i=0; i<targetLength; i++) { aligned[i] = false; } for (AlignmentInfo::const_iterator alignmentPoint = alignment.begin(); alignmentPoint != alignment.end(); alignmentPoint++) { aligned[ alignmentPoint->second ] = true; } // process unaligned target words for(size_t i=0; i<targetLength; i++) { if (!aligned[i]) { Word w = targetPhrase.GetWord(i); if (!w.IsNonTerminal()) { const string &word = w.GetFactor(m_factorType)->GetString(); if (word != "<s>" && word != "</s>") { if (!m_unrestricted && m_vocab.find( word ) == m_vocab.end()) { accumulator->PlusEquals(this,"OTHER",1); } else { accumulator->PlusEquals(this,word,1); } } } } } }