void SourceWordDeletionFeature::ComputeFeatures(const Phrase &source,
    const TargetPhrase& targetPhrase,
    ScoreComponentCollection* accumulator,
    const AlignmentInfo &alignmentInfo) const
{
  // handle special case: unknown words (they have no word alignment)
  size_t targetLength = targetPhrase.GetSize();
  size_t sourceLength = source.GetSize();
  if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return;

  // flag aligned words
  bool aligned[16];
  CHECK(sourceLength < 16);
  for(size_t i=0; i<sourceLength; i++)
    aligned[i] = false;
  for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++)
    aligned[ alignmentPoint->first ] = true;

  // process unaligned source words
  for(size_t i=0; i<sourceLength; i++) {
    if (!aligned[i]) {
      const Word &w = source.GetWord(i);
      if (!w.IsNonTerminal()) {
        const StringPiece word = w.GetFactor(m_factorType)->GetString();
        if (word != "<s>" && word != "</s>") {
          if (!m_unrestricted && FindStringPiece(m_vocab, word ) == m_vocab.end()) {
            accumulator->PlusEquals(this, StringPiece("OTHER"),1);
          } else {
            accumulator->PlusEquals(this,word,1);
          }
        }
      }
    }
  }
}
void TargetWordInsertionFeature::ComputeFeatures(const TargetPhrase& targetPhrase,
    											 ScoreComponentCollection* accumulator,
    											 const AlignmentInfo &alignmentInfo) const
{
  // handle special case: unknown words (they have no word alignment)
  size_t targetLength = targetPhrase.GetSize();
  size_t sourceLength = targetPhrase.GetSourcePhrase().GetSize();
  if (targetLength == 1 && sourceLength == 1) {
		const Factor* f1 = targetPhrase.GetWord(0).GetFactor(1);
		if (f1 && f1->GetString().compare(UNKNOWN_FACTOR) == 0) {
			return;
		}
  }

  // flag aligned words
  bool aligned[16];
  CHECK(targetLength < 16);
  for(size_t i=0; i<targetLength; i++) {
    aligned[i] = false;
  }
  for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++) {
    aligned[ alignmentPoint->second ] = true;
  }

  // process unaligned target words
  for(size_t i=0; i<targetLength; i++) {
    if (!aligned[i]) {
      Word w = targetPhrase.GetWord(i);
      if (!w.IsNonTerminal()) {
    	const string &word = w.GetFactor(m_factorType)->GetString();
    	if (word != "<s>" && word != "</s>") {
      	  if (!m_unrestricted && m_vocab.find( word ) == m_vocab.end()) {
      		accumulator->PlusEquals(this,"OTHER",1);
      	  }
      	  else {
      		accumulator->PlusEquals(this,word,1);
      	  }
    	}
      }
    }
  }
}
Пример #3
0
pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> PhraseDictionaryMultiModelCounts::GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const
{

  size_t tsize = phraseT.GetSize();
  size_t ssize = phraseS.GetSize();
  AlignVector alignedToT (tsize);
  AlignVector alignedToS (ssize);
  AlignmentInfo::const_iterator iter;

  for (iter = alignment.begin(); iter != alignment.end(); ++iter) {
    const pair<size_t,size_t> &alignPair = *iter;
    size_t s = alignPair.first;
    size_t t = alignPair.second;
    if (s >= ssize || t >= tsize) {
      cerr << "Error: inconsistent alignment for phrase pair: " << phraseS << " - " << phraseT << endl;
      cerr << "phrase pair will be discarded" << endl;
      throw AlignmentException();
    }
    alignedToT[t].insert( s );
    alignedToS[s].insert( t );
  }
  return make_pair(alignedToT,alignedToS);
}
Пример #4
0
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId)
{
	const StaticData &staticData = StaticData::Instance();
	bool labeledOutput = staticData.IsLabeledNBestList();
	bool reportAllFactors = staticData.GetReportAllFactorsNBest();
	bool includeAlignment = staticData.NBestIncludesAlignment();
	bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
	
	TrellisPathList::const_iterator iter;
	for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
	{
		const TrellisPath &path = **iter;
		const std::vector<const Hypothesis *> &edges = path.GetEdges();

		// print the surface factor of the translation
		out << translationId << " ||| ";
		for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
		{
			const Hypothesis &edge = *edges[currEdge];
			OutputSurface(out, edge.GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
		}
		out << " |||";

		std::string lastName = "";
		const vector<const StatefulFeatureFunction*>& sff =
			system->GetStatefulFeatureFunctions();
		for( size_t i=0; i<sff.size(); i++ )
		{
			if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() )
			{
				lastName = sff[i]->GetScoreProducerWeightShortName();
				out << " " << lastName << ":";
			}
			vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] );
			for (size_t j = 0; j<scores.size(); ++j) 
			{
		  		out << " " << scores[j];
			}
		}

		const vector<const StatelessFeatureFunction*>& slf =
			system->GetStatelessFeatureFunctions();
		for( size_t i=0; i<slf.size(); i++ )
		{
			if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() )
			{
				lastName = slf[i]->GetScoreProducerWeightShortName();
				out << " " << lastName << ":";
			}
			vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] );
			for (size_t j = 0; j<scores.size(); ++j) 
			{
		  		out << " " << scores[j];
			}
		}

		// translation components
		if (StaticData::Instance().GetInputType()==SentenceInput){  
			// translation components	for text input
			vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
			if (pds.size() > 0) {
				if (labeledOutput)
					out << " tm:";
				vector<PhraseDictionaryFeature*>::iterator iter;
				for (iter = pds.begin(); iter != pds.end(); ++iter) {
					vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
					for (size_t j = 0; j<scores.size(); ++j) 
						out << " " << scores[j];
				}
			}
		}
		else{		
			// translation components for Confusion Network input
			// first translation component has GetNumInputScores() scores from the input Confusion Network
			// at the beginning of the vector
			vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
			if (pds.size() > 0) {
				vector<PhraseDictionaryFeature*>::iterator iter;
				
				iter = pds.begin();
				vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
					
				size_t pd_numinputscore = (*iter)->GetNumInputScores();

				if (pd_numinputscore){
					
					if (labeledOutput)
						out << " I:";

					for (size_t j = 0; j < pd_numinputscore; ++j)
						out << " " << scores[j];
				}
					
					
				for (iter = pds.begin() ; iter != pds.end(); ++iter) {
					vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
					
					size_t pd_numinputscore = (*iter)->GetNumInputScores();

					if (iter == pds.begin() && labeledOutput)
						out << " tm:";
					for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
						out << " " << scores[j];
				}
			}
		}
		
		// generation
		const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries();
		if (gds.size() > 0) {
			if (labeledOutput)
				out << " g: ";
			vector<GenerationDictionary*>::const_iterator iter;
			for (iter = gds.begin(); iter != gds.end(); ++iter) {
				vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
				for (size_t j = 0; j<scores.size(); j++) {
					out << scores[j] << " ";
				}
			}
		}
		
		// total						
		out << " ||| " << path.GetTotalScore();
		
		//phrase-to-phrase alignment
		if (includeAlignment) {
			out << " |||";
			for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
			{
				const Hypothesis &edge = *edges[currEdge];
				const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
				WordsRange targetRange = path.GetTargetWordsRange(edge);
				out << " " << sourceRange.GetStartPos();
				if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
					out << "-" << sourceRange.GetEndPos();
				}
				out<< "=" << targetRange.GetStartPos();
				if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
					out<< "-" << targetRange.GetEndPos();
				}
			}
		}
	
		if (includeWordAlignment) {
			out << " |||";
			for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
			{
				const Hypothesis &edge = *edges[currEdge];
				const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
				WordsRange targetRange = path.GetTargetWordsRange(edge);
				const int sourceOffset = sourceRange.GetStartPos();
				const int targetOffset = targetRange.GetStartPos();
				const AlignmentInfo AI = edge.GetCurrTargetPhrase().GetAlignmentInfo();
				AlignmentInfo::const_iterator iter;
				for (iter = AI.begin(); iter != AI.end(); ++iter)
					{
						out << " " << iter->first+sourceOffset << "-" << iter->second+targetOffset;
					}
			}
		}
	
		if (StaticData::Instance().IsPathRecoveryEnabled()) {
			out << "|||";
			OutputInput(out, edges[0]);
		}

		out << endl;
	}


	out <<std::flush;
}