コード例 #1
0
ファイル: score.cpp プロジェクト: Avmb/mosesdecoder
void printTargetPhrase(const PHRASE &phraseS, const PHRASE &phraseT,
                       const PhraseAlignment &bestAlignment, ostream &out)
{
  // output target symbols, except root, in rule table format
  for (std::size_t i = 0; i < phraseT.size()-1; ++i) {
    const std::string &word = vcbT.getWord(phraseT[i]);
    if (!unpairedExtractFormatFlag || !isNonTerminal(word)) {
      out << word << " ";
      continue;
    }
    // get corresponding source non-terminal and output pair
    std::set<std::size_t> alignmentPoints = bestAlignment.alignedToT[i];
    assert(alignmentPoints.size() == 1);
    int j = *(alignmentPoints.begin());
    if (inverseFlag) {
      out << word << vcbS.getWord(phraseS[j]) << " ";
    } else {
      out << vcbS.getWord(phraseS[j]) << word << " ";
    }
  }
  // output target root symbol
  if (conditionOnTargetLhsFlag) {
    if (inverseFlag) {
      out << "[X]";
    } else {
      out << vcbS.getWord(phraseS.back());
    }
  } else {
    out << vcbT.getWord(phraseT.back());
  }
}
コード例 #2
0
bool PhraseAlignment::equals( const PhraseAlignment& other ) {
    if (this == &other) return true;
    if (other.english != english) return false;
    if (other.foreign != foreign) return false;
    PHRASE phraseE = phraseTableE.getPhrase( english );
    PHRASE phraseF = phraseTableF.getPhrase( foreign );
    for(int i=0; i<phraseE.size(); i++) {
        if (alignedToE[i].size() != other.alignedToE[i].size()) return false;
        for(int j=0; j<alignedToE[i].size(); j++) {
            if (alignedToE[i][j] != other.alignedToE[i][j]) return false;
        }
    }
    for(int i=0; i<phraseF.size(); i++) {
        if (alignedToF[i].size() != other.alignedToF[i].size()) return false;
        for(int j=0; j<alignedToF[i].size(); j++) {
            if (alignedToF[i][j] != other.alignedToF[i][j]) return false;
        }
    }
    return true;
}
コード例 #3
0
// check if two word alignments between a phrase pairs "match"
// i.e. they do not differ in the alignment of non-termimals
bool PhraseAlignment::match( const PhraseAlignment& other )
{
	if (other.target != target || other.source != source) return false;
	if (!hierarchicalFlag) return true;

	PHRASE phraseT = phraseTableT.getPhrase( target );

  assert(phraseT.size() == alignedToT.size() + 1);
  assert(alignedToT.size() == other.alignedToT.size());

	// loop over all words (note: 0 = left hand side of rule)
	for(size_t i=0;i<phraseT.size()-1;++i)
		if (isNonTerminal( vcbT.getWord( phraseT[i] ) )) {
			if (alignedToT[i].size() != 1 ||
			    other.alignedToT[i].size() != 1 ||
		    	    *(alignedToT[i].begin()) != *(other.alignedToT[i].begin()))
				return false;
		}

	return true;
}
コード例 #4
0
ファイル: score.cpp プロジェクト: xwd/mosesGit-hiero
void printSourcePhrase(const PHRASE &phraseS, const PHRASE &phraseT,
                       const PhraseAlignment &bestAlignment, ostream &out)
{
  // output source symbols, except root, in rule table format
  for (int i = 0; i < phraseS.size()-1; ++i) {
    const std::string &word = vcbS.getWord(phraseS[i]);
    if (!stringToTreeFlag || !isNonTerminal(word)) {
      out << word << " ";
      continue;
    }
    // get corresponding target non-terminal and output pair
    std::set<size_t> alignmentPoints = bestAlignment.alignedToS[i];
    assert(alignmentPoints.size() == 1);
    int j = *(alignmentPoints.begin());
    if (inverseFlag) {
      out << vcbT.getWord(phraseT[j]) << word << " ";
    } else {
      out << word << vcbT.getWord(phraseT[j]) << " ";
    }
  }

  // output source root symbol
  out << vcbS.getWord(phraseS.back());
}
コード例 #5
0
void processPhrasePairs( vector< PhraseAlignment > &phrasePair )
{
  if (phrasePair.size() == 0) return;
  map<int, int> countE;
  map<int, int> alignmentE;
  int totalCount = 0;
  int currentCount = 0;
  int maxSameCount = 0;
  int maxSame = -1;
  int old = -1;
  for(size_t i=0; i<phrasePair.size(); i++) {
    if (i>0) {
      if (phrasePair[old].english == phrasePair[i].english) {
        if (! phrasePair[i].equals( phrasePair[old] )) {
          if (currentCount > maxSameCount) {
            maxSameCount = currentCount;
            maxSame = i-1;
          }
          currentCount = 0;
        }
      } else {
        // wrap up old E
        if (currentCount > maxSameCount) {
          maxSameCount = currentCount;
          maxSame = i-1;
        }

        alignmentE[ phrasePair[old].english ] = maxSame;
        //	if (maxSameCount != totalCount)
        //  cout << "max count is " << maxSameCount << "/" << totalCount << endl;

        // get ready for new E
        totalCount = 0;
        currentCount = 0;
        maxSameCount = 0;
        maxSame = -1;
      }
    }
    countE[ phrasePair[i].english ]++;
    old = i;
    currentCount++;
    totalCount++;
  }

  // wrap up old E
  if (currentCount > maxSameCount) {
    maxSameCount = currentCount;
    maxSame = phrasePair.size()-1;
  }
  alignmentE[ phrasePair[old].english ] = maxSame;
  //  if (maxSameCount != totalCount)
  //    cout << "max count is " << maxSameCount << "/" << totalCount << endl;

  // output table
  typedef map< int, int >::iterator II;
  PHRASE phraseF = phraseTableF.getPhrase( phrasePair[0].foreign );
  size_t index = 0;
  for(II i = countE.begin(); i != countE.end(); i++) {
    //cout << "\tp( " << i->first << " | " << phrasePair[0].foreign << " ; " << phraseF.size() << " ) = ...\n";
    //cerr << index << endl;

    // foreign phrase (unless inverse)
    if (! inverseFlag) {
      for(size_t j=0; j<phraseF.size(); j++) {
        phraseTableFile << vcbF.getWord( phraseF[j] );
        phraseTableFile << " ";
      }
      phraseTableFile << "||| ";
    }

    // english phrase
    PHRASE phraseE = phraseTableE.getPhrase( i->first );
    for(size_t j=0; j<phraseE.size(); j++) {
      phraseTableFile << vcbE.getWord( phraseE[j] );
      phraseTableFile << " ";
    }
    phraseTableFile << "||| ";

    // foreign phrase (if inverse)
    if (inverseFlag) {
      for(size_t j=0; j<phraseF.size(); j++) {
        phraseTableFile << vcbF.getWord( phraseF[j] );
        phraseTableFile << " ";
      }
      phraseTableFile << "||| ";
    }

    // phrase pair frequency
    phraseTableFile << i->second;

    //source phrase pair frequency
    phraseTableFile << " " << phrasePair.size();

    // source phrase length
    phraseTableFile	<< " " << phraseF.size();

    // target phrase length
    phraseTableFile	<< " " << phraseE.size();

    phraseTableFile << endl;

    index += i->second;
  }
}
コード例 #6
0
ファイル: score.cpp プロジェクト: poetzhangzi/test
void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount ) 
{
  if (phrasePair.size() == 0) return;

	PhraseAlignment *bestAlignment = findBestAlignment( phrasePair );

	// compute count
	float count = 0;
	for(size_t i=0;i<phrasePair.size();i++)
	{
		count += phrasePair[i]->count;
	}

	PHRASE phraseS = phraseTableS.getPhrase( phrasePair[0]->GetSource() );
	PHRASE phraseT = phraseTableT.getPhrase( phrasePair[0]->GetTarget() );

	// labels (if hierarchical)

	// source phrase (unless inverse)
	if (! inverseFlag) 
	{
		for(int j=0;j<phraseS.size();j++)
		{
			phraseTableFile << vcbS.getWord( phraseS[j] );
			phraseTableFile << " ";
		}
		phraseTableFile << "||| ";
	}
	
	// target phrase
	for(int j=0;j<phraseT.size();j++)
	{
		phraseTableFile << vcbT.getWord( phraseT[j] );
		phraseTableFile << " ";
	}
	phraseTableFile << "||| ";
	
	// source phrase (if inverse)
	if (inverseFlag) 
	{
		for(int j=0;j<phraseS.size();j++)
		{
			phraseTableFile << vcbS.getWord( phraseS[j] );
			phraseTableFile << " ";
		}
		phraseTableFile << "||| ";
	}

	// phrase translation probability
	if (goodTuringFlag && count<GT_MAX)
		count *= discountFactor[(int)(count+0.99999)];
	double condScore = count / totalCount;	
	phraseTableFile << ( logProbFlag ? negLogProb*log(condScore) : condScore );
	
	// lexical translation probability
	if (lexFlag)
	{
		double lexScore = computeLexicalTranslation( phraseS, phraseT, bestAlignment);
		phraseTableFile << " " << ( logProbFlag ? negLogProb*log(lexScore) : lexScore );
	}
	
	phraseTableFile << " ||| ";

	// alignment info for non-terminals
	if (! inverseFlag)
	{
		if (hierarchicalFlag) 
		{ // always output alignment if hiero style, but only for non-terms
			assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
			for(int j = 0; j < phraseT.size() - 1; j++)
			{
				if (isNonTerminal(vcbT.getWord( phraseT[j] )))
				{
					assert(bestAlignment->alignedToT[ j ].size() == 1);
					int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
					phraseTableFile << sourcePos << "-" << j << " ";
				}
			}
		}
		else if (wordAlignmentFlag)
		{ // alignment info in pb model
			for(int j=0;j<bestAlignment->alignedToT.size();j++)
			{
				const set< size_t > &aligned = bestAlignment->alignedToT[j];
				for (set< size_t >::const_iterator p(aligned.begin()); p != aligned.end(); ++p)
				{
					phraseTableFile << *p << "-" << j << " ";
				}
			}
		}
	}

	phraseTableFile << " ||| " << totalCount;
	phraseTableFile << endl;
}
コード例 #7
0
void outputPhrasePair(vector<PhraseAlignment*> &phrasePair, float totalCount, Bz2LineWriter& phraseTableFile) {
  if (phrasePair.size() == 0)
		return;

	PhraseAlignment *bestAlignment = findBestAlignment( phrasePair );

	// compute count
	float count = 0.;
	for(size_t i=0; i<phrasePair.size(); count += phrasePair[i++]->count);

	PHRASE phraseS = phraseTableS.getPhrase( phrasePair[0]->source );
	PHRASE phraseT = phraseTableT.getPhrase( phrasePair[0]->target );

	// labels (if hierarchical)

	// source phrase (unless inverse)
	if (!inverseFlag) {
		for (size_t j=0; j<phraseS.size(); phraseTableFile.writeLine(vcbS.getWord(phraseS[j++]) + " "));
		phraseTableFile.writeLine("||| ");
	}
	
	// target phrase
	for (size_t j=0; j<phraseT.size(); phraseTableFile.writeLine(vcbT.getWord(phraseT[j++]) + " "));
	phraseTableFile.writeLine("||| ");
	
	// source phrase (if inverse)
	if (inverseFlag) {
		for (size_t j=0; j<phraseS.size(); phraseTableFile.writeLine(vcbS.getWord(phraseS[j++]) + " "));
		phraseTableFile.writeLine("||| ");
	}
	
	// alignment info for non-terminals
	if (!inverseFlag && hierarchicalFlag) {
    assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
		for(size_t j = 0; j < phraseT.size() - 1; ++j)
			if (isNonTerminal(vcbT.getWord( phraseT[j] ))) {
        assert(bestAlignment->alignedToT[ j ].size() == 1);
				stringstream data;
				data << *(bestAlignment->alignedToT[j].begin()) << "-" << j << " ";
				phraseTableFile.writeLine(data.str());
			}
		phraseTableFile.writeLine("||| ");
	}

	// phrase translation probability
	if (goodTuringFlag && count<GT_MAX)
		count *= discountFactor[(int)(count+0.99999)];
	
	{
		stringstream data;
		data << (logProbFlag ? negLogProb*log(count / totalCount) : count / totalCount);
		phraseTableFile.writeLine(data.str());
	}
	
	// lexical translation probability
	if (lexFlag) {
		stringstream data;
		data << " " << (logProbFlag ?
										negLogProb*log(computeLexicalTranslation(phraseS, phraseT, bestAlignment)) :
										computeLexicalTranslation(phraseS, phraseT, bestAlignment));
		phraseTableFile.writeLine(data.str());
	}

	{
		stringstream data;
		data << " ||| " << totalCount << endl;
		phraseTableFile.writeLine(data.str());
	}

	// optional output of word alignments
	if (!inverseFlag && wordAlignmentFlag) {
		// source phrase
		for(size_t j=0;j<phraseS.size(); wordAlignmentFile << vcbS.getWord(phraseS[j++]) << " ");
		wordAlignmentFile << "||| ";
	
		// target phrase
		for(size_t j=0;j<phraseT.size(); wordAlignmentFile << vcbT.getWord(phraseT[j++]) << " ");
		wordAlignmentFile << "|||";

		// alignment
		for(size_t j=0;j<bestAlignment->alignedToT.size(); ++j) {
			const set< size_t > &aligned = bestAlignment->alignedToT[j];
      for (set< size_t >::const_iterator p(aligned.begin()); p != aligned.end(); wordAlignmentFile << " " << *(p++) << "-" << j);
		}
		wordAlignmentFile << endl;
	}
}