Example #1
0
vector<const Factor*> doMBR(const TrellisPathList& nBestList){
//   cerr << "Sentence " << sent << " has " << sents.size() << " candidate translations" << endl;
  float marginal = 0;

  vector<float> joint_prob_vec;
  vector< vector<const Factor*> > translations;
  float joint_prob;
  vector< map < vector <const Factor *>, int > > ngram_stats;

  TrellisPathList::const_iterator iter;
  //TrellisPath* hyp = NULL;
	for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
	{
		const TrellisPath &path = **iter;
    joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights()));
    marginal += joint_prob;
    joint_prob_vec.push_back(joint_prob);
    //Cache ngram counts
    map < vector < const Factor *>, int > counts;
    vector<const Factor*> translation;
    GetOutputFactors(path, translation);
    
    //TO DO
    extract_ngrams(translation,counts);
    ngram_stats.push_back(counts);
    translations.push_back(translation);
   }
   
   vector<float> mbr_loss;
   float bleu, weightedLoss;
   float weightedLossCumul = 0;
   float minMBRLoss = 1000000;
   int minMBRLossIdx = -1;
   
   /* Main MBR computation done here */
   for (int i = 0; i < nBestList.GetSize(); i++){
       weightedLossCumul = 0;
       for (int j = 0; j < nBestList.GetSize(); j++){
            if ( i != j) {
               bleu = calculate_score(translations, j, i,ngram_stats );
               weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal);
               weightedLossCumul += weightedLoss;
               if (weightedLossCumul > minMBRLoss)
                   break;
             }
       }
       if (weightedLossCumul < minMBRLoss){
           minMBRLoss = weightedLossCumul;
           minMBRLossIdx = i;
       }
   }
   /* Find sentence that minimises Bayes Risk under 1- BLEU loss */
   return translations[minMBRLossIdx];
}
Example #2
0
vector< vector<const Word*> > MosesDecoder::runDecoder(const std::string& source,
    size_t sentenceid,
    size_t nBestSize,
    float bleuObjectiveWeight,
    float bleuScoreWeight,
    vector< ScoreComponentCollection>& featureValues,
    vector< float>& bleuScores,
    vector< float>& modelScores,
    size_t numReturnedTranslations,
    bool realBleu,
    bool distinct,
    size_t rank,
    size_t epoch,
    SearchAlgorithm& search,
    string filename)
{
  // run the decoder
  m_manager = new Moses::Manager(*m_sentence);
  m_manager->Decode();
  TrellisPathList nBestList;
  m_manager->CalcNBest(nBestSize, nBestList, distinct);

  // optionally print nbest to file (to extract scores and features.. currently just for sentence bleu scoring)
  /*if (filename != "") {
    ofstream out(filename.c_str());
    if (!out) {
      ostringstream msg;
      msg << "Unable to open " << filename;
      throw runtime_error(msg.str());
    }
    // TODO: handle sentence id (for now always 0)
    //OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(), 0, false);
    out.close();
  }*/

  // read off the feature values and bleu scores for each sentence in the nbest list
  Moses::TrellisPathList::const_iterator iter;
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
    const Moses::TrellisPath &path = **iter;
    featureValues.push_back(path.GetScoreBreakdown());
    float bleuScore, dynBleuScore, realBleuScore;
    if (realBleu) realBleuScore = m_bleuScoreFeature->CalculateBleu(path.GetTargetPhrase());
    else dynBleuScore = getBleuScore(featureValues.back());
    bleuScore = realBleu ? realBleuScore : dynBleuScore;
    bleuScores.push_back(bleuScore);

    //std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
    float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
    modelScores.push_back(scoreWithoutBleu);

    if (iter != nBestList.begin())
      cerr << endl;
    cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << path.GetTargetPhrase() << "\", score: "
         << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << path.GetTotalScore();
    if (m_bleuScoreFeature->Enabled() && realBleu)
      cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") ";

    // set bleu score to zero in the feature vector since we do not want to optimise its weight
    setBleuScore(featureValues.back(), 0);
  }

  // prepare translations to return
  vector< vector<const Word*> > translations;
  for (size_t i=0; i < numReturnedTranslations && i < nBestList.GetSize(); ++i) {
    const TrellisPath &path = nBestList.at(i);
    Phrase phrase = path.GetTargetPhrase();

    vector<const Word*> translation;
    for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
      const Word &word = phrase.GetWord(pos);
      Word *newWord = new Word(word);
      translation.push_back(newWord);
    }
    translations.push_back(translation);
  }

  return translations;
}
Example #3
0
const TrellisPath doMBR(const TrellisPathList& nBestList){
  float marginal = 0;

  vector<float> joint_prob_vec;
  vector< vector<const Factor*> > translations;
  float joint_prob;
  vector< map < vector <const Factor *>, int > > ngram_stats;

  TrellisPathList::const_iterator iter;
  
  // get max score to prevent underflow
  float maxScore = -1e20;
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
  {
    const TrellisPath &path = **iter;
    float score = StaticData::Instance().GetMBRScale()
      * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights());
    if (maxScore < score) maxScore = score;
  }
  
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
  {
    const TrellisPath &path = **iter;
    joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights()) - maxScore);
    marginal += joint_prob;
    joint_prob_vec.push_back(joint_prob);

    // get words in translation
    vector<const Factor*> translation;
    GetOutputFactors(path, translation);
    
    // collect n-gram counts
    map < vector < const Factor *>, int > counts;
    extract_ngrams(translation,counts);

    ngram_stats.push_back(counts);
    translations.push_back(translation);
   }

   vector<float> mbr_loss;
   float bleu, weightedLoss;
   float weightedLossCumul = 0;
   float minMBRLoss = 1000000;
   int minMBRLossIdx = -1;
   
   /* Main MBR computation done here */
   iter = nBestList.begin();
   for (unsigned int i = 0; i < nBestList.GetSize(); i++){
       weightedLossCumul = 0;
       for (unsigned int j = 0; j < nBestList.GetSize(); j++){
            if ( i != j) {
               bleu = calculate_score(translations, j, i,ngram_stats );
               weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal);
               weightedLossCumul += weightedLoss;
               if (weightedLossCumul > minMBRLoss)
                   break;
             }
       }
       if (weightedLossCumul < minMBRLoss){
           minMBRLoss = weightedLossCumul;
           minMBRLossIdx = i;
       }
       iter++;
   }
   /* Find sentence that minimises Bayes Risk under 1- BLEU loss */
	 return nBestList.at(minMBRLossIdx);
   //return translations[minMBRLossIdx];
}