Ejemplo n.º 1
0
void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats) const
//fill fs and allStats with statistics from models
{
  for(size_t i = 0; i < m_numModels; ++i) {
    const PhraseDictionary &pd = *m_pd[i];

    TargetPhraseCollection *ret_raw = (TargetPhraseCollection*)  pd.GetTargetPhraseCollection( src);
    if (ret_raw != NULL) {

      TargetPhraseCollection::iterator iterTargetPhrase;
      for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end();  ++iterTargetPhrase) {

        TargetPhrase * targetPhrase = *iterTargetPhrase;
        vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);

        string targetString = targetPhrase->GetStringRep(m_output);
        if (allStats->find(targetString) == allStats->end()) {

          multiModelCountsStatistics * statistics = new multiModelCountsStatistics;
          statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info

          // zero out scores from original phrase table
          statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);

          statistics->fst.resize(m_numModels);
          statistics->ft.resize(m_numModels);
          Scores scoreVector(5);
          scoreVector[0] = -raw_scores[0];
          scoreVector[1] = -raw_scores[1];
          scoreVector[2] = -raw_scores[2];
          statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); // set scores to 0
          statistics->targetPhrase->Evaluate(src, GetFeaturesToApply());

          (*allStats)[targetString] = statistics;

        }
        multiModelCountsStatistics * statistics = (*allStats)[targetString];

        statistics->fst[i] = UntransformScore(raw_scores[0]);
        statistics->ft[i] = UntransformScore(raw_scores[1]);
        fs[i] = UntransformScore(raw_scores[2]);
        (*allStats)[targetString] = statistics;
      }
    }
  }

  // get target phrase frequency for models which have not seen the phrase pair
  for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
    multiModelCountsStatistics * statistics = iter->second;

    for (size_t i = 0; i < m_numModels; ++i) {
      if (!statistics->ft[i]) {
        statistics->ft[i] = GetTargetCount(static_cast<const Phrase&>(*statistics->targetPhrase), i);
      }
    }
  }
}
void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const
{
  for(size_t i = 0; i < m_numModels; ++i) {
    const PhraseDictionary &pd = *m_pd[i];

    TargetPhraseCollection *ret_raw = (TargetPhraseCollection*)  pd.GetTargetPhraseCollectionLEGACY( src);
    if (ret_raw != NULL) {

      TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
      if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
        iterLast = ret_raw->begin() + m_tableLimit;
      } else {
        iterLast = ret_raw->end();
      }

      for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast;  ++iterTargetPhrase) {
        const TargetPhrase * targetPhrase = *iterTargetPhrase;
        std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);

        std::string targetString = targetPhrase->GetStringRep(m_output);
        if (allStats->find(targetString) == allStats->end()) {

          multiModelStatistics * statistics = new multiModelStatistics;
          statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
          statistics->p.resize(m_numScoreComponents);
          for(size_t j = 0; j < m_numScoreComponents; ++j) {
            statistics->p[j].resize(m_numModels);
          }

          //correct future cost estimates and total score
          statistics->targetPhrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
          vector<FeatureFunction*> pd_feature;
          pd_feature.push_back(m_pd[i]);
          const vector<FeatureFunction*> pd_feature_const(pd_feature);
          statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
          // zero out scores from original phrase table
          statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);

          (*allStats)[targetString] = statistics;

        }
        multiModelStatistics * statistics = (*allStats)[targetString];

        for(size_t j = 0; j < m_numScoreComponents; ++j) {
          statistics->p[j][i] = UntransformScore(raw_scores[j]);
        }

        (*allStats)[targetString] = statistics;
      }
    }
  }
}
Ejemplo n.º 3
0
float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, size_t modelIndex) const
{

  const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
  const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollection(target);

  // in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
  if (ret_raw && ret_raw->GetSize() > 0) {
    const TargetPhrase * targetPhrase = *(ret_raw->begin());
    return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd)[0]);
  }

  // target phrase unknown
  else return 0;
}