void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats) const //fill fs and allStats with statistics from models { for(size_t i = 0; i < m_numModels; ++i) { const PhraseDictionary &pd = *m_pd[i]; TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollection( src); if (ret_raw != NULL) { TargetPhraseCollection::iterator iterTargetPhrase; for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) { TargetPhrase * targetPhrase = *iterTargetPhrase; vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd); string targetString = targetPhrase->GetStringRep(m_output); if (allStats->find(targetString) == allStats->end()) { multiModelCountsStatistics * statistics = new multiModelCountsStatistics; statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info // zero out scores from original phrase table statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd); statistics->fst.resize(m_numModels); statistics->ft.resize(m_numModels); Scores scoreVector(5); scoreVector[0] = -raw_scores[0]; scoreVector[1] = -raw_scores[1]; scoreVector[2] = -raw_scores[2]; statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); // set scores to 0 statistics->targetPhrase->Evaluate(src, GetFeaturesToApply()); (*allStats)[targetString] = statistics; } multiModelCountsStatistics * statistics = (*allStats)[targetString]; statistics->fst[i] = UntransformScore(raw_scores[0]); statistics->ft[i] = UntransformScore(raw_scores[1]); fs[i] = UntransformScore(raw_scores[2]); (*allStats)[targetString] = statistics; } } } // get target phrase frequency for models which have not seen the phrase pair for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { multiModelCountsStatistics * statistics = iter->second; for (size_t i = 0; i < m_numModels; ++i) { if (!statistics->ft[i]) { statistics->ft[i] = GetTargetCount(static_cast<const Phrase&>(*statistics->targetPhrase), i); } } } }
void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const { for(size_t i = 0; i < m_numModels; ++i) { const PhraseDictionary &pd = *m_pd[i]; TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src); if (ret_raw != NULL) { TargetPhraseCollection::iterator iterTargetPhrase, iterLast; if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) { iterLast = ret_raw->begin() + m_tableLimit; } else { iterLast = ret_raw->end(); } for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast; ++iterTargetPhrase) { const TargetPhrase * targetPhrase = *iterTargetPhrase; std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd); std::string targetString = targetPhrase->GetStringRep(m_output); if (allStats->find(targetString) == allStats->end()) { multiModelStatistics * statistics = new multiModelStatistics; statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info statistics->p.resize(m_numScoreComponents); for(size_t j = 0; j < m_numScoreComponents; ++j) { statistics->p[j].resize(m_numModels); } //correct future cost estimates and total score statistics->targetPhrase->GetScoreBreakdown().InvertDenseFeatures(&pd); vector<FeatureFunction*> pd_feature; pd_feature.push_back(m_pd[i]); const vector<FeatureFunction*> pd_feature_const(pd_feature); statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const); // zero out scores from original phrase table statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd); (*allStats)[targetString] = statistics; } multiModelStatistics * statistics = (*allStats)[targetString]; for(size_t j = 0; j < m_numScoreComponents; ++j) { statistics->p[j][i] = UntransformScore(raw_scores[j]); } (*allStats)[targetString] = statistics; } } } }
float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, size_t modelIndex) const { const PhraseDictionary &pd = *m_inverse_pd[modelIndex]; const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollection(target); // in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score) if (ret_raw && ret_raw->GetSize() > 0) { const TargetPhrase * targetPhrase = *(ret_raw->begin()); return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd)[0]); } // target phrase unknown else return 0; }