bool RuleTableLoaderCompact::LoadRuleSection( LineReader &reader, const std::vector<Word> &vocab, const std::vector<Phrase> &sourcePhrases, const std::vector<Phrase> &targetPhrases, const std::vector<size_t> &targetLhsIds, const std::vector<const AlignmentInfo *> &alignmentSets, RuleTableTrie &ruleTable) { // Read rule count. reader.ReadLine(); const size_t ruleCount = std::atoi(reader.m_line.c_str()); // Read rules and add to table. const size_t numScoreComponents = ruleTable.GetNumScoreComponents(); std::vector<float> scoreVector(numScoreComponents); std::vector<size_t> tokenPositions; for (size_t i = 0; i < ruleCount; ++i) { reader.ReadLine(); tokenPositions.clear(); FindTokens(tokenPositions, reader.m_line); const char *charLine = reader.m_line.c_str(); // The first three tokens are IDs for the source phrase, target phrase, // and alignment set. const int sourcePhraseId = std::atoi(charLine+tokenPositions[0]); const int targetPhraseId = std::atoi(charLine+tokenPositions[1]); const int alignmentSetId = std::atoi(charLine+tokenPositions[2]); const Phrase &sourcePhrase = sourcePhrases[sourcePhraseId]; const Phrase &targetPhrasePhrase = targetPhrases[targetPhraseId]; const Word *targetLhs = new Word(vocab[targetLhsIds[targetPhraseId]]); Word sourceLHS("X"); // TODO not implemented for compact const AlignmentInfo *alignNonTerm = alignmentSets[alignmentSetId]; // Then there should be one score for each score component. for (size_t j = 0; j < numScoreComponents; ++j) { float score = std::atof(charLine+tokenPositions[3+j]); scoreVector[j] = FloorScore(TransformScore(score)); } if (reader.m_line[tokenPositions[3+numScoreComponents]] != ':') { std::stringstream msg; msg << "Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << reader.m_lineNum; UserMessage::Add(msg.str()); return false; } // The remaining columns are currently ignored. // Create and score target phrase. TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase); targetPhrase->SetAlignNonTerm(alignNonTerm); targetPhrase->SetTargetLHS(targetLhs); targetPhrase->SetSourcePhrase(sourcePhrase); targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply()); // Insert rule into table. TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection( ruleTable, sourcePhrase, *targetPhrase, &sourceLHS); coll.Add(targetPhrase); } return true; }
const TargetPhraseCollection* PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const { delete m_targetPhrases; m_targetPhrases = new TargetPhraseCollection(); PhraseSet allPhrases; vector<PhraseSet> phrasesByTable(m_dictionaries.size()); for (size_t i = 0; i < m_dictionaries.size(); ++i) { const TargetPhraseCollection* phrases = m_dictionaries[i]->GetTargetPhraseCollection(src); if (phrases) { for (TargetPhraseCollection::const_iterator j = phrases->begin(); j != phrases->end(); ++j) { allPhrases.insert(*j); phrasesByTable[i].insert(*j); } } } ScoreComponentCollection sparseVector; for (PhraseSet::const_iterator i = allPhrases.begin(); i != allPhrases.end(); ++i) { TargetPhrase* combinedPhrase = new TargetPhrase((Phrase)**i); //combinedPhrase->ResetScore(); //cerr << *combinedPhrase << " " << combinedPhrase->GetScoreBreakdown() << endl; combinedPhrase->SetSourcePhrase((*i)->GetSourcePhrase()); combinedPhrase->SetAlignTerm(&((*i)->GetAlignTerm())); combinedPhrase->SetAlignNonTerm(&((*i)->GetAlignTerm())); Scores combinedScores(GetFeature()->GetNumScoreComponents()); for (size_t j = 0; j < phrasesByTable.size(); ++j) { PhraseSet::const_iterator tablePhrase = phrasesByTable[j].find(combinedPhrase); if (tablePhrase != phrasesByTable[j].end()) { Scores tableScores = (*tablePhrase)->GetScoreBreakdown() .GetScoresForProducer(GetFeature()); //cerr << "Scores from " << j << " table: "; for (size_t k = 0; k < tableScores.size()-1; ++k) { //cerr << tableScores[k] << "(" << exp(tableScores[k]) << ") "; combinedScores[k] += m_weights[k][j] * exp(tableScores[k]); //cerr << m_weights[k][j] * exp(tableScores[k]) << " "; } //cerr << endl; } } //map back to log space //cerr << "Combined "; for (size_t k = 0; k < combinedScores.size()-1; ++k) { //cerr << combinedScores[k] << " "; combinedScores[k] = log(combinedScores[k]); //cerr << combinedScores[k] << " "; } //cerr << endl; combinedScores.back() = 1; //assume last is penalty combinedPhrase->SetScore( GetFeature(), combinedScores, sparseVector, m_weightT, m_weightWP, *m_languageModels); //cerr << *combinedPhrase << " " << combinedPhrase->GetScoreBreakdown() << endl; m_targetPhrases->Add(combinedPhrase); } m_targetPhrases->Prune(true,m_tableLimit); return m_targetPhrases; }