コード例 #1
0
bool RuleTableLoaderCompact::LoadRuleSection(
  LineReader &reader,
  const std::vector<Word> &vocab,
  const std::vector<Phrase> &sourcePhrases,
  const std::vector<Phrase> &targetPhrases,
  const std::vector<size_t> &targetLhsIds,
  const std::vector<const AlignmentInfo *> &alignmentSets,
  RuleTableTrie &ruleTable)
{
  // Read rule count.
  reader.ReadLine();
  const size_t ruleCount = std::atoi(reader.m_line.c_str());

  // Read rules and add to table.
  const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
  std::vector<float> scoreVector(numScoreComponents);
  std::vector<size_t> tokenPositions;
  for (size_t i = 0; i < ruleCount; ++i) {
    reader.ReadLine();

    tokenPositions.clear();
    FindTokens(tokenPositions, reader.m_line);

    const char *charLine = reader.m_line.c_str();

    // The first three tokens are IDs for the source phrase, target phrase,
    // and alignment set.
    const int sourcePhraseId = std::atoi(charLine+tokenPositions[0]);
    const int targetPhraseId = std::atoi(charLine+tokenPositions[1]);
    const int alignmentSetId = std::atoi(charLine+tokenPositions[2]);

    const Phrase &sourcePhrase = sourcePhrases[sourcePhraseId];
    const Phrase &targetPhrasePhrase = targetPhrases[targetPhraseId];
    const Word *targetLhs = new Word(vocab[targetLhsIds[targetPhraseId]]);
    Word sourceLHS("X"); // TODO not implemented for compact
    const AlignmentInfo *alignNonTerm = alignmentSets[alignmentSetId];

    // Then there should be one score for each score component.
    for (size_t j = 0; j < numScoreComponents; ++j) {
      float score = std::atof(charLine+tokenPositions[3+j]);
      scoreVector[j] = FloorScore(TransformScore(score));
    }
    if (reader.m_line[tokenPositions[3+numScoreComponents]] != ':') {
      std::stringstream msg;
      msg << "Size of scoreVector != number ("
          << scoreVector.size() << "!=" << numScoreComponents
          << ") of score components on line " << reader.m_lineNum;
      UserMessage::Add(msg.str());
      return false;
    }

    // The remaining columns are currently ignored.

    // Create and score target phrase.
    TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase);
    targetPhrase->SetAlignNonTerm(alignNonTerm);
    targetPhrase->SetTargetLHS(targetLhs);
    targetPhrase->SetSourcePhrase(sourcePhrase);

    targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());

    // Insert rule into table.
    TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
                                     ruleTable, sourcePhrase, *targetPhrase, &sourceLHS);
    coll.Add(targetPhrase);
  }

  return true;
}
コード例 #2
0
  const TargetPhraseCollection*
     PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const {

    delete m_targetPhrases;
    m_targetPhrases = new TargetPhraseCollection();
    PhraseSet allPhrases;
    vector<PhraseSet> phrasesByTable(m_dictionaries.size());
    for (size_t i = 0; i < m_dictionaries.size(); ++i) {
      const TargetPhraseCollection* phrases = m_dictionaries[i]->GetTargetPhraseCollection(src);
      if (phrases) {
        for (TargetPhraseCollection::const_iterator j = phrases->begin(); 
          j != phrases->end(); ++j) {
          allPhrases.insert(*j);
          phrasesByTable[i].insert(*j);
        }
      }
    }
    ScoreComponentCollection sparseVector;
    for (PhraseSet::const_iterator i = allPhrases.begin(); i != allPhrases.end(); ++i) {
      TargetPhrase* combinedPhrase = new TargetPhrase((Phrase)**i);
      //combinedPhrase->ResetScore();
      //cerr << *combinedPhrase << " " << combinedPhrase->GetScoreBreakdown() << endl;
      combinedPhrase->SetSourcePhrase((*i)->GetSourcePhrase());
      combinedPhrase->SetAlignTerm(&((*i)->GetAlignTerm()));
      combinedPhrase->SetAlignNonTerm(&((*i)->GetAlignTerm()));
      Scores combinedScores(GetFeature()->GetNumScoreComponents());
      for (size_t j = 0; j < phrasesByTable.size(); ++j) {
        PhraseSet::const_iterator tablePhrase = phrasesByTable[j].find(combinedPhrase);
        if (tablePhrase != phrasesByTable[j].end()) {
          Scores tableScores = (*tablePhrase)->GetScoreBreakdown()
            .GetScoresForProducer(GetFeature());
          //cerr << "Scores from " << j << " table: ";
          for (size_t k = 0; k < tableScores.size()-1; ++k) {
            //cerr << tableScores[k] << "(" << exp(tableScores[k]) << ") ";
            combinedScores[k] += m_weights[k][j] * exp(tableScores[k]);
            //cerr << m_weights[k][j] * exp(tableScores[k]) << " ";
          }
          //cerr << endl;
        }
      }
      //map back to log space
      //cerr << "Combined ";
      for (size_t k = 0; k < combinedScores.size()-1; ++k) {
        //cerr << combinedScores[k] << " ";
        combinedScores[k] = log(combinedScores[k]);
        //cerr << combinedScores[k] << " ";
      }
      //cerr << endl;
      combinedScores.back() = 1; //assume last is penalty
      combinedPhrase->SetScore(
        GetFeature(),
        combinedScores,
        sparseVector,
        m_weightT,
        m_weightWP,
        *m_languageModels);
      //cerr << *combinedPhrase << " " << combinedPhrase->GetScoreBreakdown() <<  endl;
      m_targetPhrases->Add(combinedPhrase);
    }

    m_targetPhrases->Prune(true,m_tableLimit);


    return m_targetPhrases;
  }