TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const
{
  TargetPhraseCollection *ret = new TargetPhraseCollection();
  for ( std::map< std::string, multiModelStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {

    multiModelStatistics * statistics = iter->second;

    Scores scoreVector(m_numScoreComponents);

    for(size_t i = 0; i < m_numScoreComponents; ++i) {
      scoreVector[i] = TransformScore(std::inner_product(statistics->p[i].begin(), statistics->p[i].end(), multimodelweights[i].begin(), 0.0));
    }

    statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);

    //correct future cost estimates and total score
    vector<FeatureFunction*> pd_feature;
    pd_feature.push_back(const_cast<PhraseDictionaryMultiModel*>(this));
    const vector<FeatureFunction*> pd_feature_const(pd_feature);
    statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);

    ret->Add(new TargetPhrase(*statistics->targetPhrase));
  }
  return ret;
}
Пример #2
0
void TargetPhrase::SetScore(float score)
{
  //we use an existing score producer to figure out information for score setting (number of scores and weights)
  //TODO: is this a good idea?
  // Assume the default system.
  const TranslationSystem& system =  StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT);
  const ScoreProducer* prod = system.GetPhraseDictionaries()[0];

  //get the weight list
  unsigned int id = prod->GetScoreBookkeepingID();

  const vector<float> &allWeights = StaticData::Instance().GetAllWeights();

  size_t beginIndex = StaticData::Instance().GetScoreIndexManager().GetBeginIndex(id);
  size_t endIndex = StaticData::Instance().GetScoreIndexManager().GetEndIndex(id);

  vector<float> weights;

  std::copy(allWeights.begin() +beginIndex, allWeights.begin() + endIndex,std::back_inserter(weights));

  //find out how many items are in the score vector for this producer
  size_t numScores = prod->GetNumScoreComponents();

  //divide up the score among all of the score vectors
  vector <float> scoreVector(numScores,score/numScores);

  //Now we have what we need to call the full SetScore method
  SetScore(prod,scoreVector,weights,system.GetWeightWordPenalty(),system.GetLanguageModels());
}
Пример #3
0
void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats) const
//fill fs and allStats with statistics from models
{
  for(size_t i = 0; i < m_numModels; ++i) {
    const PhraseDictionary &pd = *m_pd[i];

    TargetPhraseCollection *ret_raw = (TargetPhraseCollection*)  pd.GetTargetPhraseCollection( src);
    if (ret_raw != NULL) {

      TargetPhraseCollection::iterator iterTargetPhrase;
      for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end();  ++iterTargetPhrase) {

        TargetPhrase * targetPhrase = *iterTargetPhrase;
        vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);

        string targetString = targetPhrase->GetStringRep(m_output);
        if (allStats->find(targetString) == allStats->end()) {

          multiModelCountsStatistics * statistics = new multiModelCountsStatistics;
          statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info

          // zero out scores from original phrase table
          statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);

          statistics->fst.resize(m_numModels);
          statistics->ft.resize(m_numModels);
          Scores scoreVector(5);
          scoreVector[0] = -raw_scores[0];
          scoreVector[1] = -raw_scores[1];
          scoreVector[2] = -raw_scores[2];
          statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); // set scores to 0
          statistics->targetPhrase->Evaluate(src, GetFeaturesToApply());

          (*allStats)[targetString] = statistics;

        }
        multiModelCountsStatistics * statistics = (*allStats)[targetString];

        statistics->fst[i] = UntransformScore(raw_scores[0]);
        statistics->ft[i] = UntransformScore(raw_scores[1]);
        fs[i] = UntransformScore(raw_scores[2]);
        (*allStats)[targetString] = statistics;
      }
    }
  }

  // get target phrase frequency for models which have not seen the phrase pair
  for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
    multiModelCountsStatistics * statistics = iter->second;

    for (size_t i = 0; i < m_numModels; ++i) {
      if (!statistics->ft[i]) {
        statistics->ft[i] = GetTargetCount(static_cast<const Phrase&>(*statistics->targetPhrase), i);
      }
    }
  }
}
Пример #4
0
void TargetPhrase::SetXMLScore(float score)
{
  const StaticData &staticData = StaticData::Instance();
  const FeatureFunction* prod = PhraseDictionary::GetColl()[0];
  size_t numScores = prod->GetNumScoreComponents();
  vector <float> scoreVector(numScores,score/numScores);

  m_scoreBreakdown.Assign(prod, scoreVector);
}
void TargetPhrase::SetScore(float score)
{
	//we use an existing score producer to figure out information for score setting (number of scores and weights)
	//TODO: is this a good idea?
    // Assume the default system.
    const TranslationSystem& system =  StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT);
	const ScoreProducer* prod = system.GetPhraseDictionaries()[0];
	
	vector<float> weights = StaticData::Instance().GetWeights(prod);

	
	//find out how many items are in the score vector for this producer	
	size_t numScores = prod->GetNumScoreComponents();

	//divide up the score among all of the score vectors
	vector <float> scoreVector(numScores,score/numScores);
	
	//Now we have what we need to call the full SetScore method
	SetScore(prod, scoreVector, ScoreComponentCollection(), weights, system.GetWeightWordPenalty(), system.GetLanguageModels());
}
Пример #6
0
TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats, vector<vector<float> > &multimodelweights) const
{
  TargetPhraseCollection *ret = new TargetPhraseCollection();
  for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {

    multiModelCountsStatistics * statistics = iter->second;

    if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
      UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
    }

    try {
      pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(src, static_cast<const Phrase&>(*statistics->targetPhrase), statistics->targetPhrase->GetAlignTerm());
      vector< set<size_t> > alignedToT = alignment.first;
      vector< set<size_t> > alignedToS = alignment.second;
      double lexst = ComputeWeightedLexicalTranslation(static_cast<const Phrase&>(*statistics->targetPhrase), src, alignedToS, m_lexTable_e2f, multimodelweights[1], false );
      double lexts = ComputeWeightedLexicalTranslation(src, static_cast<const Phrase&>(*statistics->targetPhrase), alignedToT, m_lexTable_f2e, multimodelweights[3], true );

      Scores scoreVector(5);
      scoreVector[0] = FloorScore(TransformScore(m_combineFunction(statistics->fst, statistics->ft, multimodelweights[0])));
      scoreVector[1] = FloorScore(TransformScore(lexst));
      scoreVector[2] = FloorScore(TransformScore(m_combineFunction(statistics->fst, fs, multimodelweights[2])));
      scoreVector[3] = FloorScore(TransformScore(lexts));
      scoreVector[4] = FloorScore(TransformScore(2.718));

      statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
      statistics->targetPhrase->Evaluate(src, GetFeaturesToApply());
    } catch (AlignmentException& e) {
      continue;
    }

    ret->Add(new TargetPhrase(*statistics->targetPhrase));
  }

  RemoveAllInMap(*allStats);
  delete allStats;
  return ret;
}
Пример #7
0
bool RuleTableLoaderCompact::LoadRuleSection(
  LineReader &reader,
  const std::vector<Word> &vocab,
  const std::vector<Phrase> &sourcePhrases,
  const std::vector<Phrase> &targetPhrases,
  const std::vector<size_t> &targetLhsIds,
  const std::vector<const AlignmentInfo *> &alignmentSets,
  RuleTableTrie &ruleTable)
{
  // Read rule count.
  reader.ReadLine();
  const size_t ruleCount = std::atoi(reader.m_line.c_str());

  // Read rules and add to table.
  const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
  std::vector<float> scoreVector(numScoreComponents);
  std::vector<size_t> tokenPositions;
  for (size_t i = 0; i < ruleCount; ++i) {
    reader.ReadLine();

    tokenPositions.clear();
    FindTokens(tokenPositions, reader.m_line);

    const char *charLine = reader.m_line.c_str();

    // The first three tokens are IDs for the source phrase, target phrase,
    // and alignment set.
    const int sourcePhraseId = std::atoi(charLine+tokenPositions[0]);
    const int targetPhraseId = std::atoi(charLine+tokenPositions[1]);
    const int alignmentSetId = std::atoi(charLine+tokenPositions[2]);

    const Phrase &sourcePhrase = sourcePhrases[sourcePhraseId];
    const Phrase &targetPhrasePhrase = targetPhrases[targetPhraseId];
    const Word *targetLhs = new Word(vocab[targetLhsIds[targetPhraseId]]);
    Word sourceLHS("X"); // TODO not implemented for compact
    const AlignmentInfo *alignNonTerm = alignmentSets[alignmentSetId];

    // Then there should be one score for each score component.
    for (size_t j = 0; j < numScoreComponents; ++j) {
      float score = std::atof(charLine+tokenPositions[3+j]);
      scoreVector[j] = FloorScore(TransformScore(score));
    }
    if (reader.m_line[tokenPositions[3+numScoreComponents]] != ':') {
      std::stringstream msg;
      msg << "Size of scoreVector != number ("
          << scoreVector.size() << "!=" << numScoreComponents
          << ") of score components on line " << reader.m_lineNum;
      UserMessage::Add(msg.str());
      return false;
    }

    // The remaining columns are currently ignored.

    // Create and score target phrase.
    TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase);
    targetPhrase->SetAlignNonTerm(alignNonTerm);
    targetPhrase->SetTargetLHS(targetLhs);
    targetPhrase->SetSourcePhrase(sourcePhrase);

    targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());

    // Insert rule into table.
    TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
                                     ruleTable, sourcePhrase, *targetPhrase, &sourceLHS);
    coll.Add(targetPhrase);
  }

  return true;
}