void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source
    , const TargetPhrase &targetPhrase
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection &estimatedFutureScore) const
{
  // get length of source and target phrase
  size_t targetLength = targetPhrase.GetSize();
  size_t sourceLength = source.GetSize();

  // create feature names
  stringstream nameSource;
  nameSource << "s" << sourceLength;

  stringstream nameTarget;
  nameTarget << "t" << targetLength;

  stringstream nameBoth;
  nameBoth << sourceLength << "," << targetLength;

  // increase feature counts
  scoreBreakdown.PlusEquals(this,nameSource.str(),1);
  scoreBreakdown.PlusEquals(this,nameTarget.str(),1);
  scoreBreakdown.PlusEquals(this,nameBoth.str(),1);

  //cerr << nameSource.str() << " " << nameTarget.str() << " " << nameBoth.str() << endl;
}
void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
                                   , const TargetPhrase &targetPhrase
                                   , ScoreComponentCollection &scoreBreakdown
                                   , ScoreComponentCollection &estimatedFutureScore) const
{
  // dense scores
  vector<float> newScores(m_numScoreComponents);
  newScores[0] = 1.5;
  newScores[1] = 0.3;
  scoreBreakdown.PlusEquals(this, newScores);

  // sparse scores
  scoreBreakdown.PlusEquals(this, "sparse-name", 2.4);

}
Esempio n. 3
0
void RuleScope::EvaluateInIsolation(const Phrase &source
						, const TargetPhrase &targetPhrase
						, ScoreComponentCollection &scoreBreakdown
						, ScoreComponentCollection &estimatedFutureScore) const
{
  // adjacent non-term count as 1 ammbiguity, rather than 2 as in rule scope
  // source can't be empty, right?
  float score = 0;

  int count = 0;
  for (size_t i = 0; i < source.GetSize() - 0; ++i) {
	const Word &word = source.GetWord(i);
	bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
	if (ambiguous) {
		++count;
	}
	else {
		if (count > 0) {
			score += count;
		}
		count = -1;
	}
  }

  // 1st & last always adjacent to ambiguity
  ++count;
  if (count > 0) {
	score += count;
  }

  scoreBreakdown.PlusEquals(this, score);
}
// assumes that source-side syntax labels are stored in the target non-terminal field of the rules
void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedScores) const
{
  const Range& range = inputPath.GetWordsRange();
  size_t startPos = range.GetStartPos();
  size_t endPos = range.GetEndPos();
  const TreeInput& treeInput = static_cast<const TreeInput&>(input);
  const NonTerminalSet& treeInputLabels = treeInput.GetLabelSet(startPos,endPos);
  const Word& lhsLabel = targetPhrase.GetTargetLHS();

  const StaticData& staticData = StaticData::Instance();
  const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();

  std::vector<float> newScores(m_numScoreComponents,0.0); // m_numScoreComponents == 2 // first fires for matches, second for mismatches

  if ( (treeInputLabels.find(lhsLabel) != treeInputLabels.end()) && (lhsLabel != outputDefaultNonTerminal) ) {
    // match
    newScores[0] = 1.0;
  } else {
    // mismatch
    newScores[1] = 1.0;
  }

  scoreBreakdown.PlusEquals(this, newScores);
}
void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
        , const TargetPhrase &targetPhrase
        , ScoreComponentCollection &scoreBreakdown
        , ScoreComponentCollection &estimatedFutureScore) const
{
    const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0];
    if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) {
        return;
    }
    if ( !m_nonGlueRules && (targetPhraseLHS != m_glueTargetLHS) ) {
        return;
    }

    for (size_t posS=0; posS<source.GetSize(); ++posS) {
        const Word &wordS = source.GetWord(posS);
        if ( !wordS.IsNonTerminal() ) {
            return;
        }
    }

    ostringstream namestr;

    for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
        const Word &wordT = targetPhrase.GetWord(posT);
        const Factor* factorT = wordT[0];
        if ( wordT.IsNonTerminal() ) {
            namestr << "[";
        }
        namestr << factorT->GetString();
        if ( wordT.IsNonTerminal() ) {
            namestr << "]";
        }
        namestr << "|";
    }

    namestr << targetPhraseLHS->GetString() << "|";

    for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
            it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
        namestr << "|" << it->first << "-" << it->second;
    }

    scoreBreakdown.PlusEquals(this, namestr.str(), 1);
    if ( targetPhraseLHS != m_glueTargetLHS ) {
        scoreBreakdown.PlusEquals(this, 1);
    }
}
Esempio n. 6
0
void Model1Feature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedFutureScore) const
{
  const Sentence& sentence = static_cast<const Sentence&>(input);
  float score = 0.0;
  float norm = TransformScore(1+sentence.GetSize());

  for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
    const Word &wordT = targetPhrase.GetWord(posT);
    if ( !wordT.IsNonTerminal() ) {
      float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word

      // cache lookup
      bool foundInCache = false;
      {
#ifdef WITH_THREADS
        boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
#endif
        boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::const_iterator sentenceCache = m_cache.find(&input);
        if (sentenceCache != m_cache.end()) {
          boost::unordered_map<const Factor*, float>::const_iterator cacheHit = sentenceCache->second.find(wordT[0]);
          if (cacheHit != sentenceCache->second.end()) {
            foundInCache = true;
            score += cacheHit->second;
            FEATUREVERBOSE(3, "Cached score( " << wordT << " ) = " << cacheHit->second << std::endl);
          }
        }
      }

      if (!foundInCache) {
        for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) { // ignore <s> and </s>
          const Word &wordS = sentence.GetWord(posS);
          float modelProb = m_model1.GetProbability(wordS[0],wordT[0]);
          FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl);
          thisWordProb += modelProb;
        }
        float thisWordScore = TransformScore(thisWordProb) - norm;
        FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl);
        {
#ifdef WITH_THREADS
          // need to update cache; write lock
          boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
          m_cache[&input][wordT[0]] = thisWordScore;
        }
        score += thisWordScore;
      }
    }
  }

  scoreBreakdown.PlusEquals(this, score);
}
BOOST_FIXTURE_TEST_CASE(plusequals, MockProducers)
{
  float arr1[] = {1,2,3,4,5};
  float arr2[] = {2,4,6,8,10};
  std::vector<float> vec1(arr1,arr1+5);
  std::vector<float> vec2(arr2,arr2+5);

  ScoreComponentCollection scc;
  scc.PlusEquals(&single, 3.4f);
  BOOST_CHECK_EQUAL(scc.GetScoreForProducer(&single), 3.4f);
  scc.PlusEquals(&multi,vec1);
  std::vector<float> actual = scc.GetScoresForProducer(&multi);
  BOOST_CHECK_EQUAL_COLLECTIONS(vec1.begin(),vec1.end()
                                ,actual.begin(), actual.end());
  scc.PlusEquals(&multi,vec1);
  actual = scc.GetScoresForProducer(&multi);
  BOOST_CHECK_EQUAL_COLLECTIONS(vec2.begin(),vec2.end(),
                                actual.begin(), actual.end());

  BOOST_CHECK_EQUAL(scc.GetScoreForProducer(&single), 3.4f);
}
void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedScores) const
{
  if (targetPhrase.GetNumNonTerminals()) {
    vector<float> newScores(m_numScoreComponents);
    newScores[0] = - std::numeric_limits<float>::infinity();
    scoreBreakdown.PlusEquals(this, newScores);
  }
}
BOOST_FIXTURE_TEST_CASE(sparse_feature, MockProducers)
{
  ScoreComponentCollection scc;
  scc.Assign(&sparse, "first", 1.3f);
  scc.Assign(&sparse, "second", 2.1f);
  BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"first"), 1.3f);
  BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"second"), 2.1f);
  BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"third"), 0.0f);
  scc.Assign(&sparse, "first", -1.9f);
  BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"first"), -1.9f);
  scc.PlusEquals(&sparse, StringPiece("first"), -1.9f);
  BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"first"), -3.8f);
}
Esempio n. 10
0
void DeleteRules::EvaluateInIsolation(const Phrase &source
                                      , const TargetPhrase &target
                                      , ScoreComponentCollection &scoreBreakdown
                                      , ScoreComponentCollection &estimatedScores) const
{
  // dense scores
  size_t hash = 0;
  boost::hash_combine(hash, source);
  boost::hash_combine(hash, target);

  boost::unordered_set<size_t>::const_iterator iter;
  iter = m_ruleHashes.find(hash);
  if (iter != m_ruleHashes.end()) {
    scoreBreakdown.PlusEquals(this, -std::numeric_limits<float>::infinity());
  }

}
Esempio n. 11
0
void InputFeature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedScores) const
{
  if (m_legacy) {
    //binary phrase-table does input feature itself
    return;
  } else if (input.GetType() == WordLatticeInput) {
    const ScorePair *scores = inputPath.GetInputScore();
    if (scores) {
      scoreBreakdown.PlusEquals(this, *scores);
    }
  }
}
Esempio n. 12
0
void OpSequenceModel:: Evaluate(const Phrase &source
                                , const TargetPhrase &targetPhrase
                                , ScoreComponentCollection &scoreBreakdown
                                , ScoreComponentCollection &estimatedFutureScore) const
{

  osmHypothesis obj;
  obj.setState(OSM->NullContextState());
  WordsBitmap myBitmap(source.GetSize());
  vector <string> mySourcePhrase;
  vector <string> myTargetPhrase;
  vector<float> scores(5);
  vector <int> alignments;
  int startIndex = 0;
  int endIndex = source.GetSize();

  const AlignmentInfo &align = targetPhrase.GetAlignTerm();
  AlignmentInfo::const_iterator iter;


  for (iter = align.begin(); iter != align.end(); ++iter) {
    alignments.push_back(iter->first);
    alignments.push_back(iter->second);
  }

  for (int i = 0; i < targetPhrase.GetSize(); i++) {
    if (targetPhrase.GetWord(i).IsOOV())
      myTargetPhrase.push_back("_TRANS_SLF_");
    else
      myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
  }

  for (int i = 0; i < source.GetSize(); i++) {
    mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
  }

  obj.setPhrases(mySourcePhrase , myTargetPhrase);
  obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
  obj.computeOSMFeature(startIndex,myBitmap);
  obj.calculateOSMProb(*OSM);
  obj.populateScores(scores);
  estimatedFutureScore.PlusEquals(this, scores);

}
Esempio n. 13
0
void CountNonTerms::Evaluate(const Phrase &sourcePhrase
              , const TargetPhrase &targetPhrase
              , ScoreComponentCollection &scoreBreakdown
              , ScoreComponentCollection &estimatedFutureScore) const
{
  const StaticData &staticData = StaticData::Instance();

  vector<float> scores(m_numScoreComponents, 0);
  size_t indScore = 0;

  if (m_all) {
	  for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
		const Word &word = targetPhrase.GetWord(i);
		if (word.IsNonTerminal()) {
			++scores[indScore];
		}
	  }
	  ++indScore;
  }

  if (m_targetSyntax) {
	  for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
		const Word &word = targetPhrase.GetWord(i);
		if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
			++scores[indScore];
		}
	  }
	  ++indScore;
  }

  if (m_sourceSyntax) {
	  for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
		const Word &word = sourcePhrase.GetWord(i);
		if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
			++scores[indScore];
		}
	  }
	  ++indScore;
  }

  scoreBreakdown.PlusEquals(this, scores);
}
Esempio n. 14
0
void SyntaxRHS::Evaluate(const InputType &input
                                   , const InputPath &inputPath
                                   , const TargetPhrase &targetPhrase
                                   , const StackVec *stackVec
                                   , ScoreComponentCollection &scoreBreakdown
                                   , ScoreComponentCollection *estimatedFutureScore) const
{
	assert(stackVec);
	for (size_t i = 0; i < stackVec->size(); ++i) {
		const ChartCellLabel &cell = *stackVec->at(i);

	}

	if (targetPhrase.GetNumNonTerminals()) {
		  vector<float> newScores(m_numScoreComponents);
		  newScores[0] = - std::numeric_limits<float>::infinity();
		  scoreBreakdown.PlusEquals(this, newScores);
	}

}
Esempio n. 15
0
void SpanLength::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedFutureScore) const
{
  assert(stackVec);

  const PhraseProperty *property = targetPhrase.GetProperty("SpanLength");
  if (property == NULL) {
    return;
  }

  const SpanLengthPhraseProperty *slProp = static_cast<const SpanLengthPhraseProperty*>(property);

  const Phrase *ruleSource = targetPhrase.GetRuleSource();
  assert(ruleSource);

  float score = 0;
  for (size_t i = 0; i < stackVec->size(); ++i) {
    const ChartCellLabel &cell = *stackVec->at(i);
    const WordsRange &ntRange = cell.GetCoverage();
    size_t sourceWidth = ntRange.GetNumWordsCovered();
    float prob = slProp->GetProb(i, sourceWidth, m_const);
    score += TransformScore(prob);
  }

  if (score < -100.0f) {
    float weight = StaticData::Instance().GetWeight(this);
    if (weight < 0) {
      score = -100;
    }
  }

  scoreBreakdown.PlusEquals(this, score);

}
size_t Perceptron::updateWeightsHopeFear(
		ScoreComponentCollection& weightUpdate,
		const vector< vector<ScoreComponentCollection> >& featureValuesHope,
		const vector< vector<ScoreComponentCollection> >& featureValuesFear,
		const vector< vector<float> >& dummy1,
		const vector< vector<float> >& dummy2,
		const vector< vector<float> >& dummy3,
		const vector< vector<float> >& dummy4,
		float perceptron_learning_rate,
		size_t rank,
		size_t epoch,
		int updatePosition)
{
	cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope[0][0] << endl;
	cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear[0][0] << endl;
	ScoreComponentCollection featureValueDiff = featureValuesHope[0][0];
	featureValueDiff.MinusEquals(featureValuesFear[0][0]);
	cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
	featureValueDiff.MultiplyEquals(perceptron_learning_rate);
	weightUpdate.PlusEquals(featureValueDiff);
	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
	return 0;
}
void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedScores) const
{
  const Word &targetLHS = targetPhrase.GetTargetLHS();

  if (targetLHS == m_glueTargetLHS) {
    // don't delete glue rules
    return;
  }

  const Phrase *source = targetPhrase.GetRuleSource();
  assert(source);
  float score = 0;

  if (source->Front().IsNonTerminal()) {
    const ChartCellLabel &cell = *stackVec->front();
    if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
      score = - std::numeric_limits<float>::infinity();
    }
  }

  if (source->Back().IsNonTerminal()) {
    const ChartCellLabel &cell = *stackVec->back();
    if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
      score = - std::numeric_limits<float>::infinity();
    }
  }


  scoreBreakdown.PlusEquals(this, score);

}
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
                                   , const DecodeStep &decodeStep
                                   , PartialTranslOptColl &outputPartialTranslOptColl
                                   , TranslationOptionCollection * /* toc */
                                   , bool /*adhereTableLimit*/) const
{
  if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
    // word deletion

    TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt);
    outputPartialTranslOptColl.Add(newTransOpt);

    return;
  }

  // normal generation step
  const GenerationDictionary* generationDictionary  = decodeStep.GetGenerationDictionaryFeature();

  const Phrase &targetPhrase  = inputPartialTranslOpt.GetTargetPhrase();
  const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
  size_t targetLength         = targetPhrase.GetSize();

  // generation list for each word in phrase
  vector< WordList > wordListVector(targetLength);

  // create generation list
  int wordListVectorPos = 0;
  for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { // going thorugh all words
    // generatable factors for this word to be put in wordList
    WordList &wordList = wordListVector[wordListVectorPos];
    const Word &word = targetPhrase.GetWord(currPos);

    // consult dictionary for possible generations for this word
    const OutputWordCollection *wordColl = generationDictionary->FindWord(word);

    if (wordColl == NULL) {
      // word not found in generation dictionary
      //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
      return; // can't be part of a phrase, special handling
    } else {
      // sort(*wordColl, CompareWordCollScore);
      OutputWordCollection::const_iterator iterWordColl;
      for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl) {
        const Word &outputWord = (*iterWordColl).first;
        const ScoreComponentCollection& score = (*iterWordColl).second;
        // enter into word list generated factor(s) and its(their) score(s)
        wordList.push_back(WordPair(outputWord, score));
      }

      wordListVectorPos++; // done, next word
    }
  }

  // use generation list (wordList)
  // set up iterators (total number of expansions)
  size_t numIteration = 1;
  vector< WordListIterator >  wordListIterVector(targetLength);
  vector< const Word* >       mergeWords(targetLength);
  for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
    wordListIterVector[currPos] = wordListVector[currPos].begin();
    numIteration *= wordListVector[currPos].size();
  }

  // go thru each possible factor for each word & create hypothesis
  for (size_t currIter = 0 ; currIter < numIteration ; currIter++) {
    ScoreComponentCollection generationScore; // total score for this string of words

    // create vector of words with new factors for last phrase
    for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
      const WordPair &wordPair = *wordListIterVector[currPos];
      mergeWords[currPos] = &(wordPair.first);
      generationScore.PlusEquals(wordPair.second);
    }

    // merge with existing trans opt
    Phrase genPhrase( mergeWords);

    if (IsFilteringStep()) {
      if (!inputPartialTranslOpt.IsCompatible(genPhrase, m_conflictFactors))
        continue;
    }

    const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
    TargetPhrase outPhrase(inPhrase);
    outPhrase.GetScoreBreakdown().PlusEquals(generationScore);

    outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
    outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply);

    const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();

    TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
    assert(newTransOpt);

    newTransOpt->SetInputPath(inputPath);

    outputPartialTranslOptColl.Add(newTransOpt);

    // increment iterators
    IncrementIterators(wordListIterVector, wordListVector);
  }
}