C++ (Cpp) TargetPhrase::GetWord Beispiele

Beispiel #1

0

Datei anzeigen

Datei: GlobalLexicalModel.cpp Projekt: akartbayev/mosesdecoder

float GlobalLexicalModel::ScorePhrase( const TargetPhrase& targetPhrase ) const
{
  const Sentence& input = *(m_local->input);
  float score = 0;
  for(size_t targetIndex = 0; targetIndex < targetPhrase.GetSize(); targetIndex++ ) {
    float sum = 0;
    const Word& targetWord = targetPhrase.GetWord( targetIndex );
    VERBOSE(2,"glm " << targetWord << ": ");
    const DoubleHash::const_iterator targetWordHash = m_hash.find( &targetWord );
    if( targetWordHash != m_hash.end() ) {
      SingleHash::const_iterator inputWordHash = targetWordHash->second.find( m_bias );
      if( inputWordHash != targetWordHash->second.end() ) {
        VERBOSE(2,"*BIAS* " << inputWordHash->second);
        sum += inputWordHash->second;
      }

      set< const Word*, WordComparer > alreadyScored; // do not score a word twice
      for(size_t inputIndex = 0; inputIndex < input.GetSize(); inputIndex++ ) {
        const Word& inputWord = input.GetWord( inputIndex );
        if ( alreadyScored.find( &inputWord ) == alreadyScored.end() ) {
          SingleHash::const_iterator inputWordHash = targetWordHash->second.find( &inputWord );
          if( inputWordHash != targetWordHash->second.end() ) {
            VERBOSE(2," " << inputWord << " " << inputWordHash->second);
            sum += inputWordHash->second;
          }
          alreadyScored.insert( &inputWord );
        }
      }
    }
    // Hal Daume says: 1/( 1 + exp [ - sum_i w_i * f_i ] )
    VERBOSE(2," p=" << FloorScore( log(1/(1+exp(-sum))) ) << endl);
    score += FloorScore( log(1/(1+exp(-sum))) );
  }
  return score;
}

Beispiel #2

0

Datei anzeigen

Datei: CacheBasedLanguageModel.cpp Projekt: crazydreamer/moses-online

	void CacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase& tp, ScoreComponentCollection* out) const
	{
		//VERBOSE(1,"CacheBasedLanguageModel::Evaluate_Whole_String" << std::endl);
		//consider all words in the TargetPhrase as one n-gram
		// and compute the decaying_score for all words
		// and return their sum
		
		decaying_cache_t::const_iterator it;
		float score = 0.0;
		std::string w = "";
		size_t endpos = tp.GetSize();
		for (size_t pos = 0 ; pos < endpos ; ++pos) {
			if (pos > 0){ w += " "; }
			w += tp.GetWord(pos).GetFactor(0)->GetString();
		}
		it = m_cache.find(w);
		
		if (it != m_cache.end()) //found!
		{
			score = ((*it).second).second;
			VERBOSE(3,"cblm::Evaluate: found w:|" << w << "| score:|" << score << "|" << std::endl);
		}
		else{
			score = precomputedScores[maxAge]; // one score per phrase table 
			VERBOSE(3,"cblm::Evaluate: not found w:|" << w << "| score:|" << score << "|" << std::endl);
		}
		VERBOSE(3,"cblm::Evaluate: phrase:|" << tp << "| score:|" << score << "|" << std::endl);
		
		out->PlusEquals(this, score);
	}

Beispiel #3

0

Datei anzeigen

Datei: CacheBasedLanguageModel.cpp Projekt: crazydreamer/moses-online

	void CacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase& tp, ScoreComponentCollection* out) const
	{
		//VERBOSE(1,"CacheBasedLanguageModel::Evaluate_All_Substrings" << std::endl);
		//loop over all n-grams in the TargetPhrase (no matter of n)
		// and compute the decaying_score for all words
		// and return their sum
		
		decaying_cache_t::const_iterator it;
		float score = 0.0;
		size_t tp_size = tp.GetSize();
		for (size_t startpos = 0 ; startpos < tp_size ; ++startpos) {
			std::string w = "";
			for (size_t endpos = startpos; endpos < tp_size ; ++endpos) {
                                if (endpos > startpos){ w += " "; }
				w += tp.GetWord(endpos).GetFactor(0)->GetString();
				it = m_cache.find(w);
				
				float tmpsc;
				if (it != m_cache.end()) //found!
				{
					tmpsc = ((*it).second).second;
					VERBOSE(3,"cblm::Evaluate: found w:|" << w << "| score:|" << tmpsc << "|" << std::endl);
				}
		                else{
        		                tmpsc = precomputedScores[maxAge]; // one score per phrase table  
					VERBOSE(3,"cblm::Evaluate: not found w:|" << w << "| score:|" << tmpsc << "|" << std::endl);
                		}
				score += ( tmpsc /  ( tp_size + startpos - endpos ) );	
				VERBOSE(3,"cblm::Evaluate: actual score:|" << score << "|" << std::endl);
			}
		}
		VERBOSE(3,"cblm::Evaluate: phrase:|" << tp << "| score:|" << score << "|" << std::endl);
		out->PlusEquals(this, score);
	}

Beispiel #4

0

Datei anzeigen

Datei: OpSequenceModel.cpp Projekt: hitokazm/mosesdecoder

void OpSequenceModel:: Evaluate(const Phrase &source
                                , const TargetPhrase &targetPhrase
                                , ScoreComponentCollection &scoreBreakdown
                                , ScoreComponentCollection &estimatedFutureScore) const
{

  osmHypothesis obj;
  obj.setState(OSM->NullContextState());
  WordsBitmap myBitmap(source.GetSize());
  vector <string> mySourcePhrase;
  vector <string> myTargetPhrase;
  vector<float> scores(5);
  vector <int> alignments;
  int startIndex = 0;
  int endIndex = source.GetSize();

  const AlignmentInfo &align = targetPhrase.GetAlignTerm();
  AlignmentInfo::const_iterator iter;


  for (iter = align.begin(); iter != align.end(); ++iter) {
    alignments.push_back(iter->first);
    alignments.push_back(iter->second);
  }

  for (int i = 0; i < targetPhrase.GetSize(); i++) {
    if (targetPhrase.GetWord(i).IsOOV())
      myTargetPhrase.push_back("_TRANS_SLF_");
    else
      myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
  }

  for (int i = 0; i < source.GetSize(); i++) {
    mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
  }

  obj.setPhrases(mySourcePhrase , myTargetPhrase);
  obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
  obj.computeOSMFeature(startIndex,myBitmap);
  obj.calculateOSMProb(*OSM);
  obj.populateScores(scores);
  estimatedFutureScore.PlusEquals(this, scores);

}

Beispiel #5

0

Datei anzeigen

Datei: Model1Feature.cpp Projekt: Jivt/mosesdecoder

void Model1Feature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedFutureScore) const
{
  const Sentence& sentence = static_cast<const Sentence&>(input);
  float score = 0.0;
  float norm = TransformScore(1+sentence.GetSize());

  for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
    const Word &wordT = targetPhrase.GetWord(posT);
    if ( !wordT.IsNonTerminal() ) {
      float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word

      // cache lookup
      bool foundInCache = false;
      {
#ifdef WITH_THREADS
        boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
#endif
        boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::const_iterator sentenceCache = m_cache.find(&input);
        if (sentenceCache != m_cache.end()) {
          boost::unordered_map<const Factor*, float>::const_iterator cacheHit = sentenceCache->second.find(wordT[0]);
          if (cacheHit != sentenceCache->second.end()) {
            foundInCache = true;
            score += cacheHit->second;
            FEATUREVERBOSE(3, "Cached score( " << wordT << " ) = " << cacheHit->second << std::endl);
          }
        }
      }

      if (!foundInCache) {
        for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) { // ignore <s> and </s>
          const Word &wordS = sentence.GetWord(posS);
          float modelProb = m_model1.GetProbability(wordS[0],wordT[0]);
          FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl);
          thisWordProb += modelProb;
        }
        float thisWordScore = TransformScore(thisWordProb) - norm;
        FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl);
        {
#ifdef WITH_THREADS
          // need to update cache; write lock
          boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
          m_cache[&input][wordT[0]] = thisWordScore;
        }
        score += thisWordScore;
      }
    }
  }

  scoreBreakdown.PlusEquals(this, score);
}

Beispiel #6

0

Datei anzeigen

Datei: CountNonTerms.cpp Projekt: A30041839/mosesdecoder

void CountNonTerms::Evaluate(const Phrase &sourcePhrase
              , const TargetPhrase &targetPhrase
              , ScoreComponentCollection &scoreBreakdown
              , ScoreComponentCollection &estimatedFutureScore) const
{
  const StaticData &staticData = StaticData::Instance();

  vector<float> scores(m_numScoreComponents, 0);
  size_t indScore = 0;

  if (m_all) {
	  for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
		const Word &word = targetPhrase.GetWord(i);
		if (word.IsNonTerminal()) {
			++scores[indScore];
		}
	  }
	  ++indScore;
  }

  if (m_targetSyntax) {
	  for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
		const Word &word = targetPhrase.GetWord(i);
		if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
			++scores[indScore];
		}
	  }
	  ++indScore;
  }

  if (m_sourceSyntax) {
	  for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
		const Word &word = sourcePhrase.GetWord(i);
		if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
			++scores[indScore];
		}
	  }
	  ++indScore;
  }

  scoreBreakdown.PlusEquals(this, scores);
}

Beispiel #7

0

Datei anzeigen

Datei: TargetWordInsertionFeature.cpp Projekt: Avmb/mosesdecoder

void TargetWordInsertionFeature::ComputeFeatures(const TargetPhrase& targetPhrase,
    											 ScoreComponentCollection* accumulator,
    											 const AlignmentInfo::CollType &alignment) const
{
  // handle special case: unknown words (they have no word alignment)
  size_t targetLength = targetPhrase.GetSize();
  size_t sourceLength = targetPhrase.GetSourcePhrase().GetSize();
  if (targetLength == 1 && sourceLength == 1) {
		const Factor* f1 = targetPhrase.GetWord(0).GetFactor(1);
		if (f1 && f1->GetString().compare(UNKNOWN_FACTOR) == 0) {
			return;
		}
  }

  // flag aligned words
  bool aligned[16];
  CHECK(targetLength < 16);
  for(size_t i=0; i<targetLength; i++) {
    aligned[i] = false;
  }
  for (AlignmentInfo::const_iterator alignmentPoint = alignment.begin(); alignmentPoint != alignment.end(); alignmentPoint++) {
    aligned[ alignmentPoint->second ] = true;
  }

  // process unaligned target words
  for(size_t i=0; i<targetLength; i++) {
    if (!aligned[i]) {
      Word w = targetPhrase.GetWord(i);
      if (!w.IsNonTerminal()) {
    	const string &word = w.GetFactor(m_factorType)->GetString();
    	if (word != "<s>" && word != "</s>") {
      	  if (!m_unrestricted && m_vocab.find( word ) == m_vocab.end()) {
      		accumulator->PlusEquals(this,"OTHER",1);
      	  }
      	  else {
      		accumulator->PlusEquals(this,word,1);
      	  }
    	}
      }
    }
  }
}

Beispiel #8

0

Datei anzeigen

Datei: RulePairUnlexicalizedSource.cpp Projekt: Jivt/mosesdecoder

void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
        , const TargetPhrase &targetPhrase
        , ScoreComponentCollection &scoreBreakdown
        , ScoreComponentCollection &estimatedFutureScore) const
{
    const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0];
    if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) {
        return;
    }
    if ( !m_nonGlueRules && (targetPhraseLHS != m_glueTargetLHS) ) {
        return;
    }

    for (size_t posS=0; posS<source.GetSize(); ++posS) {
        const Word &wordS = source.GetWord(posS);
        if ( !wordS.IsNonTerminal() ) {
            return;
        }
    }

    ostringstream namestr;

    for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
        const Word &wordT = targetPhrase.GetWord(posT);
        const Factor* factorT = wordT[0];
        if ( wordT.IsNonTerminal() ) {
            namestr << "[";
        }
        namestr << factorT->GetString();
        if ( wordT.IsNonTerminal() ) {
            namestr << "]";
        }
        namestr << "|";
    }

    namestr << targetPhraseLHS->GetString() << "|";

    for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
            it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
        namestr << "|" << it->first << "-" << it->second;
    }

    scoreBreakdown.PlusEquals(this, namestr.str(), 1);
    if ( targetPhraseLHS != m_glueTargetLHS ) {
        scoreBreakdown.PlusEquals(this, 1);
    }
}

Beispiel #9

0

Datei anzeigen

Datei: Incremental.cpp Projekt: RobinQrtz/mosesdecoder

template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &, const WordsRange &)
{
  std::vector<lm::WordIndex> words;
  CHECK(phrase.GetSize() <= 1);
  if (phrase.GetSize())
    words.push_back(Convert(phrase.GetWord(0)));

  search::PartialEdge edge(edges_.AllocateEdge(0));
  // Appears to be a bug that FutureScore does not already include language model.
  search::ScoreRuleRet scored(search::ScoreRule(context_.LanguageModel(), words, edge.Between()));
  edge.SetScore(phrase.GetFutureScore() + scored.prob * context_.LMWeight() + static_cast<search::Score>(scored.oov) * oov_weight_);

  search::Note note;
  note.vp = &phrase;
  edge.SetNote(note);

  edges_.AddEdge(edge);
}

Beispiel #10

0

Datei anzeigen

Datei: TargetPhrase.cpp Projekt: hypertornado/mosesdecoder

TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
{
  if (! IsCompatible(inputPhrase)) {
    return NULL;
  }

  // ok, merge
  TargetPhrase *clone				= new TargetPhrase(*this);
  clone->m_sourcePhrase = m_sourcePhrase;
  int currWord = 0;
  const size_t len = GetSize();
  for (size_t currPos = 0 ; currPos < len ; currPos++) {
    const Word &inputWord	= inputPhrase.GetWord(currPos);
    Word &cloneWord = clone->GetWord(currPos);
    cloneWord.Merge(inputWord);

    currWord++;
  }

  return clone;
}

Beispiel #11

0

Datei anzeigen

Datei: UTrieNode.cpp Projekt: EktaGupta28/mosesdecoder

TargetPhraseCollection::shared_ptr
UTrieNode::
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
{
  const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
  const size_t rank = alignmentInfo.GetSize();

  std::vector<int> vec;
  vec.reserve(rank);

  m_labelTable.resize(rank);

  int i = 0;
  for (AlignmentInfo::const_iterator p = alignmentInfo.begin();
       p != alignmentInfo.end(); ++p) {
    size_t targetNonTermIndex = p->second;
    const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
    vec.push_back(InsertLabel(i++, targetNonTerm));
  }
  TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
  if (ret == NULL) ret.reset(new TargetPhraseCollection);
  return ret;
}

Beispiel #12

0

Datei anzeigen

Datei: TargetWordInsertionFeature.cpp Projekt: Kitton/mosesdecoder

void TargetWordInsertionFeature::ComputeFeatures(const Phrase &source,
    const TargetPhrase& targetPhrase,
    ScoreComponentCollection* accumulator,
    const AlignmentInfo &alignmentInfo) const
{
  // handle special case: unknown words (they have no word alignment)
  size_t targetLength = targetPhrase.GetSize();
  size_t sourceLength = source.GetSize();
  if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return;

  // flag aligned words
  bool aligned[16];
  CHECK(targetLength < 16);
  for(size_t i=0; i<targetLength; i++) {
    aligned[i] = false;
  }
  for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++) {
    aligned[ alignmentPoint->second ] = true;
  }

  // process unaligned target words
  for(size_t i=0; i<targetLength; i++) {
    if (!aligned[i]) {
      Word w = targetPhrase.GetWord(i);
      if (!w.IsNonTerminal()) {
        const StringPiece word = w.GetFactor(m_factorType)->GetString();
        if (word != "<s>" && word != "</s>") {
          if (!m_unrestricted && FindStringPiece(m_vocab, word ) == m_vocab.end()) {
            accumulator->PlusEquals(this,StringPiece("OTHER"),1);
          } else {
            accumulator->PlusEquals(this,word,1);
          }
        }
      }
    }
  }
}

Beispiel #13

0

Datei anzeigen

Datei: LM.cpp Projekt: arvs/mosesdecoder

void LM::Evaluate(const Phrase &source
                  , const TargetPhrase &targetPhrase
                  , Scores &scores
                  , Scores &estimatedFutureScore) const
{
  SCORE all = 0, ngram = 0;

  PhraseVec phraseVec;
  phraseVec.reserve(m_order);
  for (size_t pos = 0; pos < targetPhrase.GetSize(); ++pos) {
    const Word &word = targetPhrase.GetWord(pos);
    ShiftOrPush(phraseVec, word);
    SCORE score = GetValueCache(phraseVec);

    all += score;
    if (phraseVec.size() == m_order) {
      ngram += score;
    }
  }

  SCORE estimated = all - ngram;
  scores.Add(*this, ngram);
  estimatedFutureScore.Add(*this, estimated);
}

Beispiel #14

0

Datei anzeigen

Datei: WordTranslationFeature.cpp Projekt: mitramah/mosesdecoder

void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedScores) const
{
  const Sentence& sentence = static_cast<const Sentence&>(input);
  const AlignmentInfo &alignment = targetPhrase.GetAlignTerm();

  // process aligned words
  for (AlignmentInfo::const_iterator alignmentPoint = alignment.begin(); alignmentPoint != alignment.end(); alignmentPoint++) {
    const Phrase& sourcePhrase = inputPath.GetPhrase();
    int sourceIndex = alignmentPoint->first;
    int targetIndex = alignmentPoint->second;
    Word ws = sourcePhrase.GetWord(sourceIndex);
    if (m_factorTypeSource == 0 && ws.IsNonTerminal()) continue;
    Word wt = targetPhrase.GetWord(targetIndex);
    if (m_factorTypeSource == 0 && wt.IsNonTerminal()) continue;
    StringPiece sourceWord = ws.GetFactor(m_factorTypeSource)->GetString();
    StringPiece targetWord = wt.GetFactor(m_factorTypeTarget)->GetString();
    if (m_ignorePunctuation) {
      // check if source or target are punctuation
      char firstChar = sourceWord[0];
      CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
      if(charIterator != m_punctuationHash.end())
        continue;
      firstChar = targetWord[0];
      charIterator = m_punctuationHash.find( firstChar );
      if(charIterator != m_punctuationHash.end())
        continue;
    }

    if (!m_unrestricted) {
      if (FindStringPiece(m_vocabSource, sourceWord) == m_vocabSource.end())
        sourceWord = "OTHER";
      if (FindStringPiece(m_vocabTarget, targetWord) == m_vocabTarget.end())
        targetWord = "OTHER";
    }

    if (m_simple) {
      // construct feature name
      util::StringStream featureName;
      featureName << m_description << "_";
      featureName << sourceWord;
      featureName << "~";
      featureName << targetWord;
      scoreBreakdown.SparsePlusEquals(featureName.str(), 1);
    }
    if (m_domainTrigger && !m_sourceContext) {
      const bool use_topicid = sentence.GetUseTopicId();
      const bool use_topicid_prob = sentence.GetUseTopicIdAndProb();
      if (use_topicid || use_topicid_prob) {
        if(use_topicid) {
          // use topicid as trigger
          const long topicid = sentence.GetTopicId();
          util::StringStream feature;
          feature << m_description << "_";
          if (topicid == -1)
            feature << "unk";
          else
            feature << topicid;

          feature << "_";
          feature << sourceWord;
          feature << "~";
          feature << targetWord;
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        } else {
          // use topic probabilities
          const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
          if (atol(topicid_prob[0].c_str()) == -1) {
            util::StringStream feature;
            feature << m_description << "_unk_";
            feature << sourceWord;
            feature << "~";
            feature << targetWord;
            scoreBreakdown.SparsePlusEquals(feature.str(), 1);
          } else {
            for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
              util::StringStream feature;
              feature << m_description << "_";
              feature << topicid_prob[i];
              feature << "_";
              feature << sourceWord;
              feature << "~";
              feature << targetWord;
              scoreBreakdown.SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
            }
          }
        }
      } else {
        // range over domain trigger words (keywords)
        const long docid = input.GetDocumentId();
        for (boost::unordered_set<std::string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
          string sourceTrigger = *p;
          util::StringStream feature;
          feature << m_description << "_";
          feature << sourceTrigger;
          feature << "_";
          feature << sourceWord;
          feature << "~";
          feature << targetWord;
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        }
      }
    }
    if (m_sourceContext) {
      size_t globalSourceIndex = inputPath.GetWordsRange().GetStartPos() + sourceIndex;
      if (!m_domainTrigger && globalSourceIndex == 0) {
        // add <s> trigger feature for source
        util::StringStream feature;
        feature << m_description << "_";
        feature << "<s>,";
        feature << sourceWord;
        feature << "~";
        feature << targetWord;
        scoreBreakdown.SparsePlusEquals(feature.str(), 1);
      }

      // range over source words to get context
      for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
        if (contextIndex == globalSourceIndex) continue;
        StringPiece sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString();
        if (m_ignorePunctuation) {
          // check if trigger is punctuation
          char firstChar = sourceTrigger[0];
          CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
          if(charIterator != m_punctuationHash.end())
            continue;
        }

        const long docid = input.GetDocumentId();
        bool sourceTriggerExists = false;
        if (m_domainTrigger)
          sourceTriggerExists = FindStringPiece(m_vocabDomain[docid], sourceTrigger ) != m_vocabDomain[docid].end();
        else if (!m_unrestricted)
          sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();

        if (m_domainTrigger) {
          if (sourceTriggerExists) {
            util::StringStream feature;
            feature << m_description << "_";
            feature << sourceTrigger;
            feature << "_";
            feature << sourceWord;
            feature << "~";
            feature << targetWord;
            scoreBreakdown.SparsePlusEquals(feature.str(), 1);
          }
        } else if (m_unrestricted || sourceTriggerExists) {
          util::StringStream feature;
          feature << m_description << "_";
          if (contextIndex < globalSourceIndex) {
            feature << sourceTrigger;
            feature << ",";
            feature << sourceWord;
          } else {
            feature << sourceWord;
            feature << ",";
            feature << sourceTrigger;
          }
          feature << "~";
          feature << targetWord;
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        }
      }
    }
    if (m_targetContext) {
      throw runtime_error("Can't use target words outside current translation option in a stateless feature");
      /*
      size_t globalTargetIndex = cur_hypo.GetCurrTargetWordsRange().GetStartPos() + targetIndex;
      if (globalTargetIndex == 0) {
      	// add <s> trigger feature for source
      	stringstream feature;
      	feature << "wt_";
      	feature << sourceWord;
      	feature << "~";
      	feature << "<s>,";
      	feature << targetWord;
      	accumulator->SparsePlusEquals(feature.str(), 1);
      }

      // range over target words (up to current position) to get context
      for(size_t contextIndex = 0; contextIndex < globalTargetIndex; contextIndex++ ) {
      	string targetTrigger = cur_hypo.GetWord(contextIndex).GetFactor(m_factorTypeTarget)->GetString();
      	if (m_ignorePunctuation) {
      		// check if trigger is punctuation
      		char firstChar = targetTrigger.at(0);
      		CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
      		if(charIterator != m_punctuationHash.end())
      			continue;
      	}

      	bool targetTriggerExists = false;
      	if (!m_unrestricted)
      		targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();

      	if (m_unrestricted || targetTriggerExists) {
      		stringstream feature;
      		feature << "wt_";
      		feature << sourceWord;
      		feature << "~";
      		feature << targetTrigger;
      		feature << ",";
      		feature << targetWord;
      		accumulator->SparsePlusEquals(feature.str(), 1);
      	}
      }*/
    }
  }
}

Beispiel #15

0

Datei anzeigen

Datei: PhrasePairFeature.cpp Projekt: akivajp/mosesdecoder

void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedFutureScore) const
{
  const Phrase& source = inputPath.GetPhrase();
  if (m_simple) {
    ostringstream namestr;
    namestr << "pp_";
    namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
    for (size_t i = 1; i < source.GetSize(); ++i) {
      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
      namestr << ",";
      namestr << sourceFactor->GetString();
    }
    namestr << "~";
    namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
    for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
      const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
      namestr << ",";
      namestr << targetFactor->GetString();
    }

    scoreBreakdown.SparsePlusEquals(namestr.str(),1);
  }
  if (m_domainTrigger) {
    const Sentence& isnt = static_cast<const Sentence&>(input);
    const bool use_topicid = isnt.GetUseTopicId();
    const bool use_topicid_prob = isnt.GetUseTopicIdAndProb();

    // compute pair
    ostringstream pair;
    pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
    for (size_t i = 1; i < source.GetSize(); ++i) {
      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
      pair << ",";
      pair << sourceFactor->GetString();
    }
    pair << "~";
    pair << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
    for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
      const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
      pair << ",";
      pair << targetFactor->GetString();
    }

    if (use_topicid || use_topicid_prob) {
      if(use_topicid) {
        // use topicid as trigger
        const long topicid = isnt.GetTopicId();
        stringstream feature;
        feature << "pp_";
        if (topicid == -1)
          feature << "unk";
        else
          feature << topicid;

        feature << "_";
        feature << pair.str();
        scoreBreakdown.SparsePlusEquals(feature.str(), 1);
      } else {
        // use topic probabilities
        const vector<string> &topicid_prob = *(isnt.GetTopicIdAndProb());
        if (atol(topicid_prob[0].c_str()) == -1) {
          stringstream feature;
          feature << "pp_unk_";
          feature << pair.str();
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        } else {
          for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
            stringstream feature;
            feature << "pp_";
            feature << topicid_prob[i];
            feature << "_";
            feature << pair.str();
            scoreBreakdown.SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
          }
        }
      }
    } else {
      // range over domain trigger words
      const long docid = isnt.GetDocumentId();
      for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
        string sourceTrigger = *p;
        ostringstream namestr;
        namestr << "pp_";
        namestr << sourceTrigger;
        namestr << "_";
        namestr << pair.str();
        scoreBreakdown.SparsePlusEquals(namestr.str(),1);
      }
    }
  }
  if (m_sourceContext) {
    const Sentence& isnt = static_cast<const Sentence&>(input);

    // range over source words to get context
    for(size_t contextIndex = 0; contextIndex < isnt.GetSize(); contextIndex++ ) {
      StringPiece sourceTrigger = isnt.GetWord(contextIndex).GetFactor(m_sourceFactorId)->GetString();
      if (m_ignorePunctuation) {
        // check if trigger is punctuation
        char firstChar = sourceTrigger[0];
        CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
        if(charIterator != m_punctuationHash.end())
          continue;
      }

      bool sourceTriggerExists = false;
      if (!m_unrestricted)
        sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();

      if (m_unrestricted || sourceTriggerExists) {
        ostringstream namestr;
        namestr << "pp_";
        namestr << sourceTrigger;
        namestr << "~";
        namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
        for (size_t i = 1; i < source.GetSize(); ++i) {
          const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
          namestr << ",";
          namestr << sourceFactor->GetString();
        }
        namestr << "~";
        namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
        for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
          const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
          namestr << ",";
          namestr << targetFactor->GetString();
        }

        scoreBreakdown.SparsePlusEquals(namestr.str(),1);
      }
    }
  }
}