Ejemplo n.º 1
1
/***
 * print surface factor only for the given phrase
 */
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
		   bool reportSegmentation, bool reportAllFactors)
{
  CHECK(outputFactorOrder.size() > 0);
  const Phrase& phrase = edge.GetCurrTargetPhrase();
  if (reportAllFactors == true) {
    out << phrase;
  } else {
    size_t size = phrase.GetSize();
    for (size_t pos = 0 ; pos < size ; pos++) {
      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
      out << *factor;
      CHECK(factor);

      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
        CHECK(factor);

        out << "|" << *factor;
      }
      out << " ";
    }
  }

  // trace option "-t"
  if (reportSegmentation == true && phrase.GetSize() > 0) {
    out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
	<< "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
  }
}
Ejemplo n.º 2
0
/***
 * print surface factor only for the given phrase
 */
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
                   char reportSegmentation, bool reportAllFactors)
{
  CHECK(outputFactorOrder.size() > 0);
  const Phrase& phrase = edge.GetCurrTargetPhrase();
  bool markUnknown = StaticData::Instance().GetMarkUnknown();
  if (reportAllFactors == true) {
    out << phrase;
  } else {
    FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().second;

    size_t size = phrase.GetSize();
    for (size_t pos = 0 ; pos < size ; pos++) {
      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);

      if (placeholderFactor != NOT_FOUND) {
        const Factor *origFactor = phrase.GetFactor(pos, placeholderFactor);
        if (origFactor) {
          factor = origFactor;
        }
      }
      CHECK(factor);

      //preface surface form with UNK if marking unknowns
      const Word &word = phrase.GetWord(pos);
      if(markUnknown && word.IsOOV()) {
	out << "UNK" << *factor;
      }
      else {
	out << *factor;
      }
      
      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
        CHECK(factor);

        out << "|" << *factor;
      }
      out << " ";
    }
  }

  // trace option "-t" / "-tt"
  if (reportSegmentation > 0 && phrase.GetSize() > 0) {
    const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
    const int sourceStart = sourceRange.GetStartPos();
    const int sourceEnd = sourceRange.GetEndPos();
    out << "|" << sourceStart << "-" << sourceEnd;
    // enriched "-tt"
    if (reportSegmentation == 2) {
      out << ",0, ";
      const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
      OutputAlignment(out, ai, 0, 0);
    }
    out << "| ";
  }
}
Ejemplo n.º 3
0
void GlobalLexicalModel::Evaluate
(const Hypothesis& hypo,
 ScoreComponentCollection* accumulator) const
{
  accumulator->PlusEquals( this,
                           GetFromCacheOrScorePhrase(hypo.GetCurrTargetPhrase()) );
}
Ejemplo n.º 4
0
/***
 * print surface factor only for the given phrase
 */
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
                   char reportSegmentation, bool reportAllFactors)
{
  CHECK(outputFactorOrder.size() > 0);
  const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
  bool markUnknown = StaticData::Instance().GetMarkUnknown();
  if (reportAllFactors == true) {
    out << phrase;
  } else {
    FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();

    std::map<size_t, const Factor*> placeholders;
    if (placeholderFactor != NOT_FOUND) {
      // creates map of target position -> factor for placeholders
      placeholders = GetPlaceholders(edge, placeholderFactor);
    }

    size_t size = phrase.GetSize();
    for (size_t pos = 0 ; pos < size ; pos++) {
      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);

      if (placeholders.size()) {
        // do placeholders
        std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
        if (iter != placeholders.end()) {
          factor = iter->second;
        }
      }

      CHECK(factor);

      //preface surface form with UNK if marking unknowns
      const Word &word = phrase.GetWord(pos);
      if(markUnknown && word.IsOOV()) {
        out << "UNK" << *factor;
      } else {
        out << *factor;
      }

      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
        CHECK(factor);

        out << "|" << *factor;
      }
      out << " ";
    }
  }

  // trace ("report segmentation") option "-t" / "-tt"
  if (reportSegmentation > 0 && phrase.GetSize() > 0) {
    const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
    const int sourceStart = sourceRange.GetStartPos();
    const int sourceEnd = sourceRange.GetEndPos();
    out << "|" << sourceStart << "-" << sourceEnd;    // enriched "-tt"
    if (reportSegmentation == 2) {
      out << ",wa=";
      const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
      OutputAlignment(out, ai, 0, 0);
      out << ",total=";
      out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
      out << ",";
      ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
      scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
      OutputAllFeatureScores(scoreBreakdown, out);
    }
    out << "| ";
  }
}
void GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const
{
  const Sentence& input = *(m_local->input);
  const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();

  for(size_t targetIndex = 0; targetIndex < targetPhrase.GetSize(); targetIndex++ ) {
    StringPiece targetString = targetPhrase.GetWord(targetIndex).GetString(0); // TODO: change for other factors

    if (m_ignorePunctuation) {
      // check if first char is punctuation
      char firstChar = targetString[0];
      CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
      if(charIterator != m_punctuationHash.end())
        continue;
    }

    if (m_biasFeature) {
      stringstream feature;
      feature << "glm_";
      feature << targetString;
      feature << "~";
      feature << "**BIAS**";
      accumulator->SparsePlusEquals(feature.str(), 1);
    }

    boost::unordered_set<uint64_t> alreadyScored;
    for(size_t sourceIndex = 0; sourceIndex < input.GetSize(); sourceIndex++ ) {
      const StringPiece sourceString = input.GetWord(sourceIndex).GetString(0);
      // TODO: change for other factors

      if (m_ignorePunctuation) {
        // check if first char is punctuation
        char firstChar = sourceString[0];
        CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
        if(charIterator != m_punctuationHash.end())
          continue;
      }
      const uint64_t sourceHash = util::MurmurHashNative(sourceString.data(), sourceString.size());

      if ( alreadyScored.find(sourceHash) == alreadyScored.end()) {
        bool sourceExists, targetExists;
        if (!m_unrestricted) {
          sourceExists = FindStringPiece(m_vocabSource, sourceString ) != m_vocabSource.end();
          targetExists = FindStringPiece(m_vocabTarget, targetString) != m_vocabTarget.end();
        }

        // no feature if vocab is in use and both words are not in restricted vocabularies
        if (m_unrestricted || (sourceExists && targetExists)) {
          if (m_sourceContext) {
            if (sourceIndex == 0) {
              // add <s> trigger feature for source
              stringstream feature;
              feature << "glm_";
              feature << targetString;
              feature << "~";
              feature << "<s>,";
              feature << sourceString;
              accumulator->SparsePlusEquals(feature.str(), 1);
              alreadyScored.insert(sourceHash);
            }

            // add source words to the right of current source word as context
            for(int contextIndex = sourceIndex+1; contextIndex < input.GetSize(); contextIndex++ ) {
              StringPiece contextString = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors
              bool contextExists;
              if (!m_unrestricted)
                contextExists = FindStringPiece(m_vocabSource, contextString ) != m_vocabSource.end();

              if (m_unrestricted || contextExists) {
                stringstream feature;
                feature << "glm_";
                feature << targetString;
                feature << "~";
                feature << sourceString;
                feature << ",";
                feature << contextString;
                accumulator->SparsePlusEquals(feature.str(), 1);
                alreadyScored.insert(sourceHash);
              }
            }
          } else if (m_biphrase) {
            // --> look backwards for constructing context
            int globalTargetIndex = cur_hypo.GetSize() - targetPhrase.GetSize() + targetIndex;

            // 1) source-target pair, trigger source word (can be discont.) and adjacent target word (bigram)
            StringPiece targetContext;
            if (globalTargetIndex > 0)
              targetContext = cur_hypo.GetWord(globalTargetIndex-1).GetString(0); // TODO: change for other factors
            else
              targetContext = "<s>";

            if (sourceIndex == 0) {
              StringPiece sourceTrigger = "<s>";
              AddFeature(accumulator, sourceTrigger, sourceString,
                         targetContext, targetString);
            } else
              for(int contextIndex = sourceIndex-1; contextIndex >= 0; contextIndex-- ) {
                StringPiece sourceTrigger = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors
                bool sourceTriggerExists = false;
                if (!m_unrestricted)
                  sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();

                if (m_unrestricted || sourceTriggerExists)
                  AddFeature(accumulator, sourceTrigger, sourceString,
                             targetContext, targetString);
              }

            // 2) source-target pair, adjacent source word (bigram) and trigger target word (can be discont.)
            StringPiece sourceContext;
            if (sourceIndex-1 >= 0)
              sourceContext = input.GetWord(sourceIndex-1).GetString(0); // TODO: change for other factors
            else
              sourceContext = "<s>";

            if (globalTargetIndex == 0) {
              string targetTrigger = "<s>";
              AddFeature(accumulator, sourceContext, sourceString,
                         targetTrigger, targetString);
            } else
              for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
                StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors
                bool targetTriggerExists = false;
                if (!m_unrestricted)
                  targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger ) != m_vocabTarget.end();

                if (m_unrestricted || targetTriggerExists)
                  AddFeature(accumulator, sourceContext, sourceString,
                             targetTrigger, targetString);
              }
          } else if (m_bitrigger) {
            // allow additional discont. triggers on both sides
            int globalTargetIndex = cur_hypo.GetSize() - targetPhrase.GetSize() + targetIndex;

            if (sourceIndex == 0) {
              StringPiece sourceTrigger = "<s>";
              bool sourceTriggerExists = true;

              if (globalTargetIndex == 0) {
                string targetTrigger = "<s>";
                bool targetTriggerExists = true;

                if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
                  AddFeature(accumulator, sourceTrigger, sourceString,
                             targetTrigger, targetString);
              } else {
                // iterate backwards over target
                for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
                  StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors
                  bool targetTriggerExists = false;
                  if (!m_unrestricted)
                    targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger ) != m_vocabTarget.end();

                  if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
                    AddFeature(accumulator, sourceTrigger, sourceString,
                               targetTrigger, targetString);
                }
              }
            }
            // iterate over both source and target
            else {
              // iterate backwards over source
              for(int contextIndex = sourceIndex-1; contextIndex >= 0; contextIndex-- ) {
                StringPiece sourceTrigger = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors
                bool sourceTriggerExists = false;
                if (!m_unrestricted)
                  sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();

                if (globalTargetIndex == 0) {
                  string targetTrigger = "<s>";
                  bool targetTriggerExists = true;

                  if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
                    AddFeature(accumulator, sourceTrigger, sourceString,
                               targetTrigger, targetString);
                } else {
                  // iterate backwards over target
                  for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
                    StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors
                    bool targetTriggerExists = false;
                    if (!m_unrestricted)
                      targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger ) != m_vocabTarget.end();

                    if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
                      AddFeature(accumulator, sourceTrigger, sourceString,
                                 targetTrigger, targetString);
                  }
                }
              }
            }
          } else {
            stringstream feature;
            feature << "glm_";
            feature << targetString;
            feature << "~";
            feature << sourceString;
            accumulator->SparsePlusEquals(feature.str(), 1);
            alreadyScored.insert(sourceHash);

          }
        }
      }
    }
  }
}
void WordTranslationFeature::EvaluateWhenApplied
(const Hypothesis& hypo,
 ScoreComponentCollection* accumulator) const
{
  const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());
  const TranslationOption& transOpt = hypo.GetTranslationOption();
  const TargetPhrase& targetPhrase = hypo.GetCurrTargetPhrase();
  const AlignmentInfo &alignment = targetPhrase.GetAlignTerm();

  // process aligned words
  for (AlignmentInfo::const_iterator alignmentPoint = alignment.begin(); alignmentPoint != alignment.end(); alignmentPoint++) {
    const Phrase& sourcePhrase = transOpt.GetInputPath().GetPhrase();
    int sourceIndex = alignmentPoint->first;
    int targetIndex = alignmentPoint->second;
    Word ws = sourcePhrase.GetWord(sourceIndex);
    if (m_factorTypeSource == 0 && ws.IsNonTerminal()) continue;
    Word wt = targetPhrase.GetWord(targetIndex);
    if (m_factorTypeSource == 0 && wt.IsNonTerminal()) continue;
    StringPiece sourceWord = ws.GetFactor(m_factorTypeSource)->GetString();
    StringPiece targetWord = wt.GetFactor(m_factorTypeTarget)->GetString();
    if (m_ignorePunctuation) {
      // check if source or target are punctuation
      char firstChar = sourceWord[0];
      CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
      if(charIterator != m_punctuationHash.end())
        continue;
      firstChar = targetWord[0];
      charIterator = m_punctuationHash.find( firstChar );
      if(charIterator != m_punctuationHash.end())
        continue;
    }

    if (!m_unrestricted) {
      if (FindStringPiece(m_vocabSource, sourceWord) == m_vocabSource.end())
        sourceWord = "OTHER";
      if (FindStringPiece(m_vocabTarget, targetWord) == m_vocabTarget.end())
        targetWord = "OTHER";
    }

    if (m_simple) {
      // construct feature name
      stringstream featureName;
      featureName << m_description << "_";
      featureName << sourceWord;
      featureName << "~";
      featureName << targetWord;
      accumulator->SparsePlusEquals(featureName.str(), 1);
    }
    if (m_domainTrigger && !m_sourceContext) {
      const bool use_topicid = input.GetUseTopicId();
      const bool use_topicid_prob = input.GetUseTopicIdAndProb();
      if (use_topicid || use_topicid_prob) {
        if(use_topicid) {
          // use topicid as trigger
          const long topicid = input.GetTopicId();
          stringstream feature;
          feature << m_description << "_";
          if (topicid == -1)
            feature << "unk";
          else
            feature << topicid;

          feature << "_";
          feature << sourceWord;
          feature << "~";
          feature << targetWord;
          accumulator->SparsePlusEquals(feature.str(), 1);
        } else {
          // use topic probabilities
          const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
          if (atol(topicid_prob[0].c_str()) == -1) {
            stringstream feature;
            feature << m_description << "_unk_";
            feature << sourceWord;
            feature << "~";
            feature << targetWord;
            accumulator->SparsePlusEquals(feature.str(), 1);
          } else {
            for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
              stringstream feature;
              feature << m_description << "_";
              feature << topicid_prob[i];
              feature << "_";
              feature << sourceWord;
              feature << "~";
              feature << targetWord;
              accumulator->SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
            }
          }
        }
      } else {
        // range over domain trigger words (keywords)
        const long docid = input.GetDocumentId();
        for (boost::unordered_set<std::string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
          string sourceTrigger = *p;
          stringstream feature;
          feature << m_description << "_";
          feature << sourceTrigger;
          feature << "_";
          feature << sourceWord;
          feature << "~";
          feature << targetWord;
          accumulator->SparsePlusEquals(feature.str(), 1);
        }
      }
    }
    if (m_sourceContext) {
      size_t globalSourceIndex = hypo.GetTranslationOption().GetStartPos() + sourceIndex;
      if (!m_domainTrigger && globalSourceIndex == 0) {
        // add <s> trigger feature for source
        stringstream feature;
        feature << m_description << "_";
        feature << "<s>,";
        feature << sourceWord;
        feature << "~";
        feature << targetWord;
        accumulator->SparsePlusEquals(feature.str(), 1);
      }

      // range over source words to get context
      for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
        if (contextIndex == globalSourceIndex) continue;
        StringPiece sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString();
        if (m_ignorePunctuation) {
          // check if trigger is punctuation
          char firstChar = sourceTrigger[0];
          CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
          if(charIterator != m_punctuationHash.end())
            continue;
        }

        const long docid = input.GetDocumentId();
        bool sourceTriggerExists = false;
        if (m_domainTrigger)
          sourceTriggerExists = FindStringPiece(m_vocabDomain[docid], sourceTrigger ) != m_vocabDomain[docid].end();
        else if (!m_unrestricted)
          sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();

        if (m_domainTrigger) {
          if (sourceTriggerExists) {
            stringstream feature;
            feature << m_description << "_";
            feature << sourceTrigger;
            feature << "_";
            feature << sourceWord;
            feature << "~";
            feature << targetWord;
            accumulator->SparsePlusEquals(feature.str(), 1);
          }
        } else if (m_unrestricted || sourceTriggerExists) {
          stringstream feature;
          feature << m_description << "_";
          if (contextIndex < globalSourceIndex) {
            feature << sourceTrigger;
            feature << ",";
            feature << sourceWord;
          } else {
            feature << sourceWord;
            feature << ",";
            feature << sourceTrigger;
          }
          feature << "~";
          feature << targetWord;
          accumulator->SparsePlusEquals(feature.str(), 1);
        }
      }
    }
    if (m_targetContext) {
      throw runtime_error("Can't use target words outside current translation option in a stateless feature");
      /*
      size_t globalTargetIndex = cur_hypo.GetCurrTargetWordsRange().GetStartPos() + targetIndex;
      if (globalTargetIndex == 0) {
      	// add <s> trigger feature for source
      	stringstream feature;
      	feature << "wt_";
      	feature << sourceWord;
      	feature << "~";
      	feature << "<s>,";
      	feature << targetWord;
      	accumulator->SparsePlusEquals(feature.str(), 1);
      }

      // range over target words (up to current position) to get context
      for(size_t contextIndex = 0; contextIndex < globalTargetIndex; contextIndex++ ) {
      	string targetTrigger = cur_hypo.GetWord(contextIndex).GetFactor(m_factorTypeTarget)->GetString();
      	if (m_ignorePunctuation) {
      		// check if trigger is punctuation
      		char firstChar = targetTrigger.at(0);
      		CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
      		if(charIterator != m_punctuationHash.end())
      			continue;
      	}

      	bool targetTriggerExists = false;
      	if (!m_unrestricted)
      		targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();

      	if (m_unrestricted || targetTriggerExists) {
      		stringstream feature;
      		feature << "wt_";
      		feature << sourceWord;
      		feature << "~";
      		feature << targetTrigger;
      		feature << ",";
      		feature << targetWord;
      		accumulator->SparsePlusEquals(feature.str(), 1);
      	}
      }*/
    }
  }
}
Ejemplo n.º 7
0
void PhrasePairFeature::EvaluateWhenApplied(
  const Hypothesis& hypo,
  ScoreComponentCollection* accumulator) const
{
  const TargetPhrase& target = hypo.GetCurrTargetPhrase();
  const Phrase& source = hypo.GetTranslationOption().GetInputPath().GetPhrase();
  if (m_simple) {
    ostringstream namestr;
    namestr << "pp_";
    namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
    for (size_t i = 1; i < source.GetSize(); ++i) {
      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
      namestr << ",";
      namestr << sourceFactor->GetString();
    }
    namestr << "~";
    namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
    for (size_t i = 1; i < target.GetSize(); ++i) {
      const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
      namestr << ",";
      namestr << targetFactor->GetString();
    }

    accumulator->SparsePlusEquals(namestr.str(),1);
  }
  if (m_domainTrigger) {
    const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());
    const bool use_topicid = input.GetUseTopicId();
    const bool use_topicid_prob = input.GetUseTopicIdAndProb();

    // compute pair
    ostringstream pair;
    pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
    for (size_t i = 1; i < source.GetSize(); ++i) {
      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
      pair << ",";
      pair << sourceFactor->GetString();
    }
    pair << "~";
    pair << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
    for (size_t i = 1; i < target.GetSize(); ++i) {
      const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
      pair << ",";
      pair << targetFactor->GetString();
    }

    if (use_topicid || use_topicid_prob) {
      if(use_topicid) {
        // use topicid as trigger
        const long topicid = input.GetTopicId();
        stringstream feature;
        feature << "pp_";
        if (topicid == -1)
          feature << "unk";
        else
          feature << topicid;

        feature << "_";
        feature << pair.str();
        accumulator->SparsePlusEquals(feature.str(), 1);
      } else {
        // use topic probabilities
        const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
        if (atol(topicid_prob[0].c_str()) == -1) {
          stringstream feature;
          feature << "pp_unk_";
          feature << pair.str();
          accumulator->SparsePlusEquals(feature.str(), 1);
        } else {
          for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
            stringstream feature;
            feature << "pp_";
            feature << topicid_prob[i];
            feature << "_";
            feature << pair.str();
            accumulator->SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
          }
        }
      }
    } else {
      // range over domain trigger words
      const long docid = input.GetDocumentId();
      for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
        string sourceTrigger = *p;
        ostringstream namestr;
        namestr << "pp_";
        namestr << sourceTrigger;
        namestr << "_";
        namestr << pair.str();
        accumulator->SparsePlusEquals(namestr.str(),1);
      }
    }
  }
  if (m_sourceContext) {
    const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());

    // range over source words to get context
    for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
      StringPiece sourceTrigger = input.GetWord(contextIndex).GetFactor(m_sourceFactorId)->GetString();
      if (m_ignorePunctuation) {
        // check if trigger is punctuation
        char firstChar = sourceTrigger[0];
        CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
        if(charIterator != m_punctuationHash.end())
          continue;
      }

      bool sourceTriggerExists = false;
      if (!m_unrestricted)
        sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();

      if (m_unrestricted || sourceTriggerExists) {
        ostringstream namestr;
        namestr << "pp_";
        namestr << sourceTrigger;
        namestr << "~";
        namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
        for (size_t i = 1; i < source.GetSize(); ++i) {
          const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
          namestr << ",";
          namestr << sourceFactor->GetString();
        }
        namestr << "~";
        namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
        for (size_t i = 1; i < target.GetSize(); ++i) {
          const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
          namestr << ",";
          namestr << targetFactor->GetString();
        }

        accumulator->SparsePlusEquals(namestr.str(),1);
      }
    }
  }
}