HReorderingForwardState::
HReorderingForwardState(const HReorderingForwardState *prev,
                        const TranslationOption &topt)
  : LRState(prev, topt)
  , m_first(false)
  , m_prevRange(topt.GetSourceWordsRange())
  , m_coverage(prev->m_coverage, topt.GetSourceWordsRange())
{
}
TranslationOption *DecodeStepGeneration::MergeGeneration(const TranslationOption& oldTO, Phrase &mergePhrase
    , const ScoreComponentCollection& generationScore) const
{
  if (IsFilteringStep()) {
    if (!oldTO.IsCompatible(mergePhrase, m_conflictFactors))
      return NULL;
  }

  TranslationOption *newTransOpt = new TranslationOption(oldTO);
  newTransOpt->MergeNewFeatures(mergePhrase, generationScore, m_newOutputFactors);
  return newTransOpt;
}
PhraseBasedReorderingState::
PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
                           const TranslationOption &topt)
  : LRState(prev, topt)
  , m_prevRange(topt.GetSourceWordsRange())
  , m_first(false)
{ }
/**
 * Create xml-based translation options for the specific input span
 */
void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPos, size_t endPos)
{
  Sentence const& source=dynamic_cast<Sentence const&>(m_source);
  InputPath &inputPath = GetInputPath(startPos,endPos);

  vector <TranslationOption*> xmlOptions;
  source.GetXmlTranslationOptions(xmlOptions,startPos,endPos);

  //get vector of TranslationOptions from Sentence
  for(size_t i=0; i<xmlOptions.size(); i++) {
    TranslationOption *transOpt = xmlOptions[i];
    transOpt->SetInputPath(inputPath);
    Add(transOpt);
  }

};
void
LexicalReordering::
SetCache(TranslationOption& to) const
{
  if (to.GetLexReorderingScores(this)) return;
  // Scores were were set already (e.g., by sampling phrase table)

  if (m_table) {
    Phrase const& sphrase = to.GetInputPath().GetPhrase();
    Phrase const& tphrase = to.GetTargetPhrase();
    to.CacheLexReorderingScores(*this, this->GetProb(sphrase,tphrase));
  } else { // e.g. OOV with Mmsapt
    // Scores vals(GetNumScoreComponents(), 0);
    // to.CacheLexReorderingScores(*this, vals);
  }
}
void TranslationOptionCollectionLattice::CreateTranslationOptions()
{
  GetTargetPhraseCollectionBatch();

  VERBOSE(2,"Translation Option Collection\n " << *this << endl);
  const vector <DecodeGraph*> &decodeGraphs = StaticData::Instance().GetDecodeGraphs();
  UTIL_THROW_IF2(decodeGraphs.size() != 1, "Multiple decoder graphs not supported yet");
  const DecodeGraph &decodeGraph = *decodeGraphs[0];
  UTIL_THROW_IF2(decodeGraph.GetSize() != 1, "Factored decomposition not supported yet");

  const DecodeStep &decodeStep = **decodeGraph.begin();
  const PhraseDictionary &phraseDictionary = *decodeStep.GetPhraseDictionaryFeature();

  for (size_t i = 0; i < m_inputPathQueue.size(); ++i) {
    const InputPath &path = *m_inputPathQueue[i];
    const TargetPhraseCollection *tpColl = path.GetTargetPhrases(phraseDictionary);
    const WordsRange &range = path.GetWordsRange();

    if (tpColl) {
    	TargetPhraseCollection::const_iterator iter;
    	for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
    		const TargetPhrase &tp = **iter;
    		TranslationOption *transOpt = new TranslationOption(range, tp);
    		transOpt->SetInputPath(path);
    		transOpt->Evaluate(m_source);

    		Add(transOpt);
    	}
    }
    else if (path.GetPhrase().GetSize() == 1) {
    	// unknown word processing
    	ProcessOneUnknownWord(path, path.GetWordsRange().GetEndPos(), 1, path.GetInputScore());
    }
  }

  // Prune
  Prune();

  Sort();

  // future score matrix
  CalcFutureScore();

  // Cached lex reodering costs
  CacheLexReordering();

}
void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption &topt, ReorderingType reoType) const {
  // don't call this on a bidirectional object
  assert(m_direction == LexicalReorderingConfiguration::Backward || m_direction == LexicalReorderingConfiguration::Forward);
  const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ?
    topt.GetCachedScores(m_configuration.GetScoreProducer()) : m_prevScore;
  
  // No scores available. TODO: Using a good prior distribution would be nicer.
  if(cachedScores == NULL)
    return;

  const Scores &scoreSet = *cachedScores;
  if(m_configuration.CollapseScores())
    scores[m_offset] = scoreSet[m_offset + reoType];
  else {
    std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
    scores[m_offset + reoType] = scoreSet[m_offset + reoType];
  }
}
HierarchicalReorderingForwardState::HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt)
  : LexicalReorderingState(prev, topt), m_first(false), m_prevRange(topt.GetSourceWordsRange()), m_coverage(prev->m_coverage) {
  const WordsRange currWordsRange = topt.GetSourceWordsRange();
  m_coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true);
}
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
                                   , const DecodeStep &decodeStep
                                   , PartialTranslOptColl &outputPartialTranslOptColl
                                   , TranslationOptionCollection * /* toc */
                                   , bool /*adhereTableLimit*/) const
{
  if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
    // word deletion

    TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt);
    outputPartialTranslOptColl.Add(newTransOpt);

    return;
  }

  // normal generation step
  const GenerationDictionary* generationDictionary  = decodeStep.GetGenerationDictionaryFeature();

  const Phrase &targetPhrase  = inputPartialTranslOpt.GetTargetPhrase();
  const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
  size_t targetLength         = targetPhrase.GetSize();

  // generation list for each word in phrase
  vector< WordList > wordListVector(targetLength);

  // create generation list
  int wordListVectorPos = 0;
  for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { // going thorugh all words
    // generatable factors for this word to be put in wordList
    WordList &wordList = wordListVector[wordListVectorPos];
    const Word &word = targetPhrase.GetWord(currPos);

    // consult dictionary for possible generations for this word
    const OutputWordCollection *wordColl = generationDictionary->FindWord(word);

    if (wordColl == NULL) {
      // word not found in generation dictionary
      //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
      return; // can't be part of a phrase, special handling
    } else {
      // sort(*wordColl, CompareWordCollScore);
      OutputWordCollection::const_iterator iterWordColl;
      for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl) {
        const Word &outputWord = (*iterWordColl).first;
        const ScoreComponentCollection& score = (*iterWordColl).second;
        // enter into word list generated factor(s) and its(their) score(s)
        wordList.push_back(WordPair(outputWord, score));
      }

      wordListVectorPos++; // done, next word
    }
  }

  // use generation list (wordList)
  // set up iterators (total number of expansions)
  size_t numIteration = 1;
  vector< WordListIterator >  wordListIterVector(targetLength);
  vector< const Word* >       mergeWords(targetLength);
  for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
    wordListIterVector[currPos] = wordListVector[currPos].begin();
    numIteration *= wordListVector[currPos].size();
  }

  // go thru each possible factor for each word & create hypothesis
  for (size_t currIter = 0 ; currIter < numIteration ; currIter++) {
    ScoreComponentCollection generationScore; // total score for this string of words

    // create vector of words with new factors for last phrase
    for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
      const WordPair &wordPair = *wordListIterVector[currPos];
      mergeWords[currPos] = &(wordPair.first);
      generationScore.PlusEquals(wordPair.second);
    }

    // merge with existing trans opt
    Phrase genPhrase( mergeWords);

    if (IsFilteringStep()) {
      if (!inputPartialTranslOpt.IsCompatible(genPhrase, m_conflictFactors))
        continue;
    }

    const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
    TargetPhrase outPhrase(inPhrase);
    outPhrase.GetScoreBreakdown().PlusEquals(generationScore);

    outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
    outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply);

    const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();

    TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
    assert(newTransOpt);

    newTransOpt->SetInputPath(inputPath);

    outputPartialTranslOptColl.Add(newTransOpt);

    // increment iterators
    IncrementIterators(wordListIterVector, wordListVector);
  }
}
예제 #10
0
void SparseReordering::CopyScores(
               const TranslationOption& currentOpt,
               const TranslationOption* previousOpt,
               const InputType& input,
               LexicalReorderingState::ReorderingType reoType,
               LexicalReorderingConfiguration::Direction direction,
               ScoreComponentCollection* scores) const 
{
  if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
      (reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
        reoType == LexicalReorderingState::DR)) {
    size_t gapStart, gapEnd;
    //NB: Using a static cast for speed, but could be nasty if 
    //using non-sentence input
    const Sentence& sentence = static_cast<const Sentence&>(input);
    const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
    if (previousOpt) {
      const WordsRange& previousRange = previousOpt->GetSourceWordsRange();
      if (previousRange < currentRange) {
        gapStart = previousRange.GetEndPos() + 1;
        gapEnd = currentRange.GetStartPos();
      } else {
        gapStart = currentRange.GetEndPos() + 1;
        gapEnd = previousRange.GetStartPos();
      }
    } else {
      //start of sentence
      gapStart = 0;
      gapEnd  = currentRange.GetStartPos();
    }
    assert(gapStart < gapEnd);
    for (size_t i = gapStart; i < gapEnd; ++i) {
        AddFeatures(SparseReorderingFeatureKey::Between,
           SparseReorderingFeatureKey::Source, sentence.GetWord(i),
          SparseReorderingFeatureKey::First, reoType, scores);
    }
  }
  //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
  //phrase (backward)
  //stack (forward)
  SparseReorderingFeatureKey::Type type;
  if (direction == LexicalReorderingConfiguration::Forward) {
    if (!m_useStack) return;
    type = SparseReorderingFeatureKey::Stack;
  } else if (direction == LexicalReorderingConfiguration::Backward) {
    if (!m_usePhrase) return;
    type = SparseReorderingFeatureKey::Phrase;
  } else {
    //Shouldn't be called for bidirectional
    //keep compiler happy
    type = SparseReorderingFeatureKey::Phrase;
    assert(!"Shouldn't call CopyScores() with bidirectional direction");
  }
  const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
  AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
    SparseReorderingFeatureKey::First, reoType, scores);
  AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
  const Phrase& targetPhrase = currentOpt.GetTargetPhrase();   
  AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
    SparseReorderingFeatureKey::First, reoType, scores);
  AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);


}
예제 #11
0
int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
{
  const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
  std::string line;
  std::map<std::string, std::string> meta;

  if (getline(in, line, '\n').eof())
    return 0;

  //get covered words - if continual-partial-translation is switched on, parse input
  const StaticData &staticData = StaticData::Instance();
  m_frontSpanCoveredLength = 0;
  m_sourceCompleted.resize(0);
  if (staticData.ContinuePartialTranslation()) {
    string initialTargetPhrase;
    string sourceCompletedStr;
    int loc1 = line.find( "|||", 0 );
    int loc2 = line.find( "|||", loc1 + 3 );
    if (loc1 > -1 && loc2 > -1) {
      initialTargetPhrase = line.substr(0, loc1);
      sourceCompletedStr = line.substr(loc1 + 3, loc2 - loc1 - 3);
      line = line.substr(loc2 + 3);
      sourceCompletedStr = Trim(sourceCompletedStr);
      initialTargetPhrase = Trim(initialTargetPhrase);
      m_initialTargetPhrase = initialTargetPhrase;
      int len = sourceCompletedStr.size();
      m_sourceCompleted.resize(len);
      int contiguous = 1;
      for (int i = 0; i < len; ++i) {
        if (sourceCompletedStr.at(i) == '1') {
          m_sourceCompleted[i] = true;
          if (contiguous)
            m_frontSpanCoveredLength ++;
        } else {
          m_sourceCompleted[i] = false;
          contiguous = 0;
        }
      }
    }
  }

  // remove extra spaces
  line = Trim(line);

  // if sentences is specified as "<seg id=1> ... </seg>", extract id
  meta = ProcessAndStripSGML(line);
  if (meta.find("id") != meta.end()) {
    this->SetTranslationId(atol(meta["id"].c_str()));
  }
  if (meta.find("docid") != meta.end()) {
    this->SetDocumentId(atol(meta["docid"].c_str()));
    this->SetUseTopicId(false);
    this->SetUseTopicIdAndProb(false);
  }
  if (meta.find("topic") != meta.end()) {
    vector<string> topic_params;
    boost::split(topic_params, meta["topic"], boost::is_any_of("\t "));
    if (topic_params.size() == 1) {
      this->SetTopicId(atol(topic_params[0].c_str()));
      this->SetUseTopicId(true);
      this->SetUseTopicIdAndProb(false);
    } else {
      this->SetTopicIdAndProb(topic_params);
      this->SetUseTopicId(false);
      this->SetUseTopicIdAndProb(true);
    }
  }
  if (meta.find("weight-setting") != meta.end()) {
    this->SetWeightSetting(meta["weight-setting"]);
    this->SetSpecifiesWeightSetting(true);
  } else {
    this->SetSpecifiesWeightSetting(false);
  }

  // parse XML markup in translation line
  //const StaticData &staticData = StaticData::Instance();
  std::vector<XmlOption*> xmlOptionsList(0);
  std::vector< size_t > xmlWalls;
  std::vector< std::pair<size_t, std::string> > placeholders;

  if (staticData.GetXmlInputType() != XmlPassThrough) {
    if (!ProcessAndStripXMLTags(line, xmlOptionsList, m_reorderingConstraint, xmlWalls, placeholders,
                                staticData.GetXmlBrackets().first, staticData.GetXmlBrackets().second)) {
      const string msg("Unable to parse XML in line: " + line);
      TRACE_ERR(msg << endl);
      throw runtime_error(msg);
    }
  }

  Phrase::CreateFromString(Input, factorOrder, line, factorDelimiter, NULL);

  // placeholders
  ProcessPlaceholders(placeholders);

  if (staticData.IsChart()) {
    InitStartEndWord();
  }

  //now that we have final word positions in phrase (from CreateFromString),
  //we can make input phrase objects to go with our XmlOptions and create TranslationOptions

  //only fill the vector if we are parsing XML
  if (staticData.GetXmlInputType() != XmlPassThrough ) {
    for (size_t i=0; i<GetSize(); i++) {
      m_xmlCoverageMap.push_back(false);
    }

    //iterXMLOpts will be empty for XmlIgnore
    //look at each column
    for(std::vector<XmlOption*>::const_iterator iterXmlOpts = xmlOptionsList.begin();
        iterXmlOpts != xmlOptionsList.end(); iterXmlOpts++) {

      const XmlOption *xmlOption = *iterXmlOpts;

      TranslationOption *transOpt = new TranslationOption(xmlOption->range, xmlOption->targetPhrase);
      m_xmlOptionsList.push_back(transOpt);

      for(size_t j=transOpt->GetSourceWordsRange().GetStartPos(); j<=transOpt->GetSourceWordsRange().GetEndPos(); j++) {
        m_xmlCoverageMap[j]=true;
      }

      delete xmlOption;
    }

  }

  // reordering walls and zones
  m_reorderingConstraint.InitializeWalls( GetSize() );

  // set reordering walls, if "-monotone-at-punction" is set
  if (staticData.UseReorderingConstraint() && GetSize()>0) {
    m_reorderingConstraint.SetMonotoneAtPunctuation( GetSubString( WordsRange(0,GetSize()-1 ) ) );
  }

  // set walls obtained from xml
  for(size_t i=0; i<xmlWalls.size(); i++)
    if( xmlWalls[i] < GetSize() ) // no buggy walls, please
      m_reorderingConstraint.SetWall( xmlWalls[i], true );
  m_reorderingConstraint.FinalizeWalls();

  return 1;
}