HReorderingForwardState:: HReorderingForwardState(const HReorderingForwardState *prev, const TranslationOption &topt) : LRState(prev, topt) , m_first(false) , m_prevRange(topt.GetSourceWordsRange()) , m_coverage(prev->m_coverage, topt.GetSourceWordsRange()) { }
PhraseBasedReorderingState:: PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt) : LRState(prev, topt) , m_prevRange(topt.GetSourceWordsRange()) , m_first(false) { }
HierarchicalReorderingForwardState::HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt) : LexicalReorderingState(prev, topt), m_first(false), m_prevRange(topt.GetSourceWordsRange()), m_coverage(prev->m_coverage) { const WordsRange currWordsRange = topt.GetSourceWordsRange(); m_coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true); }
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt , const DecodeStep &decodeStep , PartialTranslOptColl &outputPartialTranslOptColl , TranslationOptionCollection * /* toc */ , bool /*adhereTableLimit*/) const { if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) { // word deletion TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt); outputPartialTranslOptColl.Add(newTransOpt); return; } // normal generation step const GenerationDictionary* generationDictionary = decodeStep.GetGenerationDictionaryFeature(); const Phrase &targetPhrase = inputPartialTranslOpt.GetTargetPhrase(); const InputPath &inputPath = inputPartialTranslOpt.GetInputPath(); size_t targetLength = targetPhrase.GetSize(); // generation list for each word in phrase vector< WordList > wordListVector(targetLength); // create generation list int wordListVectorPos = 0; for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { // going thorugh all words // generatable factors for this word to be put in wordList WordList &wordList = wordListVector[wordListVectorPos]; const Word &word = targetPhrase.GetWord(currPos); // consult dictionary for possible generations for this word const OutputWordCollection *wordColl = generationDictionary->FindWord(word); if (wordColl == NULL) { // word not found in generation dictionary //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection); return; // can't be part of a phrase, special handling } else { // sort(*wordColl, CompareWordCollScore); OutputWordCollection::const_iterator iterWordColl; for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl) { const Word &outputWord = (*iterWordColl).first; const ScoreComponentCollection& score = (*iterWordColl).second; // enter into word list generated factor(s) and its(their) score(s) wordList.push_back(WordPair(outputWord, score)); } wordListVectorPos++; // done, next word } } // use generation list (wordList) // set up iterators (total number of expansions) size_t numIteration = 1; vector< WordListIterator > wordListIterVector(targetLength); vector< const Word* > mergeWords(targetLength); for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { wordListIterVector[currPos] = wordListVector[currPos].begin(); numIteration *= wordListVector[currPos].size(); } // go thru each possible factor for each word & create hypothesis for (size_t currIter = 0 ; currIter < numIteration ; currIter++) { ScoreComponentCollection generationScore; // total score for this string of words // create vector of words with new factors for last phrase for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { const WordPair &wordPair = *wordListIterVector[currPos]; mergeWords[currPos] = &(wordPair.first); generationScore.PlusEquals(wordPair.second); } // merge with existing trans opt Phrase genPhrase( mergeWords); if (IsFilteringStep()) { if (!inputPartialTranslOpt.IsCompatible(genPhrase, m_conflictFactors)) continue; } const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase(); TargetPhrase outPhrase(inPhrase); outPhrase.GetScoreBreakdown().PlusEquals(generationScore); outPhrase.MergeFactors(genPhrase, m_newOutputFactors); outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply); const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange(); TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase); assert(newTransOpt); newTransOpt->SetInputPath(inputPath); outputPartialTranslOptColl.Add(newTransOpt); // increment iterators IncrementIterators(wordListIterVector, wordListVector); } }
void SparseReordering::CopyScores( const TranslationOption& currentOpt, const TranslationOption* previousOpt, const InputType& input, LexicalReorderingState::ReorderingType reoType, LexicalReorderingConfiguration::Direction direction, ScoreComponentCollection* scores) const { if (m_useBetween && direction == LexicalReorderingConfiguration::Backward && (reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL || reoType == LexicalReorderingState::DR)) { size_t gapStart, gapEnd; //NB: Using a static cast for speed, but could be nasty if //using non-sentence input const Sentence& sentence = static_cast<const Sentence&>(input); const WordsRange& currentRange = currentOpt.GetSourceWordsRange(); if (previousOpt) { const WordsRange& previousRange = previousOpt->GetSourceWordsRange(); if (previousRange < currentRange) { gapStart = previousRange.GetEndPos() + 1; gapEnd = currentRange.GetStartPos(); } else { gapStart = currentRange.GetEndPos() + 1; gapEnd = previousRange.GetStartPos(); } } else { //start of sentence gapStart = 0; gapEnd = currentRange.GetStartPos(); } assert(gapStart < gapEnd); for (size_t i = gapStart; i < gapEnd; ++i) { AddFeatures(SparseReorderingFeatureKey::Between, SparseReorderingFeatureKey::Source, sentence.GetWord(i), SparseReorderingFeatureKey::First, reoType, scores); } } //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl; //phrase (backward) //stack (forward) SparseReorderingFeatureKey::Type type; if (direction == LexicalReorderingConfiguration::Forward) { if (!m_useStack) return; type = SparseReorderingFeatureKey::Stack; } else if (direction == LexicalReorderingConfiguration::Backward) { if (!m_usePhrase) return; type = SparseReorderingFeatureKey::Phrase; } else { //Shouldn't be called for bidirectional //keep compiler happy type = SparseReorderingFeatureKey::Phrase; assert(!"Shouldn't call CopyScores() with bidirectional direction"); } const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase(); AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0), SparseReorderingFeatureKey::First, reoType, scores); AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores); const Phrase& targetPhrase = currentOpt.GetTargetPhrase(); AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0), SparseReorderingFeatureKey::First, reoType, scores); AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores); }
int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder) { const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter(); std::string line; std::map<std::string, std::string> meta; if (getline(in, line, '\n').eof()) return 0; //get covered words - if continual-partial-translation is switched on, parse input const StaticData &staticData = StaticData::Instance(); m_frontSpanCoveredLength = 0; m_sourceCompleted.resize(0); if (staticData.ContinuePartialTranslation()) { string initialTargetPhrase; string sourceCompletedStr; int loc1 = line.find( "|||", 0 ); int loc2 = line.find( "|||", loc1 + 3 ); if (loc1 > -1 && loc2 > -1) { initialTargetPhrase = line.substr(0, loc1); sourceCompletedStr = line.substr(loc1 + 3, loc2 - loc1 - 3); line = line.substr(loc2 + 3); sourceCompletedStr = Trim(sourceCompletedStr); initialTargetPhrase = Trim(initialTargetPhrase); m_initialTargetPhrase = initialTargetPhrase; int len = sourceCompletedStr.size(); m_sourceCompleted.resize(len); int contiguous = 1; for (int i = 0; i < len; ++i) { if (sourceCompletedStr.at(i) == '1') { m_sourceCompleted[i] = true; if (contiguous) m_frontSpanCoveredLength ++; } else { m_sourceCompleted[i] = false; contiguous = 0; } } } } // remove extra spaces line = Trim(line); // if sentences is specified as "<seg id=1> ... </seg>", extract id meta = ProcessAndStripSGML(line); if (meta.find("id") != meta.end()) { this->SetTranslationId(atol(meta["id"].c_str())); } if (meta.find("docid") != meta.end()) { this->SetDocumentId(atol(meta["docid"].c_str())); this->SetUseTopicId(false); this->SetUseTopicIdAndProb(false); } if (meta.find("topic") != meta.end()) { vector<string> topic_params; boost::split(topic_params, meta["topic"], boost::is_any_of("\t ")); if (topic_params.size() == 1) { this->SetTopicId(atol(topic_params[0].c_str())); this->SetUseTopicId(true); this->SetUseTopicIdAndProb(false); } else { this->SetTopicIdAndProb(topic_params); this->SetUseTopicId(false); this->SetUseTopicIdAndProb(true); } } if (meta.find("weight-setting") != meta.end()) { this->SetWeightSetting(meta["weight-setting"]); this->SetSpecifiesWeightSetting(true); } else { this->SetSpecifiesWeightSetting(false); } // parse XML markup in translation line //const StaticData &staticData = StaticData::Instance(); std::vector<XmlOption*> xmlOptionsList(0); std::vector< size_t > xmlWalls; std::vector< std::pair<size_t, std::string> > placeholders; if (staticData.GetXmlInputType() != XmlPassThrough) { if (!ProcessAndStripXMLTags(line, xmlOptionsList, m_reorderingConstraint, xmlWalls, placeholders, staticData.GetXmlBrackets().first, staticData.GetXmlBrackets().second)) { const string msg("Unable to parse XML in line: " + line); TRACE_ERR(msg << endl); throw runtime_error(msg); } } Phrase::CreateFromString(Input, factorOrder, line, factorDelimiter, NULL); // placeholders ProcessPlaceholders(placeholders); if (staticData.IsChart()) { InitStartEndWord(); } //now that we have final word positions in phrase (from CreateFromString), //we can make input phrase objects to go with our XmlOptions and create TranslationOptions //only fill the vector if we are parsing XML if (staticData.GetXmlInputType() != XmlPassThrough ) { for (size_t i=0; i<GetSize(); i++) { m_xmlCoverageMap.push_back(false); } //iterXMLOpts will be empty for XmlIgnore //look at each column for(std::vector<XmlOption*>::const_iterator iterXmlOpts = xmlOptionsList.begin(); iterXmlOpts != xmlOptionsList.end(); iterXmlOpts++) { const XmlOption *xmlOption = *iterXmlOpts; TranslationOption *transOpt = new TranslationOption(xmlOption->range, xmlOption->targetPhrase); m_xmlOptionsList.push_back(transOpt); for(size_t j=transOpt->GetSourceWordsRange().GetStartPos(); j<=transOpt->GetSourceWordsRange().GetEndPos(); j++) { m_xmlCoverageMap[j]=true; } delete xmlOption; } } // reordering walls and zones m_reorderingConstraint.InitializeWalls( GetSize() ); // set reordering walls, if "-monotone-at-punction" is set if (staticData.UseReorderingConstraint() && GetSize()>0) { m_reorderingConstraint.SetMonotoneAtPunctuation( GetSubString( WordsRange(0,GetSize()-1 ) ) ); } // set walls obtained from xml for(size_t i=0; i<xmlWalls.size(); i++) if( xmlWalls[i] < GetSize() ) // no buggy walls, please m_reorderingConstraint.SetWall( xmlWalls[i], true ); m_reorderingConstraint.FinalizeWalls(); return 1; }