PhraseSegmentation BeamSearchAdapter::search(boost::shared_ptr<const PhrasePairCollection> ppairs, const std::vector<Word> &sentence) const { if(sentence.empty()) return PhraseSegmentation(); std::stringstream sntstream; std::copy(sentence.begin(), sentence.end() - 1, std::ostream_iterator<Word>(sntstream, " ")); sntstream << sentence.back(); boost::scoped_ptr<Moses::Sentence> msent(new Moses::Sentence()); std::vector<Moses::FactorType> ftype(1, 0); //msent.Read(sntstream, ftype); msent->CreateFromString(ftype, sntstream.str(), "|"); const Moses::TranslationSystem &system = Moses::StaticData::Instance().GetTranslationSystem(Moses::TranslationSystem::DEFAULT); boost::scoped_ptr<Moses::Manager> manager(new Moses::Manager(0, *msent, Moses::StaticData::Instance().GetSearchAlgorithm(), &system)); manager->ProcessSentence(); const Moses::Hypothesis *hypo = manager->GetBestHypothesis(); CompareAnchoredPhrasePairs comparePhrasePairs; typedef std::vector<AnchoredPhrasePair> PPVector; PPVector ppvec; ppairs->copyPhrasePairs(std::back_inserter(ppvec)); std::sort(ppvec.begin(), ppvec.end(), comparePhrasePairs); PhraseSegmentation seg; if(hypo == NULL) LOG(logger_, error, "No answer from moses."); while(hypo && hypo->GetPrevHypo() != NULL) { CoverageBitmap cov(sentence.size()); const Moses::WordsRange &mrange = hypo->GetCurrSourceWordsRange(); for(uint i = mrange.GetStartPos(); i <= mrange.GetEndPos(); i++) cov.set(i); PhraseData srcpd; const Moses::Phrase *msrcphr = hypo->GetSourcePhrase(); for(uint i = 0; i < msrcphr->GetSize(); i++) srcpd.push_back(msrcphr->GetFactor(i, 0)->GetString()); PhraseData tgtpd; const Moses::Phrase &mtgtphr = hypo->GetCurrTargetPhrase(); for(uint i = 0; i < mtgtphr.GetSize(); i++) tgtpd.push_back(mtgtphr.GetFactor(i, 0)->GetString()); PPVector::const_iterator it = std::lower_bound(ppvec.begin(), ppvec.end(), CompareAnchoredPhrasePairs::PhrasePairKey(cov, srcpd, tgtpd), comparePhrasePairs); seg.push_front(*it); hypo = hypo->GetPrevHypo(); } return seg; }
PhraseSegmentation NistXmlStateInitialiser::initSegmentation( boost::shared_ptr<const PhrasePairCollection> phraseTranslations, const std::vector<Word> &sentence, int documentNumber, int sentenceNumber ) const { if(sentence.empty()) return PhraseSegmentation(); std::vector<AnchoredPhrasePair> ppvec; phraseTranslations->copyPhrasePairs(std::back_inserter(ppvec)); CompareAnchoredPhrasePairs ppComparator; std::sort(ppvec.begin(), ppvec.end(), ppComparator); PhraseSegmentation seg; PhraseData tgtpd; for(PlainTextDocument::const_word_iterator it = documents_[documentNumber].sentence_begin(sentenceNumber); it != documents_[documentNumber].sentence_end(sentenceNumber); ++it ) { if((*it).substr(0, 1) != "|") { // word tgtpd.push_back(*it); continue; } // end of hypothesis Word token((*it).substr(1, (*it).length()-2)); std::vector<Word> srctokenrange; // metadata boost::split( srctokenrange, token, boost::is_any_of("-"), boost::token_compress_on ); PhraseData srcpd; CoverageBitmap cov(sentence.size()); try { if(srctokenrange.size() != 2) { BOOST_THROW_EXCEPTION(FileFormatException()); } for(uint i = boost::lexical_cast<uint>(srctokenrange.front()); i <= boost::lexical_cast<uint>(srctokenrange.back()); ++i ) { srcpd.push_back(sentence[i]); cov.set(i); } } catch(boost::exception &) { LOG(logger_, error, "Invalid alignment data in raw-translation file " "(document " << documentNumber << ", " " sentence " << sentenceNumber << "): " << *it ); throw; } std::vector<AnchoredPhrasePair>::const_iterator appit = std::lower_bound( ppvec.begin(), ppvec.end(), CompareAnchoredPhrasePairs::PhrasePairKey(cov, srcpd, tgtpd), ppComparator ); seg.push_back(*appit); tgtpd.clear(); } return seg; }