Beispiel #1
0
PhraseSegmentation BeamSearchAdapter::search(boost::shared_ptr<const PhrasePairCollection> ppairs, const std::vector<Word> &sentence) const {
    if(sentence.empty())
        return PhraseSegmentation();

    std::stringstream sntstream;
    std::copy(sentence.begin(), sentence.end() - 1, std::ostream_iterator<Word>(sntstream, " "));
    sntstream << sentence.back();

    boost::scoped_ptr<Moses::Sentence> msent(new Moses::Sentence());
    std::vector<Moses::FactorType> ftype(1, 0);
    //msent.Read(sntstream, ftype);
    msent->CreateFromString(ftype, sntstream.str(), "|");

    const Moses::TranslationSystem &system =
        Moses::StaticData::Instance().GetTranslationSystem(Moses::TranslationSystem::DEFAULT);
    boost::scoped_ptr<Moses::Manager> manager(new Moses::Manager(0, *msent, Moses::StaticData::Instance().GetSearchAlgorithm(), &system));
    manager->ProcessSentence();
    const Moses::Hypothesis *hypo = manager->GetBestHypothesis();

    CompareAnchoredPhrasePairs comparePhrasePairs;
    typedef std::vector<AnchoredPhrasePair> PPVector;
    PPVector ppvec;
    ppairs->copyPhrasePairs(std::back_inserter(ppvec));
    std::sort(ppvec.begin(), ppvec.end(), comparePhrasePairs);
    PhraseSegmentation seg;
    if(hypo == NULL)
        LOG(logger_, error, "No answer from moses.");
    while(hypo && hypo->GetPrevHypo() != NULL) {
        CoverageBitmap cov(sentence.size());
        const Moses::WordsRange &mrange = hypo->GetCurrSourceWordsRange();
        for(uint i = mrange.GetStartPos(); i <= mrange.GetEndPos(); i++)
            cov.set(i);

        PhraseData srcpd;
        const Moses::Phrase *msrcphr = hypo->GetSourcePhrase();
        for(uint i = 0; i < msrcphr->GetSize(); i++)
            srcpd.push_back(msrcphr->GetFactor(i, 0)->GetString());

        PhraseData tgtpd;
        const Moses::Phrase &mtgtphr = hypo->GetCurrTargetPhrase();
        for(uint i = 0; i < mtgtphr.GetSize(); i++)
            tgtpd.push_back(mtgtphr.GetFactor(i, 0)->GetString());

        PPVector::const_iterator it = std::lower_bound(ppvec.begin(), ppvec.end(),
                                      CompareAnchoredPhrasePairs::PhrasePairKey(cov, srcpd, tgtpd),
                                      comparePhrasePairs);
        seg.push_front(*it);

        hypo = hypo->GetPrevHypo();
    }

    return seg;
}
Beispiel #2
0
PhraseSegmentation
NistXmlStateInitialiser::initSegmentation(
	boost::shared_ptr<const PhrasePairCollection> phraseTranslations,
	const std::vector<Word> &sentence,
	int documentNumber,
	int sentenceNumber
) const {
	if(sentence.empty())
		return PhraseSegmentation();

	std::vector<AnchoredPhrasePair> ppvec;
	phraseTranslations->copyPhrasePairs(std::back_inserter(ppvec));

	CompareAnchoredPhrasePairs ppComparator;
	std::sort(ppvec.begin(), ppvec.end(), ppComparator);

	PhraseSegmentation seg;
	PhraseData tgtpd;
	for(PlainTextDocument::const_word_iterator
		it = documents_[documentNumber].sentence_begin(sentenceNumber);
		it != documents_[documentNumber].sentence_end(sentenceNumber);
		++it
	) {
		if((*it).substr(0, 1) != "|") { // word
			tgtpd.push_back(*it);
			continue;
		}
		// end of hypothesis
		Word token((*it).substr(1, (*it).length()-2));
		std::vector<Word> srctokenrange; // metadata
		boost::split(
			srctokenrange,
			token,
			boost::is_any_of("-"),
			boost::token_compress_on
		);
		PhraseData srcpd;
		CoverageBitmap cov(sentence.size());
		try {
			if(srctokenrange.size() != 2) {
				BOOST_THROW_EXCEPTION(FileFormatException());
			}
			for(uint
				i = boost::lexical_cast<uint>(srctokenrange.front());
				i <= boost::lexical_cast<uint>(srctokenrange.back());
				++i
			) {
				srcpd.push_back(sentence[i]);
				cov.set(i);
			}
		} catch(boost::exception &) {
			LOG(logger_, error,
				"Invalid alignment data in raw-translation file "
				"(document " << documentNumber << ", "
				" sentence " << sentenceNumber << "): "
				<< *it
			);
			throw;
		}
		std::vector<AnchoredPhrasePair>::const_iterator
			appit = std::lower_bound(
				ppvec.begin(),
				ppvec.end(),
				CompareAnchoredPhrasePairs::PhrasePairKey(cov, srcpd, tgtpd),
				ppComparator
			);
		seg.push_back(*appit);
		tgtpd.clear();
	}
	return seg;
}