Пример #1
0
	uint AddWord(
		const uint sentno,
		const AnchoredPhrasePair &app,
		const uint maxLength
	) {
		WordAlignment wa = app.second.get().getWordAlignment();
		PhraseData sd = app.second.get().getSourcePhrase().get();
		PhraseData td = app.second.get().getTargetPhrase().get();

		uint addCount = 0;
		for(uint j = 0; j < sd.size(); ++j) {
			// just for testing: words longer than 5 characters .... (should use some other criteria here!)
			if((maxLength == 0) || (sd[j].size() > maxLength)) {
				for(WordAlignment::const_iterator
					wit = wa.begin_for_source(j);
					wit != wa.end_for_source(j);
					++wit
				) {
					sentWords[sentno].push_back(td[*wit]);
					addCount++;
					// LOG(logger_, debug, "add word " << td[*wit] << " aligned to " << sd[j]);
				}
			}
		}
		return addCount;
	}
Пример #2
0
	uint AddWord(
		const uint sentno,
		const uint phrno,
		const AnchoredPhrasePair &app,
		const std::string pos,
		const int historySize
	) {
		WordAlignment wa = app.second.get().getWordAlignment();
		PhraseData sd = app.second.get().getSourcePhrase().get();
		PhraseData td = app.second.get().getTargetPhrase().get();

		uint wordno = app.first.find_first();

		uint addCount = 0;
		for(uint j = 0; j < sd.size(); ++j) {
			// TODO: we could support other conditins here as well!
			if(posTags[sentno][wordno] == pos) {
				for(WordAlignment::const_iterator
					wit = wa.begin_for_source(j);
					wit != wa.end_for_source(j);
					++wit
				) {
					std::string wordPair = sd[j] + "_" + td[*wit];
					long long b = FindVocabularyPosition(wordPair);
					if(b < 0)
						continue;
					SelectedWordVector word(phrno,*wit,sd[j],td[*wit],wordno,size);
					FindVector(b,word.vec);
					selectedWords[sentno].push_back(word);
					selectedWords[sentno].back().similarity = MaxSimilarityWithHistory(
						sentno,
						selectedWords[sentno].size()-1,
						historySize
					);
					// currentScore += word.similarity;
					addCount++;
					//LOG(logger_, debug, "add word " << td[*wit] << " aligned to " << sd[j]);
				}
			}
			wordno++;
		}
		return addCount;
	};