uint AddWord( const uint sentno, const AnchoredPhrasePair &app, const uint maxLength ) { WordAlignment wa = app.second.get().getWordAlignment(); PhraseData sd = app.second.get().getSourcePhrase().get(); PhraseData td = app.second.get().getTargetPhrase().get(); uint addCount = 0; for(uint j = 0; j < sd.size(); ++j) { // just for testing: words longer than 5 characters .... (should use some other criteria here!) if((maxLength == 0) || (sd[j].size() > maxLength)) { for(WordAlignment::const_iterator wit = wa.begin_for_source(j); wit != wa.end_for_source(j); ++wit ) { sentWords[sentno].push_back(td[*wit]); addCount++; // LOG(logger_, debug, "add word " << td[*wit] << " aligned to " << sd[j]); } } } return addCount; }
uint AddWord( const uint sentno, const uint phrno, const AnchoredPhrasePair &app, const std::string pos, const int historySize ) { WordAlignment wa = app.second.get().getWordAlignment(); PhraseData sd = app.second.get().getSourcePhrase().get(); PhraseData td = app.second.get().getTargetPhrase().get(); uint wordno = app.first.find_first(); uint addCount = 0; for(uint j = 0; j < sd.size(); ++j) { // TODO: we could support other conditins here as well! if(posTags[sentno][wordno] == pos) { for(WordAlignment::const_iterator wit = wa.begin_for_source(j); wit != wa.end_for_source(j); ++wit ) { std::string wordPair = sd[j] + "_" + td[*wit]; long long b = FindVocabularyPosition(wordPair); if(b < 0) continue; SelectedWordVector word(phrno,*wit,sd[j],td[*wit],wordno,size); FindVector(b,word.vec); selectedWords[sentno].push_back(word); selectedWords[sentno].back().similarity = MaxSimilarityWithHistory( sentno, selectedWords[sentno].size()-1, historySize ); // currentScore += word.similarity; addCount++; //LOG(logger_, debug, "add word " << td[*wit] << " aligned to " << sd[j]); } } wordno++; } return addCount; };