const TargetPhraseCollection* PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const { // There is no souch source phrase if source phrase is longer than longest // observed source phrase during compilation if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength()) return NULL; // Retrieve target phrase collection from phrase table TargetPhraseVectorPtr decodedPhraseColl = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true); if(decodedPhraseColl != NULL && decodedPhraseColl->size()) { TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl)); TargetPhraseCollection* phraseColl = new TargetPhraseCollection(); // Score phrases and if possible apply ttable_limit TargetPhraseVector::iterator nth = (m_tableLimit == 0 || tpv->size() < m_tableLimit) ? tpv->end() : tpv->begin() + m_tableLimit; NTH_ELEMENT4(tpv->begin(), nth, tpv->end(), CompareTargetPhrase()); for(TargetPhraseVector::iterator it = tpv->begin(); it != nth; it++) { TargetPhrase *tp = new TargetPhrase(*it); phraseColl->Add(tp); } // Cache phrase pair for for clean-up or retrieval with PREnc const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl); return phraseColl; } else return NULL; }
void ChartTranslationOptionList::ApplyThreshold() { if (m_size > m_ruleLimit) { // Something's gone wrong if the list has grown to m_ruleLimit * 2 // without being pruned. assert(m_size < m_ruleLimit * 2); // Reduce the list to the best m_ruleLimit options. The remaining // options can be overwritten on subsequent calls to Add(). NTH_ELEMENT4(m_collection.begin(), m_collection.begin()+m_ruleLimit, m_collection.begin()+m_size, ChartTranslationOptionOrderer()); m_size = m_ruleLimit; } // keep only those over best + threshold float scoreThreshold = -std::numeric_limits<float>::infinity(); CollType::const_iterator iter; for (iter = m_collection.begin(); iter != m_collection.begin()+m_size; ++iter) { const ChartTranslationOptions *transOpt = *iter; float score = transOpt->GetEstimateOfBestScore(); scoreThreshold = (score > scoreThreshold) ? score : scoreThreshold; } scoreThreshold += StaticData::Instance().GetTranslationOptionThreshold(); CollType::iterator bound = std::partition(m_collection.begin(), m_collection.begin()+m_size, ScoreThresholdPred(scoreThreshold)); m_size = std::distance(m_collection.begin(), bound); }
void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc, const StackVec &stackVec, const WordsRange &range) { if (tpc.IsEmpty()) { return; } for (size_t i = 0; i < stackVec.size(); ++i) { const ChartCellLabel &chartCellLabel = *stackVec[i]; size_t numHypos = chartCellLabel.GetStack().cube->size(); if (numHypos == 0) { return; // empty stack. These rules can't be used } } float score = ChartTranslationOptions::CalcEstimateOfBestScore(tpc, stackVec); // If the rule limit has already been reached then don't add the option // unless it is better than at least one existing option. if (m_size > m_ruleLimit && score < m_scoreThreshold) { return; } // Add the option to the list. if (m_size == m_collection.size()) { // m_collection has reached capacity: create a new object. m_collection.push_back(new ChartTranslationOptions(tpc, stackVec, range, score)); } else { // Overwrite an unused object. *(m_collection[m_size]) = ChartTranslationOptions(tpc, stackVec, range, score); } ++m_size; // If the rule limit hasn't been exceeded then update the threshold. if (m_size <= m_ruleLimit) { m_scoreThreshold = (score < m_scoreThreshold) ? score : m_scoreThreshold; } // Prune if bursting if (m_size == m_ruleLimit * 2) { NTH_ELEMENT4(m_collection.begin(), m_collection.begin() + m_ruleLimit - 1, m_collection.begin() + m_size, ChartTranslationOptionOrderer()); m_scoreThreshold = m_collection[m_ruleLimit-1]->GetEstimateOfBestScore(); m_size = m_ruleLimit; } }
/** pruning, remove partial translation options, if list too big */ void PartialTranslOptColl::Prune() { // done if not too big if ( m_list.size() <= m_maxSize ) { return; } // TRACE_ERR( "pruning partial translation options from size " << m_list.size() << std::endl); // find nth element NTH_ELEMENT4(m_list.begin(), m_list.begin() + m_maxSize, m_list.end(), ComparePartialTranslationOption); m_worstScore = m_list[ m_maxSize-1 ]->GetFutureScore(); // delete the rest for (size_t i = m_maxSize ; i < m_list.size() ; ++i) { delete m_list[i]; m_totalPruned++; } m_list.resize(m_maxSize); // TRACE_ERR( "pruned to size " << m_list.size() << ", total pruned: " << m_totalPruned << std::endl); }