unsigned CIMIContext::getBestSentence(CCandidates& result, int rank, unsigned start, unsigned end) { // -1 means selected sentence if (rank < -1 || rank >= (int) m_nBest) return 0; result.clear(); if (end == UINT_MAX) end = m_tailIdx - 1; while (end > start && m_lattice[end].m_bwType == CLatticeFrame::NO_BESTWORD) end--; unsigned i = end, nWordConverted = 0; while (i > start) { CLatticeFrame& fr = m_lattice[i]; if (rank < 0) { result.insert(result.begin(), fr.m_selWord); i = fr.m_selWord.m_start; } else { result.insert(result.begin(), fr.m_bestWords[rank]); i = fr.m_bestWords[rank].m_start; } nWordConverted++; } return nWordConverted; }
unsigned CIMIContext::getBestSentence(std::vector<unsigned>& result, int rank, unsigned start, unsigned end) { CCandidates sentence; unsigned nWordConverted = getBestSentence(sentence, rank, start, end); result.clear(); for (size_t i = 0; i < sentence.size(); i++) { result.push_back(sentence[i].m_wordId); } return nWordConverted; }
unsigned CIMIContext::getBestSentence(wstring& result, int rank, unsigned start, unsigned end) { CCandidates sentence; unsigned nWordConverted = getBestSentence(sentence, rank, start, end); result.clear(); for (size_t i = 0; i < sentence.size(); i++) { result += sentence[i].m_cwstr; } return nWordConverted; }
std::vector<CCandidates> CIMIContext::getBestSentenceTails(int rank, unsigned start, unsigned end) { std::vector<CCandidates> result; if (rank < 0) { return result; } CCandidates sentence; unsigned word_num = getBestSentence(sentence, rank, start, end); unsigned tail_word_num = word_num; while (tail_word_num > 1) { unsigned dec = tail_word_num / (m_maxTailCandidateNum + 1) + 1; tail_word_num -= std::min(dec, tail_word_num); if (tail_word_num <= 1) { break; } CCandidates tail(sentence.begin(), sentence.begin() + tail_word_num); result.push_back(tail); } return result; }
void CIMIContext::getCandidates(unsigned frIdx, CCandidates& result) { TCandiPair cp; static std::map<wstring, TCandiPair> candidates_map; std::map<wstring, TCandiPair>::iterator candidates_it; candidates_map.clear(); result.clear(); std::vector<unsigned> st; getSelectedSentence(st, frIdx); cp.m_candi.m_start = m_candiStarts = frIdx++; for (; frIdx < m_tailIdx; ++frIdx) { if (m_lattice[frIdx + 1].isSyllableSepFrame()) continue; CLatticeFrame &fr = m_lattice[frIdx]; if (!fr.isSyllableFrame()) continue; cp.m_candi.m_end = frIdx; if (fr.m_bwType != CLatticeFrame::NO_BESTWORD) { for (size_t i = 0; i < m_nBest; i++) { if (fr.m_bestWords.find(i) == fr.m_bestWords.end()) continue; CCandidate candi = fr.m_bestWords[i]; if (candi.m_start != m_candiStarts) continue; if (candi.m_pLexiconState == NULL) continue; TLexiconState & lxst = *(candi.m_pLexiconState); int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies; if (len == 0) len = 1; cp.m_candi = candi; cp.m_Rank = TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED, fr.m_bwType & CLatticeFrame::BESTWORD, len, false, 0); candidates_map[candi.m_cwstr] = cp; } } bool found = false; CLexiconStates::iterator it = fr.m_lexiconStates.begin(); CLexiconStates::iterator ite = fr.m_lexiconStates.end(); for (; it != ite; ++it) { TLexiconState & lxst = *it; if (lxst.m_start != m_candiStarts) continue; int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies; if (0 == len) len = 1; found = true; unsigned word_num; const CPinyinTrie::TWordIdInfo *words = lxst.getWords(word_num); for (unsigned i = 0; i < word_num; ++i) { if (m_csLevel < words[i].m_csLevel) continue; cp.m_candi.m_wordId = words[i].m_id; cp.m_candi.m_cwstr = _getWstr(cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = &lxst; if (!cp.m_candi.m_cwstr) continue; //sorting according to the order in PinYinTire cp.m_Rank = TCandiRank(false, !st.empty() && st.front() == cp.m_candi.m_wordId, len, false, i); candidates_it = candidates_map.find(cp.m_candi.m_cwstr); if (candidates_it == candidates_map.end() || cp.m_Rank < candidates_it->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) { candidates_map[cp.m_candi.m_cwstr] = cp; // print_wide(cp.m_candi.m_cwstr); // printf(" "); } } // puts(""); } if (!found) continue; // FIXME: need better solution later if (m_bDynaCandiOrder) { CLatticeStates::iterator it = fr.m_latticeStates.begin(); CLatticeStates::iterator ite = fr.m_latticeStates.end(); // printf("adjusting "); for (; it != ite; ++it) { TLatticeState & ltst = *it; if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts) continue; cp.m_candi.m_wordId = ltst.m_backTraceWordId; cp.m_candi.m_cwstr = _getWstr(cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = ltst.m_pLexiconState; if (!cp.m_candi.m_cwstr) continue; int len = cp.m_candi.m_pLexiconState->m_syls.size() - cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies; if (0 == len) len = 1; cp.m_Rank = TCandiRank(false, !st.empty() && st.front() == cp.m_candi.m_wordId, len, true, ltst.m_score / ltst.m_pBackTraceNode->m_score); candidates_it = candidates_map.find(cp.m_candi.m_cwstr); if (candidates_it == candidates_map.end() || cp.m_Rank < candidates_it->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) { // print_wide(cp.m_candi.m_cwstr); // std::string buf; // ltst.m_score.toString(buf); // printf("len:%d %s", len, buf.c_str()); // ltst.m_pBackTraceNode->m_score.toString(buf); // printf("%s ", buf.c_str()); candidates_map[cp.m_candi.m_cwstr] = cp; } } // puts(""); } m_candiEnds = frIdx; } std::vector<TCandiPairPtr> vec; vec.reserve(candidates_map.size()); for (candidates_it = candidates_map.begin(); candidates_it != candidates_map.end(); ++candidates_it) { vec.push_back(TCandiPairPtr(&(candidates_it->second))); } std::sort(vec.begin(), vec.end()); for (size_t i = 0; i < vec.size(); i++) { // print_wide(vec[i].m_Ptr->m_candi.m_cwstr); // printf(" "); result.push_back(vec[i].m_Ptr->m_candi); } // puts(""); }
void CIMIContext::getCandidates (unsigned frIdx, CCandidates& result) { TCandiPair cp; static std::map<wstring, TCandiPair> map; std::map<wstring, TCandiPair>::iterator it_map; map.clear(); result.clear(); std::vector<unsigned> st; getBestSentence (st, frIdx); cp.m_candi.m_start = m_candiStarts = frIdx++; for (;frIdx < m_tailIdx; ++frIdx) { CLatticeFrame &fr = m_lattice[frIdx]; if (!fr.isSyllableFrame ()) continue; cp.m_candi.m_end = frIdx; if (fr.m_bwType != CLatticeFrame::NO_BESTWORD && fr.m_bestWord.m_start == m_candiStarts) { cp.m_candi = fr.m_bestWord; cp.m_Rank = TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED, fr.m_bwType & CLatticeFrame::BESTWORD, 0, false, 0); map [cp.m_candi.m_cwstr] = cp; } bool found = false; CLexiconStates::iterator it = fr.m_lexiconStates.begin(); CLexiconStates::iterator ite = fr.m_lexiconStates.end(); for (; it != ite; ++it) { TLexiconState & lxst = *it; if (lxst.m_start != m_candiStarts) continue; int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies; if (0 == len) len = 1; found = true; unsigned word_num; const CPinyinTrie::TWordIdInfo *words = lxst.getWords (word_num); for (unsigned i=0; i<word_num; ++i) { if (m_csLevel < words[i].m_csLevel) continue; cp.m_candi.m_wordId = words[i].m_id; cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = &lxst; if (!cp.m_candi.m_cwstr) continue; //sorting according to the order in PinYinTire cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, false, i); it_map = map.find(cp.m_candi.m_cwstr); if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) map [cp.m_candi.m_cwstr] = cp; } } if (!found) continue; // FIXME: need better solution later if (m_bDynaCandiOrder) { CLatticeStates::iterator it = fr.m_latticeStates.begin(); CLatticeStates::iterator ite = fr.m_latticeStates.end(); for (; it != ite; ++it) { TLatticeState & ltst = *it; if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts) continue; cp.m_candi.m_wordId = ltst.m_backTraceWordId; cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = ltst.m_pLexiconState; if (!cp.m_candi.m_cwstr) continue; int len = cp.m_candi.m_pLexiconState->m_syls.size() - cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies; if (0 == len) len = 1; cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, true, ltst.m_score/ltst.m_pBackTraceNode->m_score); it_map = map.find(cp.m_candi.m_cwstr); if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) map[cp.m_candi.m_cwstr] = cp; } } m_candiEnds = frIdx; } std::vector<TCandiPairPtr> vec; vec.reserve(map.size()); std::map<wstring, TCandiPair>::iterator it_mapE = map.end(); for (it_map = map.begin(); it_map != it_mapE; ++it_map) vec.push_back(TCandiPairPtr(&(it_map->second))); std::make_heap(vec.begin(), vec.end()); std::sort_heap(vec.begin(), vec.end()); for (int i=0, sz=vec.size(); i < sz; ++i) result.push_back(vec[i].m_Ptr->m_candi); }