void CIMIContext::_backTraceBestPath () { CLatticeStates& tail_states = m_lattice[m_tailIdx].m_latticeStates; // there must be some transfer errors if (tail_states.size() != 1) return; TLatticeState *bs = &(tail_states[0]); while (bs->m_pBackTraceNode) { unsigned start = bs->m_pBackTraceNode->m_frIdx; unsigned end = bs->m_frIdx; CLatticeFrame & end_fr = m_lattice[end]; if (! (end_fr.m_bwType & CLatticeFrame::USER_SELECTED)) { end_fr.m_bwType |= CLatticeFrame::BESTWORD; end_fr.m_bestWord.m_start = start; end_fr.m_bestWord.m_end = end; end_fr.m_bestWord.m_wordId = bs->m_backTraceWordId; end_fr.m_bestWord.m_cwstr = end_fr.m_wstr.empty()? _getWstr (bs->m_backTraceWordId): end_fr.m_wstr.c_str(); } m_bestPath.push_back (end); bs = bs->m_pBackTraceNode; } std::reverse (m_bestPath.begin(), m_bestPath.end()); }
void CIMIContext::getCandidates(unsigned frIdx, CCandidates& result) { TCandiPair cp; static std::map<wstring, TCandiPair> candidates_map; std::map<wstring, TCandiPair>::iterator candidates_it; candidates_map.clear(); result.clear(); std::vector<unsigned> st; getSelectedSentence(st, frIdx); cp.m_candi.m_start = m_candiStarts = frIdx++; for (; frIdx < m_tailIdx; ++frIdx) { if (m_lattice[frIdx + 1].isSyllableSepFrame()) continue; CLatticeFrame &fr = m_lattice[frIdx]; if (!fr.isSyllableFrame()) continue; cp.m_candi.m_end = frIdx; if (fr.m_bwType != CLatticeFrame::NO_BESTWORD) { for (size_t i = 0; i < m_nBest; i++) { if (fr.m_bestWords.find(i) == fr.m_bestWords.end()) continue; CCandidate candi = fr.m_bestWords[i]; if (candi.m_start != m_candiStarts) continue; if (candi.m_pLexiconState == NULL) continue; TLexiconState & lxst = *(candi.m_pLexiconState); int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies; if (len == 0) len = 1; cp.m_candi = candi; cp.m_Rank = TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED, fr.m_bwType & CLatticeFrame::BESTWORD, len, false, 0); candidates_map[candi.m_cwstr] = cp; } } bool found = false; CLexiconStates::iterator it = fr.m_lexiconStates.begin(); CLexiconStates::iterator ite = fr.m_lexiconStates.end(); for (; it != ite; ++it) { TLexiconState & lxst = *it; if (lxst.m_start != m_candiStarts) continue; int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies; if (0 == len) len = 1; found = true; unsigned word_num; const CPinyinTrie::TWordIdInfo *words = lxst.getWords(word_num); for (unsigned i = 0; i < word_num; ++i) { if (m_csLevel < words[i].m_csLevel) continue; cp.m_candi.m_wordId = words[i].m_id; cp.m_candi.m_cwstr = _getWstr(cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = &lxst; if (!cp.m_candi.m_cwstr) continue; //sorting according to the order in PinYinTire cp.m_Rank = TCandiRank(false, !st.empty() && st.front() == cp.m_candi.m_wordId, len, false, i); candidates_it = candidates_map.find(cp.m_candi.m_cwstr); if (candidates_it == candidates_map.end() || cp.m_Rank < candidates_it->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) { candidates_map[cp.m_candi.m_cwstr] = cp; // print_wide(cp.m_candi.m_cwstr); // printf(" "); } } // puts(""); } if (!found) continue; // FIXME: need better solution later if (m_bDynaCandiOrder) { CLatticeStates::iterator it = fr.m_latticeStates.begin(); CLatticeStates::iterator ite = fr.m_latticeStates.end(); // printf("adjusting "); for (; it != ite; ++it) { TLatticeState & ltst = *it; if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts) continue; cp.m_candi.m_wordId = ltst.m_backTraceWordId; cp.m_candi.m_cwstr = _getWstr(cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = ltst.m_pLexiconState; if (!cp.m_candi.m_cwstr) continue; int len = cp.m_candi.m_pLexiconState->m_syls.size() - cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies; if (0 == len) len = 1; cp.m_Rank = TCandiRank(false, !st.empty() && st.front() == cp.m_candi.m_wordId, len, true, ltst.m_score / ltst.m_pBackTraceNode->m_score); candidates_it = candidates_map.find(cp.m_candi.m_cwstr); if (candidates_it == candidates_map.end() || cp.m_Rank < candidates_it->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) { // print_wide(cp.m_candi.m_cwstr); // std::string buf; // ltst.m_score.toString(buf); // printf("len:%d %s", len, buf.c_str()); // ltst.m_pBackTraceNode->m_score.toString(buf); // printf("%s ", buf.c_str()); candidates_map[cp.m_candi.m_cwstr] = cp; } } // puts(""); } m_candiEnds = frIdx; } std::vector<TCandiPairPtr> vec; vec.reserve(candidates_map.size()); for (candidates_it = candidates_map.begin(); candidates_it != candidates_map.end(); ++candidates_it) { vec.push_back(TCandiPairPtr(&(candidates_it->second))); } std::sort(vec.begin(), vec.end()); for (size_t i = 0; i < vec.size(); i++) { // print_wide(vec[i].m_Ptr->m_candi.m_cwstr); // printf(" "); result.push_back(vec[i].m_Ptr->m_candi); } // puts(""); }
bool CIMIContext::_backTracePaths(const std::vector<TLatticeState>& tail_states, int rank, TPath& path, TPath& segmentPath) { path.clear(); segmentPath.clear(); if (rank >= (int) tail_states.size()) { // rank out of bounds, only return the segment path return false; } const TLatticeState *bs = &(tail_states[rank]); while (bs->m_pBackTraceNode) { unsigned start = bs->m_pBackTraceNode->m_frIdx; unsigned end = bs->m_frIdx; CLatticeFrame & end_fr = m_lattice[end]; if (!(end_fr.m_bwType & CLatticeFrame::USER_SELECTED)) { const TWCHAR* cwstr = NULL; if (end_fr.m_wstr.empty()) { cwstr = _getWstr(bs->m_backTraceWordId); } else { cwstr = end_fr.m_wstr.c_str(); } CCandidate candi(start, end, bs->m_pLexiconState, cwstr, bs->m_backTraceWordId); end_fr.m_bwType |= CLatticeFrame::BESTWORD; end_fr.m_bestWords[rank] = candi; if (rank == 0) { end_fr.m_selWord = candi; // select the first by default. } } if (bs->m_pBackTraceNode->m_pLexiconState) { std::vector<unsigned> seg_path = bs->m_pBackTraceNode->m_pLexiconState->m_seg_path; std::vector<unsigned>::reverse_iterator it = seg_path.rbegin(); for (; it != seg_path.rend(); ++it) { if (segmentPath.empty() || segmentPath.back() != *it) segmentPath.push_back(*it); } } path.push_back(end); bs = bs->m_pBackTraceNode; } std::reverse(path.begin(), path.end()); std::reverse(segmentPath.begin(), segmentPath.end()); #ifdef DEBUG std::vector<unsigned>::iterator it; printf("trace lattice path[%d]: ", rank); for (it = path.begin(); it != path.end(); ++it) printf("%d ", *it); printf("\n"); printf("trace segments path[%d]: ", rank); for (it = segmentPath.begin(); it != segmentPath.end(); ++it) printf("%d ", *it); printf("\n"); #endif return true; }
void CIMIContext::getCandidates (unsigned frIdx, CCandidates& result) { TCandiPair cp; static std::map<wstring, TCandiPair> map; std::map<wstring, TCandiPair>::iterator it_map; map.clear(); result.clear(); std::vector<unsigned> st; getBestSentence (st, frIdx); cp.m_candi.m_start = m_candiStarts = frIdx++; for (;frIdx < m_tailIdx; ++frIdx) { CLatticeFrame &fr = m_lattice[frIdx]; if (!fr.isSyllableFrame ()) continue; cp.m_candi.m_end = frIdx; if (fr.m_bwType != CLatticeFrame::NO_BESTWORD && fr.m_bestWord.m_start == m_candiStarts) { cp.m_candi = fr.m_bestWord; cp.m_Rank = TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED, fr.m_bwType & CLatticeFrame::BESTWORD, 0, false, 0); map [cp.m_candi.m_cwstr] = cp; } bool found = false; CLexiconStates::iterator it = fr.m_lexiconStates.begin(); CLexiconStates::iterator ite = fr.m_lexiconStates.end(); for (; it != ite; ++it) { TLexiconState & lxst = *it; if (lxst.m_start != m_candiStarts) continue; int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies; if (0 == len) len = 1; found = true; unsigned word_num; const CPinyinTrie::TWordIdInfo *words = lxst.getWords (word_num); for (unsigned i=0; i<word_num; ++i) { if (m_csLevel < words[i].m_csLevel) continue; cp.m_candi.m_wordId = words[i].m_id; cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = &lxst; if (!cp.m_candi.m_cwstr) continue; //sorting according to the order in PinYinTire cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, false, i); it_map = map.find(cp.m_candi.m_cwstr); if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) map [cp.m_candi.m_cwstr] = cp; } } if (!found) continue; // FIXME: need better solution later if (m_bDynaCandiOrder) { CLatticeStates::iterator it = fr.m_latticeStates.begin(); CLatticeStates::iterator ite = fr.m_latticeStates.end(); for (; it != ite; ++it) { TLatticeState & ltst = *it; if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts) continue; cp.m_candi.m_wordId = ltst.m_backTraceWordId; cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = ltst.m_pLexiconState; if (!cp.m_candi.m_cwstr) continue; int len = cp.m_candi.m_pLexiconState->m_syls.size() - cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies; if (0 == len) len = 1; cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, true, ltst.m_score/ltst.m_pBackTraceNode->m_score); it_map = map.find(cp.m_candi.m_cwstr); if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) map[cp.m_candi.m_cwstr] = cp; } } m_candiEnds = frIdx; } std::vector<TCandiPairPtr> vec; vec.reserve(map.size()); std::map<wstring, TCandiPair>::iterator it_mapE = map.end(); for (it_map = map.begin(); it_map != it_mapE; ++it_map) vec.push_back(TCandiPairPtr(&(it_map->second))); std::make_heap(vec.begin(), vec.end()); std::sort_heap(vec.begin(), vec.end()); for (int i=0, sz=vec.size(); i < sz; ++i) result.push_back(vec[i].m_Ptr->m_candi); }
void CIMIContext::_backTraceBestPaths () { CLatticeStates& tail_states = m_lattice[m_tailIdx].m_latticeStates; // there must be some transfer errors if (tail_states.size() != 1) return; TLatticeState *bs = &(tail_states[0]); while (bs->m_pBackTraceNode) { unsigned start = bs->m_pBackTraceNode->m_frIdx; unsigned end = bs->m_frIdx; CLatticeFrame & end_fr = m_lattice[end]; if (! (end_fr.m_bwType & CLatticeFrame::USER_SELECTED)) { end_fr.m_bwType |= CLatticeFrame::BESTWORD; end_fr.m_bestWord.m_start = start; end_fr.m_bestWord.m_end = end; end_fr.m_bestWord.m_pLexiconState = bs->m_pLexiconState; end_fr.m_bestWord.m_wordId = bs->m_backTraceWordId; end_fr.m_bestWord.m_cwstr = end_fr.m_wstr.empty()? _getWstr (bs->m_backTraceWordId): end_fr.m_wstr.c_str(); } if (bs->m_pBackTraceNode->m_pLexiconState) { std::vector<unsigned> seg_path = bs->m_pBackTraceNode->m_pLexiconState->m_seg_path; std::vector<unsigned>::reverse_iterator it = seg_path.rbegin(); std::vector<unsigned>::reverse_iterator ite = seg_path.rend(); for (; it != seg_path.rend(); ++it) { if (m_bestSegPath.empty() || m_bestSegPath.back() != *it) m_bestSegPath.push_back (*it); } } m_bestPath.push_back (end); bs = bs->m_pBackTraceNode; } std::reverse (m_bestPath.begin(), m_bestPath.end()); std::reverse (m_bestSegPath.begin(), m_bestSegPath.end()); if (m_pPySegmentor) m_pPySegmentor->notify_best_segpath (m_bestSegPath); #ifdef DEBUG std::vector<unsigned>::iterator it; printf ("best lattice path: "); for (it = m_bestPath.begin(); it != m_bestPath.end(); ++it) printf ("%d ", *it); printf ("best segments path: "); for (it = m_bestSegPath.begin(); it != m_bestSegPath.end(); ++it) printf ("%d ", *it); printf ("\n"); #endif }