void CIMIContext::_saveUserDict () { if (!m_pUserDict) return; if (m_bestPath.empty()) return; CSyllables syls; unsigned s = 0; bool has_user_selected = false; std::vector<unsigned>::iterator it = m_bestPath.begin(); std::vector<unsigned>::iterator ite = m_bestPath.end() - 1; for (; it != ite; ++it, ++s) { CLatticeFrame &fr = m_lattice[*it]; if (!fr.isSyllableFrame ()) break; has_user_selected |= (fr.m_bwType & CLatticeFrame::USER_SELECTED); CSyllables &tmp = fr.m_bestWord.m_pLexiconState->m_syls; if (syls.size() + tmp.size() > MAX_USRDEF_WORD_LEN) { --it; break; } std::copy (tmp.begin(), tmp.end(), back_inserter(syls)); } if (s >= 2 && has_user_selected && !syls.empty()) { wstring phrase; getBestSentence (phrase, 0, *it); m_pUserDict->addWord (syls, phrase); } }
unsigned CIMIContext::getBestSentence(std::vector<unsigned>& result, int rank, unsigned start, unsigned end) { CCandidates sentence; unsigned nWordConverted = getBestSentence(sentence, rank, start, end); result.clear(); for (size_t i = 0; i < sentence.size(); i++) { result.push_back(sentence[i].m_wordId); } return nWordConverted; }
unsigned CIMIContext::getBestSentence(wstring& result, int rank, unsigned start, unsigned end) { CCandidates sentence; unsigned nWordConverted = getBestSentence(sentence, rank, start, end); result.clear(); for (size_t i = 0; i < sentence.size(); i++) { result += sentence[i].m_cwstr; } return nWordConverted; }
std::vector<CCandidates> CIMIContext::getBestSentenceTails(int rank, unsigned start, unsigned end) { std::vector<CCandidates> result; if (rank < 0) { return result; } CCandidates sentence; unsigned word_num = getBestSentence(sentence, rank, start, end); unsigned tail_word_num = word_num; while (tail_word_num > 1) { unsigned dec = tail_word_num / (m_maxTailCandidateNum + 1) + 1; tail_word_num -= std::min(dec, tail_word_num); if (tail_word_num <= 1) { break; } CCandidates tail(sentence.begin(), sentence.begin() + tail_word_num); result.push_back(tail); } return result; }
void CIMIContext::_saveUserDict () { if (!m_pUserDict) return; if (m_bestPath.empty()) return; bool has_user_selected = false; std::vector<unsigned>::iterator it = m_bestPath.begin(); std::vector<unsigned>::iterator ite = m_bestPath.end() - 1; unsigned s = 0; for (; it != ite; ++it, ++s) { has_user_selected |= (m_lattice[*it].m_bwType & CLatticeFrame::USER_SELECTED); if (!m_lattice[*it].isSyllableFrame ()) break; } if (has_user_selected && s >= 2) { CSyllables syls; -- it; CLexiconStates::iterator lxit = m_lattice[*it].m_lexiconStates.begin(); CLexiconStates::iterator lxite = m_lattice[*it].m_lexiconStates.end(); for (; lxit != lxite; ++lxit) { if (lxit->m_start == 0 && !lxit->m_bFuzzy && lxit->m_seg_path == m_bestSegPath) { //FIXME: need better solution later syls = lxit->m_syls; break; } } if (!syls.empty()) { wstring phrase; getBestSentence (phrase, 0, *it); m_pUserDict->addWord (syls, phrase); } } }
unsigned CIMIContext::getSelectedSentence(std::vector<unsigned>& result, unsigned start, unsigned end) { return getBestSentence(result, -1, start, end); }
unsigned CIMIContext::getSelectedSentence(wstring& result, unsigned start, unsigned end) { return getBestSentence(result, -1, start, end); }
void CIMIContext::getCandidates (unsigned frIdx, CCandidates& result) { TCandiPair cp; static std::map<wstring, TCandiPair> map; std::map<wstring, TCandiPair>::iterator it_map; map.clear(); result.clear(); std::vector<unsigned> st; getBestSentence (st, frIdx); cp.m_candi.m_start = m_candiStarts = frIdx++; for (;frIdx < m_tailIdx; ++frIdx) { CLatticeFrame &fr = m_lattice[frIdx]; if (!fr.isSyllableFrame ()) continue; cp.m_candi.m_end = frIdx; if (fr.m_bwType != CLatticeFrame::NO_BESTWORD && fr.m_bestWord.m_start == m_candiStarts) { cp.m_candi = fr.m_bestWord; cp.m_Rank = TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED, fr.m_bwType & CLatticeFrame::BESTWORD, 0, false, 0); map [cp.m_candi.m_cwstr] = cp; } bool found = false; CLexiconStates::iterator it = fr.m_lexiconStates.begin(); CLexiconStates::iterator ite = fr.m_lexiconStates.end(); for (; it != ite; ++it) { TLexiconState & lxst = *it; if (lxst.m_start != m_candiStarts) continue; int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies; if (0 == len) len = 1; found = true; unsigned word_num; const CPinyinTrie::TWordIdInfo *words = lxst.getWords (word_num); for (unsigned i=0; i<word_num; ++i) { if (m_csLevel < words[i].m_csLevel) continue; cp.m_candi.m_wordId = words[i].m_id; cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = &lxst; if (!cp.m_candi.m_cwstr) continue; //sorting according to the order in PinYinTire cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, false, i); it_map = map.find(cp.m_candi.m_cwstr); if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) map [cp.m_candi.m_cwstr] = cp; } } if (!found) continue; // FIXME: need better solution later if (m_bDynaCandiOrder) { CLatticeStates::iterator it = fr.m_latticeStates.begin(); CLatticeStates::iterator ite = fr.m_latticeStates.end(); for (; it != ite; ++it) { TLatticeState & ltst = *it; if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts) continue; cp.m_candi.m_wordId = ltst.m_backTraceWordId; cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId); cp.m_candi.m_pLexiconState = ltst.m_pLexiconState; if (!cp.m_candi.m_cwstr) continue; int len = cp.m_candi.m_pLexiconState->m_syls.size() - cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies; if (0 == len) len = 1; cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, true, ltst.m_score/ltst.m_pBackTraceNode->m_score); it_map = map.find(cp.m_candi.m_cwstr); if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID) map[cp.m_candi.m_cwstr] = cp; } } m_candiEnds = frIdx; } std::vector<TCandiPairPtr> vec; vec.reserve(map.size()); std::map<wstring, TCandiPair>::iterator it_mapE = map.end(); for (it_map = map.begin(); it_map != it_mapE; ++it_map) vec.push_back(TCandiPairPtr(&(it_map->second))); std::make_heap(vec.begin(), vec.end()); std::sort_heap(vec.begin(), vec.end()); for (int i=0, sz=vec.size(); i < sz; ++i) result.push_back(vec[i].m_Ptr->m_candi); }