void CIMIContext::_saveUserDict () { if (!m_pUserDict) return; if (m_bestPath.empty()) return; CSyllables syls; unsigned s = 0; bool has_user_selected = false; std::vector<unsigned>::iterator it = m_bestPath.begin(); std::vector<unsigned>::iterator ite = m_bestPath.end() - 1; for (; it != ite; ++it, ++s) { CLatticeFrame &fr = m_lattice[*it]; if (!fr.isSyllableFrame ()) break; has_user_selected |= (fr.m_bwType & CLatticeFrame::USER_SELECTED); CSyllables &tmp = fr.m_bestWord.m_pLexiconState->m_syls; if (syls.size() + tmp.size() > MAX_USRDEF_WORD_LEN) { --it; break; } std::copy (tmp.begin(), tmp.end(), back_inserter(syls)); } if (s >= 2 && has_user_selected && !syls.empty()) { wstring phrase; getBestSentence (phrase, 0, *it); m_pUserDict->addWord (syls, phrase); } }
void CIMIContext::_forwardSingleSyllable (unsigned i, unsigned j, TSyllable syllable, const IPySegmentor::TSegment& seg) { const CPinyinTrie::TNode * pn = NULL; CLatticeFrame &fr = m_lattice[j]; fr.m_type = CLatticeFrame::SYLLABLE; CLexiconStates::iterator it = m_lattice[i].m_lexiconStates.begin (); CLexiconStates::iterator ite = m_lattice[i].m_lexiconStates.end (); for (; it != ite; ++it) { TLexiconState &lxst = *it; bool added_from_sysdict = false; if (lxst.m_pPYNode) { // try to match a word from lattice i to lattice j // and if match, we'll count it as a new lexicon on lattice j pn = m_pPinyinTrie->transfer (lxst.m_pPYNode, syllable); if (pn) { added_from_sysdict = true; TLexiconState new_lxst = TLexiconState (lxst.m_start, pn, lxst.m_syls, lxst.m_seg_path); new_lxst.m_syls.push_back (syllable); new_lxst.m_num_of_inner_fuzzies = lxst.m_num_of_inner_fuzzies + (seg.m_inner_fuzzy? 1: 0); new_lxst.m_seg_path.push_back (seg.m_start+seg.m_len); fr.m_lexiconStates.push_back (new_lxst); } } if (m_pUserDict && lxst.m_syls.size() < MAX_USRDEF_WORD_LEN) { // try to match a word from user dict CSyllables syls = lxst.m_syls; syls.push_back (syllable); std::vector<CPinyinTrie::TWordIdInfo> words; m_pUserDict->getWords (syls, words); if (!words.empty() || !added_from_sysdict) { // even if the words is empty we'll add a fake lexicon // here. This helps _saveUserDict detect new words. TLexiconState new_lxst = TLexiconState (lxst.m_start, words, lxst.m_syls, lxst.m_seg_path); new_lxst.m_syls.push_back (syllable); new_lxst.m_num_of_inner_fuzzies = lxst.m_num_of_inner_fuzzies + (seg.m_inner_fuzzy? 1: 0); new_lxst.m_seg_path.push_back (seg.m_start+seg.m_len); fr.m_lexiconStates.push_back (new_lxst); } } } // last, create a lexicon for single character with only one syllable pn = m_pPinyinTrie->transfer (syllable); if (pn) { CSyllables syls; syls.push_back (syllable); std::vector<unsigned> seg_path; seg_path.push_back (seg.m_start); seg_path.push_back (seg.m_start+seg.m_len); TLexiconState new_lxst = TLexiconState (i, pn, syls, seg_path); new_lxst.m_num_of_inner_fuzzies = seg.m_inner_fuzzy? 1: 0; fr.m_lexiconStates.push_back (new_lxst); } }
void CIMIContext::_saveUserDict() { if (!m_pUserDict) return; CSyllables syls; bool has_user_selected = false; unsigned i = m_tailIdx - 1; unsigned e_pos = 0; while (i > 0 && m_lattice[i].m_bwType == CLatticeFrame::NO_BESTWORD) i--; while (i > 0) { CLatticeFrame &fr = m_lattice[i]; if (!fr.isSyllableFrame()) { i = fr.m_selWord.m_start; break; } TLexiconState* state = fr.m_selWord.m_pLexiconState; if (!state) { i = fr.m_selWord.m_start; continue; } if (syls.size() + state->m_syls.size() > MAX_USRDEF_WORD_LEN) { i = fr.m_selWord.m_start; break; } if (!e_pos) e_pos = i; has_user_selected |= (fr.m_bwType & CLatticeFrame::USER_SELECTED); std::copy(state->m_syls.begin(), state->m_syls.end(), inserter(syls, syls.begin())); i = fr.m_selWord.m_start; } if (has_user_selected && syls.size() > 1) { wstring phrase; getSelectedSentence (phrase, 0, e_pos); m_pUserDict->addWord (syls, phrase); } }
void CIMIContext::_saveUserDict () { if (!m_pUserDict) return; if (m_bestPath.empty()) return; bool has_user_selected = false; std::vector<unsigned>::iterator it = m_bestPath.begin(); std::vector<unsigned>::iterator ite = m_bestPath.end() - 1; unsigned s = 0; for (; it != ite; ++it, ++s) { has_user_selected |= (m_lattice[*it].m_bwType & CLatticeFrame::USER_SELECTED); if (!m_lattice[*it].isSyllableFrame ()) break; } if (has_user_selected && s >= 2) { CSyllables syls; -- it; CLexiconStates::iterator lxit = m_lattice[*it].m_lexiconStates.begin(); CLexiconStates::iterator lxite = m_lattice[*it].m_lexiconStates.end(); for (; lxit != lxite; ++lxit) { if (lxit->m_start == 0 && !lxit->m_bFuzzy && lxit->m_seg_path == m_bestSegPath) { //FIXME: need better solution later syls = lxit->m_syls; break; } } if (!syls.empty()) { wstring phrase; getBestSentence (phrase, 0, *it); m_pUserDict->addWord (syls, phrase); } } }