Example #1
0
void CIMIContext::_saveUserDict ()
{
    if (!m_pUserDict)
        return;

    if (m_bestPath.empty())
        return;
    
    CSyllables syls;
    unsigned s = 0;
    bool has_user_selected = false;
    std::vector<unsigned>::iterator it  = m_bestPath.begin();
    std::vector<unsigned>::iterator ite = m_bestPath.end() - 1;
    for (; it != ite; ++it, ++s) {
        CLatticeFrame &fr = m_lattice[*it];
        if (!fr.isSyllableFrame ())
            break;

        has_user_selected |= (fr.m_bwType & CLatticeFrame::USER_SELECTED);

        CSyllables &tmp = fr.m_bestWord.m_pLexiconState->m_syls;
        if (syls.size() + tmp.size() > MAX_USRDEF_WORD_LEN) {
            --it;
            break;
        }

        std::copy (tmp.begin(), tmp.end(), back_inserter(syls));
    }

    if (s >= 2 && has_user_selected && !syls.empty()) {
        wstring phrase;
        getBestSentence (phrase, 0, *it);
        m_pUserDict->addWord (syls, phrase);
    }
}
Example #2
0
void CIMIContext::_forwardSingleSyllable (unsigned i, unsigned j, TSyllable syllable, const IPySegmentor::TSegment& seg)
{
    const CPinyinTrie::TNode * pn = NULL;

    CLatticeFrame &fr = m_lattice[j];
    fr.m_type = CLatticeFrame::SYLLABLE;

    CLexiconStates::iterator it  = m_lattice[i].m_lexiconStates.begin ();
    CLexiconStates::iterator ite = m_lattice[i].m_lexiconStates.end ();
    for (; it != ite; ++it) {
        TLexiconState &lxst = *it;
        bool added_from_sysdict = false;

        if (lxst.m_pPYNode) {
            // try to match a word from lattice i to lattice j
            // and if match, we'll count it as a new lexicon on lattice j
            pn = m_pPinyinTrie->transfer (lxst.m_pPYNode, syllable);
            if (pn) {
                added_from_sysdict = true;
                TLexiconState new_lxst = TLexiconState (lxst.m_start, pn, lxst.m_syls, lxst.m_seg_path);
                new_lxst.m_syls.push_back (syllable);
                new_lxst.m_num_of_inner_fuzzies = lxst.m_num_of_inner_fuzzies + (seg.m_inner_fuzzy? 1: 0);
                new_lxst.m_seg_path.push_back (seg.m_start+seg.m_len);
                fr.m_lexiconStates.push_back (new_lxst);
            }
        }

        if (m_pUserDict && lxst.m_syls.size() < MAX_USRDEF_WORD_LEN) {
            // try to match a word from user dict
            CSyllables syls = lxst.m_syls;
            syls.push_back (syllable);
            std::vector<CPinyinTrie::TWordIdInfo> words;
            m_pUserDict->getWords (syls, words);
            if (!words.empty() || !added_from_sysdict) {
                // even if the words is empty we'll add a fake lexicon
                // here. This helps _saveUserDict detect new words.
                TLexiconState new_lxst = TLexiconState (lxst.m_start, words, lxst.m_syls, lxst.m_seg_path);
                new_lxst.m_syls.push_back (syllable);
                new_lxst.m_num_of_inner_fuzzies = lxst.m_num_of_inner_fuzzies + (seg.m_inner_fuzzy? 1: 0);
                new_lxst.m_seg_path.push_back (seg.m_start+seg.m_len);
                fr.m_lexiconStates.push_back (new_lxst);
            }
        }
    }

    // last, create a lexicon for single character with only one syllable
    pn = m_pPinyinTrie->transfer (syllable);
    if (pn) {
        CSyllables syls;
        syls.push_back (syllable);
        std::vector<unsigned> seg_path;
        seg_path.push_back (seg.m_start);
        seg_path.push_back (seg.m_start+seg.m_len);
        TLexiconState new_lxst = TLexiconState (i, pn, syls, seg_path);
        new_lxst.m_num_of_inner_fuzzies = seg.m_inner_fuzzy? 1: 0;
        fr.m_lexiconStates.push_back (new_lxst);
    }
}
Example #3
0
void
CIMIContext::_saveUserDict()
{
    if (!m_pUserDict)
        return;

    CSyllables syls;
    bool has_user_selected = false;
    unsigned i = m_tailIdx - 1;
    unsigned e_pos = 0;

    while (i > 0 && m_lattice[i].m_bwType == CLatticeFrame::NO_BESTWORD)
        i--;

    while (i > 0) {
        CLatticeFrame &fr = m_lattice[i];
        if (!fr.isSyllableFrame()) {
            i = fr.m_selWord.m_start;
            break;
        }

        TLexiconState* state = fr.m_selWord.m_pLexiconState;
        if (!state) {
            i = fr.m_selWord.m_start;
            continue;
        }

        if (syls.size() + state->m_syls.size() > MAX_USRDEF_WORD_LEN) {
            i = fr.m_selWord.m_start;
            break;
        }

    if (!e_pos) e_pos = i;

        has_user_selected |= (fr.m_bwType & CLatticeFrame::USER_SELECTED);
        std::copy(state->m_syls.begin(), state->m_syls.end(), inserter(syls, syls.begin()));
        i = fr.m_selWord.m_start;
    }

    if (has_user_selected && syls.size() > 1) {
        wstring phrase;
        getSelectedSentence (phrase, 0, e_pos);
        m_pUserDict->addWord (syls, phrase);
    }
}
Example #4
0
void CIMIContext::_saveUserDict ()
{
    if (!m_pUserDict)
        return;

    if (m_bestPath.empty())
        return;
    
    bool has_user_selected = false;
    std::vector<unsigned>::iterator it  = m_bestPath.begin();
    std::vector<unsigned>::iterator ite = m_bestPath.end() - 1;
    unsigned s = 0;
    for (; it != ite; ++it, ++s) {
        has_user_selected |= (m_lattice[*it].m_bwType & CLatticeFrame::USER_SELECTED);
        if (!m_lattice[*it].isSyllableFrame ())
            break;
    }

    if (has_user_selected && s >= 2) {
        CSyllables syls;
        -- it;
        CLexiconStates::iterator lxit  = m_lattice[*it].m_lexiconStates.begin();
        CLexiconStates::iterator lxite = m_lattice[*it].m_lexiconStates.end();
        for (; lxit != lxite; ++lxit) {
            if (lxit->m_start == 0 && !lxit->m_bFuzzy && lxit->m_seg_path == m_bestSegPath) { //FIXME: need better solution later
                syls = lxit->m_syls;
                break;
            }
        }

        if (!syls.empty()) {
            wstring phrase;
            getBestSentence (phrase, 0, *it);
            m_pUserDict->addWord (syls, phrase);
        }
    }
}