Пример #1
0
void CIMIContext::_saveUserDict ()
{
    if (!m_pUserDict)
        return;

    if (m_bestPath.empty())
        return;
    
    CSyllables syls;
    unsigned s = 0;
    bool has_user_selected = false;
    std::vector<unsigned>::iterator it  = m_bestPath.begin();
    std::vector<unsigned>::iterator ite = m_bestPath.end() - 1;
    for (; it != ite; ++it, ++s) {
        CLatticeFrame &fr = m_lattice[*it];
        if (!fr.isSyllableFrame ())
            break;

        has_user_selected |= (fr.m_bwType & CLatticeFrame::USER_SELECTED);

        CSyllables &tmp = fr.m_bestWord.m_pLexiconState->m_syls;
        if (syls.size() + tmp.size() > MAX_USRDEF_WORD_LEN) {
            --it;
            break;
        }

        std::copy (tmp.begin(), tmp.end(), back_inserter(syls));
    }

    if (s >= 2 && has_user_selected && !syls.empty()) {
        wstring phrase;
        getBestSentence (phrase, 0, *it);
        m_pUserDict->addWord (syls, phrase);
    }
}
Пример #2
0
unsigned
CIMIContext::getBestSentence(std::vector<unsigned>& result, int rank,
                             unsigned start, unsigned end)
{
    CCandidates sentence;
    unsigned nWordConverted = getBestSentence(sentence, rank, start, end);
    result.clear();
    for (size_t i = 0; i < sentence.size(); i++) {
        result.push_back(sentence[i].m_wordId);
    }
    return nWordConverted;
}
Пример #3
0
unsigned
CIMIContext::getBestSentence(wstring& result, int rank,
                             unsigned start, unsigned end)
{
    CCandidates sentence;
    unsigned nWordConverted = getBestSentence(sentence, rank, start, end);
    result.clear();
    for (size_t i = 0; i < sentence.size(); i++) {
        result += sentence[i].m_cwstr;
    }
    return nWordConverted;
}
Пример #4
0
std::vector<CCandidates>
CIMIContext::getBestSentenceTails(int rank, unsigned start, unsigned end)
{
    std::vector<CCandidates> result;
    if (rank < 0) {
        return result;
    }

    CCandidates sentence;
    unsigned word_num = getBestSentence(sentence, rank, start, end);
    unsigned tail_word_num = word_num;

    while (tail_word_num > 1) {
        unsigned dec = tail_word_num / (m_maxTailCandidateNum + 1) + 1;
        tail_word_num -= std::min(dec, tail_word_num);
        if (tail_word_num <= 1) {
            break;
        }
        CCandidates tail(sentence.begin(), sentence.begin() + tail_word_num);
        result.push_back(tail);
    }
    return result;
}
Пример #5
0
void CIMIContext::_saveUserDict ()
{
    if (!m_pUserDict)
        return;

    if (m_bestPath.empty())
        return;
    
    bool has_user_selected = false;
    std::vector<unsigned>::iterator it  = m_bestPath.begin();
    std::vector<unsigned>::iterator ite = m_bestPath.end() - 1;
    unsigned s = 0;
    for (; it != ite; ++it, ++s) {
        has_user_selected |= (m_lattice[*it].m_bwType & CLatticeFrame::USER_SELECTED);
        if (!m_lattice[*it].isSyllableFrame ())
            break;
    }

    if (has_user_selected && s >= 2) {
        CSyllables syls;
        -- it;
        CLexiconStates::iterator lxit  = m_lattice[*it].m_lexiconStates.begin();
        CLexiconStates::iterator lxite = m_lattice[*it].m_lexiconStates.end();
        for (; lxit != lxite; ++lxit) {
            if (lxit->m_start == 0 && !lxit->m_bFuzzy && lxit->m_seg_path == m_bestSegPath) { //FIXME: need better solution later
                syls = lxit->m_syls;
                break;
            }
        }

        if (!syls.empty()) {
            wstring phrase;
            getBestSentence (phrase, 0, *it);
            m_pUserDict->addWord (syls, phrase);
        }
    }
}
Пример #6
0
unsigned
CIMIContext::getSelectedSentence(std::vector<unsigned>& result,
                                 unsigned start, unsigned end)
{
    return getBestSentence(result, -1, start, end);
}
Пример #7
0
unsigned
CIMIContext::getSelectedSentence(wstring& result,
                                 unsigned start, unsigned end)
{
    return getBestSentence(result, -1, start, end);
}
Пример #8
0
void CIMIContext::getCandidates (unsigned frIdx, CCandidates& result)
{
    TCandiPair cp;
    static std::map<wstring, TCandiPair> map;
    std::map<wstring, TCandiPair>::iterator it_map;

    map.clear();
    result.clear();

    std::vector<unsigned> st;
    getBestSentence (st, frIdx);

    cp.m_candi.m_start = m_candiStarts = frIdx++;

    for (;frIdx < m_tailIdx; ++frIdx)  {
        CLatticeFrame &fr = m_lattice[frIdx];

        if (!fr.isSyllableFrame ())
            continue;

        cp.m_candi.m_end = frIdx;
        if (fr.m_bwType != CLatticeFrame::NO_BESTWORD && fr.m_bestWord.m_start == m_candiStarts) {
            cp.m_candi = fr.m_bestWord;
            cp.m_Rank = TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED,
                                   fr.m_bwType & CLatticeFrame::BESTWORD,
                                   0, false, 0);
            map [cp.m_candi.m_cwstr] = cp;
        }

        bool found = false;
        CLexiconStates::iterator it  = fr.m_lexiconStates.begin();
        CLexiconStates::iterator ite = fr.m_lexiconStates.end();
        for (; it != ite; ++it) {
            TLexiconState & lxst = *it;

            if (lxst.m_start != m_candiStarts)
                continue;

            int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies;
            if (0 == len) len = 1;

            found = true;
            unsigned word_num;
            const CPinyinTrie::TWordIdInfo *words = lxst.getWords (word_num);

            for (unsigned i=0; i<word_num; ++i) {
                if (m_csLevel < words[i].m_csLevel)
                    continue;

                cp.m_candi.m_wordId = words[i].m_id;
                cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = &lxst;
                if (!cp.m_candi.m_cwstr)
                    continue;

                //sorting according to the order in PinYinTire
                cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, false, i);
                it_map = map.find(cp.m_candi.m_cwstr);
                if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID)
                    map [cp.m_candi.m_cwstr] = cp;
            }
        }

        if (!found) continue; // FIXME: need better solution later

        if (m_bDynaCandiOrder) {
            CLatticeStates::iterator it  = fr.m_latticeStates.begin();
            CLatticeStates::iterator ite = fr.m_latticeStates.end();
            for (; it != ite; ++it) {
                TLatticeState & ltst = *it;

                if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts)
                    continue;

                cp.m_candi.m_wordId = ltst.m_backTraceWordId;
                cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = ltst.m_pLexiconState;
                if (!cp.m_candi.m_cwstr)
                    continue;

                int len = cp.m_candi.m_pLexiconState->m_syls.size() -
                          cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies;
                if (0 == len) len = 1;
                cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, true, ltst.m_score/ltst.m_pBackTraceNode->m_score);
                it_map = map.find(cp.m_candi.m_cwstr);
                if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID)
                    map[cp.m_candi.m_cwstr] = cp;
            }
        }

        m_candiEnds = frIdx;
    }

    std::vector<TCandiPairPtr> vec;

    vec.reserve(map.size());
    std::map<wstring, TCandiPair>::iterator it_mapE = map.end();
    for (it_map = map.begin(); it_map != it_mapE; ++it_map)
        vec.push_back(TCandiPairPtr(&(it_map->second)));
    std::make_heap(vec.begin(), vec.end());
    std::sort_heap(vec.begin(), vec.end());

    for (int i=0, sz=vec.size(); i < sz; ++i)
        result.push_back(vec[i].m_Ptr->m_candi);
}