Example #1
0
bool CIMIContext::searchFrom (unsigned idx)
{
    bool affectCandidates = (idx <= m_candiEnds);

    _clearBestPaths ();

    for (; idx<=m_tailIdx; ++idx) {
        CLatticeFrame &fr = m_lattice[idx];

        if (fr.m_type == CLatticeFrame::UNUSED)
            continue;

        fr.m_latticeStates.clear ();

        /* user selected word might be cut in next step */
        if (fr.m_bwType & CLatticeFrame::USER_SELECTED)
            _transferBetween (fr.m_bestWord.m_start, idx, fr.m_bestWord.m_pLexiconState, fr.m_bestWord.m_wordId);

        CLexiconStates::iterator it  = fr.m_lexiconStates.begin ();
        CLexiconStates::iterator ite = fr.m_lexiconStates.end ();
        for (; it != ite; ++it) {
            unsigned word_num = 0;
            TLexiconState &lxst = *it;
            const CPinyinTrie::TWordIdInfo *words = lxst.getWords (word_num);

            if (!word_num)
                continue;

            if (lxst.m_start == m_candiStarts && idx > m_candiEnds)
                affectCandidates = true;

            /* only selected the word with higher unigram probablities */
            int maxsz = it->m_bFuzzy? MAX_LEXICON_TRIES>1: MAX_LEXICON_TRIES;
            int sz = word_num<maxsz? word_num: maxsz;
            int i = 0, count = 0;
            double ic = it->m_bFuzzy? 0.5: 1.0;
            for (i = 0; count < sz && i < sz && (words[i].m_bSeen || count < 2); ++i) {
                if (m_csLevel >= words[i].m_csLevel) {
                    _transferBetween (lxst.m_start, idx, &lxst, words[i].m_id, ic);
                    ++ count;
                }
            }

            /* try extra words in history cache */
            if (m_pHistory) {
                for (; i < word_num; ++i) {
                    if (m_csLevel >= words[i].m_csLevel && m_pHistory->seenBefore (words[i].m_id))
                        _transferBetween (lxst.m_start, idx, &lxst, words[i].m_id, ic);
                }
            }
        }
    }

    _backTraceBestPaths ();

    return affectCandidates;
}
Example #2
0
bool
CIMIContext::searchFrom(unsigned idx)
{
    bool affectCandidates = (idx <= m_candiEnds);

    for (; idx <= m_tailIdx; ++idx) {
        CLatticeFrame &fr = m_lattice[idx];

        if (fr.m_type == CLatticeFrame::UNUSED)
            continue;

        fr.m_latticeStates.clear();

        /* user selected word might be cut in next step */
        if (fr.m_bwType & CLatticeFrame::USER_SELECTED) {
            _transferBetween(fr.m_selWord.m_start, idx,
                             fr.m_selWord.m_pLexiconState,
                             fr.m_selWord.m_wordId);
        }

        CLexiconStates::iterator it = fr.m_lexiconStates.begin();
        CLexiconStates::iterator ite = fr.m_lexiconStates.end();
        for (; it != ite; ++it) {
            unsigned word_num = 0;
            TLexiconState &lxst = *it;
            const CPinyinTrie::TWordIdInfo *words = lxst.getWords(word_num);

            if (!word_num)
                continue;

            if (lxst.m_start == m_candiStarts && idx > m_candiEnds)
                affectCandidates = true;

            // only selected the word with higher unigram probablities, and
            // narrow the search deepth and lower the initial score for fuzzy
            // syllables
            int maxsz = it->m_bFuzzy ? MAX_LEXICON_TRIES /
                        2 : MAX_LEXICON_TRIES;

            double ic = it->m_bFuzzy ? 0.5 : 1.0;

            int sz = (int) word_num < maxsz ? (int) word_num : maxsz;
            int i = 0, count = 0;

            while (count < sz && i < sz && (words[i].m_bSeen || count < 2)) {
                if (m_csLevel >= words[i].m_csLevel) {
                    // printf("cost %d\n", words[i].m_cost);
                    _transferBetween(lxst.m_start, idx, &lxst, words[i].m_id,
                                     ic * exp2_tbl[words[i].m_cost]);
                    ++count;
                }
                i++;
            }

            /* try extra words in history cache */
            if (m_pHistory) {
                while (i < (int) word_num) {
                    if (m_csLevel >= words[i].m_csLevel
                        && m_pHistory->seenBefore(words[i].m_id)) {
                        // printf("history cost %d\n", words[i].m_cost);
                        _transferBetween(lxst.m_start, idx, &lxst,
                                         words[i].m_id,
                                         ic * exp2_tbl[words[i].m_cost]);
                    }
                    i++;
                }
            }
        }
    }

    _clearPaths();
    m_path.clear();
    m_segPath.clear();
    m_nBest = 0;

    std::vector<TLatticeState> tail_states =
        m_lattice[m_tailIdx].m_latticeStates.getFilteredResult();

#ifdef DEBUG
    for (int i = 0; i < tail_states.size(); i++) {
        std::string score;
        tail_states[i].m_score.toString(score);
        printf("score[%d]: %s\n", i, score.c_str());
    }
#endif

    for (size_t i = 0; i < m_maxBest; i++) {
        TPath path, segpath;
        if (_backTracePaths(tail_states, m_nBest, path, segpath)) {
            m_path.push_back(path);
            m_segPath.push_back(segpath);
            m_nBest++;
        }
    }

    if (m_pPySegmentor && m_nBest > 0 && !m_segPath[0].empty())
        m_pPySegmentor->notify_best_segpath(m_segPath[0]);

    return affectCandidates;
}