예제 #1
0
void CIMIContext::_backTraceBestPath ()
{
    CLatticeStates& tail_states = m_lattice[m_tailIdx].m_latticeStates;

    // there must be some transfer errors
    if (tail_states.size() != 1)
        return;

    TLatticeState *bs = &(tail_states[0]);

    while (bs->m_pBackTraceNode) {
        unsigned start = bs->m_pBackTraceNode->m_frIdx;
        unsigned end   = bs->m_frIdx;
        CLatticeFrame & end_fr = m_lattice[end];

        if (! (end_fr.m_bwType & CLatticeFrame::USER_SELECTED)) {
            end_fr.m_bwType |= CLatticeFrame::BESTWORD;

            end_fr.m_bestWord.m_start = start;
            end_fr.m_bestWord.m_end = end;
            end_fr.m_bestWord.m_wordId = bs->m_backTraceWordId;
            end_fr.m_bestWord.m_cwstr = end_fr.m_wstr.empty()?
                                        _getWstr (bs->m_backTraceWordId):
                                        end_fr.m_wstr.c_str();
        }

        m_bestPath.push_back (end);
        bs = bs->m_pBackTraceNode;
    }

    std::reverse (m_bestPath.begin(), m_bestPath.end());
}
예제 #2
0
void
CIMIContext::getCandidates(unsigned frIdx, CCandidates& result)
{
    TCandiPair cp;
    static std::map<wstring, TCandiPair> candidates_map;
    std::map<wstring, TCandiPair>::iterator candidates_it;

    candidates_map.clear();
    result.clear();

    std::vector<unsigned> st;
    getSelectedSentence(st, frIdx);

    cp.m_candi.m_start = m_candiStarts = frIdx++;

    for (; frIdx < m_tailIdx; ++frIdx) {
        if (m_lattice[frIdx + 1].isSyllableSepFrame())
            continue;

        CLatticeFrame &fr = m_lattice[frIdx];
        if (!fr.isSyllableFrame())
            continue;

        cp.m_candi.m_end = frIdx;
        if (fr.m_bwType != CLatticeFrame::NO_BESTWORD) {
            for (size_t i = 0; i < m_nBest; i++) {
                if (fr.m_bestWords.find(i) == fr.m_bestWords.end())
                    continue;
                CCandidate candi = fr.m_bestWords[i];
                if (candi.m_start != m_candiStarts)
                    continue;
                if (candi.m_pLexiconState == NULL)
                    continue;

                TLexiconState & lxst = *(candi.m_pLexiconState);
                int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies;
                if (len == 0) len = 1;

                cp.m_candi = candi;
                cp.m_Rank =
                    TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED,
                               fr.m_bwType & CLatticeFrame::BESTWORD,
                               len, false, 0);
                candidates_map[candi.m_cwstr] = cp;
            }
        }

        bool found = false;
        CLexiconStates::iterator it = fr.m_lexiconStates.begin();
        CLexiconStates::iterator ite = fr.m_lexiconStates.end();
        for (; it != ite; ++it) {
            TLexiconState & lxst = *it;

            if (lxst.m_start != m_candiStarts)
                continue;

            int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies;
            if (0 == len) len = 1;

            found = true;
            unsigned word_num;
            const CPinyinTrie::TWordIdInfo *words = lxst.getWords(word_num);

            for (unsigned i = 0; i < word_num; ++i) {
                if (m_csLevel < words[i].m_csLevel)
                    continue;

                cp.m_candi.m_wordId = words[i].m_id;
                cp.m_candi.m_cwstr = _getWstr(cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = &lxst;
                if (!cp.m_candi.m_cwstr)
                    continue;

                //sorting according to the order in PinYinTire
                cp.m_Rank =
                    TCandiRank(false,
                               !st.empty() && st.front() == cp.m_candi.m_wordId,
                               len, false, i);
                candidates_it = candidates_map.find(cp.m_candi.m_cwstr);
                if (candidates_it == candidates_map.end()
                    || cp.m_Rank < candidates_it->second.m_Rank
                    || cp.m_candi.m_wordId > INI_USRDEF_WID) {
                    candidates_map[cp.m_candi.m_cwstr] = cp;
                    // print_wide(cp.m_candi.m_cwstr);
                    // printf(" ");
                }
            }
            // puts("");
        }

        if (!found) continue;  // FIXME: need better solution later

        if (m_bDynaCandiOrder) {
            CLatticeStates::iterator it = fr.m_latticeStates.begin();
            CLatticeStates::iterator ite = fr.m_latticeStates.end();
            // printf("adjusting ");
            for (; it != ite; ++it) {
                TLatticeState & ltst = *it;

                if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts)
                    continue;

                cp.m_candi.m_wordId = ltst.m_backTraceWordId;
                cp.m_candi.m_cwstr = _getWstr(cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = ltst.m_pLexiconState;
                if (!cp.m_candi.m_cwstr)
                    continue;

                int len = cp.m_candi.m_pLexiconState->m_syls.size() -
                          cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies;
                if (0 == len) len = 1;
                cp.m_Rank = TCandiRank(false,
                                       !st.empty() && st.front() ==
                                       cp.m_candi.m_wordId,
                                       len, true, ltst.m_score /
                                       ltst.m_pBackTraceNode->m_score);
                candidates_it = candidates_map.find(cp.m_candi.m_cwstr);
                if (candidates_it == candidates_map.end()
                    || cp.m_Rank < candidates_it->second.m_Rank
                    || cp.m_candi.m_wordId > INI_USRDEF_WID) {
                    // print_wide(cp.m_candi.m_cwstr);
                    // std::string buf;
                    // ltst.m_score.toString(buf);
                    // printf("len:%d %s", len, buf.c_str());
                    // ltst.m_pBackTraceNode->m_score.toString(buf);
                    // printf("%s ", buf.c_str());
                    candidates_map[cp.m_candi.m_cwstr] = cp;
                }
            }
            // puts("");
        }

        m_candiEnds = frIdx;
    }

    std::vector<TCandiPairPtr> vec;

    vec.reserve(candidates_map.size());
    for (candidates_it = candidates_map.begin();
         candidates_it != candidates_map.end(); ++candidates_it) {
        vec.push_back(TCandiPairPtr(&(candidates_it->second)));
    }

    std::sort(vec.begin(), vec.end());
    for (size_t i = 0; i < vec.size(); i++) {
        // print_wide(vec[i].m_Ptr->m_candi.m_cwstr);
        // printf(" ");
        result.push_back(vec[i].m_Ptr->m_candi);
    }
    // puts("");
}
예제 #3
0
bool
CIMIContext::_backTracePaths(const std::vector<TLatticeState>& tail_states,
                             int rank, TPath& path, TPath& segmentPath)
{
    path.clear();
    segmentPath.clear();

    if (rank >= (int) tail_states.size()) {
        // rank out of bounds, only return the segment path
        return false;
    }

    const TLatticeState *bs = &(tail_states[rank]);

    while (bs->m_pBackTraceNode) {
        unsigned start = bs->m_pBackTraceNode->m_frIdx;
        unsigned end = bs->m_frIdx;
        CLatticeFrame & end_fr = m_lattice[end];

        if (!(end_fr.m_bwType & CLatticeFrame::USER_SELECTED)) {
            const TWCHAR* cwstr = NULL;
            if (end_fr.m_wstr.empty()) {
                cwstr = _getWstr(bs->m_backTraceWordId);
            } else {
                cwstr = end_fr.m_wstr.c_str();
            }

            CCandidate candi(start, end, bs->m_pLexiconState, cwstr,
                             bs->m_backTraceWordId);

            end_fr.m_bwType |= CLatticeFrame::BESTWORD;
            end_fr.m_bestWords[rank] = candi;
            if (rank == 0) {
                end_fr.m_selWord = candi; // select the first by default.
            }
        }

        if (bs->m_pBackTraceNode->m_pLexiconState) {
            std::vector<unsigned> seg_path =
                bs->m_pBackTraceNode->m_pLexiconState->m_seg_path;
            std::vector<unsigned>::reverse_iterator it = seg_path.rbegin();

            for (; it != seg_path.rend(); ++it) {
                if (segmentPath.empty() || segmentPath.back() != *it)
                    segmentPath.push_back(*it);
            }
        }

        path.push_back(end);
        bs = bs->m_pBackTraceNode;
    }

    std::reverse(path.begin(), path.end());
    std::reverse(segmentPath.begin(), segmentPath.end());

#ifdef DEBUG
    std::vector<unsigned>::iterator it;

    printf("trace lattice path[%d]: ", rank);
    for (it = path.begin(); it != path.end(); ++it)
        printf("%d ", *it);
    printf("\n");

    printf("trace segments path[%d]: ", rank);
    for (it = segmentPath.begin(); it != segmentPath.end(); ++it)
        printf("%d ", *it);
    printf("\n");
#endif

    return true;
}
예제 #4
0
void CIMIContext::getCandidates (unsigned frIdx, CCandidates& result)
{
    TCandiPair cp;
    static std::map<wstring, TCandiPair> map;
    std::map<wstring, TCandiPair>::iterator it_map;

    map.clear();
    result.clear();

    std::vector<unsigned> st;
    getBestSentence (st, frIdx);

    cp.m_candi.m_start = m_candiStarts = frIdx++;

    for (;frIdx < m_tailIdx; ++frIdx)  {
        CLatticeFrame &fr = m_lattice[frIdx];

        if (!fr.isSyllableFrame ())
            continue;

        cp.m_candi.m_end = frIdx;
        if (fr.m_bwType != CLatticeFrame::NO_BESTWORD && fr.m_bestWord.m_start == m_candiStarts) {
            cp.m_candi = fr.m_bestWord;
            cp.m_Rank = TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED,
                                   fr.m_bwType & CLatticeFrame::BESTWORD,
                                   0, false, 0);
            map [cp.m_candi.m_cwstr] = cp;
        }

        bool found = false;
        CLexiconStates::iterator it  = fr.m_lexiconStates.begin();
        CLexiconStates::iterator ite = fr.m_lexiconStates.end();
        for (; it != ite; ++it) {
            TLexiconState & lxst = *it;

            if (lxst.m_start != m_candiStarts)
                continue;

            int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies;
            if (0 == len) len = 1;

            found = true;
            unsigned word_num;
            const CPinyinTrie::TWordIdInfo *words = lxst.getWords (word_num);

            for (unsigned i=0; i<word_num; ++i) {
                if (m_csLevel < words[i].m_csLevel)
                    continue;

                cp.m_candi.m_wordId = words[i].m_id;
                cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = &lxst;
                if (!cp.m_candi.m_cwstr)
                    continue;

                //sorting according to the order in PinYinTire
                cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, false, i);
                it_map = map.find(cp.m_candi.m_cwstr);
                if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID)
                    map [cp.m_candi.m_cwstr] = cp;
            }
        }

        if (!found) continue; // FIXME: need better solution later

        if (m_bDynaCandiOrder) {
            CLatticeStates::iterator it  = fr.m_latticeStates.begin();
            CLatticeStates::iterator ite = fr.m_latticeStates.end();
            for (; it != ite; ++it) {
                TLatticeState & ltst = *it;

                if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts)
                    continue;

                cp.m_candi.m_wordId = ltst.m_backTraceWordId;
                cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = ltst.m_pLexiconState;
                if (!cp.m_candi.m_cwstr)
                    continue;

                int len = cp.m_candi.m_pLexiconState->m_syls.size() -
                          cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies;
                if (0 == len) len = 1;
                cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, true, ltst.m_score/ltst.m_pBackTraceNode->m_score);
                it_map = map.find(cp.m_candi.m_cwstr);
                if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID)
                    map[cp.m_candi.m_cwstr] = cp;
            }
        }

        m_candiEnds = frIdx;
    }

    std::vector<TCandiPairPtr> vec;

    vec.reserve(map.size());
    std::map<wstring, TCandiPair>::iterator it_mapE = map.end();
    for (it_map = map.begin(); it_map != it_mapE; ++it_map)
        vec.push_back(TCandiPairPtr(&(it_map->second)));
    std::make_heap(vec.begin(), vec.end());
    std::sort_heap(vec.begin(), vec.end());

    for (int i=0, sz=vec.size(); i < sz; ++i)
        result.push_back(vec[i].m_Ptr->m_candi);
}
예제 #5
0
void CIMIContext::_backTraceBestPaths ()
{
    CLatticeStates& tail_states = m_lattice[m_tailIdx].m_latticeStates;

    // there must be some transfer errors
    if (tail_states.size() != 1)
        return;

    TLatticeState *bs = &(tail_states[0]);

    while (bs->m_pBackTraceNode) {
        unsigned start = bs->m_pBackTraceNode->m_frIdx;
        unsigned end   = bs->m_frIdx;
        CLatticeFrame & end_fr = m_lattice[end];

        if (! (end_fr.m_bwType & CLatticeFrame::USER_SELECTED)) {
            end_fr.m_bwType |= CLatticeFrame::BESTWORD;

            end_fr.m_bestWord.m_start = start;
            end_fr.m_bestWord.m_end = end;
            end_fr.m_bestWord.m_pLexiconState = bs->m_pLexiconState;
            end_fr.m_bestWord.m_wordId = bs->m_backTraceWordId;
            end_fr.m_bestWord.m_cwstr = end_fr.m_wstr.empty()?
                                        _getWstr (bs->m_backTraceWordId):
                                        end_fr.m_wstr.c_str();
        }

        if (bs->m_pBackTraceNode->m_pLexiconState) {
            std::vector<unsigned> seg_path = bs->m_pBackTraceNode->m_pLexiconState->m_seg_path;
            std::vector<unsigned>::reverse_iterator it  = seg_path.rbegin();
            std::vector<unsigned>::reverse_iterator ite = seg_path.rend();

            for (; it != seg_path.rend(); ++it) {
                if (m_bestSegPath.empty() || m_bestSegPath.back() != *it)
                    m_bestSegPath.push_back (*it);
            }
        }

        m_bestPath.push_back (end);
        bs = bs->m_pBackTraceNode;
    }

    std::reverse (m_bestPath.begin(), m_bestPath.end());
    std::reverse (m_bestSegPath.begin(), m_bestSegPath.end());

    if (m_pPySegmentor)
        m_pPySegmentor->notify_best_segpath (m_bestSegPath);

#ifdef DEBUG
    std::vector<unsigned>::iterator it;

    printf ("best lattice path: ");
    for (it = m_bestPath.begin(); it != m_bestPath.end(); ++it)
        printf ("%d ", *it);

    printf ("best segments path: ");
    for (it = m_bestSegPath.begin(); it != m_bestSegPath.end(); ++it)
        printf ("%d ", *it);
    printf ("\n");
#endif

}