Esempio n. 1
0
unsigned
CIMIContext::getBestSentence(CCandidates& result, int rank,
                             unsigned start, unsigned end)
{
    // -1 means selected sentence
    if (rank < -1 || rank >= (int) m_nBest)
        return 0;

    result.clear();

    if (end == UINT_MAX)
        end = m_tailIdx - 1;

    while (end > start && m_lattice[end].m_bwType == CLatticeFrame::NO_BESTWORD)
        end--;

    unsigned i = end, nWordConverted = 0;
    while (i > start) {
        CLatticeFrame& fr = m_lattice[i];
        if (rank < 0) {
            result.insert(result.begin(), fr.m_selWord);
            i = fr.m_selWord.m_start;
        } else {
            result.insert(result.begin(), fr.m_bestWords[rank]);
            i = fr.m_bestWords[rank].m_start;
        }
        nWordConverted++;
    }
    return nWordConverted;
}
Esempio n. 2
0
unsigned
CIMIContext::getBestSentence(std::vector<unsigned>& result, int rank,
                             unsigned start, unsigned end)
{
    CCandidates sentence;
    unsigned nWordConverted = getBestSentence(sentence, rank, start, end);
    result.clear();
    for (size_t i = 0; i < sentence.size(); i++) {
        result.push_back(sentence[i].m_wordId);
    }
    return nWordConverted;
}
Esempio n. 3
0
unsigned
CIMIContext::getBestSentence(wstring& result, int rank,
                             unsigned start, unsigned end)
{
    CCandidates sentence;
    unsigned nWordConverted = getBestSentence(sentence, rank, start, end);
    result.clear();
    for (size_t i = 0; i < sentence.size(); i++) {
        result += sentence[i].m_cwstr;
    }
    return nWordConverted;
}
Esempio n. 4
0
std::vector<CCandidates>
CIMIContext::getBestSentenceTails(int rank, unsigned start, unsigned end)
{
    std::vector<CCandidates> result;
    if (rank < 0) {
        return result;
    }

    CCandidates sentence;
    unsigned word_num = getBestSentence(sentence, rank, start, end);
    unsigned tail_word_num = word_num;

    while (tail_word_num > 1) {
        unsigned dec = tail_word_num / (m_maxTailCandidateNum + 1) + 1;
        tail_word_num -= std::min(dec, tail_word_num);
        if (tail_word_num <= 1) {
            break;
        }
        CCandidates tail(sentence.begin(), sentence.begin() + tail_word_num);
        result.push_back(tail);
    }
    return result;
}
Esempio n. 5
0
void
CIMIContext::getCandidates(unsigned frIdx, CCandidates& result)
{
    TCandiPair cp;
    static std::map<wstring, TCandiPair> candidates_map;
    std::map<wstring, TCandiPair>::iterator candidates_it;

    candidates_map.clear();
    result.clear();

    std::vector<unsigned> st;
    getSelectedSentence(st, frIdx);

    cp.m_candi.m_start = m_candiStarts = frIdx++;

    for (; frIdx < m_tailIdx; ++frIdx) {
        if (m_lattice[frIdx + 1].isSyllableSepFrame())
            continue;

        CLatticeFrame &fr = m_lattice[frIdx];
        if (!fr.isSyllableFrame())
            continue;

        cp.m_candi.m_end = frIdx;
        if (fr.m_bwType != CLatticeFrame::NO_BESTWORD) {
            for (size_t i = 0; i < m_nBest; i++) {
                if (fr.m_bestWords.find(i) == fr.m_bestWords.end())
                    continue;
                CCandidate candi = fr.m_bestWords[i];
                if (candi.m_start != m_candiStarts)
                    continue;
                if (candi.m_pLexiconState == NULL)
                    continue;

                TLexiconState & lxst = *(candi.m_pLexiconState);
                int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies;
                if (len == 0) len = 1;

                cp.m_candi = candi;
                cp.m_Rank =
                    TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED,
                               fr.m_bwType & CLatticeFrame::BESTWORD,
                               len, false, 0);
                candidates_map[candi.m_cwstr] = cp;
            }
        }

        bool found = false;
        CLexiconStates::iterator it = fr.m_lexiconStates.begin();
        CLexiconStates::iterator ite = fr.m_lexiconStates.end();
        for (; it != ite; ++it) {
            TLexiconState & lxst = *it;

            if (lxst.m_start != m_candiStarts)
                continue;

            int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies;
            if (0 == len) len = 1;

            found = true;
            unsigned word_num;
            const CPinyinTrie::TWordIdInfo *words = lxst.getWords(word_num);

            for (unsigned i = 0; i < word_num; ++i) {
                if (m_csLevel < words[i].m_csLevel)
                    continue;

                cp.m_candi.m_wordId = words[i].m_id;
                cp.m_candi.m_cwstr = _getWstr(cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = &lxst;
                if (!cp.m_candi.m_cwstr)
                    continue;

                //sorting according to the order in PinYinTire
                cp.m_Rank =
                    TCandiRank(false,
                               !st.empty() && st.front() == cp.m_candi.m_wordId,
                               len, false, i);
                candidates_it = candidates_map.find(cp.m_candi.m_cwstr);
                if (candidates_it == candidates_map.end()
                    || cp.m_Rank < candidates_it->second.m_Rank
                    || cp.m_candi.m_wordId > INI_USRDEF_WID) {
                    candidates_map[cp.m_candi.m_cwstr] = cp;
                    // print_wide(cp.m_candi.m_cwstr);
                    // printf(" ");
                }
            }
            // puts("");
        }

        if (!found) continue;  // FIXME: need better solution later

        if (m_bDynaCandiOrder) {
            CLatticeStates::iterator it = fr.m_latticeStates.begin();
            CLatticeStates::iterator ite = fr.m_latticeStates.end();
            // printf("adjusting ");
            for (; it != ite; ++it) {
                TLatticeState & ltst = *it;

                if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts)
                    continue;

                cp.m_candi.m_wordId = ltst.m_backTraceWordId;
                cp.m_candi.m_cwstr = _getWstr(cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = ltst.m_pLexiconState;
                if (!cp.m_candi.m_cwstr)
                    continue;

                int len = cp.m_candi.m_pLexiconState->m_syls.size() -
                          cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies;
                if (0 == len) len = 1;
                cp.m_Rank = TCandiRank(false,
                                       !st.empty() && st.front() ==
                                       cp.m_candi.m_wordId,
                                       len, true, ltst.m_score /
                                       ltst.m_pBackTraceNode->m_score);
                candidates_it = candidates_map.find(cp.m_candi.m_cwstr);
                if (candidates_it == candidates_map.end()
                    || cp.m_Rank < candidates_it->second.m_Rank
                    || cp.m_candi.m_wordId > INI_USRDEF_WID) {
                    // print_wide(cp.m_candi.m_cwstr);
                    // std::string buf;
                    // ltst.m_score.toString(buf);
                    // printf("len:%d %s", len, buf.c_str());
                    // ltst.m_pBackTraceNode->m_score.toString(buf);
                    // printf("%s ", buf.c_str());
                    candidates_map[cp.m_candi.m_cwstr] = cp;
                }
            }
            // puts("");
        }

        m_candiEnds = frIdx;
    }

    std::vector<TCandiPairPtr> vec;

    vec.reserve(candidates_map.size());
    for (candidates_it = candidates_map.begin();
         candidates_it != candidates_map.end(); ++candidates_it) {
        vec.push_back(TCandiPairPtr(&(candidates_it->second)));
    }

    std::sort(vec.begin(), vec.end());
    for (size_t i = 0; i < vec.size(); i++) {
        // print_wide(vec[i].m_Ptr->m_candi.m_cwstr);
        // printf(" ");
        result.push_back(vec[i].m_Ptr->m_candi);
    }
    // puts("");
}
Esempio n. 6
0
void CIMIContext::getCandidates (unsigned frIdx, CCandidates& result)
{
    TCandiPair cp;
    static std::map<wstring, TCandiPair> map;
    std::map<wstring, TCandiPair>::iterator it_map;

    map.clear();
    result.clear();

    std::vector<unsigned> st;
    getBestSentence (st, frIdx);

    cp.m_candi.m_start = m_candiStarts = frIdx++;

    for (;frIdx < m_tailIdx; ++frIdx)  {
        CLatticeFrame &fr = m_lattice[frIdx];

        if (!fr.isSyllableFrame ())
            continue;

        cp.m_candi.m_end = frIdx;
        if (fr.m_bwType != CLatticeFrame::NO_BESTWORD && fr.m_bestWord.m_start == m_candiStarts) {
            cp.m_candi = fr.m_bestWord;
            cp.m_Rank = TCandiRank(fr.m_bwType & CLatticeFrame::USER_SELECTED,
                                   fr.m_bwType & CLatticeFrame::BESTWORD,
                                   0, false, 0);
            map [cp.m_candi.m_cwstr] = cp;
        }

        bool found = false;
        CLexiconStates::iterator it  = fr.m_lexiconStates.begin();
        CLexiconStates::iterator ite = fr.m_lexiconStates.end();
        for (; it != ite; ++it) {
            TLexiconState & lxst = *it;

            if (lxst.m_start != m_candiStarts)
                continue;

            int len = lxst.m_syls.size() - lxst.m_num_of_inner_fuzzies;
            if (0 == len) len = 1;

            found = true;
            unsigned word_num;
            const CPinyinTrie::TWordIdInfo *words = lxst.getWords (word_num);

            for (unsigned i=0; i<word_num; ++i) {
                if (m_csLevel < words[i].m_csLevel)
                    continue;

                cp.m_candi.m_wordId = words[i].m_id;
                cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = &lxst;
                if (!cp.m_candi.m_cwstr)
                    continue;

                //sorting according to the order in PinYinTire
                cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, false, i);
                it_map = map.find(cp.m_candi.m_cwstr);
                if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID)
                    map [cp.m_candi.m_cwstr] = cp;
            }
        }

        if (!found) continue; // FIXME: need better solution later

        if (m_bDynaCandiOrder) {
            CLatticeStates::iterator it  = fr.m_latticeStates.begin();
            CLatticeStates::iterator ite = fr.m_latticeStates.end();
            for (; it != ite; ++it) {
                TLatticeState & ltst = *it;

                if (ltst.m_pBackTraceNode->m_frIdx != m_candiStarts)
                    continue;

                cp.m_candi.m_wordId = ltst.m_backTraceWordId;
                cp.m_candi.m_cwstr = _getWstr (cp.m_candi.m_wordId);
                cp.m_candi.m_pLexiconState = ltst.m_pLexiconState;
                if (!cp.m_candi.m_cwstr)
                    continue;

                int len = cp.m_candi.m_pLexiconState->m_syls.size() -
                          cp.m_candi.m_pLexiconState->m_num_of_inner_fuzzies;
                if (0 == len) len = 1;
                cp.m_Rank = TCandiRank(false, st.front() == cp.m_candi.m_wordId, len, true, ltst.m_score/ltst.m_pBackTraceNode->m_score);
                it_map = map.find(cp.m_candi.m_cwstr);
                if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank || cp.m_candi.m_wordId > INI_USRDEF_WID)
                    map[cp.m_candi.m_cwstr] = cp;
            }
        }

        m_candiEnds = frIdx;
    }

    std::vector<TCandiPairPtr> vec;

    vec.reserve(map.size());
    std::map<wstring, TCandiPair>::iterator it_mapE = map.end();
    for (it_map = map.begin(); it_map != it_mapE; ++it_map)
        vec.push_back(TCandiPairPtr(&(it_map->second)));
    std::make_heap(vec.begin(), vec.end());
    std::sort_heap(vec.begin(), vec.end());

    for (int i=0, sz=vec.size(); i < sz; ++i)
        result.push_back(vec[i].m_Ptr->m_candi);
}