Exemplo n.º 1
0
void
CIMIClassicView::getPreeditString(IPreeditString& ps)
{
    ps.clear();

    wstring &wstr = ps.getString ();
    IPreeditString::CCharTypeVec& charTypes = ps.getCharTypeVec ();

    m_pIC->getBestSentence (wstr, 0, m_candiFrIdx);

    int caret = wstr.size ();
    charTypes.reserve (caret);
    for (int i = 0; i < caret; ++i)
        charTypes.push_back (IPreeditString::HANZI_CHAR | IPreeditString::USER_CHOICE);

    const wstring& pystr = m_pPySegmentor->getInputBuffer ();
    IPySegmentor::TSegmentVec& segments = m_pPySegmentor->getSegments ();
    IPySegmentor::TSegmentVec::const_iterator it  = segments.begin ();
    IPySegmentor::TSegmentVec::const_iterator ite = segments.end ();
    
    CLattice& lattice = m_pIC->getLattice ();
    unsigned i = 0, l = 0;
    for (; it != ite; ++it, i+=l) {
        l = it->m_len;

        if (i+l <= m_candiFrIdx)
            continue;

        if (i < m_cursorFrIdx && m_cursorFrIdx <= i+l)
            caret = wstr.size() +  (m_cursorFrIdx-i);

        CLatticeFrame &fr = lattice [i+l];
        int ct = IPreeditString::PINYIN_CHAR;
        if (fr.isSyllableSepFrame()) {
            ct = IPreeditString::BOUNDARY | IPreeditString::USER_CHOICE;
        } else if (fr.m_type == CLatticeFrame::ASCII) {
            ct = IPreeditString::ASCII_CHAR;
        } else if (fr.m_type == CLatticeFrame::SYMBOL) {
            ct = IPreeditString::SYMBOL_CHAR;
        }

        wstr.insert (wstr.end(), pystr.begin()+i, pystr.begin()+i+l);
        for (int c=0; c<l; ++c)
            charTypes.push_back (ct);

        if (fr.isSyllableFrame() && !fr.isSyllableSepFrame()) {
            if ( it != ite -1 && !lattice[i+l+1].isSyllableSepFrame ()) { 
                wstr.push_back (' ');
                charTypes.push_back (IPreeditString::BOUNDARY);
            }
        }
    }
    
    ps.setCaret (caret);
}
Exemplo n.º 2
0
void
CIMIClassicView::getPreeditString(IPreeditString& ps)
{
    ps.clear();

    wstring &wstr = ps.getString ();
    IPreeditString::CCharTypeVec& charTypes = ps.getCharTypeVec ();

    m_pIC->getBestSentence (wstr, 0, m_candiFrIdx);

    int caret = wstr.size ();
    charTypes.reserve (caret);
    for (int i = 0; i < caret; ++i)
        charTypes.push_back (IPreeditString::HANZI_CHAR | IPreeditString::USER_CHOICE);

    const wstring& pystr = m_pPySegmentor->getInputBuffer ();
    std::vector<unsigned>& seg_path = m_pIC->getBestSegPath();

    if (pystr.empty())
        return;

    /* FIXME:
     * CIMIContext would fail to backTrace the bestPathes when there are no latticeStates
     * on frame e.g., 'yiden' in Quanpin mode, need a better solution later!
     */
    if (seg_path.empty()) { 
        IPySegmentor::TSegmentVec& segments = m_pPySegmentor->getSegments ();
        IPySegmentor::TSegmentVec::const_iterator it  = segments.begin ();
        IPySegmentor::TSegmentVec::const_iterator ite = segments.end ();

        CLattice& lattice = m_pIC->getLattice ();
        unsigned i = 0, l = 0;
        for (; it != ite; ++it, i+=l) {
            l = it->m_len;

            if (i+l <= m_candiFrIdx)
                continue;

            if (i < m_cursorFrIdx && m_cursorFrIdx <= i+l)
                caret = wstr.size() +  (m_cursorFrIdx-i);

            CLatticeFrame &fr = lattice [i+l];
            int ct = IPreeditString::PINYIN_CHAR;
            if (fr.isSyllableSepFrame()) {
                ct = IPreeditString::BOUNDARY | IPreeditString::USER_CHOICE;
            } else if (fr.m_type == CLatticeFrame::ASCII) {
                ct = IPreeditString::ASCII_CHAR;
            } else if (fr.m_type == CLatticeFrame::SYMBOL) {
                ct = IPreeditString::SYMBOL_CHAR;
            }

            wstr.insert (wstr.end(), pystr.begin()+i, pystr.begin()+i+l);
            for (int c=0; c<l; ++c)
                charTypes.push_back (ct);

            if (fr.isSyllableFrame() && !fr.isSyllableSepFrame()) {
                if ( it != ite -1 && !lattice[i+l+1].isSyllableSepFrame ()) {
                    wstr.push_back (' ');
                    charTypes.push_back (IPreeditString::BOUNDARY);
                }
            }
        }
    } else {
        std::vector<unsigned>::iterator it  = seg_path.begin() + 1;
        std::vector<unsigned>::iterator ite = seg_path.end();

        CLattice& lattice = m_pIC->getLattice ();
        unsigned i = 0, l = 0;
        for (; it != ite; i=*(it++)) {
            l = *it - i;

            if (*it <= m_candiFrIdx)
                continue;

            if (i < m_cursorFrIdx && m_cursorFrIdx <= i+l)
                caret = wstr.size() +  (m_cursorFrIdx-i);

            CLatticeFrame &fr = lattice [i+l];
            int ct = IPreeditString::PINYIN_CHAR;
            if (fr.isSyllableSepFrame()) {
                ct = IPreeditString::BOUNDARY | IPreeditString::USER_CHOICE;
            } else if (fr.m_type == CLatticeFrame::ASCII) {
                ct = IPreeditString::ASCII_CHAR;
            } else if (fr.m_type == CLatticeFrame::SYMBOL) {
                ct = IPreeditString::SYMBOL_CHAR;
            }

            wstr.insert (wstr.end(), pystr.begin()+i, pystr.begin()+i+l);
            for (int c=0; c<l; ++c)
                charTypes.push_back (ct);

            if (fr.isSyllableFrame() && !fr.isSyllableSepFrame()) {
                if ( it != ite -1 && !lattice[i+l+1].isSyllableSepFrame ()) { 
                    wstr.push_back (' ');
                    charTypes.push_back (IPreeditString::BOUNDARY);
                }
            }
        }
    }

    ps.setCaret (caret);
}