Beispiel #1
0
void
CIMIContext::_transferBetween(unsigned start, unsigned end,
                              TLexiconState* plxst, unsigned wid,
                              double ic)
{
    CLatticeFrame &start_fr = m_lattice[start];
    CLatticeFrame &end_fr = m_lattice[end];

    TLatticeState node(-1.0, end, plxst);
    TSentenceScore efic(ic);

    if ((end_fr.m_bwType & CLatticeFrame::USER_SELECTED)
        && end_fr.m_selWord.m_wordId == wid)
        efic = TSentenceScore(30000, 1.0);

    static double s_history_distribution[] = {
        0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50
    };

    double weight_h = s_history_distribution[m_historyPower];
    double weight_s = 1.0 - weight_h;

    CLatticeStates::iterator it = start_fr.m_latticeStates.begin();
    CLatticeStates::iterator ite = start_fr.m_latticeStates.end();

    for (; it != ite; ++it) {
        // for 1-length lattice states, replace ending_word_id (comma)
        // with none_word_id (recognized by CThreadSlm)
    unsigned _wid = wid;
        if (wid == ENDING_WORD_ID && it->m_pBackTraceNode && it->m_pBackTraceNode->m_frIdx == 0)
            _wid = NONE_WORD_ID;

        node.m_pBackTraceNode = &(*it);
        node.m_backTraceWordId = wid;

        double ts = m_pModel->transfer(it->m_slmState, _wid, node.m_slmState);
        m_pModel->historify(node.m_slmState);

        // backward to pseudo root, so wid is probably a user word,
        // save the wid in idx field, so that later we could get it via
        // CThreadSlm::lastWordId, to calculate p_{cache} correctly.
        if (node.m_slmState.getLevel() == 0
            && m_pHistory && m_pHistory->seenBefore(wid))
            node.m_slmState.setIdx(wid);  // an pseudo unigram node state

        if (m_pHistory) {
            unsigned history[2] = { m_pModel->lastWordId(it->m_slmState), _wid };
            double hpr = m_pHistory->pr(history, history + 2);
            ts = weight_s * ts + weight_h * hpr;
        }

        node.m_score = it->m_score * efic * TSentenceScore(ts);
        // std::string buf;
        // node.m_score.toString(buf);
        // printf("node score %s ts=%lf ", buf.c_str(), ts);
        // it->m_score.toString(buf);
        // printf("%s ic=%lf\n", buf.c_str(), ic);
        end_fr.m_latticeStates.add(node);
    }
}
Beispiel #2
0
void CIMIContext::_transferBetween (unsigned start, unsigned end, TLexiconState* plxst, unsigned wid, double ic)
{
    CLatticeFrame &start_fr = m_lattice[start];
    CLatticeFrame &end_fr   = m_lattice[end];

    TLatticeState node (-1.0, end, plxst);
    TSentenceScore efic (ic);

    if ((end_fr.m_bwType & CLatticeFrame::USER_SELECTED) && end_fr.m_bestWord.m_wordId == wid)
        efic = TSentenceScore (30000, 1.0);

    static double s_history_distribution[11] = {0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50};
    double weight_h = s_history_distribution[m_historyPower];
    double weight_s = 1.0 - weight_h;

    CLatticeStates::iterator it  = start_fr.m_latticeStates.begin();
    CLatticeStates::iterator ite = start_fr.m_latticeStates.end();

    for (; it != ite; ++it) {
        node.m_pBackTraceNode = &(*it);
        node.m_backTraceWordId = wid;

        double ts = m_pModel->transfer(it->m_slmState, wid, node.m_slmState);
        m_pModel->historify(node.m_slmState);

        // backward to psuedo root, so wid is probably a user word, save the wid in idx field,
        // so that later we could get it via CThreadSlm::lastWordId, to calculate p_{cache} correctly.
        if (node.m_slmState.getLevel() == 0 && m_pHistory && m_pHistory->seenBefore(wid))
            node.m_slmState.setIdx(wid);  // an psuedo unigram node state

        if (m_pHistory) {
            unsigned history[2] = {m_pModel->lastWordId(it->m_slmState), wid};
            double hpr = m_pHistory->pr(history, history+2);
            ts = weight_s * ts + weight_h*hpr;
        }

        node.m_score = it->m_score * efic * TSentenceScore(ts);
        end_fr.m_latticeStates.push_back (node);
    }
}