void CIMIContext::_transferBetween(unsigned start, unsigned end, TLexiconState* plxst, unsigned wid, double ic) { CLatticeFrame &start_fr = m_lattice[start]; CLatticeFrame &end_fr = m_lattice[end]; TLatticeState node(-1.0, end, plxst); TSentenceScore efic(ic); if ((end_fr.m_bwType & CLatticeFrame::USER_SELECTED) && end_fr.m_selWord.m_wordId == wid) efic = TSentenceScore(30000, 1.0); static double s_history_distribution[] = { 0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50 }; double weight_h = s_history_distribution[m_historyPower]; double weight_s = 1.0 - weight_h; CLatticeStates::iterator it = start_fr.m_latticeStates.begin(); CLatticeStates::iterator ite = start_fr.m_latticeStates.end(); for (; it != ite; ++it) { // for 1-length lattice states, replace ending_word_id (comma) // with none_word_id (recognized by CThreadSlm) unsigned _wid = wid; if (wid == ENDING_WORD_ID && it->m_pBackTraceNode && it->m_pBackTraceNode->m_frIdx == 0) _wid = NONE_WORD_ID; node.m_pBackTraceNode = &(*it); node.m_backTraceWordId = wid; double ts = m_pModel->transfer(it->m_slmState, _wid, node.m_slmState); m_pModel->historify(node.m_slmState); // backward to pseudo root, so wid is probably a user word, // save the wid in idx field, so that later we could get it via // CThreadSlm::lastWordId, to calculate p_{cache} correctly. if (node.m_slmState.getLevel() == 0 && m_pHistory && m_pHistory->seenBefore(wid)) node.m_slmState.setIdx(wid); // an pseudo unigram node state if (m_pHistory) { unsigned history[2] = { m_pModel->lastWordId(it->m_slmState), _wid }; double hpr = m_pHistory->pr(history, history + 2); ts = weight_s * ts + weight_h * hpr; } node.m_score = it->m_score * efic * TSentenceScore(ts); // std::string buf; // node.m_score.toString(buf); // printf("node score %s ts=%lf ", buf.c_str(), ts); // it->m_score.toString(buf); // printf("%s ic=%lf\n", buf.c_str(), ic); end_fr.m_latticeStates.add(node); } }
void CIMIContext::_transferBetween (unsigned start, unsigned end, TLexiconState* plxst, unsigned wid, double ic) { CLatticeFrame &start_fr = m_lattice[start]; CLatticeFrame &end_fr = m_lattice[end]; TLatticeState node (-1.0, end, plxst); TSentenceScore efic (ic); if ((end_fr.m_bwType & CLatticeFrame::USER_SELECTED) && end_fr.m_bestWord.m_wordId == wid) efic = TSentenceScore (30000, 1.0); static double s_history_distribution[11] = {0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50}; double weight_h = s_history_distribution[m_historyPower]; double weight_s = 1.0 - weight_h; CLatticeStates::iterator it = start_fr.m_latticeStates.begin(); CLatticeStates::iterator ite = start_fr.m_latticeStates.end(); for (; it != ite; ++it) { node.m_pBackTraceNode = &(*it); node.m_backTraceWordId = wid; double ts = m_pModel->transfer(it->m_slmState, wid, node.m_slmState); m_pModel->historify(node.m_slmState); // backward to psuedo root, so wid is probably a user word, save the wid in idx field, // so that later we could get it via CThreadSlm::lastWordId, to calculate p_{cache} correctly. if (node.m_slmState.getLevel() == 0 && m_pHistory && m_pHistory->seenBefore(wid)) node.m_slmState.setIdx(wid); // an psuedo unigram node state if (m_pHistory) { unsigned history[2] = {m_pModel->lastWordId(it->m_slmState), wid}; double hpr = m_pHistory->pr(history, history+2); ts = weight_s * ts + weight_h*hpr; } node.m_score = it->m_score * efic * TSentenceScore(ts); end_fr.m_latticeStates.push_back (node); } }