bool CIMIContext::searchFrom (unsigned idx) { bool affectCandidates = (idx <= m_candiEnds); _clearBestPaths (); for (; idx<=m_tailIdx; ++idx) { CLatticeFrame &fr = m_lattice[idx]; if (fr.m_type == CLatticeFrame::UNUSED) continue; fr.m_latticeStates.clear (); /* user selected word might be cut in next step */ if (fr.m_bwType & CLatticeFrame::USER_SELECTED) _transferBetween (fr.m_bestWord.m_start, idx, fr.m_bestWord.m_pLexiconState, fr.m_bestWord.m_wordId); CLexiconStates::iterator it = fr.m_lexiconStates.begin (); CLexiconStates::iterator ite = fr.m_lexiconStates.end (); for (; it != ite; ++it) { unsigned word_num = 0; TLexiconState &lxst = *it; const CPinyinTrie::TWordIdInfo *words = lxst.getWords (word_num); if (!word_num) continue; if (lxst.m_start == m_candiStarts && idx > m_candiEnds) affectCandidates = true; /* only selected the word with higher unigram probablities */ int maxsz = it->m_bFuzzy? MAX_LEXICON_TRIES>1: MAX_LEXICON_TRIES; int sz = word_num<maxsz? word_num: maxsz; int i = 0, count = 0; double ic = it->m_bFuzzy? 0.5: 1.0; for (i = 0; count < sz && i < sz && (words[i].m_bSeen || count < 2); ++i) { if (m_csLevel >= words[i].m_csLevel) { _transferBetween (lxst.m_start, idx, &lxst, words[i].m_id, ic); ++ count; } } /* try extra words in history cache */ if (m_pHistory) { for (; i < word_num; ++i) { if (m_csLevel >= words[i].m_csLevel && m_pHistory->seenBefore (words[i].m_id)) _transferBetween (lxst.m_start, idx, &lxst, words[i].m_id, ic); } } } } _backTraceBestPaths (); return affectCandidates; }
bool CIMIContext::searchFrom(unsigned idx) { bool affectCandidates = (idx <= m_candiEnds); for (; idx <= m_tailIdx; ++idx) { CLatticeFrame &fr = m_lattice[idx]; if (fr.m_type == CLatticeFrame::UNUSED) continue; fr.m_latticeStates.clear(); /* user selected word might be cut in next step */ if (fr.m_bwType & CLatticeFrame::USER_SELECTED) { _transferBetween(fr.m_selWord.m_start, idx, fr.m_selWord.m_pLexiconState, fr.m_selWord.m_wordId); } CLexiconStates::iterator it = fr.m_lexiconStates.begin(); CLexiconStates::iterator ite = fr.m_lexiconStates.end(); for (; it != ite; ++it) { unsigned word_num = 0; TLexiconState &lxst = *it; const CPinyinTrie::TWordIdInfo *words = lxst.getWords(word_num); if (!word_num) continue; if (lxst.m_start == m_candiStarts && idx > m_candiEnds) affectCandidates = true; // only selected the word with higher unigram probablities, and // narrow the search deepth and lower the initial score for fuzzy // syllables int maxsz = it->m_bFuzzy ? MAX_LEXICON_TRIES / 2 : MAX_LEXICON_TRIES; double ic = it->m_bFuzzy ? 0.5 : 1.0; int sz = (int) word_num < maxsz ? (int) word_num : maxsz; int i = 0, count = 0; while (count < sz && i < sz && (words[i].m_bSeen || count < 2)) { if (m_csLevel >= words[i].m_csLevel) { // printf("cost %d\n", words[i].m_cost); _transferBetween(lxst.m_start, idx, &lxst, words[i].m_id, ic * exp2_tbl[words[i].m_cost]); ++count; } i++; } /* try extra words in history cache */ if (m_pHistory) { while (i < (int) word_num) { if (m_csLevel >= words[i].m_csLevel && m_pHistory->seenBefore(words[i].m_id)) { // printf("history cost %d\n", words[i].m_cost); _transferBetween(lxst.m_start, idx, &lxst, words[i].m_id, ic * exp2_tbl[words[i].m_cost]); } i++; } } } } _clearPaths(); m_path.clear(); m_segPath.clear(); m_nBest = 0; std::vector<TLatticeState> tail_states = m_lattice[m_tailIdx].m_latticeStates.getFilteredResult(); #ifdef DEBUG for (int i = 0; i < tail_states.size(); i++) { std::string score; tail_states[i].m_score.toString(score); printf("score[%d]: %s\n", i, score.c_str()); } #endif for (size_t i = 0; i < m_maxBest; i++) { TPath path, segpath; if (_backTracePaths(tail_states, m_nBest, path, segpath)) { m_path.push_back(path); m_segPath.push_back(segpath); m_nBest++; } } if (m_pPySegmentor && m_nBest > 0 && !m_segPath[0].empty()) m_pPySegmentor->notify_best_segpath(m_segPath[0]); return affectCandidates; }