void PinyinPhraseLib::optimize_phrase_frequencies (uint32 max_freq) { uint32 freq = m_phrase_lib.get_max_phrase_frequency (); if (freq < max_freq || !max_freq) return; double ratio = ((double) max_freq) / freq; Phrase phrase; for (int i = 0; i<(int)m_phrase_lib.number_of_phrases (); ++i) { phrase = m_phrase_lib.get_phrase_by_index (i); phrase.set_frequency ((uint32)(phrase.frequency () * ratio)); } }
void PinyinPhraseLib::create_pinyin_index () { if (!m_pinyin_table || !m_pinyin_table->size()) return; clear_phrase_index (); uint32 pinyin_offset = 0; WideString content; Phrase phrase; for (uint32 i=0; i<m_phrase_lib.number_of_phrases (); i++) { phrase = m_phrase_lib.get_phrase_by_index (i); content = phrase.get_content (); std::vector<PinyinKeyVector> key_vv; m_pinyin_table->find_key_strings (key_vv, content); for (uint32 j=0; j<key_vv.size(); j++) { for (uint32 k=0; k<key_vv[j].size(); k++) m_pinyin_lib.push_back (key_vv[j][k]); insert_pinyin_phrase_into_index (phrase.get_phrase_offset (), pinyin_offset); pinyin_offset = m_pinyin_lib.size (); } #if 0 if (key_vv.size () > 1 && content.length () > 1) { for (uint32 x=0; x<key_vv.size (); x++) { std::cerr << phrase.frequency () << "\t| " << utf8_wcstombs (content) << " ="; for (uint32 y=0; y<key_vv[x].size (); y++) std::cerr << " " << key_vv[x][y]; std::cerr << "\n"; } } #endif std::cout << "." << std::flush; } sort_phrase_tables (); std::cout << "Phrase Number = " << count_phrase_number () << "\n"; }