Exemplo n.º 1
0
void
PinyinPhraseLib::optimize_phrase_frequencies (uint32 max_freq)
{
	uint32 freq = m_phrase_lib.get_max_phrase_frequency ();

	if (freq < max_freq || !max_freq) return;

	double ratio = ((double) max_freq) / freq;

	Phrase phrase;
	
	for (int i = 0; i<(int)m_phrase_lib.number_of_phrases (); ++i) {
		phrase = m_phrase_lib.get_phrase_by_index (i);
		phrase.set_frequency ((uint32)(phrase.frequency () * ratio));
	}
}
Exemplo n.º 2
0
void
PinyinPhraseLib::create_pinyin_index ()
{
	if (!m_pinyin_table || !m_pinyin_table->size()) return;

	clear_phrase_index ();

	uint32 pinyin_offset = 0;

	WideString content;
	Phrase phrase;

	for (uint32 i=0; i<m_phrase_lib.number_of_phrases (); i++) {
		phrase = m_phrase_lib.get_phrase_by_index (i);

		content = phrase.get_content ();

		std::vector<PinyinKeyVector> key_vv;
		m_pinyin_table->find_key_strings (key_vv, content);

		for (uint32 j=0; j<key_vv.size(); j++) {
			for (uint32 k=0; k<key_vv[j].size(); k++)
				m_pinyin_lib.push_back (key_vv[j][k]);

			insert_pinyin_phrase_into_index (phrase.get_phrase_offset (), pinyin_offset);

			pinyin_offset = m_pinyin_lib.size ();
		}
#if 0
		if (key_vv.size () > 1 && content.length () > 1) {
			for (uint32 x=0; x<key_vv.size (); x++) {
				std::cerr << phrase.frequency () << "\t| " << 
						utf8_wcstombs (content) << " =";
				for (uint32 y=0; y<key_vv[x].size (); y++)
					std::cerr << " " << key_vv[x][y];
				std::cerr << "\n";
			}
		}
#endif
		std::cout << "." << std::flush;
	}

	sort_phrase_tables ();

	std::cout << "Phrase Number = " << count_phrase_number () << "\n";
}