Example #1
0
void
PinyinPhraseLib::create_pinyin_index ()
{
	if (!m_pinyin_table || !m_pinyin_table->size()) return;

	clear_phrase_index ();

	uint32 pinyin_offset = 0;

	WideString content;
	Phrase phrase;

	for (uint32 i=0; i<m_phrase_lib.number_of_phrases (); i++) {
		phrase = m_phrase_lib.get_phrase_by_index (i);

		content = phrase.get_content ();

		std::vector<PinyinKeyVector> key_vv;
		m_pinyin_table->find_key_strings (key_vv, content);

		for (uint32 j=0; j<key_vv.size(); j++) {
			for (uint32 k=0; k<key_vv[j].size(); k++)
				m_pinyin_lib.push_back (key_vv[j][k]);

			insert_pinyin_phrase_into_index (phrase.get_phrase_offset (), pinyin_offset);

			pinyin_offset = m_pinyin_lib.size ();
		}
#if 0
		if (key_vv.size () > 1 && content.length () > 1) {
			for (uint32 x=0; x<key_vv.size (); x++) {
				std::cerr << phrase.frequency () << "\t| " << 
						utf8_wcstombs (content) << " =";
				for (uint32 y=0; y<key_vv[x].size (); y++)
					std::cerr << " " << key_vv[x][y];
				std::cerr << "\n";
			}
		}
#endif
		std::cout << "." << std::flush;
	}

	sort_phrase_tables ();

	std::cout << "Phrase Number = " << count_phrase_number () << "\n";
}
Example #2
0
bool
PinyinPhraseLib::insert_phrase_into_index (const Phrase &phrase, const PinyinKeyVector &keys)
{
	if (!phrase.valid ()) return false;

	// First find out all of the chars which have no valid key in keys.
	WideString content = phrase.get_content ();
	WideString nokey_content;

	PinyinKeyVector final_keys;

	std::vector<uint32> content_state;

	std::vector<PinyinKeyVector> key_vv;

	uint32 pinyin_offset = m_pinyin_lib.size ();

	uint32 i,j,k;

	for (i=0; i<content.length (); ++i) {
		if (i < keys.size () &&
			keys [i].get_initial () != SCIM_PINYIN_ZeroInitial &&
			keys [i].get_final () != SCIM_PINYIN_ZeroFinal) {
			//This key is valid, store it into final_key.
			final_keys.push_back (keys [i]);
			content_state.push_back (1);
		} else {
			//This key is invalid, put the content into nokey_content,
			//and store a zero key into final_keys,
			//and store the position into invalid_key_pos.
			nokey_content.push_back (content [i]);
			final_keys.push_back (PinyinKey ());
			content_state.push_back (0);
		}
	}

	if (nokey_content.length ())
		m_pinyin_table->find_key_strings (key_vv, nokey_content);
	else
		key_vv.push_back (PinyinKeyVector ());

	std::sort (m_phrases [content.length () -1].begin (),
			   m_phrases [content.length () -1].end (),
			   PinyinKeyExactLessThan ());

	if (m_pinyin_lib.capacity () < m_pinyin_lib.size () + key_vv.size () * content.length ())
		m_pinyin_lib.reserve (m_pinyin_lib.size () + key_vv.size () * content.length () + 1);

	for (i=0; i<key_vv.size(); ++i) {
		for (j=0, k=0; j<content.length (); ++j) { 
			if (content_state [j])
				m_pinyin_lib.push_back (final_keys [j]);
			else
				m_pinyin_lib.push_back (key_vv [i][k++]);
		}

		insert_pinyin_phrase_into_index (phrase.get_phrase_offset (),
										 pinyin_offset);

		pinyin_offset = m_pinyin_lib.size ();
	}

	std::sort (m_phrases [content.length () -1].begin (),
			   m_phrases [content.length () -1].end (), m_pinyin_key_less);

	return true;
}