int main(int argc, char * argv[]){ PhraseItem phrase_item; utf16_t string1 = 2; PinyinKey key1 = PinyinKey((PinyinInitial)3,(PinyinFinal)3,(PinyinTone)3); PinyinKey key2 = PinyinKey((PinyinInitial)4,(PinyinFinal)4,(PinyinTone)4); phrase_item.set_phrase_string(1, &string1); phrase_item.append_pronunciation(&key1, 100); phrase_item.append_pronunciation(&key2, 300); assert(phrase_item.get_phrase_length() == 1); PinyinKey key3; guint32 freq; phrase_item.get_nth_pronunciation(0, &key3, freq); assert(key3 == key1); assert(freq == 100); phrase_item.get_nth_pronunciation(1, &key3, freq); assert(key3 == key2); assert(freq == 300); PinyinCustomSettings custom; gfloat poss = phrase_item.get_pinyin_possibility(custom, &key1); printf("pinyin possiblitiy:%f\n", poss); assert(phrase_item.get_unigram_frequency() == 0); utf16_t string2; phrase_item.get_phrase_string(&string2); assert(string1 == string2); FacadePhraseIndex phrase_index; assert(!phrase_index.add_phrase_item(1, &phrase_item)); MemoryChunk* chunk = new MemoryChunk; assert(phrase_index.store(0, chunk)); assert(phrase_index.load(0, chunk)); PhraseItem item2; guint32 time = record_time(); for ( size_t i = 0; i < bench_times; ++i){ phrase_index.get_phrase_item(1, item2); assert(item2.get_unigram_frequency() == 0); assert(item2.get_n_pronunciation() == 2); assert(item2.get_phrase_length() == 1); assert(item2.get_pinyin_possibility(custom, &key2) == 0.75); } print_time(time, bench_times); { PhraseItem item3; phrase_index.get_phrase_item(1, item3); item3.increase_pinyin_possibility(custom, &key1, 200); assert(item3.get_pinyin_possibility(custom, &key1) == 0.5) ; } { PhraseItem item5; phrase_index.get_phrase_item(1, item5); gfloat poss = item5.get_pinyin_possibility(custom, &key1); printf("pinyin poss:%f\n", poss); assert(poss == 0.5); } FacadePhraseIndex phrase_index_load; FILE* infile = fopen("../../data/gb_char.table", "r"); if ( NULL == infile ){ fprintf(stderr, "open gb_char.table failed!\n"); exit(ENOENT); } phrase_index_load.load_text(1, infile); fclose(infile); infile = fopen("../../data/gbk_char.table", "r"); if ( NULL == infile ){ fprintf(stderr, "open gbk_char.table failed!\n"); exit(ENOENT); } phrase_index_load.load_text(2, infile); fclose(infile); phrase_index.compat(); MemoryChunk* store1 = new MemoryChunk; phrase_index_load.store(1, store1); phrase_index_load.load(1, store1); MemoryChunk* store2 = new MemoryChunk; phrase_index_load.store(2, store2); phrase_index_load.load(2, store2); phrase_index.compat(); phrase_index_load.get_phrase_item(16870555, item2); assert( item2.get_phrase_length() == 14); assert( item2.get_n_pronunciation() == 1); gunichar2 buf[1024]; item2.get_phrase_string(buf); char * string = g_utf16_to_utf8( buf, 14, NULL, NULL, NULL); printf("%s\n", string); g_free(string); guint32 delta = 3; phrase_index_load.add_unigram_frequency(16870555, delta); phrase_index_load.get_phrase_item(16870555, item2); assert( item2.get_unigram_frequency() == 3); phrase_index_load.get_phrase_item(16777222, item2); assert(item2.get_phrase_length() == 1); assert(item2.get_n_pronunciation() == 6); return 0; }
bool PinyinPhraseLib::insert_phrase_into_index (const Phrase &phrase, const PinyinKeyVector &keys) { if (!phrase.valid ()) return false; // First find out all of the chars which have no valid key in keys. WideString content = phrase.get_content (); WideString nokey_content; PinyinKeyVector final_keys; std::vector<uint32> content_state; std::vector<PinyinKeyVector> key_vv; uint32 pinyin_offset = m_pinyin_lib.size (); uint32 i,j,k; for (i=0; i<content.length (); ++i) { if (i < keys.size () && keys [i].get_initial () != SCIM_PINYIN_ZeroInitial && keys [i].get_final () != SCIM_PINYIN_ZeroFinal) { //This key is valid, store it into final_key. final_keys.push_back (keys [i]); content_state.push_back (1); } else { //This key is invalid, put the content into nokey_content, //and store a zero key into final_keys, //and store the position into invalid_key_pos. nokey_content.push_back (content [i]); final_keys.push_back (PinyinKey ()); content_state.push_back (0); } } if (nokey_content.length ()) m_pinyin_table->find_key_strings (key_vv, nokey_content); else key_vv.push_back (PinyinKeyVector ()); std::sort (m_phrases [content.length () -1].begin (), m_phrases [content.length () -1].end (), PinyinKeyExactLessThan ()); if (m_pinyin_lib.capacity () < m_pinyin_lib.size () + key_vv.size () * content.length ()) m_pinyin_lib.reserve (m_pinyin_lib.size () + key_vv.size () * content.length () + 1); for (i=0; i<key_vv.size(); ++i) { for (j=0, k=0; j<content.length (); ++j) { if (content_state [j]) m_pinyin_lib.push_back (final_keys [j]); else m_pinyin_lib.push_back (key_vv [i][k++]); } insert_pinyin_phrase_into_index (phrase.get_phrase_offset (), pinyin_offset); pinyin_offset = m_pinyin_lib.size (); } std::sort (m_phrases [content.length () -1].begin (), m_phrases [content.length () -1].end (), m_pinyin_key_less); return true; }