int main(int argc, char * argv[]){ /* generate pinyin index*/ PinyinCustomSettings custom; PinyinLargeTable largetable(&custom); FILE * gbfile = fopen("../../data/gb_char.table", "r"); if ( gbfile == NULL) { printf("open gb_char.table failed!"); return 1; } FILE * gbkfile = fopen("../../data/gbk_char.table","r"); if ( gbkfile == NULL) { printf("open gb_char.table failed!"); return 1; } largetable.load_text(gbfile); fclose(gbfile); largetable.load_text(gbkfile); fclose(gbkfile); MemoryChunk * new_chunk = new MemoryChunk; largetable.store(new_chunk); new_chunk->save("../../data/pinyin_index.bin"); largetable.load(new_chunk); /* generate phrase index*/ FacadePhraseIndex phrase_index; FILE* infile = fopen("../../data/gb_char.table", "r"); if ( NULL == infile ){ printf("open gb_char.table failed!\n"); exit(1); } phrase_index.load_text(1, infile); fclose(infile); infile = fopen("../../data/gbk_char.table", "r"); if ( NULL == infile ){ printf("open gbk_char.table failed!\n"); exit(1); } phrase_index.load_text(2, infile); fclose(infile); new_chunk = new MemoryChunk; phrase_index.store(1, new_chunk); new_chunk->save("../../data/gb_char.bin"); phrase_index.load(1, new_chunk); new_chunk = new MemoryChunk; phrase_index.store(2, new_chunk); new_chunk->save("../../data/gbk_char.bin"); phrase_index.load(2, new_chunk); return 0; }
int main( int argc, char * argv[]){ PinyinCustomSettings custom; PinyinLargeTable largetable(&custom); FacadePhraseIndex phrase_index; FILE * gbfile = fopen("../../data/gb_char.table", "r"); if ( gbfile == NULL ) { fprintf(stderr, "open gb_char.table failed!\n"); exit(ENOENT); } largetable.load_text(gbfile); fseek(gbfile, 0L, SEEK_SET); phrase_index.load_text(1, gbfile); fclose(gbfile); FILE * gbkfile = fopen("../../data/gbk_char.table","r"); if ( gbkfile == NULL ) { fprintf(stderr, "open gb_char.table failed!\n"); exit(ENOENT); } largetable.load_text(gbkfile); fseek(gbkfile, 0L, SEEK_SET); phrase_index.load_text(2, gbkfile); fclose(gbkfile); MemoryChunk* new_chunk = new MemoryChunk; largetable.store(new_chunk); largetable.load(new_chunk); char* linebuf = NULL; size_t size = 0; while( getline(&linebuf, &size, stdin) ){ linebuf[strlen(linebuf)-1] = '\0'; if ( strcmp ( linebuf, "quit" ) == 0) break; PinyinDefaultParser parser; NullPinyinValidator validator; PinyinKeyVector keys; PinyinKeyPosVector poses; keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); parser.parse(validator, keys, poses, linebuf); guint32 start = record_time(); PhraseIndexRanges ranges; for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ ranges[i] = g_array_new(FALSE, FALSE, sizeof (PhraseIndexRange)); } for ( size_t i = 0 ; i < bench_times; ++i){ largetable.search(keys->len, (PinyinKey *)keys->data, ranges); } for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ GArray * range = ranges[i]; g_array_set_size( range, 0); } print_time(start, bench_times); largetable.search(keys->len, (PinyinKey *)keys->data, ranges); for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ GArray * range = ranges[i]; if ( range ){ for (size_t k = 0; k < range->len; ++k){ PhraseIndexRange* onerange = &g_array_index(range, PhraseIndexRange, k); printf("start:%d\tend:%d\n", onerange->m_range_begin, onerange->m_range_end); PhraseItem item; for ( phrase_token_t token = onerange->m_range_begin; token != onerange->m_range_end; ++token){ phrase_index.get_phrase_item( token, item); gunichar2 bufstr[1024]; item.get_phrase_string(bufstr); char * string = g_utf16_to_utf8 ( bufstr, item.get_phrase_length(), NULL, NULL, NULL); printf("%s\t", string); g_free(string); PinyinKey pinyin_buffer[1024]; size_t npron = item.get_n_pronunciation(); guint32 freq; for ( size_t n = 0; n < npron; ++n){ item.get_nth_pronunciation(n, pinyin_buffer, freq); for ( size_t o = 0; o < item.get_phrase_length(); ++o){ printf("%s'", pinyin_buffer[o].get_key_string()); } printf("\b\t%d\t", freq); } printf("\n"); } } if ( range->len) printf("range items number:%d\n", range->len); } g_array_set_size( range, 0); } g_array_free(keys, TRUE); g_array_free(poses, TRUE); } if (linebuf) free(linebuf); return 0; }
int main(int argc, char * argv[]) { pinyin_option_t options = USE_TONE | PINYIN_INCOMPLETE; ChewingLargeTable largetable(options); FacadePhraseIndex phrase_index; FILE * gbfile = fopen("../../data/gb_char.table", "r"); if (NULL == gbfile) { fprintf(stderr, "open gb_char.table failed!\n"); exit(ENOENT); } largetable.load_text(gbfile); fseek(gbfile, 0L, SEEK_SET); phrase_index.load_text(1, gbfile); fclose(gbfile); FILE * gbkfile = fopen("../../data/gbk_char.table", "r"); if (NULL == gbkfile) { fprintf(stderr, "open gbk_char.table failed!\n"); exit(ENOENT); } largetable.load_text(gbkfile); fseek(gbkfile, 0L, SEEK_SET); phrase_index.load_text(2, gbkfile); fclose(gbkfile); MemoryChunk * new_chunk = new MemoryChunk; largetable.store(new_chunk); largetable.load(new_chunk); char* linebuf = NULL; size_t size = 0; while( getline(&linebuf, &size, stdin) ){ linebuf[strlen(linebuf)-1] = '\0'; if ( strcmp ( linebuf, "quit" ) == 0) break; FullPinyinParser2 parser; ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); ChewingKeyRestVector key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); parser.parse(options, keys, key_rests, linebuf, strlen(linebuf)); if (0 == keys->len) { fprintf(stderr, "Invalid input.\n"); continue; } guint32 start = record_time(); PhraseIndexRanges ranges; memset(ranges, 0, sizeof(PhraseIndexRanges)); guint8 min_index, max_index; phrase_index.get_sub_phrase_range(min_index, max_index); for (size_t i = min_index; i < max_index; ++i) { ranges[i] = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange)); } for (size_t i = 0; i < bench_times; ++i) { largetable.search(keys->len, (ChewingKey *)keys->data, ranges); } for (size_t i = min_index; i < max_index; ++i) { g_array_set_size(ranges[i], 0); } print_time(start, bench_times); largetable.search(keys->len, (ChewingKey *)keys->data, ranges); for (size_t i = min_index; i < max_index; ++i) { GArray * & range = ranges[i]; if (range) { if (range->len) printf("range items number:%d\n", range->len); for (size_t k = 0; k < range->len; ++k) { PhraseIndexRange * onerange = &g_array_index(range, PhraseIndexRange, k); printf("start:%d\tend:%d\n", onerange->m_range_begin, onerange->m_range_end); PhraseItem item; for ( phrase_token_t token = onerange->m_range_begin; token != onerange->m_range_end; ++token){ phrase_index.get_phrase_item( token, item); /* get phrase string */ gunichar2 buffer[MAX_PHRASE_LENGTH + 1]; item.get_phrase_string(buffer); char * string = g_utf16_to_utf8 ( buffer, item.get_phrase_length(), NULL, NULL, NULL); printf("%s\t", string); g_free(string); ChewingKey chewing_buffer[MAX_PHRASE_LENGTH]; size_t npron = item.get_n_pronunciation(); guint32 freq; for (size_t m = 0; m < npron; ++m){ item.get_nth_pronunciation(m, chewing_buffer, freq); for (size_t n = 0; n < item.get_phrase_length(); ++n){ printf("%s'", chewing_buffer[n].get_pinyin_string()); } printf("\b\t%d\t", freq); } } printf("\n"); } } g_array_set_size(range, 0); } g_array_free(keys, TRUE); g_array_free(key_rests, TRUE); } if (linebuf) free(linebuf); return 0; }
int main(int argc, char * argv[]) { SystemTableInfo system_table_info; bool retval = system_table_info.load("../../data/table.conf"); if (!retval) { fprintf(stderr, "load table.conf failed.\n"); exit(ENOENT); } pinyin_option_t options = USE_TONE | PINYIN_INCOMPLETE; ChewingLargeTable largetable(options); FacadePhraseIndex phrase_index; const pinyin_table_info_t * phrase_files = system_table_info.get_table_info(); if (!load_phrase_table(phrase_files, &largetable, NULL, &phrase_index)) exit(ENOENT); MemoryChunk * new_chunk = new MemoryChunk; largetable.store(new_chunk); largetable.load(new_chunk); char* linebuf = NULL; size_t size = 0; ssize_t read; while ((read = getline(&linebuf, &size, stdin)) != -1) { if ( '\n' == linebuf[strlen(linebuf) - 1] ) { linebuf[strlen(linebuf) - 1] = '\0'; } if ( strcmp ( linebuf, "quit" ) == 0) break; FullPinyinParser2 parser; ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); ChewingKeyRestVector key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); parser.parse(options, keys, key_rests, linebuf, strlen(linebuf)); if (0 == keys->len) { fprintf(stderr, "Invalid input.\n"); continue; } guint32 start = record_time(); PhraseIndexRanges ranges; memset(ranges, 0, sizeof(PhraseIndexRanges)); phrase_index.prepare_ranges(ranges); for (size_t i = 0; i < bench_times; ++i) { phrase_index.clear_ranges(ranges); largetable.search(keys->len, (ChewingKey *)keys->data, ranges); } print_time(start, bench_times); phrase_index.clear_ranges(ranges); largetable.search(keys->len, (ChewingKey *)keys->data, ranges); for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { GArray * & range = ranges[i]; if (!range) continue; if (range->len) printf("range items number:%d\n", range->len); for (size_t k = 0; k < range->len; ++k) { PhraseIndexRange * onerange = &g_array_index(range, PhraseIndexRange, k); printf("start:%d\tend:%d\n", onerange->m_range_begin, onerange->m_range_end); PhraseItem item; for ( phrase_token_t token = onerange->m_range_begin; token != onerange->m_range_end; ++token){ phrase_index.get_phrase_item( token, item); /* get phrase string */ ucs4_t buffer[MAX_PHRASE_LENGTH + 1]; item.get_phrase_string(buffer); char * string = g_ucs4_to_utf8 ( buffer, item.get_phrase_length(), NULL, NULL, NULL); printf("%s\t", string); g_free(string); ChewingKey chewing_buffer[MAX_PHRASE_LENGTH]; size_t npron = item.get_n_pronunciation(); guint32 freq; for (size_t m = 0; m < npron; ++m){ item.get_nth_pronunciation(m, chewing_buffer, freq); for (size_t n = 0; n < item.get_phrase_length(); ++n){ gchar * pinyins = chewing_buffer[n].get_pinyin_string(); printf("%s'", pinyins); g_free(pinyins); } printf("\b\t%d\t", freq); } } printf("\n"); } g_array_set_size(range, 0); } phrase_index.destroy_ranges(ranges); g_array_free(keys, TRUE); g_array_free(key_rests, TRUE); } if (linebuf) free(linebuf); /* mask out all index items. */ largetable.mask_out(0x0, 0x0); return 0; }