int main(int argc, char * argv[]){ PhraseLargeTable largetable; FILE * gbfile = fopen("../../data/gb_char.table", "r"); if ( gbfile == NULL ) { fprintf(stderr, "open gb_char.table failed!\n"); return 1; } largetable.load_text(gbfile); fclose(gbfile); FILE * gbkfile = fopen("../../data/gbk_char.table", "r"); if (gbkfile == NULL ) { fprintf(stderr, "open gbk_char.table failed!\n"); return 1; } largetable.load_text(gbkfile); fclose(gbkfile); MemoryChunk * chunk = new MemoryChunk; largetable.store(chunk); largetable.load(chunk); char * linebuf = NULL; size_t size = 0; while( getline(&linebuf, &size, stdin) ){ linebuf[strlen(linebuf) - 1] = '\0'; if ( strcmp ( linebuf, "quit" ) == 0) break; glong phrase_len = g_utf8_strlen(linebuf, -1); utf16_t * new_phrase = g_utf8_to_utf16(linebuf, -1, NULL, NULL, NULL); phrase_token_t token; guint32 start = record_time(); for ( size_t i = 0; i < bench_times; ++i){ largetable.search(phrase_len, new_phrase, token); } print_time(start, bench_times); int retval = largetable.search(phrase_len, new_phrase, token); if ( retval & SEARCH_OK ) printf("%s:\t%d\n", linebuf, token); else printf("phrase %s not found.\n", linebuf); g_free(new_phrase); } if ( linebuf ) free(linebuf); return 0; }
int main(int argc, char * argv[]){ FILE * input = stdin; const char * bigram_filename = "bigram.db"; PhraseLargeTable phrases; MemoryChunk * chunk = new MemoryChunk; bool retval = chunk->load("phrase_index.bin"); if (!retval) { fprintf(stderr, "open phrase_index.bin failed!\n"); exit(ENOENT); } phrases.load(chunk); FacadePhraseIndex phrase_index; if (!load_phrase_index(&phrase_index)) exit(ENOENT); Bigram bigram; retval = bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); if (!retval) { fprintf(stderr, "open %s failed!\n", bigram_filename); exit(ENOENT); } taglib_init(); values = g_ptr_array_new(); required = g_hash_table_new(g_str_hash, g_str_equal); //enter "\data" line assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", "")); ssize_t result = my_getline(input); if ( result == -1 ) { fprintf(stderr, "empty file input.\n"); exit(ENODATA); } //read "\data" line if ( !taglib_read(linebuf, line_type, values, required) ) { fprintf(stderr, "error: interpolation model expected.\n"); exit(ENODATA); } assert(line_type == BEGIN_LINE); char * value = NULL; assert(g_hash_table_lookup_extended(required, "model", NULL, (gpointer *)&value)); if ( !( strcmp("interpolation", value) == 0 ) ) { fprintf(stderr, "error: interpolation model expected.\n"); exit(ENODATA); } result = my_getline(input); if ( result != -1 ) parse_body(input, &phrases, &phrase_index, &bigram); taglib_fini(); if (!save_phrase_index(&phrase_index)) exit(ENOENT); return 0; }
int main(int argc, char * argv[]){ int i = 1; const char * table_dir = "."; setlocale(LC_ALL, ""); while ( i < argc ){ if ( strcmp("--help", argv[i]) == 0 ){ print_help(); exit(0); } else if ( strcmp("--table-dir", argv[i]) == 0){ if ( ++i >= argc ){ print_help(); exit(EINVAL); } table_dir = argv[i]; } ++i; } /* generate pinyin index*/ pinyin_option_t options = USE_TONE; ChewingLargeTable chewinglargetable(options); PhraseLargeTable phraselargetable; /* generate phrase index */ FacadePhraseIndex phrase_index; for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { const pinyin_table_info_t * table_info = pinyin_phrase_files + i; if (SYSTEM_FILE != table_info->m_file_type) continue; const char * tablename = table_info->m_table_filename; gchar * filename = g_build_filename(table_dir, tablename, NULL); FILE * tablefile = fopen(filename, "r"); if (NULL == tablefile) { fprintf(stderr, "open %s failed!\n", tablename); exit(ENOENT); } chewinglargetable.load_text(tablefile); fseek(tablefile, 0L, SEEK_SET); phraselargetable.load_text(tablefile); fseek(tablefile, 0L, SEEK_SET); phrase_index.load_text(i, tablefile); fclose(tablefile); g_free(filename); } MemoryChunk * new_chunk = new MemoryChunk; chewinglargetable.store(new_chunk); new_chunk->save("pinyin_index.bin"); chewinglargetable.load(new_chunk); new_chunk = new MemoryChunk; phraselargetable.store(new_chunk); new_chunk->save("phrase_index.bin"); phraselargetable.load(new_chunk); phrase_index.compact(); if (!save_phrase_index(&phrase_index)) exit(ENOENT); return 0; }
int main(int argc, char * argv[]) { int i = 1; const char * table_dir = "."; setlocale(LC_ALL, ""); while ( i < argc ) { if ( strcmp("--help", argv[i]) == 0 ) { print_help(); exit(0); } else if ( strcmp("--table-dir", argv[i]) == 0) { if ( ++i >= argc ) { print_help(); exit(EINVAL); } table_dir = argv[i]; } ++i; } /* generate pinyin index*/ PinyinCustomSettings custom; PinyinLargeTable pinyinlargetable(&custom); PhraseLargeTable phraselargetable; /* generate phrase index */ FacadePhraseIndex phrase_index; gchar * filename = g_build_filename(table_dir, "gb_char.table", NULL); FILE * gbfile = fopen(filename, "r"); g_free(filename); if ( gbfile == NULL) { fprintf(stderr, "open gb_char.table failed!"); exit(ENOENT); } pinyinlargetable.load_text(gbfile); fseek(gbfile, 0L, SEEK_SET); phraselargetable.load_text(gbfile); fseek(gbfile, 0L, SEEK_SET); phrase_index.load_text(1, gbfile); fclose(gbfile); filename = g_build_filename(table_dir, "gbk_char.table", NULL); FILE * gbkfile = fopen(filename, "r"); g_free(filename); if ( gbkfile == NULL) { fprintf(stderr, "open gbk_char.table failed!"); exit(ENOENT); } pinyinlargetable.load_text(gbkfile); fseek(gbkfile, 0L, SEEK_SET); phraselargetable.load_text(gbkfile); fseek(gbkfile, 0L, SEEK_SET); phrase_index.load_text(2, gbkfile); fclose(gbkfile); MemoryChunk * new_chunk = new MemoryChunk; pinyinlargetable.store(new_chunk); new_chunk->save("pinyin_index.bin"); pinyinlargetable.load(new_chunk); new_chunk = new MemoryChunk; phraselargetable.store(new_chunk); new_chunk->save("phrase_index.bin"); phraselargetable.load(new_chunk); phrase_index.compat(); new_chunk = new MemoryChunk; phrase_index.store(1, new_chunk); new_chunk->save("gb_char.bin"); phrase_index.load(1, new_chunk); new_chunk = new MemoryChunk; phrase_index.store(2, new_chunk); new_chunk->save("gbk_char.bin"); phrase_index.load(2, new_chunk); return 0; }