int main( int argc, char * argv[]){ SystemTableInfo system_table_info; bool retval = system_table_info.load("../../data/table.conf"); if (!retval) { fprintf(stderr, "load table.conf failed.\n"); exit(ENOENT); } pinyin_option_t options = USE_TONE; FacadeChewingTable largetable; MemoryChunk * chunk = new MemoryChunk; chunk->load("../../data/pinyin_index.bin"); largetable.load(options, chunk, NULL); const pinyin_table_info_t * phrase_files = system_table_info.get_table_info(); FacadePhraseIndex phrase_index; if (!load_phrase_index(phrase_files, &phrase_index)) exit(ENOENT); Bigram system_bigram; system_bigram.attach("../../data/bigram.db", ATTACH_READONLY); Bigram user_bigram; user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE); gfloat lambda = system_table_info.get_lambda(); PinyinLookup2 pinyin_lookup(lambda, options, &largetable, &phrase_index, &system_bigram, &user_bigram); /* prepare the prefixes for get_best_match. */ TokenVector prefixes = g_array_new (FALSE, FALSE, sizeof(phrase_token_t)); g_array_append_val(prefixes, sentence_start); CandidateConstraints constraints = g_array_new (TRUE, FALSE, sizeof(lookup_constraint_t)); MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); char* linebuf = NULL; size_t size = 0; ssize_t read; while( (read = getline(&linebuf, &size, stdin)) != -1 ){ if ( '\n' == linebuf[strlen(linebuf) - 1] ) { linebuf[strlen(linebuf) - 1] = '\0'; } if ( strcmp ( linebuf, "quit" ) == 0) break; FullPinyinParser2 parser; ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); ChewingKeyRestVector key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); parser.parse(options, keys, key_rests, linebuf, strlen(linebuf)); if ( 0 == keys->len ) /* invalid pinyin */ continue; /* initialize constraints. */ g_array_set_size(constraints, keys->len); for ( size_t i = 0; i < constraints->len; ++i){ lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i); constraint->m_type = NO_CONSTRAINT; } guint32 start_time = record_time(); for ( size_t i = 0; i < bench_times; ++i) pinyin_lookup.get_best_match(prefixes, keys, constraints, results); print_time(start_time, bench_times); for ( size_t i = 0; i < results->len; ++i){ phrase_token_t * token = &g_array_index(results, phrase_token_t, i); if ( null_token == *token) continue; printf("pos:%ld,token:%d\t", i, *token); } printf("\n"); char * sentence = NULL; pinyin_lookup.convert_to_utf8(results, sentence); printf("%s\n", sentence); g_array_free(keys, TRUE); g_array_free(key_rests, TRUE); g_free(sentence); } g_array_free(prefixes, TRUE); g_array_free(constraints, TRUE); g_array_free(results, TRUE); free(linebuf); return 0; }
int main(int argc, char * argv[]){ const char * evals_text = "evals.text"; pinyin_option_t options = USE_TONE; FacadeChewingTable largetable; MemoryChunk * chunk = new MemoryChunk; chunk->load("pinyin_index.bin"); largetable.load(options, chunk, NULL); FacadePhraseTable2 phrase_table; chunk = new MemoryChunk; chunk->load("phrase_index.bin"); phrase_table.load(chunk, NULL); FacadePhraseIndex phrase_index; if (!load_phrase_index(&phrase_index)) exit(ENOENT); Bigram system_bigram; system_bigram.attach("bigram.db", ATTACH_READONLY); Bigram user_bigram; user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE); PinyinLookup2 pinyin_lookup(options, &largetable, &phrase_index, &system_bigram, &user_bigram); /* open evals.text. */ FILE * evals_file = fopen(evals_text, "r"); if ( NULL == evals_file ) { fprintf(stderr, "Can't open file:%s\n", evals_text); exit(ENOENT); } PhraseTokens phrase_tokens; memset(phrase_tokens, 0, sizeof(PhraseTokens)); phrase_index.prepare_tokens(phrase_tokens); /* Evaluates the correction rate of test text documents. */ size_t tested_count = 0; size_t passed_count = 0; char* linebuf = NULL; size_t size = 0; TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t)); phrase_token_t token = null_token; while( getline(&linebuf, &size, evals_file) ) { if ( feof(evals_file) ) break; if ( '\n' == linebuf[strlen(linebuf)-1] ) linebuf[strlen(linebuf)-1] = '\0'; glong phrase_len = 0; ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL); token = null_token; if ( 0 != phrase_len ) { int result = phrase_table.search(phrase_len, phrase, phrase_tokens); int num = get_first_token(phrase_tokens, token); if ( !(result & SEARCH_OK) ) token = null_token; g_free(phrase); phrase = NULL; } if ( null_token == token ) { if ( tokens->len ) { /* one test. */ if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) { tested_count ++; passed_count ++; } else { tested_count ++; } g_array_set_size(tokens, 0); } } else { g_array_append_val(tokens, token); } } if ( tokens->len ) { /* one test. */ if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) { tested_count ++; passed_count ++; } else { tested_count ++; } } parameter_t rate = passed_count / (parameter_t) tested_count; printf("correction rate:%f\n", rate); g_array_free(tokens, TRUE); fclose(evals_file); free(linebuf); phrase_index.destroy_tokens(phrase_tokens); return 0; }