int main( int argc, char * argv[]){
    SystemTableInfo system_table_info;

    bool retval = system_table_info.load("../../data/table.conf");
    if (!retval) {
        fprintf(stderr, "load table.conf failed.\n");
        exit(ENOENT);
    }

    pinyin_option_t options = USE_TONE;
    FacadeChewingTable largetable;

    MemoryChunk * chunk = new MemoryChunk;
    chunk->load("../../data/pinyin_index.bin");
    largetable.load(options, chunk, NULL);

    const pinyin_table_info_t * phrase_files =
        system_table_info.get_table_info();

    FacadePhraseIndex phrase_index;
    if (!load_phrase_index(phrase_files, &phrase_index))
        exit(ENOENT);

    Bigram system_bigram;
    system_bigram.attach("../../data/bigram.db", ATTACH_READONLY);
    Bigram user_bigram;
    user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE);

    gfloat lambda = system_table_info.get_lambda();
    
    PinyinLookup2 pinyin_lookup(lambda, options,
                                &largetable, &phrase_index,
                                &system_bigram, &user_bigram);

    /* prepare the prefixes for get_best_match. */
    TokenVector prefixes = g_array_new
        (FALSE, FALSE, sizeof(phrase_token_t));
    g_array_append_val(prefixes, sentence_start);
    
    CandidateConstraints constraints = g_array_new
        (TRUE, FALSE, sizeof(lookup_constraint_t));

    MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));

    char* linebuf = NULL; size_t size = 0; ssize_t read;
    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
            linebuf[strlen(linebuf) - 1] = '\0';
        }

	if ( strcmp ( linebuf, "quit" ) == 0)
	    break;
	
	FullPinyinParser2 parser;
	ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
	ChewingKeyRestVector key_rests =
            g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
	parser.parse(options, keys, key_rests, linebuf, strlen(linebuf));

	if ( 0 == keys->len ) /* invalid pinyin */
	    continue;

        /* initialize constraints. */
	g_array_set_size(constraints, keys->len);
	for ( size_t i = 0; i < constraints->len; ++i){
	    lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
	    constraint->m_type = NO_CONSTRAINT;
	}

	guint32 start_time = record_time();
	for ( size_t i = 0; i < bench_times; ++i)
	    pinyin_lookup.get_best_match(prefixes, keys, constraints, results);
	print_time(start_time, bench_times);
	for ( size_t i = 0; i < results->len; ++i){
	    phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
	    if ( null_token == *token)
		continue;
	    printf("pos:%ld,token:%d\t", i, *token);
	}
	printf("\n");
	char * sentence = NULL;
	pinyin_lookup.convert_to_utf8(results, sentence);
	printf("%s\n", sentence);

	g_array_free(keys, TRUE);
	g_array_free(key_rests, TRUE);
	g_free(sentence);
    }

    g_array_free(prefixes, TRUE);
    g_array_free(constraints, TRUE);
    g_array_free(results, TRUE);

    free(linebuf);
    return 0;
}
예제 #2
0
int main(int argc, char * argv[]){
    const char * evals_text = "evals.text";

    pinyin_option_t options = USE_TONE;
    FacadeChewingTable largetable;

    MemoryChunk * chunk = new MemoryChunk;
    chunk->load("pinyin_index.bin");
    largetable.load(options, chunk, NULL);

    FacadePhraseTable2 phrase_table;
    chunk = new MemoryChunk;
    chunk->load("phrase_index.bin");
    phrase_table.load(chunk, NULL);

    FacadePhraseIndex phrase_index;
    if (!load_phrase_index(&phrase_index))
        exit(ENOENT);

    Bigram system_bigram;
    system_bigram.attach("bigram.db", ATTACH_READONLY);
    Bigram user_bigram;
    user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE);

    PinyinLookup2 pinyin_lookup(options, &largetable, &phrase_index,
                               &system_bigram, &user_bigram);

    /* open evals.text. */
    FILE * evals_file = fopen(evals_text, "r");
    if ( NULL == evals_file ) {
        fprintf(stderr, "Can't open file:%s\n", evals_text);
        exit(ENOENT);
    }

    PhraseTokens phrase_tokens;
    memset(phrase_tokens, 0, sizeof(PhraseTokens));
    phrase_index.prepare_tokens(phrase_tokens);

    /* Evaluates the correction rate of test text documents. */
    size_t tested_count = 0; size_t passed_count = 0;
    char* linebuf = NULL; size_t size = 0;
    TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t));

    phrase_token_t token = null_token;
    while( getline(&linebuf, &size, evals_file) ) {
        if ( feof(evals_file) )
            break;
        if ( '\n' == linebuf[strlen(linebuf)-1] )
            linebuf[strlen(linebuf)-1] = '\0';

        glong phrase_len = 0;
        ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);

        token = null_token;
        if ( 0 != phrase_len ) {
            int result = phrase_table.search(phrase_len, phrase, phrase_tokens);
            int num = get_first_token(phrase_tokens, token);

            if ( !(result & SEARCH_OK) )
                token = null_token;

            g_free(phrase);
            phrase = NULL;
        }

        if ( null_token == token ) {
            if ( tokens->len ) { /* one test. */
                if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
                    tested_count ++; passed_count ++;
                } else {
                    tested_count ++;
                }
                g_array_set_size(tokens, 0);
            }
        } else {
            g_array_append_val(tokens, token);
        }
    }

    if ( tokens->len ) { /* one test. */
        if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
            tested_count ++; passed_count ++;
        } else {
            tested_count ++;
        }
    }

    parameter_t rate = passed_count / (parameter_t) tested_count;
    printf("correction rate:%f\n", rate);

    g_array_free(tokens, TRUE);
    fclose(evals_file);
    free(linebuf);

    phrase_index.destroy_tokens(phrase_tokens);

    return 0;
}