예제 #1
0
int main(int argc, char * argv[]){
    PhraseLargeTable largetable;

    FILE * gbfile = fopen("../../data/gb_char.table", "r");
    if ( gbfile == NULL ) {
        fprintf(stderr, "open gb_char.table failed!\n");
        return 1;
    }

    largetable.load_text(gbfile);
    fclose(gbfile);

    FILE * gbkfile = fopen("../../data/gbk_char.table", "r");
    if (gbkfile == NULL ) {
        fprintf(stderr, "open gbk_char.table failed!\n");
        return 1;
    }

    largetable.load_text(gbkfile);
    fclose(gbkfile);

    MemoryChunk * chunk = new MemoryChunk;
    largetable.store(chunk);
    largetable.load(chunk);

    char * linebuf = NULL;
    size_t size = 0;
    while( getline(&linebuf, &size, stdin) ){
        linebuf[strlen(linebuf) - 1] = '\0';
        if ( strcmp ( linebuf, "quit" ) == 0)
            break;

        glong phrase_len = g_utf8_strlen(linebuf, -1);
        utf16_t * new_phrase = g_utf8_to_utf16(linebuf, -1, NULL, NULL, NULL);
        phrase_token_t token;

        guint32 start = record_time();
        for ( size_t i = 0; i < bench_times; ++i){
            largetable.search(phrase_len, new_phrase, token);
        }
        print_time(start, bench_times);

        int retval = largetable.search(phrase_len, new_phrase, token);
        if ( retval & SEARCH_OK )
            printf("%s:\t%d\n", linebuf, token);
        else
            printf("phrase %s not found.\n", linebuf);

        g_free(new_phrase);
    }

    if ( linebuf )
        free(linebuf);
    return 0;
}
예제 #2
0
int main(int argc, char * argv[]){
    FILE * input = stdin;
    const char * bigram_filename = "bigram.db";

    PhraseLargeTable phrases;

    MemoryChunk * chunk = new MemoryChunk;
    bool retval = chunk->load("phrase_index.bin");
    if (!retval) {
        fprintf(stderr, "open phrase_index.bin failed!\n");
        exit(ENOENT);
    }
    phrases.load(chunk);

    FacadePhraseIndex phrase_index;
    if (!load_phrase_index(&phrase_index))
        exit(ENOENT);

    Bigram bigram;
    retval = bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
    if (!retval) {
        fprintf(stderr, "open %s failed!\n", bigram_filename);
        exit(ENOENT);
    }

    taglib_init();

    values = g_ptr_array_new();
    required = g_hash_table_new(g_str_hash, g_str_equal);

    //enter "\data" line
    assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", ""));
    ssize_t result = my_getline(input);
    if ( result == -1 ) {
        fprintf(stderr, "empty file input.\n");
        exit(ENODATA);
    }

    //read "\data" line
    if ( !taglib_read(linebuf, line_type, values, required) ) {
        fprintf(stderr, "error: interpolation model expected.\n");
        exit(ENODATA);
    }

    assert(line_type == BEGIN_LINE);
    char * value = NULL;
    assert(g_hash_table_lookup_extended(required, "model", NULL, (gpointer *)&value));
    if ( !( strcmp("interpolation", value) == 0 ) ) {
        fprintf(stderr, "error: interpolation model expected.\n");
        exit(ENODATA);
    }

    result = my_getline(input);
    if ( result != -1 )
        parse_body(input, &phrases, &phrase_index, &bigram);

    taglib_fini();

    if (!save_phrase_index(&phrase_index))
        exit(ENOENT);

    return 0;
}
예제 #3
0
int main(int argc, char * argv[]){
    int i = 1;
    const char * table_dir = ".";

    setlocale(LC_ALL, "");
    while ( i < argc ){
        if ( strcmp("--help", argv[i]) == 0 ){
            print_help();
            exit(0);
        } else if ( strcmp("--table-dir", argv[i]) == 0){
            if ( ++i >= argc ){
                print_help();
                exit(EINVAL);
            }
            table_dir = argv[i];
        }
        ++i;
    }

    /* generate pinyin index*/
    pinyin_option_t options = USE_TONE;
    ChewingLargeTable chewinglargetable(options);
    PhraseLargeTable phraselargetable;

    /* generate phrase index */
    FacadePhraseIndex phrase_index;
    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
        const pinyin_table_info_t * table_info = pinyin_phrase_files + i;

        if (SYSTEM_FILE != table_info->m_file_type)
            continue;

        const char * tablename = table_info->m_table_filename;

        gchar * filename = g_build_filename(table_dir, tablename, NULL);
        FILE * tablefile = fopen(filename, "r");

        if (NULL == tablefile) {
            fprintf(stderr, "open %s failed!\n", tablename);
            exit(ENOENT);
        }

        chewinglargetable.load_text(tablefile);
        fseek(tablefile, 0L, SEEK_SET);
        phraselargetable.load_text(tablefile);
        fseek(tablefile, 0L, SEEK_SET);
        phrase_index.load_text(i, tablefile);
        fclose(tablefile);
        g_free(filename);
    }

    MemoryChunk * new_chunk = new MemoryChunk;
    chewinglargetable.store(new_chunk);
    new_chunk->save("pinyin_index.bin");
    chewinglargetable.load(new_chunk);
    
    new_chunk = new MemoryChunk;
    phraselargetable.store(new_chunk);
    new_chunk->save("phrase_index.bin");
    phraselargetable.load(new_chunk);

    phrase_index.compact();

    if (!save_phrase_index(&phrase_index))
        exit(ENOENT);

    return 0;
}
예제 #4
0
int main(int argc, char * argv[]) {
    int i = 1;
    const char * table_dir = ".";

    setlocale(LC_ALL, "");
    while ( i < argc ) {
        if ( strcmp("--help", argv[i]) == 0 ) {
            print_help();
            exit(0);
        } else if ( strcmp("--table-dir", argv[i]) == 0) {
            if ( ++i >= argc ) {
                print_help();
                exit(EINVAL);
            }
            table_dir = argv[i];
        }
        ++i;
    }

    /* generate pinyin index*/
    PinyinCustomSettings custom;
    PinyinLargeTable pinyinlargetable(&custom);
    PhraseLargeTable phraselargetable;

    /* generate phrase index */
    FacadePhraseIndex phrase_index;

    gchar * filename = g_build_filename(table_dir, "gb_char.table", NULL);
    FILE * gbfile = fopen(filename, "r");
    g_free(filename);

    if ( gbfile == NULL) {
        fprintf(stderr, "open gb_char.table failed!");
        exit(ENOENT);
    }

    pinyinlargetable.load_text(gbfile);

    fseek(gbfile, 0L, SEEK_SET);
    phraselargetable.load_text(gbfile);
    fseek(gbfile, 0L, SEEK_SET);
    phrase_index.load_text(1, gbfile);
    fclose(gbfile);

    filename = g_build_filename(table_dir, "gbk_char.table", NULL);
    FILE * gbkfile = fopen(filename, "r");
    g_free(filename);

    if ( gbkfile == NULL) {
        fprintf(stderr, "open gbk_char.table failed!");
        exit(ENOENT);
    }

    pinyinlargetable.load_text(gbkfile);

    fseek(gbkfile, 0L, SEEK_SET);
    phraselargetable.load_text(gbkfile);
    fseek(gbkfile, 0L, SEEK_SET);
    phrase_index.load_text(2, gbkfile);
    fclose(gbkfile);

    MemoryChunk * new_chunk = new MemoryChunk;
    pinyinlargetable.store(new_chunk);
    new_chunk->save("pinyin_index.bin");
    pinyinlargetable.load(new_chunk);

    new_chunk = new MemoryChunk;
    phraselargetable.store(new_chunk);
    new_chunk->save("phrase_index.bin");
    phraselargetable.load(new_chunk);

    phrase_index.compat();

    new_chunk = new MemoryChunk;
    phrase_index.store(1, new_chunk);
    new_chunk->save("gb_char.bin");
    phrase_index.load(1, new_chunk);

    new_chunk = new MemoryChunk;
    phrase_index.store(2, new_chunk);
    new_chunk->save("gbk_char.bin");
    phrase_index.load(2, new_chunk);

    return 0;
}