Beispiel #1
0
void gen_phrase_file(const char * outfilename, int phrase_index){
    FILE * outfile = fopen(outfilename, "w");
    if (NULL == outfile ) {
        fprintf(stderr, "Can't write file %s.\n", outfilename);
        exit(ENOENT);
    }
    phrase_token_t token = 1;
    char pinyin_buffer[4096];
    //phrase length
    for ( size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
	GArray * item_array = g_item_array[i];
	//item array
	for( size_t m = 0; m < item_array->len; ++m){
	    item* oneitem = & g_array_index(item_array, item, m);
	    phrase_item * phrase = oneitem->phrase;
	    GArray * pinyin_and_freqs = oneitem->pinyin_and_freq_array;
	    const char * phrase_buffer = g_ucs4_to_utf8(phrase->uniphrase,
						 phrase->length, 
						 NULL, NULL, NULL);
	    //each pinyin
	    for( size_t n = 0 ; n < pinyin_and_freqs->len; ++n){
		pinyin_and_freq_item * pinyin_and_freq = &g_array_index(pinyin_and_freqs, pinyin_and_freq_item, n);
		GArray * pinyin = pinyin_and_freq->pinyin;
		PinyinKey * key = &g_array_index(pinyin, PinyinKey, 0);
		strcpy(pinyin_buffer,key->get_key_string());
		for (size_t k = 1; k < pinyin->len; ++k){
		    strcat(pinyin_buffer, "'");
		    PinyinKey * key = &g_array_index(pinyin, PinyinKey, k);
		    strcat(pinyin_buffer, key->get_key_string ());
		}
		guint32 freq = pinyin_and_freq -> freq;
		if ( freq < 3 ) 
		    freq = 3;
		fprintf( outfile, "%s\t%s\t%d\t%d\n", 
			 pinyin_buffer, phrase_buffer, 
			 PHRASE_INDEX_MAKE_TOKEN(phrase_index, token),
			 freq);
	    }
	    token++;
	}
    }
    fclose(outfile);
}
Beispiel #2
0
int main (int argc, char * argv [])
{
    NullPinyinValidator validator;
    PinyinKeyVector keys;
    PinyinKeyPosVector poses;
    PinyinCustomSettings custom;
    PinyinParser *parser = 0;
    //PinyinTable table;
    const char *tablefile = "../data/pinyin-table.txt";

    keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
    poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));

    int i = 0;
    while (i<argc) {
        if (++i >= argc) break;

        if ( !strcmp("-h", argv [i]) || !strcmp ("--help", argv [i]) ) {
            print_help ();
            return 0;
        }

        if ( !strcmp("-i", argv [i]) ) {
            custom.set_use_incomplete (true);
            continue;
        }

        if ( !strcmp("-p", argv [i]) ) {
            if (++i >= argc) {
                fprintf(stderr, "No argument for option %s.\n", argv [i-1]);
                return -1;
            }
            if (!strcmp (argv[i], "sp") || !strcmp (argv[i], "sp-default"))
                parser = new PinyinShuangPinParser ();
            else if (!strcmp (argv[i], "sp-stone"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_STONE);
            else if (!strcmp (argv[i], "sp-zrm"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_ZRM);
            else if (!strcmp (argv[i], "sp-ms"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_MS);
            else if (!strcmp (argv[i], "sp-ziguang"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_ZIGUANG);
            else if (!strcmp (argv[i], "sp-abc"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_ABC);
            else if (!strcmp (argv[i], "sp-liushi"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_LIUSHI);
            if (!strcmp (argv[i], "zy") || !strcmp (argv[i], "zy-standard") || !strcmp (argv[i], "zy-default"))
                parser = new PinyinZhuYinParser ();
            else if (!strcmp (argv[i], "zy-hsu"))
                parser = new PinyinZhuYinParser (ZHUYIN_HSU);
            else if (!strcmp (argv[i], "zy-ibm"))
                parser = new PinyinZhuYinParser (ZHUYIN_IBM);
            else if (!strcmp (argv[i], "zy-gin-yieh"))
                parser = new PinyinZhuYinParser (ZHUYIN_GIN_YIEH);
            else if (!strcmp (argv[i], "zy-et"))
                parser = new PinyinZhuYinParser (ZHUYIN_ET);
            else if (!strcmp (argv[i], "zy-et26"))
                parser = new PinyinZhuYinParser (ZHUYIN_ET26);
            else if (!strcmp (argv[i], "zy-zhuyin"))
                parser = new PinyinZhuYinParser (ZHUYIN_ZHUYIN);
            else {
                fprintf(stderr, "Unknown Parser:%s.\n", argv[i]);
                print_help();
                exit(EINVAL);
            }

            continue;
        }

        if (!strcmp("-f", argv [i])) {
            if (++i >= argc) {
                fprintf(stderr, "No argument for option %s.\n", argv [i-1]);
                return -1;
            }
            tablefile = argv [i];
            continue;
        }

        fprintf(stderr, "Invalid option: %s.\n", argv [i]);
        return -1;
    };

    if (!parser) parser = new PinyinDefaultParser ();

    char * line = NULL;
    size_t len = 0;

    while (1) {
        printf("Input:"); fflush(stdout);
        getline(&line, &len, stdin);

        if (!strncmp (line, "quit", 4)) break;

        int len = parser->parse (validator, keys, poses,(const char *) line);

        printf("Parsed %d chars, %d keys:\n", len, keys->len);

        for (size_t i=0; i < keys->len; ++i){
            PinyinKey * key = &g_array_index(keys, PinyinKey, i);
            printf("%s ", key->get_key_string ());
        }
        printf("\n");

        for ( size_t i=0; i < poses->len; ++i){
            PinyinKeyPos * pos = &g_array_index(poses, PinyinKeyPos, i);
            printf("%d %ld ", pos->get_pos(), pos->get_length());
        }
        printf("\n");

        for (size_t i=0; i < keys->len; ++i){
            PinyinKey * key = &g_array_index(keys, PinyinKey, i);
            printf("%s ", key->get_key_zhuyin_string ());
        }
        printf("\n");
    }

    if (line)
        free(line);

    return 0;
}