void gen_phrase_file(const char * outfilename, int phrase_index){ FILE * outfile = fopen(outfilename, "w"); if (NULL == outfile ) { fprintf(stderr, "Can't write file %s.\n", outfilename); exit(ENOENT); } phrase_token_t token = 1; char pinyin_buffer[4096]; //phrase length for ( size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){ GArray * item_array = g_item_array[i]; //item array for( size_t m = 0; m < item_array->len; ++m){ item* oneitem = & g_array_index(item_array, item, m); phrase_item * phrase = oneitem->phrase; GArray * pinyin_and_freqs = oneitem->pinyin_and_freq_array; const char * phrase_buffer = g_ucs4_to_utf8(phrase->uniphrase, phrase->length, NULL, NULL, NULL); //each pinyin for( size_t n = 0 ; n < pinyin_and_freqs->len; ++n){ pinyin_and_freq_item * pinyin_and_freq = &g_array_index(pinyin_and_freqs, pinyin_and_freq_item, n); GArray * pinyin = pinyin_and_freq->pinyin; PinyinKey * key = &g_array_index(pinyin, PinyinKey, 0); strcpy(pinyin_buffer,key->get_key_string()); for (size_t k = 1; k < pinyin->len; ++k){ strcat(pinyin_buffer, "'"); PinyinKey * key = &g_array_index(pinyin, PinyinKey, k); strcat(pinyin_buffer, key->get_key_string ()); } guint32 freq = pinyin_and_freq -> freq; if ( freq < 3 ) freq = 3; fprintf( outfile, "%s\t%s\t%d\t%d\n", pinyin_buffer, phrase_buffer, PHRASE_INDEX_MAKE_TOKEN(phrase_index, token), freq); } token++; } } fclose(outfile); }
int main (int argc, char * argv []) { NullPinyinValidator validator; PinyinKeyVector keys; PinyinKeyPosVector poses; PinyinCustomSettings custom; PinyinParser *parser = 0; //PinyinTable table; const char *tablefile = "../data/pinyin-table.txt"; keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); int i = 0; while (i<argc) { if (++i >= argc) break; if ( !strcmp("-h", argv [i]) || !strcmp ("--help", argv [i]) ) { print_help (); return 0; } if ( !strcmp("-i", argv [i]) ) { custom.set_use_incomplete (true); continue; } if ( !strcmp("-p", argv [i]) ) { if (++i >= argc) { fprintf(stderr, "No argument for option %s.\n", argv [i-1]); return -1; } if (!strcmp (argv[i], "sp") || !strcmp (argv[i], "sp-default")) parser = new PinyinShuangPinParser (); else if (!strcmp (argv[i], "sp-stone")) parser = new PinyinShuangPinParser (SHUANG_PIN_STONE); else if (!strcmp (argv[i], "sp-zrm")) parser = new PinyinShuangPinParser (SHUANG_PIN_ZRM); else if (!strcmp (argv[i], "sp-ms")) parser = new PinyinShuangPinParser (SHUANG_PIN_MS); else if (!strcmp (argv[i], "sp-ziguang")) parser = new PinyinShuangPinParser (SHUANG_PIN_ZIGUANG); else if (!strcmp (argv[i], "sp-abc")) parser = new PinyinShuangPinParser (SHUANG_PIN_ABC); else if (!strcmp (argv[i], "sp-liushi")) parser = new PinyinShuangPinParser (SHUANG_PIN_LIUSHI); if (!strcmp (argv[i], "zy") || !strcmp (argv[i], "zy-standard") || !strcmp (argv[i], "zy-default")) parser = new PinyinZhuYinParser (); else if (!strcmp (argv[i], "zy-hsu")) parser = new PinyinZhuYinParser (ZHUYIN_HSU); else if (!strcmp (argv[i], "zy-ibm")) parser = new PinyinZhuYinParser (ZHUYIN_IBM); else if (!strcmp (argv[i], "zy-gin-yieh")) parser = new PinyinZhuYinParser (ZHUYIN_GIN_YIEH); else if (!strcmp (argv[i], "zy-et")) parser = new PinyinZhuYinParser (ZHUYIN_ET); else if (!strcmp (argv[i], "zy-et26")) parser = new PinyinZhuYinParser (ZHUYIN_ET26); else if (!strcmp (argv[i], "zy-zhuyin")) parser = new PinyinZhuYinParser (ZHUYIN_ZHUYIN); else { fprintf(stderr, "Unknown Parser:%s.\n", argv[i]); print_help(); exit(EINVAL); } continue; } if (!strcmp("-f", argv [i])) { if (++i >= argc) { fprintf(stderr, "No argument for option %s.\n", argv [i-1]); return -1; } tablefile = argv [i]; continue; } fprintf(stderr, "Invalid option: %s.\n", argv [i]); return -1; }; if (!parser) parser = new PinyinDefaultParser (); char * line = NULL; size_t len = 0; while (1) { printf("Input:"); fflush(stdout); getline(&line, &len, stdin); if (!strncmp (line, "quit", 4)) break; int len = parser->parse (validator, keys, poses,(const char *) line); printf("Parsed %d chars, %d keys:\n", len, keys->len); for (size_t i=0; i < keys->len; ++i){ PinyinKey * key = &g_array_index(keys, PinyinKey, i); printf("%s ", key->get_key_string ()); } printf("\n"); for ( size_t i=0; i < poses->len; ++i){ PinyinKeyPos * pos = &g_array_index(poses, PinyinKeyPos, i); printf("%d %ld ", pos->get_pos(), pos->get_length()); } printf("\n"); for (size_t i=0; i < keys->len; ++i){ PinyinKey * key = &g_array_index(keys, PinyinKey, i); printf("%s ", key->get_key_zhuyin_string ()); } printf("\n"); } if (line) free(line); return 0; }