static inline bool search_chewing_index(pinyin_option_t options, const char * chewing, ChewingKey & key){ chewing_index_item_t item; memset(&item, 0, sizeof(item)); item.m_chewing_input = chewing; std_lite::pair<const chewing_index_item_t *, const chewing_index_item_t *> range; range = std_lite::equal_range (chewing_index, chewing_index + G_N_ELEMENTS(chewing_index), item, compare_chewing_less_than); guint16 range_len = range.second - range.first; assert (range_len <= 1); if (range_len == 1) { const chewing_index_item_t * index = range.first; if (!check_chewing_options(options, index)) return false; key = content_table[index->m_table_index].m_chewing_key; assert(key.get_table_index() == index->m_table_index); return true; } return false; }
int main(int argc, char * argv[]) { GError * error = NULL; GOptionContext * context; context = g_option_context_new("- test pinyin parser"); g_option_context_add_main_entries(context, entries, NULL); if (!g_option_context_parse(context, &argc, &argv, &error)) { g_print("option parsing failed:%s\n", error->message); exit(EINVAL); } pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE | USE_RESPLIT_TABLE; if (incomplete) options |= PINYIN_INCOMPLETE | ZHUYIN_INCOMPLETE; PhoneticParser2 * parser = NULL; ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); ChewingKeyRestVector key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); /* create the parser */ if (strcmp("fullpinyin", parsername) == 0) { parser = new FullPinyinParser2(); } else if (strcmp("doublepinyin", parsername) == 0) { parser = new DoublePinyinParser2(); } else if (strcmp("zhuyin", parsername) == 0) { if (strcmp("standard", schemename) == 0) { parser = new ZhuyinSimpleParser2(); } else if (strcmp("hsu", schemename) == 0) { parser = new ZhuyinDiscreteParser2(); } else if (strcmp("dachen26", schemename) == 0) { parser = new ZhuyinDaChenCP26Parser2(); } } else if (strcmp("pinyindirect", parsername) == 0) { parser = new PinyinDirectParser2(); } else if (strcmp("zhuyindirect", parsername) == 0) { parser = new ZhuyinDirectParser2(); } if (!parser) parser = new FullPinyinParser2(); char* linebuf = NULL; size_t size = 0; ssize_t read; while( (read = getline(&linebuf, &size, stdin)) != -1 ){ if ( '\n' == linebuf[strlen(linebuf) - 1] ) { linebuf[strlen(linebuf) - 1] = '\0'; } if ( strcmp ( linebuf, "quit" ) == 0) break; #if 0 ChewingKey key; bool success = parser->parse_one_key(options, key, linebuf, strlen(linebuf)); if (success) { gchar * pinyins = key.get_pinyin_string(); printf("pinyin:%s\n", pinyins); g_free(pinyins); } #endif #if 1 int len = 0; guint32 start_time = record_time(); for ( size_t i = 0; i < bench_times; ++i) len = parser->parse(options, keys, key_rests, linebuf, strlen(linebuf)); print_time(start_time, bench_times); printf("parsed %d chars, %d keys.\n", len, keys->len); assert(keys->len == key_rests->len); for (size_t i = 0; i < keys->len; ++i) { ChewingKey * key = &g_array_index(keys, ChewingKey, i); ChewingKeyRest * key_rest = &g_array_index(key_rests, ChewingKeyRest, i); gchar * pinyins = key->get_pinyin_string(); printf("%s %d %d\t", pinyins, key_rest->m_raw_begin, key_rest->m_raw_end); g_free(pinyins); } printf("\n"); #endif } if (linebuf) free(linebuf); delete parser; g_array_free(key_rests, TRUE); g_array_free(keys, TRUE); return 0; }
void gen_phrase_file(const char * outputfile, int phrase_index){ FILE * outfile = fopen(outputfile, "w"); if (NULL == outfile ) { fprintf(stderr, "Can't write file %s.\n", outputfile); exit(ENOENT); } phrase_token_t token = 1; /* phrase length index */ for (size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i) { GArray * item_array = g_item_array[i]; /* item array index */ for (size_t m = 0; m < item_array->len; ++m) { phrase_and_array_item * item = &g_array_index (item_array, phrase_and_array_item, m); phrase_item phrase = item->phrase; GArray * chewing_and_freqs = item->chewing_and_freq_array; gchar * phrase_str = g_ucs4_to_utf8 (phrase.uniphrase, phrase.length, NULL, NULL, NULL); /* iterate each pinyin */ for (size_t n = 0; n < chewing_and_freqs->len; ++n) { chewing_and_freq_item * chewing_and_freq = &g_array_index (chewing_and_freqs, chewing_and_freq_item, n); ChewingKeyVector keys = chewing_and_freq->keys; ChewingKeyRestVector key_rests = chewing_and_freq->key_rests; GArray * pinyins = g_array_new(TRUE, FALSE, sizeof(gchar *)); gchar * pinyin = NULL; size_t k; for (k = 0; k < keys->len; ++k) { ChewingKey key = g_array_index(keys, ChewingKey, k); ChewingKeyRest key_rest = g_array_index (key_rests, ChewingKeyRest, k); //assert (CHEWING_ZERO_TONE != key.m_tone); pinyin = key.get_bopomofo_string(); g_array_append_val(pinyins, pinyin); } gchar * pinyin_str = g_strjoinv("'", (gchar **)pinyins->data); for (k = 0; k < pinyins->len; ++k) { g_free(g_array_index(pinyins, gchar *, k)); } g_array_free(pinyins, TRUE); guint32 freq = chewing_and_freq->freq; /* avoid zero freq */ if (freq < 3) freq = 3; fprintf(outfile, "%s\t%s\t%d\t%d\n", pinyin_str, phrase_str, PHRASE_INDEX_MAKE_TOKEN(phrase_index, token), freq); g_free(pinyin_str); } g_free(phrase_str); token++; } } fclose(outfile); }