bool PinyinPhraseLib::input_pinyin_lib (const PinyinValidator &validator, std::istream &is) { if (!is) return false; m_pinyin_lib.clear (); char header [40]; bool binary; //check header is.getline (header, 40); if (strncmp (header, scim_pinyin_lib_text_header, strlen (scim_pinyin_lib_text_header)) == 0) { binary = false; } else if (strncmp (header, scim_pinyin_lib_binary_header, strlen (scim_pinyin_lib_binary_header)) == 0) { binary = true; } else { return false; } is.getline (header, 40); if (strncmp (header, scim_pinyin_lib_version, strlen (scim_pinyin_lib_version)) != 0) return false; unsigned char bytes [4]; PinyinKey key; uint32 number; //get length if (binary) { is.read ((char*) bytes, sizeof(unsigned char) * 4); number = scim_bytestouint32 (bytes); } else { is.getline (header, 40); number = atoi (header); } if (number <= 0) return false; m_pinyin_lib.reserve (number + 256); if (binary) { for (uint32 i=0; i<number; i++) { key.input_binary (validator, is); m_pinyin_lib.push_back (key); } } else { for (uint32 i=0; i<number; i++) { key.input_text (validator, is); m_pinyin_lib.push_back (key); } } return true; }
int main() { PinyinTable ime(NULL,"pinyin_table.txt"); PinyinKey key; key.set_key(scim_default_pinyin_validator,"pin"); std::vector <ucs4_t> chars; int n=ime.find_chars(chars,key); cout<<n<<" matched"<<endl; ofstream o("out.txt"); for(int i=0;i<chars.size();i++){ utf8_write_wchar(o,chars[i]); } }
void gen_phrase_file(const char * outfilename, int phrase_index){ FILE * outfile = fopen(outfilename, "w"); if (NULL == outfile ) { fprintf(stderr, "Can't write file %s.\n", outfilename); exit(ENOENT); } phrase_token_t token = 1; char pinyin_buffer[4096]; //phrase length for ( size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){ GArray * item_array = g_item_array[i]; //item array for( size_t m = 0; m < item_array->len; ++m){ item* oneitem = & g_array_index(item_array, item, m); phrase_item * phrase = oneitem->phrase; GArray * pinyin_and_freqs = oneitem->pinyin_and_freq_array; const char * phrase_buffer = g_ucs4_to_utf8(phrase->uniphrase, phrase->length, NULL, NULL, NULL); //each pinyin for( size_t n = 0 ; n < pinyin_and_freqs->len; ++n){ pinyin_and_freq_item * pinyin_and_freq = &g_array_index(pinyin_and_freqs, pinyin_and_freq_item, n); GArray * pinyin = pinyin_and_freq->pinyin; PinyinKey * key = &g_array_index(pinyin, PinyinKey, 0); strcpy(pinyin_buffer,key->get_key_string()); for (size_t k = 1; k < pinyin->len; ++k){ strcat(pinyin_buffer, "'"); PinyinKey * key = &g_array_index(pinyin, PinyinKey, k); strcat(pinyin_buffer, key->get_key_string ()); } guint32 freq = pinyin_and_freq -> freq; if ( freq < 3 ) freq = 3; fprintf( outfile, "%s\t%s\t%d\t%d\n", pinyin_buffer, phrase_buffer, PHRASE_INDEX_MAKE_TOKEN(phrase_index, token), freq); } token++; } } fclose(outfile); }
void PinyinAdapter::parse(const QString& string) { pinyin_parse_more_full_pinyins(m_instance, string.toLatin1().data()); #ifdef PINYIN_DEBUG for (int i = 0; i < m_instance->m_pinyin_keys->len; i ++) { PinyinKey* pykey = &g_array_index(m_instance->m_pinyin_keys, PinyinKey, i); gchar* py = pykey->get_pinyin_string(); std::cout << py << " "; g_free(py); } std::cout << std::endl; #endif pinyin_guess_candidates(m_instance, 0); candidates.clear(); guint len = 0; pinyin_get_n_candidate(m_instance, &len); len = len > MAX_SUGGESTIONS ? MAX_SUGGESTIONS : len; for (unsigned int i = 0 ; i < len; i ++ ) { lookup_candidate_t * candidate = NULL; if (pinyin_get_candidate(m_instance, i, &candidate)) { const char* word = NULL; pinyin_get_candidate_string(m_instance, candidate, &word); // Translate the token to utf-8 phrase. if (word) { candidates.append(QString(word)); } } } Q_EMIT newPredictionSuggestions(string, candidates); }
void FcitxLibpinyinUpdatePreedit(FcitxLibpinyin* libpinyin, char* sentence) { FcitxInstance* instance = libpinyin->owner->owner; FcitxInputState* input = FcitxInstanceGetInputState(instance); int offset = LibpinyinGetOffset(libpinyin); if (libpinyin->type == LPT_Pinyin) { int libpinyinLen = strlen(libpinyin->inst->m_raw_full_pinyin); int fcitxLen = strlen(libpinyin->buf); if (fcitxLen != libpinyinLen) { strcpy(libpinyin->buf, libpinyin->inst->m_raw_full_pinyin); libpinyin->cursor_pos += libpinyinLen - fcitxLen; } } int pyoffset = LibpinyinGetPinyinOffset(libpinyin); if (pyoffset > libpinyin->cursor_pos) libpinyin->cursor_pos = pyoffset; int hzlen = 0; if (fcitx_utf8_strlen(sentence) > offset) hzlen = fcitx_utf8_get_nth_char(sentence, offset) - sentence; else hzlen = strlen(sentence); if (hzlen > 0) { char* buf = (char*) fcitx_utils_malloc0((hzlen + 1) * sizeof(char)); strncpy(buf, sentence, hzlen); buf[hzlen] = 0; FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_INPUT, "%s", buf); free(buf); } int charcurpos = hzlen; int lastpos = pyoffset; int curoffset = pyoffset; for (int i = offset; i < libpinyin->inst->m_pinyin_keys->len; i ++) { PinyinKey* pykey = &g_array_index(libpinyin->inst->m_pinyin_keys, PinyinKey, i); PinyinKeyPos* pykeypos = &g_array_index(libpinyin->inst->m_pinyin_key_rests, PinyinKeyPos, i); if (lastpos > 0) { FcitxMessagesMessageConcatLast (FcitxInputStateGetPreedit(input), " "); if (curoffset < libpinyin->cursor_pos) charcurpos ++; for (int j = lastpos; j < pykeypos->m_raw_begin; j ++) { char temp[2] = {'\0', '\0'}; temp[0] = libpinyin->buf[j]; FcitxMessagesMessageConcatLast (FcitxInputStateGetPreedit(input), temp); if (curoffset < libpinyin->cursor_pos) { curoffset ++; charcurpos ++; } } } lastpos = pykeypos->m_raw_end; switch (libpinyin->type) { case LPT_Pinyin: { gchar* pystring = pykey->get_pinyin_string(); FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s", pystring); size_t pylen = strlen(pystring); if (curoffset + pylen < libpinyin->cursor_pos) { curoffset += pylen; charcurpos += pylen; } else { charcurpos += libpinyin->cursor_pos - curoffset; curoffset = libpinyin->cursor_pos; } g_free(pystring); break; } case LPT_Shuangpin: { if (pykeypos->length() == 2) { const char* initial = 0; if (pykey->m_initial == CHEWING_ZERO_INITIAL) initial = "'"; else initial = get_initial_string(pykey); if (curoffset + 1 <= libpinyin->cursor_pos) { curoffset += 1; charcurpos += strlen(initial); } FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s", initial); if (curoffset + 1 <= libpinyin->cursor_pos) { curoffset += 1; charcurpos += strlen(get_middle_string(pykey)) + strlen(get_final_string(pykey)); } FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s%s", get_middle_string(pykey), get_final_string(pykey)); } else if (pykeypos->length() == 1) { gchar* pystring = pykey->get_pinyin_string(); if (curoffset + 1 <= libpinyin->cursor_pos) { curoffset += 1; charcurpos += strlen(pystring); } FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s", pystring); g_free(pystring); } break; } case LPT_Zhuyin: { gchar* pystring = pykey->get_chewing_string(); FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s", pystring); if (curoffset + pykeypos->length() <= libpinyin->cursor_pos) { curoffset += pykeypos->length(); charcurpos += strlen(pystring); } else { int diff = libpinyin->cursor_pos - curoffset; curoffset = libpinyin->cursor_pos; size_t len = fcitx_utf8_strlen(pystring); if (pykey->m_tone != CHEWING_ZERO_TONE) len --; if (diff > len) charcurpos += strlen(pystring); else { charcurpos += fcitx_utf8_get_nth_char(pystring, diff) - pystring; } } g_free(pystring); break; } } } int buflen = strlen(libpinyin->buf); if (lastpos < buflen) { FcitxMessagesMessageConcatLast (FcitxInputStateGetPreedit(input), " "); if (lastpos < libpinyin->cursor_pos) charcurpos ++; for (int i = lastpos; i < buflen; i ++) { char temp[2] = {'\0', '\0'}; temp[0] = libpinyin->buf[i]; FcitxMessagesMessageConcatLast (FcitxInputStateGetPreedit(input), temp); if (lastpos < libpinyin->cursor_pos) { charcurpos ++; lastpos++; } } } FcitxInputStateSetCursorPos(input, charcurpos); }
int main (int argc, char * argv []) { NullPinyinValidator validator; PinyinKeyVector keys; PinyinKeyPosVector poses; PinyinCustomSettings custom; PinyinParser *parser = 0; //PinyinTable table; const char *tablefile = "../data/pinyin-table.txt"; keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); int i = 0; while (i<argc) { if (++i >= argc) break; if ( !strcmp("-h", argv [i]) || !strcmp ("--help", argv [i]) ) { print_help (); return 0; } if ( !strcmp("-i", argv [i]) ) { custom.set_use_incomplete (true); continue; } if ( !strcmp("-p", argv [i]) ) { if (++i >= argc) { fprintf(stderr, "No argument for option %s.\n", argv [i-1]); return -1; } if (!strcmp (argv[i], "sp") || !strcmp (argv[i], "sp-default")) parser = new PinyinShuangPinParser (); else if (!strcmp (argv[i], "sp-stone")) parser = new PinyinShuangPinParser (SHUANG_PIN_STONE); else if (!strcmp (argv[i], "sp-zrm")) parser = new PinyinShuangPinParser (SHUANG_PIN_ZRM); else if (!strcmp (argv[i], "sp-ms")) parser = new PinyinShuangPinParser (SHUANG_PIN_MS); else if (!strcmp (argv[i], "sp-ziguang")) parser = new PinyinShuangPinParser (SHUANG_PIN_ZIGUANG); else if (!strcmp (argv[i], "sp-abc")) parser = new PinyinShuangPinParser (SHUANG_PIN_ABC); else if (!strcmp (argv[i], "sp-liushi")) parser = new PinyinShuangPinParser (SHUANG_PIN_LIUSHI); if (!strcmp (argv[i], "zy") || !strcmp (argv[i], "zy-standard") || !strcmp (argv[i], "zy-default")) parser = new PinyinZhuYinParser (); else if (!strcmp (argv[i], "zy-hsu")) parser = new PinyinZhuYinParser (ZHUYIN_HSU); else if (!strcmp (argv[i], "zy-ibm")) parser = new PinyinZhuYinParser (ZHUYIN_IBM); else if (!strcmp (argv[i], "zy-gin-yieh")) parser = new PinyinZhuYinParser (ZHUYIN_GIN_YIEH); else if (!strcmp (argv[i], "zy-et")) parser = new PinyinZhuYinParser (ZHUYIN_ET); else if (!strcmp (argv[i], "zy-et26")) parser = new PinyinZhuYinParser (ZHUYIN_ET26); else if (!strcmp (argv[i], "zy-zhuyin")) parser = new PinyinZhuYinParser (ZHUYIN_ZHUYIN); else { fprintf(stderr, "Unknown Parser:%s.\n", argv[i]); print_help(); exit(EINVAL); } continue; } if (!strcmp("-f", argv [i])) { if (++i >= argc) { fprintf(stderr, "No argument for option %s.\n", argv [i-1]); return -1; } tablefile = argv [i]; continue; } fprintf(stderr, "Invalid option: %s.\n", argv [i]); return -1; }; if (!parser) parser = new PinyinDefaultParser (); char * line = NULL; size_t len = 0; while (1) { printf("Input:"); fflush(stdout); getline(&line, &len, stdin); if (!strncmp (line, "quit", 4)) break; int len = parser->parse (validator, keys, poses,(const char *) line); printf("Parsed %d chars, %d keys:\n", len, keys->len); for (size_t i=0; i < keys->len; ++i){ PinyinKey * key = &g_array_index(keys, PinyinKey, i); printf("%s ", key->get_key_string ()); } printf("\n"); for ( size_t i=0; i < poses->len; ++i){ PinyinKeyPos * pos = &g_array_index(poses, PinyinKeyPos, i); printf("%d %ld ", pos->get_pos(), pos->get_length()); } printf("\n"); for (size_t i=0; i < keys->len; ++i){ PinyinKey * key = &g_array_index(keys, PinyinKey, i); printf("%s ", key->get_key_zhuyin_string ()); } printf("\n"); } if (line) free(line); return 0; }
int main(int argc, char *argv[]) { pinyin_context_t* context = pinyin_init(LIBPINYIN_PKGDATADIR "/data", NULL); pinyin_instance_t* inst = pinyin_alloc_instance(context); pinyin_set_options(context, IS_PINYIN | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE); string s; cin >> s ; pinyin_parse_more_double_pinyins(inst, s.c_str()); int cursor = 0; for (int i = 0; i < inst->m_pinyin_keys->len; i ++) { PinyinKey* pykey = &g_array_index(inst->m_pinyin_keys, PinyinKey, i); gchar* py = pykey->get_pinyin_string(); gchar* chewing = pykey->get_chewing_string(); cout << py << " " << chewing << endl; g_free(py); g_free(chewing); } while (true) { cout << get_lookup_cursor(inst, cursor) << endl; GArray* array = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t)); pinyin_get_candidates(inst, get_lookup_cursor(inst, cursor), array); cout << array->len << endl; pinyin_guess_sentence(inst); char* sentence = NULL; pinyin_get_sentence(inst, &sentence); if (sentence) cout << sentence << endl; else cout << "no sentence" << endl; g_free(sentence); for (int i = 0 ; i < array->len; i ++ ) { lookup_candidate_t token = g_array_index(array, lookup_candidate_t, i); char* word = NULL; pinyin_translate_token(inst, token.m_token, &word); if (word) cout << word << " "; g_free(word); } cout << "constraints " << inst->m_constraints->len << endl; int cand; cin >> cursor >> cand; if (cand >= 0) pinyin_choose_candidate(inst, 0, &g_array_index(array, lookup_candidate_t, cand)); else if (cand != -1) { pinyin_clear_constraints(inst); } else if (cand != -2) { break; } g_array_free(array, TRUE); } pinyin_free_instance(inst); pinyin_fini(context); }