void FcitxLibpinyinLoad(FcitxLibpinyin* libpinyin) { if (libpinyin->inst != NULL) return; FcitxLibpinyinAddonInstance* libpinyinaddon = libpinyin->owner; if (libpinyin->type == LPT_Zhuyin && libpinyin->owner->zhuyin_context == NULL) { char* user_path = FcitxLibpinyinGetUserPath(libpinyinaddon->config.bSimplifiedDataForZhuyin ? LPLT_Simplified : LPLT_Traditional ); char* syspath = FcitxLibpinyinGetSysPath(libpinyinaddon->config.bSimplifiedDataForZhuyin ? LPLT_Simplified : LPLT_Traditional ); libpinyinaddon->zhuyin_context = pinyin_init( syspath, user_path); pinyin_load_phrase_library(libpinyinaddon->zhuyin_context, 15); free(user_path); free(syspath); } if (libpinyin->type != LPT_Zhuyin && libpinyin->owner->pinyin_context == NULL) { char* user_path = FcitxLibpinyinGetUserPath(libpinyinaddon->config.bTraditionalDataForPinyin ? LPLT_Traditional : LPLT_Simplified ); char* syspath = FcitxLibpinyinGetSysPath(libpinyinaddon->config.bTraditionalDataForPinyin ? LPLT_Traditional : LPLT_Simplified ); libpinyinaddon->pinyin_context = pinyin_init(syspath, user_path); pinyin_load_phrase_library(libpinyinaddon->pinyin_context, 15); free(user_path); free(syspath); } if (libpinyin->type == LPT_Zhuyin) libpinyin->inst = pinyin_alloc_instance(libpinyinaddon->zhuyin_context); else libpinyin->inst = pinyin_alloc_instance(libpinyinaddon->pinyin_context); ConfigLibpinyin(libpinyinaddon); }
PyMODINIT_FUNC PyInit_simplepinyin(void) { context = pinyin_init(LIBPINYIN_DATA, "/tmp"); // printf("DEBUG: pinyin_init\n"); if (context == NULL) return NULL; pinyin_option_t options = PINYIN_CORRECT_ALL | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE | 0; if (!pinyin_set_options(context, options)) return NULL; if (Py_AtExit(libpinyin_cleanup) == -1) return NULL; PyObject* m; if (PyType_Ready(&SimplePinyinType) < 0) return NULL; m = PyModule_Create(&simplepinyinmodule); if (m == NULL) return NULL; Py_INCREF(&SimplePinyinType); PyModule_AddObject(m, "SimplePinyin", (PyObject *)&SimplePinyinType); return m; }
PinyinAdapter::PinyinAdapter(QObject *parent) : QObject(parent), m_processingWords(false) { m_context = pinyin_init(PINYIN_DATA_DIR, "."); m_instance = pinyin_alloc_instance(m_context); pinyin_set_options(m_context, IS_PINYIN | PINYIN_INCOMPLETE | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE); }
int main(int argc, char * argv[]){ pinyin_context_t * context = pinyin_init("../data", "../data"); pinyin_instance_t * instance = pinyin_alloc_instance(context); char* linebuf = NULL; size_t size = 0; ssize_t read; while( (read = getline(&linebuf, &size, stdin)) != -1 ){ if ( '\n' == linebuf[strlen(linebuf) - 1] ) { linebuf[strlen(linebuf) - 1] = '\0'; } if ( strcmp ( linebuf, "quit" ) == 0) break; pinyin_phrase_segment(instance, linebuf); MatchResults & tokens = instance->m_match_results; for ( size_t i = 0; i < tokens->len; ++i ){ phrase_token_t token = g_array_index (tokens, phrase_token_t, i); if ( null_token == token ) continue; char * word = NULL; pinyin_translate_token(instance, token, &word); printf("%s\t", word); g_free(word); } printf("\n"); pinyin_save(context); } pinyin_free_instance(instance); pinyin_fini(context); free(linebuf); return 0; }
int main(int argc, char * argv[]){ pinyin_context_t * context = pinyin_init("../data", "../data"); pinyin_instance_t * instance = pinyin_alloc_instance(context); char* linebuf = NULL; size_t size = 0; ssize_t read; while( (read = getline(&linebuf, &size, stdin)) != -1 ){ if ( '\n' == linebuf[strlen(linebuf) - 1] ) { linebuf[strlen(linebuf) - 1] = '\0'; } if ( strcmp ( linebuf, "quit" ) == 0) break; pinyin_parse_more_chewings (instance, linebuf); pinyin_guess_sentence(instance); char * sentence = NULL; pinyin_get_sentence (instance, &sentence); if (sentence) printf("%s\n", sentence); g_free(sentence); pinyin_train(instance); pinyin_reset(instance); pinyin_save(context); } pinyin_free_instance(instance); pinyin_mask_out(context, 0x0, 0x0); pinyin_save(context); pinyin_fini(context); free(linebuf); return 0; }
int main(int argc, char const *argv[]) { char* source = "わたしわ阿飞, and my English name is Rex Lee. 网名是独孤影! ^_^。下面是一段多音分词歧义测试,这个人无伤无臭味磕头如捣蒜。"; // char* source = "hello world, here is an English string to test buffer transfer. thank you."; char* pinyin = 0; PinTable dict; pinyin_init(&dict); pinyin_set_locale("zh_CN.UTF-8"); // load_char("/root/pinyin-php/data/chars.csv", &dict); load_word("../data/words.csv", &dict); load_char("../data/chars.csv", &dict); printf("[source] \n%s\n\n", source); printf("[transl] \n"); pinyin = pinyin_translate(source, &dict); printf("%s\n", pinyin); free_buffer(pinyin); // HashNode* r = ht_lookup(&dict, "下"); // printf("[L]%d\n", dict.ht_size); // printf("t[%s]\n", r->nValue); // ht_print(&dict); // char buf[256]={"わたしわ阿飞, and"}; // setlocale(LC_ALL,"zh_CN.UTF-8"); // wchar_t ar[256]={'\0'}; // int read = mbstowcs(ar,buf,strlen(buf)); // printf("%lu\n",strlen(buf)); //输出为:9 [字节] UF-8编码下一个汉字占三个字节3*3=9 // printf("%d\n",read); //输出为:3 [个数] “你好啊”三个子字个数 return 0; }
int main(int argc, char * argv[]){ pinyin_context_t * context = pinyin_init("../data", "../data"); pinyin_option_t options = PINYIN_CORRECT_ALL | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE | DYNAMIC_ADJUST; pinyin_set_options(context, options); pinyin_instance_t * instance = pinyin_alloc_instance(context); CandidateVector candidates = g_array_new (FALSE, FALSE, sizeof(lookup_candidate_t)); char * prefixbuf = NULL; size_t prefixsize = 0; char * linebuf = NULL; size_t linesize = 0; ssize_t read; while( TRUE ){ fprintf(stdout, "prefix:"); fflush(stdout); if ((read = getline(&prefixbuf, &prefixsize, stdin)) == -1) break; if ( '\n' == prefixbuf[strlen(prefixbuf) - 1] ) { prefixbuf[strlen(prefixbuf) - 1] = '\0'; } if ((read = getline(&linebuf, &linesize, stdin)) == -1) break; if ( '\n' == linebuf[strlen(linebuf) - 1] ) { linebuf[strlen(linebuf) - 1] = '\0'; } if ( strcmp ( linebuf, "quit" ) == 0) break; pinyin_parse_more_full_pinyins(instance, linebuf); pinyin_guess_sentence_with_prefix(instance, prefixbuf); pinyin_get_full_pinyin_candidates(instance, 0, candidates); for (size_t i = 0; i < candidates->len; ++i) { lookup_candidate_t * candidate = &g_array_index (candidates, lookup_candidate_t, i); const char * pinyins = candidate->m_new_pinyins; const char * word = candidate->m_phrase_string; if (pinyins) printf("%s %s\t", pinyins, word); else printf("%s\t", word); } printf("\n"); pinyin_train(instance); pinyin_reset(instance); pinyin_save(context); } g_array_free(candidates, TRUE); pinyin_free_instance(instance); pinyin_fini(context); free(prefixbuf); free(linebuf); return 0; }