Пример #1
0
void FcitxLibpinyinLoad(FcitxLibpinyin* libpinyin)
{
    if (libpinyin->inst != NULL)
        return;

    FcitxLibpinyinAddonInstance* libpinyinaddon = libpinyin->owner;

    if (libpinyin->type == LPT_Zhuyin && libpinyin->owner->zhuyin_context == NULL) {
        char* user_path = FcitxLibpinyinGetUserPath(libpinyinaddon->config.bSimplifiedDataForZhuyin ? LPLT_Simplified : LPLT_Traditional );
        char* syspath = FcitxLibpinyinGetSysPath(libpinyinaddon->config.bSimplifiedDataForZhuyin ? LPLT_Simplified : LPLT_Traditional );
        libpinyinaddon->zhuyin_context = pinyin_init( syspath, user_path);
        pinyin_load_phrase_library(libpinyinaddon->zhuyin_context, 15);
        free(user_path);
        free(syspath);
    }

    if (libpinyin->type != LPT_Zhuyin && libpinyin->owner->pinyin_context == NULL) {
        char* user_path = FcitxLibpinyinGetUserPath(libpinyinaddon->config.bTraditionalDataForPinyin ? LPLT_Traditional : LPLT_Simplified );
        char* syspath = FcitxLibpinyinGetSysPath(libpinyinaddon->config.bTraditionalDataForPinyin ? LPLT_Traditional : LPLT_Simplified );
        libpinyinaddon->pinyin_context = pinyin_init(syspath, user_path);
        pinyin_load_phrase_library(libpinyinaddon->pinyin_context, 15);
        free(user_path);
        free(syspath);
    }

    if (libpinyin->type == LPT_Zhuyin)
        libpinyin->inst = pinyin_alloc_instance(libpinyinaddon->zhuyin_context);
    else
        libpinyin->inst = pinyin_alloc_instance(libpinyinaddon->pinyin_context);

    ConfigLibpinyin(libpinyinaddon);
}
Пример #2
0
PyMODINIT_FUNC
PyInit_simplepinyin(void)
{
    context = pinyin_init(LIBPINYIN_DATA, "/tmp");
    // printf("DEBUG: pinyin_init\n");
    if (context == NULL)
        return NULL;

    pinyin_option_t options =
        PINYIN_CORRECT_ALL | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE |
        0;
    if (!pinyin_set_options(context, options))
        return NULL;

    if (Py_AtExit(libpinyin_cleanup) == -1)
        return NULL;

    PyObject* m;

    if (PyType_Ready(&SimplePinyinType) < 0)
        return NULL;

    m = PyModule_Create(&simplepinyinmodule);
    if (m == NULL)
        return NULL;

    Py_INCREF(&SimplePinyinType);
    PyModule_AddObject(m, "SimplePinyin", (PyObject *)&SimplePinyinType);
    return m;
}
Пример #3
0
PinyinAdapter::PinyinAdapter(QObject *parent) :
    QObject(parent),
    m_processingWords(false)
{
    m_context = pinyin_init(PINYIN_DATA_DIR, ".");
    m_instance = pinyin_alloc_instance(m_context);

    pinyin_set_options(m_context, IS_PINYIN | PINYIN_INCOMPLETE | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE);
}
Пример #4
0
int main(int argc, char * argv[]){
    pinyin_context_t * context =
        pinyin_init("../data", "../data");

    pinyin_instance_t * instance = pinyin_alloc_instance(context);

    char* linebuf = NULL;
    size_t size = 0;
    ssize_t read;
    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
            linebuf[strlen(linebuf) - 1] = '\0';
        }

	if ( strcmp ( linebuf, "quit" ) == 0)
            break;

        pinyin_phrase_segment(instance, linebuf);
        MatchResults & tokens = instance->m_match_results;

        for ( size_t i = 0; i < tokens->len; ++i ){
            phrase_token_t token = g_array_index
                (tokens, phrase_token_t, i);

            if ( null_token == token )
                continue;

            char * word = NULL;
            pinyin_translate_token(instance, token, &word);
            printf("%s\t", word);
            g_free(word);
        }
        printf("\n");

        pinyin_save(context);
    }

    pinyin_free_instance(instance);
    pinyin_fini(context);
    free(linebuf);
    return 0;
}
Пример #5
0
int main(int argc, char * argv[]){
    pinyin_context_t * context =
        pinyin_init("../data", "../data");

    pinyin_instance_t * instance = pinyin_alloc_instance(context);

    char* linebuf = NULL;
    size_t size = 0;
    ssize_t read;
    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
            linebuf[strlen(linebuf) - 1] = '\0';
        }

	if ( strcmp ( linebuf, "quit" ) == 0)
            break;

        pinyin_parse_more_chewings
            (instance, linebuf);
        pinyin_guess_sentence(instance);

        char * sentence = NULL;
        pinyin_get_sentence (instance, &sentence);
        if (sentence)
            printf("%s\n", sentence);
        g_free(sentence);

        pinyin_train(instance);
        pinyin_reset(instance);
        pinyin_save(context);
    }

    pinyin_free_instance(instance);

    pinyin_mask_out(context, 0x0, 0x0);
    pinyin_save(context);
    pinyin_fini(context);

    free(linebuf);
    return 0;
}
Пример #6
0
int main(int argc, char const *argv[])
{
	char* source = "わたしわ阿飞, and my English name is Rex Lee. 网名是独孤影! ^_^。下面是一段多音分词歧义测试,这个人无伤无臭味磕头如捣蒜。";
	// char* source = "hello world, here is an English string to test buffer transfer. thank you.";
	char* pinyin = 0;

    PinTable dict;
	pinyin_init(&dict);

	pinyin_set_locale("zh_CN.UTF-8");

//    load_char("/root/pinyin-php/data/chars.csv", &dict);
    load_word("../data/words.csv", &dict);
    load_char("../data/chars.csv", &dict);

    
    printf("[source] \n%s\n\n", source);
	printf("[transl] \n");
	pinyin = pinyin_translate(source, &dict);

	printf("%s\n", pinyin);
	free_buffer(pinyin);

    
  //  HashNode* r = ht_lookup(&dict, "下");
  //  printf("[L]%d\n", dict.ht_size);
  //  printf("t[%s]\n", r->nValue);

  //  ht_print(&dict);

	//  char buf[256]={"わたしわ阿飞, and"};
	//	setlocale(LC_ALL,"zh_CN.UTF-8");
	//	wchar_t ar[256]={'\0'};
	//	int read = mbstowcs(ar,buf,strlen(buf)); 
	//	printf("%lu\n",strlen(buf)); //输出为:9 [字节] UF-8编码下一个汉字占三个字节3*3=9
	//	printf("%d\n",read); //输出为:3 [个数] “你好啊”三个子字个数

	return 0;
}
Пример #7
0
int main(int argc, char * argv[]){
    pinyin_context_t * context =
        pinyin_init("../data", "../data");

    pinyin_option_t options =
        PINYIN_CORRECT_ALL | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE |
        DYNAMIC_ADJUST;
    pinyin_set_options(context, options);

    pinyin_instance_t * instance = pinyin_alloc_instance(context);
    CandidateVector candidates = g_array_new
        (FALSE, FALSE, sizeof(lookup_candidate_t));

    char * prefixbuf = NULL; size_t prefixsize = 0;
    char * linebuf = NULL; size_t linesize = 0;
    ssize_t read;

    while( TRUE ){
        fprintf(stdout, "prefix:");
        fflush(stdout);

        if ((read = getline(&prefixbuf, &prefixsize, stdin)) == -1)
            break;

        if ( '\n' == prefixbuf[strlen(prefixbuf) - 1] ) {
            prefixbuf[strlen(prefixbuf) - 1] = '\0';
        }

        if ((read = getline(&linebuf, &linesize, stdin)) == -1)
            break;

        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
            linebuf[strlen(linebuf) - 1] = '\0';
        }

        if ( strcmp ( linebuf, "quit" ) == 0)
            break;

        pinyin_parse_more_full_pinyins(instance, linebuf);
        pinyin_guess_sentence_with_prefix(instance, prefixbuf);

        pinyin_get_full_pinyin_candidates(instance, 0, candidates);
        for (size_t i = 0; i < candidates->len; ++i) {
            lookup_candidate_t * candidate = &g_array_index
                (candidates, lookup_candidate_t, i);
            const char * pinyins = candidate->m_new_pinyins;
            const char * word = candidate->m_phrase_string;

            if (pinyins)
                printf("%s %s\t", pinyins, word);
            else
                printf("%s\t", word);
        }
        printf("\n");

        pinyin_train(instance);
        pinyin_reset(instance);
        pinyin_save(context);
    }

    g_array_free(candidates, TRUE);
    pinyin_free_instance(instance);
    pinyin_fini(context);
    free(prefixbuf); free(linebuf);
    return 0;
}