示例#1
0
bool
PinyinPhraseLib::input_pinyin_lib (const PinyinValidator &validator, std::istream &is)
{
	if (!is) return false;

	m_pinyin_lib.clear ();

	char header [40];
	bool binary;

	//check header
	is.getline (header, 40);
	if (strncmp (header,
		scim_pinyin_lib_text_header,
		strlen (scim_pinyin_lib_text_header)) == 0) {
		binary = false;
	} else if (strncmp (header,
		scim_pinyin_lib_binary_header,
		strlen (scim_pinyin_lib_binary_header)) == 0) {
		binary = true;
	} else {
		return false;
	}
	
	is.getline (header, 40);
	if (strncmp (header, scim_pinyin_lib_version, strlen (scim_pinyin_lib_version)) != 0)
		return false;

	unsigned char bytes [4];
	PinyinKey key;
	uint32 number;

	//get length
	if (binary) {
		is.read ((char*) bytes, sizeof(unsigned char) * 4);
		number = scim_bytestouint32 (bytes);
	} else {
		is.getline (header, 40);
		number = atoi (header);
	}

	if (number <= 0) return false;

	m_pinyin_lib.reserve (number + 256);

	if (binary) {
		for (uint32 i=0; i<number; i++) {
			key.input_binary (validator, is);
			m_pinyin_lib.push_back (key);
		}
	} else {
		for (uint32 i=0; i<number; i++) {
			key.input_text (validator, is);
			m_pinyin_lib.push_back (key);
		}
	}

	return true;
}
示例#2
0
int main()
{
	PinyinTable ime(NULL,"pinyin_table.txt");
	
	PinyinKey key;
	
	key.set_key(scim_default_pinyin_validator,"pin");

	std::vector <ucs4_t> chars;
	int n=ime.find_chars(chars,key);
	cout<<n<<" matched"<<endl;
	
	ofstream o("out.txt");
	for(int i=0;i<chars.size();i++){
		utf8_write_wchar(o,chars[i]);
	}
}
示例#3
0
void gen_phrase_file(const char * outfilename, int phrase_index){
    FILE * outfile = fopen(outfilename, "w");
    if (NULL == outfile ) {
        fprintf(stderr, "Can't write file %s.\n", outfilename);
        exit(ENOENT);
    }
    phrase_token_t token = 1;
    char pinyin_buffer[4096];
    //phrase length
    for ( size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
	GArray * item_array = g_item_array[i];
	//item array
	for( size_t m = 0; m < item_array->len; ++m){
	    item* oneitem = & g_array_index(item_array, item, m);
	    phrase_item * phrase = oneitem->phrase;
	    GArray * pinyin_and_freqs = oneitem->pinyin_and_freq_array;
	    const char * phrase_buffer = g_ucs4_to_utf8(phrase->uniphrase,
						 phrase->length, 
						 NULL, NULL, NULL);
	    //each pinyin
	    for( size_t n = 0 ; n < pinyin_and_freqs->len; ++n){
		pinyin_and_freq_item * pinyin_and_freq = &g_array_index(pinyin_and_freqs, pinyin_and_freq_item, n);
		GArray * pinyin = pinyin_and_freq->pinyin;
		PinyinKey * key = &g_array_index(pinyin, PinyinKey, 0);
		strcpy(pinyin_buffer,key->get_key_string());
		for (size_t k = 1; k < pinyin->len; ++k){
		    strcat(pinyin_buffer, "'");
		    PinyinKey * key = &g_array_index(pinyin, PinyinKey, k);
		    strcat(pinyin_buffer, key->get_key_string ());
		}
		guint32 freq = pinyin_and_freq -> freq;
		if ( freq < 3 ) 
		    freq = 3;
		fprintf( outfile, "%s\t%s\t%d\t%d\n", 
			 pinyin_buffer, phrase_buffer, 
			 PHRASE_INDEX_MAKE_TOKEN(phrase_index, token),
			 freq);
	    }
	    token++;
	}
    }
    fclose(outfile);
}
void PinyinAdapter::parse(const QString& string)
{
    pinyin_parse_more_full_pinyins(m_instance, string.toLatin1().data());

#ifdef PINYIN_DEBUG
    for (int i = 0; i < m_instance->m_pinyin_keys->len; i ++)
    {
        PinyinKey* pykey = &g_array_index(m_instance->m_pinyin_keys, PinyinKey, i);
        gchar* py = pykey->get_pinyin_string();
        std::cout << py << " ";
        g_free(py);
    }
    std::cout << std::endl;
#endif

    pinyin_guess_candidates(m_instance, 0);

    candidates.clear();
    guint len = 0;
    pinyin_get_n_candidate(m_instance, &len);
    len = len > MAX_SUGGESTIONS ? MAX_SUGGESTIONS : len;
    for (unsigned int i = 0 ; i < len; i ++ )
    {
        lookup_candidate_t * candidate = NULL;

        if (pinyin_get_candidate(m_instance, i, &candidate)) {
            const char* word = NULL;
            pinyin_get_candidate_string(m_instance, candidate, &word);
            // Translate the token to utf-8 phrase.
            if (word) {
                candidates.append(QString(word));
            }
        }
    }

    Q_EMIT newPredictionSuggestions(string, candidates);
}
示例#5
0
void FcitxLibpinyinUpdatePreedit(FcitxLibpinyin* libpinyin, char* sentence)
{
    FcitxInstance* instance = libpinyin->owner->owner;
    FcitxInputState* input = FcitxInstanceGetInputState(instance);
    int offset = LibpinyinGetOffset(libpinyin);

    if (libpinyin->type == LPT_Pinyin) {
        int libpinyinLen = strlen(libpinyin->inst->m_raw_full_pinyin);
        int fcitxLen = strlen(libpinyin->buf);
        if (fcitxLen != libpinyinLen) {
            strcpy(libpinyin->buf, libpinyin->inst->m_raw_full_pinyin);
            libpinyin->cursor_pos += libpinyinLen - fcitxLen;
        }
    }

    int pyoffset = LibpinyinGetPinyinOffset(libpinyin);
    if (pyoffset > libpinyin->cursor_pos)
        libpinyin->cursor_pos = pyoffset;

    int hzlen = 0;
    if (fcitx_utf8_strlen(sentence) > offset)
        hzlen = fcitx_utf8_get_nth_char(sentence, offset) - sentence;
    else
        hzlen = strlen(sentence);

    if (hzlen > 0) {
        char* buf = (char*) fcitx_utils_malloc0((hzlen + 1) * sizeof(char));
        strncpy(buf, sentence, hzlen);
        buf[hzlen] = 0;
        FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_INPUT, "%s", buf);
        free(buf);
    }

    int charcurpos = hzlen;

    int lastpos = pyoffset;
    int curoffset = pyoffset;
    for (int i = offset; i < libpinyin->inst->m_pinyin_keys->len; i ++)
    {
        PinyinKey* pykey = &g_array_index(libpinyin->inst->m_pinyin_keys, PinyinKey, i);
        PinyinKeyPos* pykeypos = &g_array_index(libpinyin->inst->m_pinyin_key_rests, PinyinKeyPos, i);

        if (lastpos > 0) {
            FcitxMessagesMessageConcatLast (FcitxInputStateGetPreedit(input), " ");
            if (curoffset < libpinyin->cursor_pos)
                charcurpos ++;
            for (int j = lastpos; j < pykeypos->m_raw_begin; j ++) {
                char temp[2] = {'\0', '\0'};
                temp[0] = libpinyin->buf[j];
                FcitxMessagesMessageConcatLast (FcitxInputStateGetPreedit(input), temp);
                if (curoffset < libpinyin->cursor_pos)
                {
                    curoffset ++;
                    charcurpos ++;
                }
            }
        }
        lastpos = pykeypos->m_raw_end;

        switch (libpinyin->type) {
            case LPT_Pinyin: {
                gchar* pystring = pykey->get_pinyin_string();
                FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s", pystring);
                size_t pylen = strlen(pystring);
                if (curoffset + pylen < libpinyin->cursor_pos) {
                    curoffset += pylen;
                    charcurpos += pylen;
                }
                else {
                    charcurpos += libpinyin->cursor_pos - curoffset;
                    curoffset = libpinyin->cursor_pos;
                }
                g_free(pystring);
                break;
            }
            case LPT_Shuangpin: {
                if (pykeypos->length() == 2) {
                    const char* initial = 0;
                    if (pykey->m_initial == CHEWING_ZERO_INITIAL)
                        initial = "'";
                    else
                        initial = get_initial_string(pykey);
                    if (curoffset + 1 <= libpinyin->cursor_pos) {
                        curoffset += 1;
                        charcurpos += strlen(initial);
                    }
                    FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s", initial);

                    if (curoffset + 1 <= libpinyin->cursor_pos) {
                        curoffset += 1;
                        charcurpos += strlen(get_middle_string(pykey)) + strlen(get_final_string(pykey));
                    }
                    FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s%s", get_middle_string(pykey), get_final_string(pykey));
                }
                else if (pykeypos->length() == 1) {
                    gchar* pystring = pykey->get_pinyin_string();
                    if (curoffset + 1 <= libpinyin->cursor_pos) {
                        curoffset += 1;
                        charcurpos += strlen(pystring);
                    }
                    FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s", pystring);
                    g_free(pystring);
                }
                break;
            }
            case LPT_Zhuyin: {
                gchar* pystring = pykey->get_chewing_string();
                FcitxMessagesAddMessageAtLast(FcitxInputStateGetPreedit(input), MSG_CODE, "%s", pystring);

                if (curoffset + pykeypos->length() <= libpinyin->cursor_pos) {
                    curoffset += pykeypos->length();
                    charcurpos += strlen(pystring);
                }
                else {
                    int diff = libpinyin->cursor_pos - curoffset;
                    curoffset = libpinyin->cursor_pos;
                    size_t len = fcitx_utf8_strlen(pystring);
                    if (pykey->m_tone != CHEWING_ZERO_TONE)
                        len --;

                    if (diff > len)
                        charcurpos += strlen(pystring);
                    else {
                        charcurpos += fcitx_utf8_get_nth_char(pystring, diff) - pystring;
                    }
                }
                g_free(pystring);
                break;
            }
        }
    }

    int buflen = strlen(libpinyin->buf);

    if (lastpos < buflen) {
        FcitxMessagesMessageConcatLast (FcitxInputStateGetPreedit(input), " ");
        if (lastpos < libpinyin->cursor_pos)
            charcurpos ++;

        for (int i = lastpos; i < buflen; i ++)
        {
            char temp[2] = {'\0', '\0'};
            temp[0] = libpinyin->buf[i];
            FcitxMessagesMessageConcatLast (FcitxInputStateGetPreedit(input), temp);
            if (lastpos < libpinyin->cursor_pos) {
                charcurpos ++;
                lastpos++;
            }
        }
    }
    FcitxInputStateSetCursorPos(input, charcurpos);
}
示例#6
0
int main (int argc, char * argv [])
{
    NullPinyinValidator validator;
    PinyinKeyVector keys;
    PinyinKeyPosVector poses;
    PinyinCustomSettings custom;
    PinyinParser *parser = 0;
    //PinyinTable table;
    const char *tablefile = "../data/pinyin-table.txt";

    keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
    poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));

    int i = 0;
    while (i<argc) {
        if (++i >= argc) break;

        if ( !strcmp("-h", argv [i]) || !strcmp ("--help", argv [i]) ) {
            print_help ();
            return 0;
        }

        if ( !strcmp("-i", argv [i]) ) {
            custom.set_use_incomplete (true);
            continue;
        }

        if ( !strcmp("-p", argv [i]) ) {
            if (++i >= argc) {
                fprintf(stderr, "No argument for option %s.\n", argv [i-1]);
                return -1;
            }
            if (!strcmp (argv[i], "sp") || !strcmp (argv[i], "sp-default"))
                parser = new PinyinShuangPinParser ();
            else if (!strcmp (argv[i], "sp-stone"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_STONE);
            else if (!strcmp (argv[i], "sp-zrm"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_ZRM);
            else if (!strcmp (argv[i], "sp-ms"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_MS);
            else if (!strcmp (argv[i], "sp-ziguang"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_ZIGUANG);
            else if (!strcmp (argv[i], "sp-abc"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_ABC);
            else if (!strcmp (argv[i], "sp-liushi"))
                parser = new PinyinShuangPinParser (SHUANG_PIN_LIUSHI);
            if (!strcmp (argv[i], "zy") || !strcmp (argv[i], "zy-standard") || !strcmp (argv[i], "zy-default"))
                parser = new PinyinZhuYinParser ();
            else if (!strcmp (argv[i], "zy-hsu"))
                parser = new PinyinZhuYinParser (ZHUYIN_HSU);
            else if (!strcmp (argv[i], "zy-ibm"))
                parser = new PinyinZhuYinParser (ZHUYIN_IBM);
            else if (!strcmp (argv[i], "zy-gin-yieh"))
                parser = new PinyinZhuYinParser (ZHUYIN_GIN_YIEH);
            else if (!strcmp (argv[i], "zy-et"))
                parser = new PinyinZhuYinParser (ZHUYIN_ET);
            else if (!strcmp (argv[i], "zy-et26"))
                parser = new PinyinZhuYinParser (ZHUYIN_ET26);
            else if (!strcmp (argv[i], "zy-zhuyin"))
                parser = new PinyinZhuYinParser (ZHUYIN_ZHUYIN);
            else {
                fprintf(stderr, "Unknown Parser:%s.\n", argv[i]);
                print_help();
                exit(EINVAL);
            }

            continue;
        }

        if (!strcmp("-f", argv [i])) {
            if (++i >= argc) {
                fprintf(stderr, "No argument for option %s.\n", argv [i-1]);
                return -1;
            }
            tablefile = argv [i];
            continue;
        }

        fprintf(stderr, "Invalid option: %s.\n", argv [i]);
        return -1;
    };

    if (!parser) parser = new PinyinDefaultParser ();

    char * line = NULL;
    size_t len = 0;

    while (1) {
        printf("Input:"); fflush(stdout);
        getline(&line, &len, stdin);

        if (!strncmp (line, "quit", 4)) break;

        int len = parser->parse (validator, keys, poses,(const char *) line);

        printf("Parsed %d chars, %d keys:\n", len, keys->len);

        for (size_t i=0; i < keys->len; ++i){
            PinyinKey * key = &g_array_index(keys, PinyinKey, i);
            printf("%s ", key->get_key_string ());
        }
        printf("\n");

        for ( size_t i=0; i < poses->len; ++i){
            PinyinKeyPos * pos = &g_array_index(poses, PinyinKeyPos, i);
            printf("%d %ld ", pos->get_pos(), pos->get_length());
        }
        printf("\n");

        for (size_t i=0; i < keys->len; ++i){
            PinyinKey * key = &g_array_index(keys, PinyinKey, i);
            printf("%s ", key->get_key_zhuyin_string ());
        }
        printf("\n");
    }

    if (line)
        free(line);

    return 0;
}
示例#7
0
int main(int argc, char *argv[])
{
    pinyin_context_t* context = pinyin_init(LIBPINYIN_PKGDATADIR "/data", NULL);
    pinyin_instance_t* inst = pinyin_alloc_instance(context);

    pinyin_set_options(context, IS_PINYIN | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE);

    string s;
    cin >> s ;
    pinyin_parse_more_double_pinyins(inst, s.c_str());

    int cursor = 0;

    for (int i = 0; i < inst->m_pinyin_keys->len; i ++)
    {
        PinyinKey* pykey = &g_array_index(inst->m_pinyin_keys, PinyinKey, i);
        gchar* py = pykey->get_pinyin_string();
        gchar* chewing = pykey->get_chewing_string();
        cout << py << " "
             << chewing
             << endl;

        g_free(py);
        g_free(chewing);
    }

    while (true)
    {
        cout << get_lookup_cursor(inst, cursor) << endl;
        GArray* array = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
        pinyin_get_candidates(inst, get_lookup_cursor(inst, cursor), array);
        cout << array->len << endl;

        pinyin_guess_sentence(inst);

        char* sentence = NULL;
        pinyin_get_sentence(inst, &sentence);
        if (sentence)
            cout << sentence << endl;
        else
            cout << "no sentence" << endl;
        g_free(sentence);

        for (int i = 0 ; i < array->len; i ++ )
        {
            lookup_candidate_t token = g_array_index(array, lookup_candidate_t, i);
            char* word = NULL;
            pinyin_translate_token(inst, token.m_token, &word);
            if (word)
                cout << word << " ";
            g_free(word);
        }

        cout << "constraints " << inst->m_constraints->len << endl;

        int cand;
        cin >> cursor >> cand;

        if (cand >= 0)
            pinyin_choose_candidate(inst, 0, &g_array_index(array, lookup_candidate_t, cand));
        else if (cand != -1) {
            pinyin_clear_constraints(inst);
        }
        else if (cand != -2) {
            break;
        }

        g_array_free(array, TRUE);
    }
    pinyin_free_instance(inst);
    pinyin_fini(context);
}