Esempio n. 1
0
static PyObject *
SimplePinyin_convert(SimplePinyin* self, PyObject *args, PyObject *kwds)
{
    const char *pinyin = "";
    const char *prefix = "";
    static char *kwlist[] = {"pinyin", "prefix", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", kwlist, &pinyin, &prefix))
        return NULL;
    // printf("DEBUG: pinyin=%s, prefix=%s.\n", pinyin, prefix);
    pinyin_parse_more_full_pinyins(self->instance, pinyin);
    pinyin_guess_sentence_with_prefix(self->instance, prefix);
    pinyin_guess_full_pinyin_candidates(self->instance, 0);

    guint num = 0;
    guint16 *arr = NULL; //FIXME: Use a name better than `arr`
    pinyin_get_n_pinyin(self->instance, &num);
    arr = PyMem_New(guint16, num);
    // printf("DEBUG: num=%i, arr=%p.\n", num, arr);
    for (size_t i = 0; i < num; ++i) {
        ChewingKeyRest *key_rest = NULL;
        pinyin_get_pinyin_key_rest(self->instance, i, &key_rest);
        pinyin_get_pinyin_key_rest_length(self->instance, key_rest, &arr[i]);
        if (i > 0) {
            arr[i] += arr[i-1];
        }
        // printf("DEBUG: %i\n", arr[i]);
    }

    guint len = 0;
    pinyin_get_n_candidate(self->instance, &len);
    // printf("DEBUG: len=%i\n", len);
    PyObject *candidate_list = PyList_New(len);
    PyObject *match_len_list = PyList_New(len);
    for (size_t i = 0; i < len; ++i) {
        lookup_candidate_t * candidate = NULL;
        pinyin_get_candidate(self->instance, i, &candidate);

        const char * word = NULL;
        pinyin_get_candidate_string(self->instance, candidate, &word);
        PyObject *ob_word = NULL;
        ob_word = Py_BuildValue("s", word);
        PyList_SetItem(candidate_list, i, ob_word);

        lookup_candidate_type_t type;
        pinyin_get_candidate_type(self->instance, candidate, &type);
        // printf("DEBUG: type=%i\n", type);

        int cursor = pinyin_choose_candidate(self->instance, 0, candidate);
        int match_len = 0;
        int index = 0;
        switch (type) {
        case BEST_MATCH_CANDIDATE:
            match_len = strlen(pinyin);
            break;
        case DIVIDED_CANDIDATE:
            //FIXME: we assume that only one key get divided
            index = cursor-2;
            //FIXME: remove the below hack if possible
            if (index >= num) {
                index = num-1;
            }
            match_len = arr[index];
            break;
        case RESPLIT_CANDIDATE:
        case NORMAL_CANDIDATE:
            index = cursor-1;
            match_len = arr[index];
        default:
            break;
        }

        // printf("DEBUG: match_len=%i\n", match_len);
        PyObject *ob_match_len = NULL;
        ob_match_len = Py_BuildValue("i", match_len);
        PyList_SetItem(match_len_list, i, ob_match_len);

        pinyin_clear_constraint(self->instance, 0);
        // printf("DEBUG: %s %d\n", word, arr[cursor-1]);
    }

    PyMem_Del(arr);
    pinyin_reset(self->instance);

    PyObject *ob_pair = NULL;
    ob_pair = Py_BuildValue("(O,O)", candidate_list, match_len_list);

    return ob_pair;
}
Esempio n. 2
0
int main(int argc, char * argv[]){
    pinyin_context_t * context =
        pinyin_init("../data", "../data");

    pinyin_option_t options =
        PINYIN_CORRECT_ALL | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE |
        DYNAMIC_ADJUST;
    pinyin_set_options(context, options);

    pinyin_instance_t * instance = pinyin_alloc_instance(context);
    CandidateVector candidates = g_array_new
        (FALSE, FALSE, sizeof(lookup_candidate_t));

    char * prefixbuf = NULL; size_t prefixsize = 0;
    char * linebuf = NULL; size_t linesize = 0;
    ssize_t read;

    while( TRUE ){
        fprintf(stdout, "prefix:");
        fflush(stdout);

        if ((read = getline(&prefixbuf, &prefixsize, stdin)) == -1)
            break;

        if ( '\n' == prefixbuf[strlen(prefixbuf) - 1] ) {
            prefixbuf[strlen(prefixbuf) - 1] = '\0';
        }

        fprintf(stdout, "pinyin:");
        fflush(stdout);

        if ((read = getline(&linebuf, &linesize, stdin)) == -1)
            break;

        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
            linebuf[strlen(linebuf) - 1] = '\0';
        }

        if ( strcmp ( linebuf, "quit" ) == 0)
            break;

        pinyin_parse_more_full_pinyins(instance, linebuf);
        pinyin_guess_sentence_with_prefix(instance, prefixbuf);

        pinyin_get_full_pinyin_candidates(instance, 0, candidates);
        for (size_t i = 0; i < candidates->len; ++i) {
            lookup_candidate_t * candidate = &g_array_index
                (candidates, lookup_candidate_t, i);
            const char * pinyins = candidate->m_new_pinyins;
            const char * word = candidate->m_phrase_string;

            if (pinyins)
                printf("%s %s\t", pinyins, word);
            else
                printf("%s\t", word);
        }
        printf("\n");

        pinyin_train(instance);
        pinyin_reset(instance);
        pinyin_save(context);
    }

    pinyin_free_candidates(instance, candidates);
    g_array_free(candidates, TRUE);
    pinyin_free_instance(instance);
    pinyin_fini(context);
    free(prefixbuf); free(linebuf);
    return 0;
}