static PyObject * SimplePinyin_convert(SimplePinyin* self, PyObject *args, PyObject *kwds) { const char *pinyin = ""; const char *prefix = ""; static char *kwlist[] = {"pinyin", "prefix", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", kwlist, &pinyin, &prefix)) return NULL; // printf("DEBUG: pinyin=%s, prefix=%s.\n", pinyin, prefix); pinyin_parse_more_full_pinyins(self->instance, pinyin); pinyin_guess_sentence_with_prefix(self->instance, prefix); pinyin_guess_full_pinyin_candidates(self->instance, 0); guint num = 0; guint16 *arr = NULL; //FIXME: Use a name better than `arr` pinyin_get_n_pinyin(self->instance, &num); arr = PyMem_New(guint16, num); // printf("DEBUG: num=%i, arr=%p.\n", num, arr); for (size_t i = 0; i < num; ++i) { ChewingKeyRest *key_rest = NULL; pinyin_get_pinyin_key_rest(self->instance, i, &key_rest); pinyin_get_pinyin_key_rest_length(self->instance, key_rest, &arr[i]); if (i > 0) { arr[i] += arr[i-1]; } // printf("DEBUG: %i\n", arr[i]); } guint len = 0; pinyin_get_n_candidate(self->instance, &len); // printf("DEBUG: len=%i\n", len); PyObject *candidate_list = PyList_New(len); PyObject *match_len_list = PyList_New(len); for (size_t i = 0; i < len; ++i) { lookup_candidate_t * candidate = NULL; pinyin_get_candidate(self->instance, i, &candidate); const char * word = NULL; pinyin_get_candidate_string(self->instance, candidate, &word); PyObject *ob_word = NULL; ob_word = Py_BuildValue("s", word); PyList_SetItem(candidate_list, i, ob_word); lookup_candidate_type_t type; pinyin_get_candidate_type(self->instance, candidate, &type); // printf("DEBUG: type=%i\n", type); int cursor = pinyin_choose_candidate(self->instance, 0, candidate); int match_len = 0; int index = 0; switch (type) { case BEST_MATCH_CANDIDATE: match_len = strlen(pinyin); break; case DIVIDED_CANDIDATE: //FIXME: we assume that only one key get divided index = cursor-2; //FIXME: remove the below hack if possible if (index >= num) { index = num-1; } match_len = arr[index]; break; case RESPLIT_CANDIDATE: case NORMAL_CANDIDATE: index = cursor-1; match_len = arr[index]; default: break; } // printf("DEBUG: match_len=%i\n", match_len); PyObject *ob_match_len = NULL; ob_match_len = Py_BuildValue("i", match_len); PyList_SetItem(match_len_list, i, ob_match_len); pinyin_clear_constraint(self->instance, 0); // printf("DEBUG: %s %d\n", word, arr[cursor-1]); } PyMem_Del(arr); pinyin_reset(self->instance); PyObject *ob_pair = NULL; ob_pair = Py_BuildValue("(O,O)", candidate_list, match_len_list); return ob_pair; }
int main(int argc, char * argv[]){ pinyin_context_t * context = pinyin_init("../data", "../data"); pinyin_option_t options = PINYIN_CORRECT_ALL | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE | DYNAMIC_ADJUST; pinyin_set_options(context, options); pinyin_instance_t * instance = pinyin_alloc_instance(context); CandidateVector candidates = g_array_new (FALSE, FALSE, sizeof(lookup_candidate_t)); char * prefixbuf = NULL; size_t prefixsize = 0; char * linebuf = NULL; size_t linesize = 0; ssize_t read; while( TRUE ){ fprintf(stdout, "prefix:"); fflush(stdout); if ((read = getline(&prefixbuf, &prefixsize, stdin)) == -1) break; if ( '\n' == prefixbuf[strlen(prefixbuf) - 1] ) { prefixbuf[strlen(prefixbuf) - 1] = '\0'; } fprintf(stdout, "pinyin:"); fflush(stdout); if ((read = getline(&linebuf, &linesize, stdin)) == -1) break; if ( '\n' == linebuf[strlen(linebuf) - 1] ) { linebuf[strlen(linebuf) - 1] = '\0'; } if ( strcmp ( linebuf, "quit" ) == 0) break; pinyin_parse_more_full_pinyins(instance, linebuf); pinyin_guess_sentence_with_prefix(instance, prefixbuf); pinyin_get_full_pinyin_candidates(instance, 0, candidates); for (size_t i = 0; i < candidates->len; ++i) { lookup_candidate_t * candidate = &g_array_index (candidates, lookup_candidate_t, i); const char * pinyins = candidate->m_new_pinyins; const char * word = candidate->m_phrase_string; if (pinyins) printf("%s %s\t", pinyins, word); else printf("%s\t", word); } printf("\n"); pinyin_train(instance); pinyin_reset(instance); pinyin_save(context); } pinyin_free_candidates(instance, candidates); g_array_free(candidates, TRUE); pinyin_free_instance(instance); pinyin_fini(context); free(prefixbuf); free(linebuf); return 0; }