Exemplo n.º 1
0
static inline bool search_chewing_index(pinyin_option_t options,
                                        const char * chewing,
                                        ChewingKey & key){
    chewing_index_item_t item;
    memset(&item, 0, sizeof(item));
    item.m_chewing_input = chewing;

    std_lite::pair<const chewing_index_item_t *,
                   const chewing_index_item_t *> range;
    range = std_lite::equal_range
        (chewing_index, chewing_index + G_N_ELEMENTS(chewing_index),
         item, compare_chewing_less_than);

    guint16 range_len = range.second - range.first;
    assert (range_len <= 1);

    if (range_len == 1) {
        const chewing_index_item_t * index = range.first;

        if (!check_chewing_options(options, index))
            return false;

        key = content_table[index->m_table_index].m_chewing_key;
        assert(key.get_table_index() == index->m_table_index);
        return true;
    }

    return false;
}
Exemplo n.º 2
0
int main(int argc, char * argv[]) {
    GError * error = NULL;
    GOptionContext * context;

    context = g_option_context_new("- test pinyin parser");
    g_option_context_add_main_entries(context, entries, NULL);
    if (!g_option_context_parse(context, &argc, &argv, &error)) {
        g_print("option parsing failed:%s\n", error->message);
        exit(EINVAL);
    }

    pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE | USE_RESPLIT_TABLE;
    if (incomplete)
        options |= PINYIN_INCOMPLETE | ZHUYIN_INCOMPLETE;

    PhoneticParser2 * parser = NULL;
    ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
    ChewingKeyRestVector key_rests =
        g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));

    /* create the parser */
    if (strcmp("fullpinyin", parsername) == 0) {
        parser = new FullPinyinParser2();
    } else if (strcmp("doublepinyin", parsername) == 0) {
        parser = new DoublePinyinParser2();
    } else if (strcmp("zhuyin", parsername) == 0) {
        if (strcmp("standard", schemename) == 0) {
            parser = new ZhuyinSimpleParser2();
        } else if (strcmp("hsu", schemename) == 0) {
            parser = new ZhuyinDiscreteParser2();
        } else if (strcmp("dachen26", schemename) == 0) {
            parser = new ZhuyinDaChenCP26Parser2();
        }
    } else if (strcmp("pinyindirect", parsername) == 0) {
        parser = new PinyinDirectParser2();
    } else if (strcmp("zhuyindirect", parsername) == 0) {
        parser = new ZhuyinDirectParser2();
    }


    if (!parser)
        parser = new FullPinyinParser2();

    char* linebuf = NULL; size_t size = 0; ssize_t read;
    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
            linebuf[strlen(linebuf) - 1] = '\0';
        }

        if ( strcmp ( linebuf, "quit" ) == 0)
            break;

#if 0
        ChewingKey key;
        bool success = parser->parse_one_key(options, key,
                                             linebuf, strlen(linebuf));
        if (success) {
            gchar * pinyins = key.get_pinyin_string();
            printf("pinyin:%s\n", pinyins);
            g_free(pinyins);
        }
#endif

#if 1
        int len = 0;
        guint32 start_time = record_time();
        for ( size_t i = 0; i < bench_times; ++i)
            len = parser->parse(options, keys, key_rests,
                                linebuf, strlen(linebuf));

        print_time(start_time, bench_times);

        printf("parsed %d chars, %d keys.\n", len, keys->len);

        assert(keys->len == key_rests->len);

        for (size_t i = 0; i < keys->len; ++i) {
            ChewingKey * key =
                &g_array_index(keys, ChewingKey, i);
            ChewingKeyRest * key_rest =
                &g_array_index(key_rests, ChewingKeyRest, i);

            gchar * pinyins = key->get_pinyin_string();
            printf("%s %d %d\t", pinyins,
                   key_rest->m_raw_begin, key_rest->m_raw_end);
            g_free(pinyins);
        }
        printf("\n");
#endif

    }

    if (linebuf)
        free(linebuf);

    delete parser;

    g_array_free(key_rests, TRUE);
    g_array_free(keys, TRUE);

    return 0;
}
Exemplo n.º 3
0
void gen_phrase_file(const char * outputfile, int phrase_index){
    FILE * outfile = fopen(outputfile, "w");
    if (NULL == outfile ) {
        fprintf(stderr, "Can't write file %s.\n", outputfile);
        exit(ENOENT);
    }

    phrase_token_t token = 1;

    /* phrase length index */
    for (size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i) {
        GArray * item_array = g_item_array[i];

        /* item array index */
        for (size_t m = 0; m < item_array->len; ++m) {
            phrase_and_array_item * item = &g_array_index
                (item_array, phrase_and_array_item, m);
            phrase_item phrase = item->phrase;
            GArray * chewing_and_freqs = item->chewing_and_freq_array;

            gchar * phrase_str = g_ucs4_to_utf8
                (phrase.uniphrase, phrase.length, NULL, NULL, NULL);

            /* iterate each pinyin */
            for (size_t n = 0; n < chewing_and_freqs->len; ++n) {
                chewing_and_freq_item * chewing_and_freq =
                    &g_array_index
                    (chewing_and_freqs, chewing_and_freq_item, n);

                ChewingKeyVector keys = chewing_and_freq->keys;
                ChewingKeyRestVector key_rests = chewing_and_freq->key_rests;

                GArray * pinyins = g_array_new(TRUE, FALSE, sizeof(gchar *));
                gchar * pinyin = NULL;

                size_t k;
                for (k = 0; k < keys->len; ++k) {
                    ChewingKey key = g_array_index(keys, ChewingKey, k);
                    ChewingKeyRest key_rest = g_array_index
                        (key_rests, ChewingKeyRest, k);

                    //assert (CHEWING_ZERO_TONE != key.m_tone);
                    pinyin = key.get_bopomofo_string();
                    g_array_append_val(pinyins, pinyin);
                }
                gchar * pinyin_str = g_strjoinv("'", (gchar **)pinyins->data);

                for (k = 0; k < pinyins->len; ++k) {
                    g_free(g_array_index(pinyins, gchar *, k));
                }
                g_array_free(pinyins, TRUE);

                guint32 freq = chewing_and_freq->freq;

                /* avoid zero freq */
                if (freq < 3) freq = 3;

		fprintf(outfile, "%s\t%s\t%d\t%d\n",
                        pinyin_str, phrase_str,
                        PHRASE_INDEX_MAKE_TOKEN(phrase_index, token), freq);

                g_free(pinyin_str);
            }
            g_free(phrase_str);
            token++;
        }
    }

    fclose(outfile);
}