bool parse_body(FILE * input, PhraseLargeTable3 * phrase_table,
                FacadePhraseIndex * phrase_index,
                KMixtureModelBigram * bigram){
    taglib_push_state();

    assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
    assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
    assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));

    do {
    retry:
        assert(taglib_read(linebuf, line_type, values, required));
        switch(line_type) {
        case END_LINE:
            goto end;
        case GRAM_1_LINE:
            my_getline(input);
            parse_unigram(input, phrase_table, phrase_index, bigram);
            goto retry;
        case GRAM_2_LINE:
            my_getline(input);
            parse_bigram(input, phrase_table, phrase_index, bigram);
            goto retry;
        default:
            assert(false);
        }
    } while (my_getline(input) != -1) ;

 end:
    taglib_pop_state();
    return true;
}
bool parse_unigram(FILE * input, PhraseLargeTable * phrases,
                   FacadePhraseIndex * phrase_index){
    taglib_push_state();

    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 1, "count", ""));

    do {
        assert(taglib_read(linebuf, line_type, values, required));
        switch (line_type) {
        case GRAM_1_ITEM_LINE:{
            /* handle \item in \1-gram */
            const char * string = (const char *) g_ptr_array_index(values, 0);
            phrase_token_t token = taglib_string_to_token(phrases, string);
            gpointer value = NULL;
            assert(g_hash_table_lookup_extended(required, "count", NULL, &value));
            glong count = atol((const char *)value);
            phrase_index->add_unigram_frequency(token, count);
            break;
        }
        case END_LINE:
        case GRAM_1_LINE:
        case GRAM_2_LINE:
            goto end;
        default:
            assert(false);
        }
    } while (my_getline(input) != -1);

 end:
    taglib_pop_state();
    return true;
}
bool parse_body(FILE * input, FILE * output){
    taglib_push_state();

    assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
    assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
    assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));

    do {
    retry:
        assert(taglib_read(linebuf, line_type, values, required));
        switch(line_type) {
        case END_LINE:
            fprintf(output, "\\end\n");
            goto end;
        case GRAM_1_LINE:
            fprintf(output, "\\1-gram\n");
            my_getline(input);
            parse_unigram(input, output);
            goto retry;
        case GRAM_2_LINE:
            fprintf(output, "\\2-gram\n");
            my_getline(input);
            parse_bigram(input, output);
            goto retry;
        default:
            assert(false);
        }
    } while (my_getline(input) != -1);

 end:
    taglib_pop_state();
    return true;
}
bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
                   FacadePhraseIndex * phrase_index,
                   KMixtureModelBigram * bigram){
    taglib_push_state();

    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count:freq", ""));

    do {
        assert(taglib_read(linebuf, line_type, values, required));
        switch (line_type) {
        case GRAM_1_ITEM_LINE:{
            /* handle \item in \1-gram */
            TAGLIB_GET_TOKEN(token, 0);
            TAGLIB_GET_PHRASE_STRING(word, 1);
            assert(taglib_validate_token_with_string
                   (phrase_index, token, word));

            TAGLIB_GET_TAGVALUE(glong, count, atol);
            TAGLIB_GET_TAGVALUE(glong, freq, atol);

            KMixtureModelArrayHeader array_header;
            memset(&array_header, 0, sizeof(KMixtureModelArrayHeader));
            array_header.m_WC = count; array_header.m_freq = freq;
            bigram->set_array_header(token, array_header);
            break;
        }
        case END_LINE:
        case GRAM_1_LINE:
        case GRAM_2_LINE:
            goto end;
        default:
            assert(false);
        }
    } while (my_getline(input) != -1);

 end:
    taglib_pop_state();
    return true;
}
bool parse_unigram(FILE * input, FILE * output){
    taglib_push_state();

    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "freq", "count"));

    do {
        assert(taglib_read(linebuf, line_type, values, required));
        switch(line_type) {
        case GRAM_1_ITEM_LINE: {
            /* handle \item in \1-gram */
            TAGLIB_GET_TOKEN(token, 0);
            TAGLIB_GET_PHRASE_STRING(word, 1);

            /* remove the "<start>" in the uni-gram of interpolation model */
            if ( sentence_start == token )
                break;

            TAGLIB_GET_TAGVALUE(glong, freq, atol);

            /* ignore zero unigram freq item */
            if ( 0 != freq )
                fprintf(output, "\\item %d %s count %ld\n", token, word, freq);
            break;
        }
        case END_LINE:
        case GRAM_1_LINE:
        case GRAM_2_LINE:
            goto end;
        default:
            assert(false);
        }
    } while (my_getline(input) != -1);

 end:
    taglib_pop_state();
    return true;
}
bool parse_bigram(FILE * input, FILE * output){
    taglib_push_state();

    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
                          "count", "T:N_n_0:n_1:Mr"));

    do {
        assert(taglib_read(linebuf, line_type, values, required));
        switch (line_type) {
        case GRAM_2_ITEM_LINE:{
            /* handle \item in \2-gram */
            /* two strings */
            TAGLIB_GET_TOKEN(token1, 0);
            TAGLIB_GET_PHRASE_STRING(word1, 1);

            TAGLIB_GET_TOKEN(token2, 2);
            TAGLIB_GET_PHRASE_STRING(word2, 3);

            TAGLIB_GET_TAGVALUE(glong, count, atol);
            fprintf(output, "\\item %d %s %d %s count %ld\n",
                    token1, word1, token2, word2, count);
            break;
        }
        case END_LINE:
        case GRAM_1_LINE:
        case GRAM_2_LINE:
            goto end;
        default:
            assert(false);
        }
    } while (my_getline(input) != -1);

 end:
    taglib_pop_state();
    return true;
}
bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
                  FacadePhraseIndex * phrase_index,
                  KMixtureModelBigram * bigram){
    taglib_push_state();

    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
                          "count:T:N_n_0:n_1:Mr", ""));

    phrase_token_t last_token = null_token;
    KMixtureModelSingleGram * last_single_gram = NULL;
    do {
        assert(taglib_read(linebuf, line_type, values, required));
        switch (line_type) {
        case GRAM_2_ITEM_LINE:{
            /* handle \item in \2-gram */
            /* two tokens */
            TAGLIB_GET_TOKEN(token1, 0);
            TAGLIB_GET_PHRASE_STRING(word1, 1);
            assert(taglib_validate_token_with_string
                   (phrase_index, token1, word1));

            TAGLIB_GET_TOKEN(token2, 2);
            TAGLIB_GET_PHRASE_STRING(word2, 3);
            assert(taglib_validate_token_with_string
                   (phrase_index, token2, word2));

            TAGLIB_GET_TAGVALUE(glong, count, atol);
            TAGLIB_GET_TAGVALUE(glong, T, atol);
            assert(count == T);
            TAGLIB_GET_TAGVALUE(glong, N_n_0, atol);
            TAGLIB_GET_TAGVALUE(glong, n_1, atol);
            TAGLIB_GET_TAGVALUE(glong, Mr, atol);

            KMixtureModelArrayItem array_item;
            memset(&array_item, 0, sizeof(KMixtureModelArrayItem));
            array_item.m_WC = count; array_item.m_N_n_0 = N_n_0;
            array_item.m_n_1 = n_1; array_item.m_Mr = Mr;

            if ( last_token != token1 ) {
                if ( last_token && last_single_gram ) {
                    bigram->store(last_token, last_single_gram);
                    delete last_single_gram;
                    /* safe guard */
                    last_token = null_token;
                    last_single_gram = NULL;
                }
                KMixtureModelSingleGram * single_gram = NULL;
                bigram->load(token1, single_gram);

                /* create the new single gram */
                if ( single_gram == NULL )
                    single_gram = new KMixtureModelSingleGram;
                last_token = token1;
                last_single_gram = single_gram;
            }

            assert(NULL != last_single_gram);
            assert(last_single_gram->insert_array_item(token2, array_item));
            break;
        }
        case END_LINE:
        case GRAM_1_LINE:
        case GRAM_2_LINE:
            goto end;
        default:
            assert(false);
        }
    } while (my_getline(input) != -1);

 end:
    if ( last_token && last_single_gram ) {
        bigram->store(last_token, last_single_gram);
        delete last_single_gram;
        /* safe guard */
        last_token = null_token;
        last_single_gram = NULL;
    }

    taglib_pop_state();
    return true;
}
bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
                  FacadePhraseIndex * phrase_index,
                  Bigram * bigram){
    taglib_push_state();

    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 2, "count", ""));

    phrase_token_t last_token = 0; SingleGram * last_single_gram = NULL;
    do {
        assert(taglib_read(linebuf, line_type, values, required));
        switch (line_type) {
        case GRAM_2_ITEM_LINE:{
            /* handle \item in \2-gram */
            /* two tokens */
            const char * string = (const char *) g_ptr_array_index(values, 0);
            phrase_token_t token1 = taglib_string_to_token(phrases, string);
            string = (const char *) g_ptr_array_index(values, 1);
            phrase_token_t token2 = taglib_string_to_token(phrases, string);

            gpointer value = NULL;
            /* tag: count */
            assert(g_hash_table_lookup_extended(required, "count", NULL, &value));
            glong count = atol((const char *)value);

            if ( last_token != token1 ) {
                if ( last_token && last_single_gram ) {
                    bigram->store(last_token, last_single_gram);
                    delete last_single_gram;
                    //safe guard
                    last_token = 0;
                    last_single_gram = NULL;
                }
                SingleGram * single_gram = NULL;
                bigram->load(token1, single_gram);

                //create the new single gram
                if ( single_gram == NULL )
                    single_gram = new SingleGram;
                last_token = token1;
                last_single_gram = single_gram;
            }
            //save the freq
            guint32 total_freq = 0;
            assert(last_single_gram->get_total_freq(total_freq));
            assert(last_single_gram->insert_freq(token2, count));
            total_freq += count;
            assert(last_single_gram->set_total_freq(total_freq));
            break;
        }
        case END_LINE:
        case GRAM_1_LINE:
        case GRAM_2_LINE:
            goto end;
        default:
            assert(false);
        }
    } while (my_getline(input) != -1);

 end:
    if ( last_token && last_single_gram ) {
        bigram->store(last_token, last_single_gram);
        delete last_single_gram;
        //safe guard
        last_token = 0;
        last_single_gram = NULL;
    }

    taglib_pop_state();
    return true;
}