Beispiel #1
0
static void compute_case_change(MVMThreadContext *tc, MVMGrapheme32 synth_g, MVMNFGSynthetic *synth_info, MVMint32 case_) {
    MVMint32 num_result_graphs;
    MVMGrapheme32          *result = NULL;
    const MVMCodepoint *result_cps = NULL;
    /* Transform the base character. */
    MVMuint32 num_result_cps = MVM_unicode_get_case_change(tc,
        synth_info->codes[synth_info->base_index], case_, &result_cps);
    if (num_result_cps == 0 || (num_result_cps == 1 && result_cps[0] == synth_info->codes[synth_info->base_index])) {
        /* Base character does not change, so grapheme stays the same. We
         * install a non-null sentinel for this case, and set the result
         * grapheme count to zero, which indicates no change. */
        result = CASE_UNCHANGED;
        num_result_graphs = 0;
    }
    else {
        /* We can potentially get multiple graphemes back. We may also get
         * into situations where we case change the base and suddenly we
         * can normalize the whole thing to a non-synthetic. So, we take
         * a trip through the normalizer. We push any codepoints before the
         * base in the synthetic (only happens with Prepend codepoints).
          * We then push the first codepoint we get back from the case change
         * then the codeponits after the base characters (generally Extend
         * codepoints).
         * Finally we push anything else the case change produced. This should
         * do about the right thing for both case changes that produce a
         * base and a combiner, and those that produce a base and a base,
         * since the normalizer applies canonical combining class sorting. */
        MVMNormalizer norm;
        MVMint32 i;
        MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFG);
        if (0 < synth_info->base_index)
            MVM_unicode_normalizer_push_codepoints(tc, &norm,
                synth_info->codes,
                synth_info->base_index);
        /* Push the first result on */
        MVM_unicode_normalizer_push_codepoints(tc, &norm, result_cps, 1);
        /* Push any combiners after that codepoint so the combiners attach to the
         * first codepoint of the casechange not the second or more */
        MVM_unicode_normalizer_push_codepoints(tc, &norm,
            synth_info->codes     + synth_info->base_index + 1,
            synth_info->num_codes - synth_info->base_index - 1);
        if (1 < num_result_cps)
            MVM_unicode_normalizer_push_codepoints(tc, &norm,
                result_cps     + 1,
                num_result_cps - 1);
        MVM_unicode_normalizer_eof(tc, &norm);

        num_result_graphs = MVM_unicode_normalizer_available(tc, &norm);
        result = MVM_malloc(num_result_graphs * sizeof(MVMGrapheme32));
        for (i = 0; i < num_result_graphs; i++)
            result[i] = MVM_unicode_normalizer_get_grapheme(tc, &norm);
        MVM_unicode_normalizer_cleanup(tc, &norm);
    }

    switch (case_) {
        case MVM_unicode_case_change_type_upper:
            synth_info->case_uc        = result;
            synth_info->case_uc_graphs = num_result_graphs;
            break;
        case MVM_unicode_case_change_type_lower:
            synth_info->case_lc        = result;
            synth_info->case_lc_graphs = num_result_graphs;
            break;
        case MVM_unicode_case_change_type_title:
            synth_info->case_tc        = result;
            synth_info->case_tc_graphs = num_result_graphs;
            break;
        case MVM_unicode_case_change_type_fold:
            synth_info->case_fc        = result;
            synth_info->case_fc_graphs = num_result_graphs;
            break;
        default:
            MVM_panic(1, "NFG: invalid case change %d", case_);
    }
}
Beispiel #2
0
static void compute_case_change(MVMThreadContext *tc, MVMGrapheme32 synth, MVMNFGSynthetic *synth_info, MVMint32 case_) {
    MVMGrapheme32 *result;
    MVMint32 num_result_graphs;

    /* Transform the base character. */
    const MVMCodepoint *result_cps;
    MVMuint32     num_result_cps = MVM_unicode_get_case_change(tc, synth_info->base,
        case_, &result_cps);
    if (num_result_cps == 0 || *result_cps == synth_info->base) {
        /* Base character does not change, so grapheme stays the same. We
         * install a non-null sentinel for this case, and set the result
         * grapheme count to zero, which indicates no change. */
        result = CASE_UNCHANGED;
        num_result_graphs = 0;
    }
    else {
        /* We can potentially get multiple graphemes back. We may also get
         * into situations where we case change the base and suddenly we
         * can normalize the whole thing to a non-synthetic. So, we take
         * a trip through the normalizer. Note we push the first thing
         * we get back from the case change, then our combiners, and
         * finally anything else the case change produced. This should
         * do about the right thing for both case changes that produce a
         * base and a combiner, and those that produce a base and a base,
         * since the normalizer applies Unicode canonical sorting. */
        MVMNormalizer norm;
        MVMint32 i;
        MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFG);
        MVM_unicode_normalizer_push_codepoints(tc, &norm, result_cps, 1);
        MVM_unicode_normalizer_push_codepoints(tc, &norm, synth_info->combs,
            synth_info->num_combs);
        if (num_result_cps > 1)
            MVM_unicode_normalizer_push_codepoints(tc, &norm, result_cps + 1,
                num_result_cps - 1);
        MVM_unicode_normalizer_eof(tc, &norm);

        num_result_graphs = MVM_unicode_normalizer_available(tc, &norm);
        result = MVM_malloc(num_result_graphs * sizeof(MVMGrapheme32));
        for (i = 0; i < num_result_graphs; i++)
            result[i] = MVM_unicode_normalizer_get_grapheme(tc, &norm);
        MVM_unicode_normalizer_cleanup(tc, &norm);
    }

    switch (case_) {
    case MVM_unicode_case_change_type_upper:
        synth_info->case_uc = result;
        synth_info->case_uc_graphs = num_result_graphs;
        break;
    case MVM_unicode_case_change_type_lower:
        synth_info->case_lc = result;
        synth_info->case_lc_graphs = num_result_graphs;
        break;
    case MVM_unicode_case_change_type_title:
        synth_info->case_tc = result;
        synth_info->case_tc_graphs = num_result_graphs;
        break;
    case MVM_unicode_case_change_type_fold:
        synth_info->case_fc = result;
        synth_info->case_fc_graphs = num_result_graphs;
        break;
    default:
        MVM_panic(1, "NFG: invalid case change %d", case_);
    }
}