static void compute_case_change(MVMThreadContext *tc, MVMGrapheme32 synth_g, MVMNFGSynthetic *synth_info, MVMint32 case_) { MVMint32 num_result_graphs; MVMGrapheme32 *result = NULL; const MVMCodepoint *result_cps = NULL; /* Transform the base character. */ MVMuint32 num_result_cps = MVM_unicode_get_case_change(tc, synth_info->codes[synth_info->base_index], case_, &result_cps); if (num_result_cps == 0 || (num_result_cps == 1 && result_cps[0] == synth_info->codes[synth_info->base_index])) { /* Base character does not change, so grapheme stays the same. We * install a non-null sentinel for this case, and set the result * grapheme count to zero, which indicates no change. */ result = CASE_UNCHANGED; num_result_graphs = 0; } else { /* We can potentially get multiple graphemes back. We may also get * into situations where we case change the base and suddenly we * can normalize the whole thing to a non-synthetic. So, we take * a trip through the normalizer. We push any codepoints before the * base in the synthetic (only happens with Prepend codepoints). * We then push the first codepoint we get back from the case change * then the codeponits after the base characters (generally Extend * codepoints). * Finally we push anything else the case change produced. This should * do about the right thing for both case changes that produce a * base and a combiner, and those that produce a base and a base, * since the normalizer applies canonical combining class sorting. */ MVMNormalizer norm; MVMint32 i; MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFG); if (0 < synth_info->base_index) MVM_unicode_normalizer_push_codepoints(tc, &norm, synth_info->codes, synth_info->base_index); /* Push the first result on */ MVM_unicode_normalizer_push_codepoints(tc, &norm, result_cps, 1); /* Push any combiners after that codepoint so the combiners attach to the * first codepoint of the casechange not the second or more */ MVM_unicode_normalizer_push_codepoints(tc, &norm, synth_info->codes + synth_info->base_index + 1, synth_info->num_codes - synth_info->base_index - 1); if (1 < num_result_cps) MVM_unicode_normalizer_push_codepoints(tc, &norm, result_cps + 1, num_result_cps - 1); MVM_unicode_normalizer_eof(tc, &norm); num_result_graphs = MVM_unicode_normalizer_available(tc, &norm); result = MVM_malloc(num_result_graphs * sizeof(MVMGrapheme32)); for (i = 0; i < num_result_graphs; i++) result[i] = MVM_unicode_normalizer_get_grapheme(tc, &norm); MVM_unicode_normalizer_cleanup(tc, &norm); } switch (case_) { case MVM_unicode_case_change_type_upper: synth_info->case_uc = result; synth_info->case_uc_graphs = num_result_graphs; break; case MVM_unicode_case_change_type_lower: synth_info->case_lc = result; synth_info->case_lc_graphs = num_result_graphs; break; case MVM_unicode_case_change_type_title: synth_info->case_tc = result; synth_info->case_tc_graphs = num_result_graphs; break; case MVM_unicode_case_change_type_fold: synth_info->case_fc = result; synth_info->case_fc_graphs = num_result_graphs; break; default: MVM_panic(1, "NFG: invalid case change %d", case_); } }
static void compute_case_change(MVMThreadContext *tc, MVMGrapheme32 synth, MVMNFGSynthetic *synth_info, MVMint32 case_) { MVMGrapheme32 *result; MVMint32 num_result_graphs; /* Transform the base character. */ const MVMCodepoint *result_cps; MVMuint32 num_result_cps = MVM_unicode_get_case_change(tc, synth_info->base, case_, &result_cps); if (num_result_cps == 0 || *result_cps == synth_info->base) { /* Base character does not change, so grapheme stays the same. We * install a non-null sentinel for this case, and set the result * grapheme count to zero, which indicates no change. */ result = CASE_UNCHANGED; num_result_graphs = 0; } else { /* We can potentially get multiple graphemes back. We may also get * into situations where we case change the base and suddenly we * can normalize the whole thing to a non-synthetic. So, we take * a trip through the normalizer. Note we push the first thing * we get back from the case change, then our combiners, and * finally anything else the case change produced. This should * do about the right thing for both case changes that produce a * base and a combiner, and those that produce a base and a base, * since the normalizer applies Unicode canonical sorting. */ MVMNormalizer norm; MVMint32 i; MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFG); MVM_unicode_normalizer_push_codepoints(tc, &norm, result_cps, 1); MVM_unicode_normalizer_push_codepoints(tc, &norm, synth_info->combs, synth_info->num_combs); if (num_result_cps > 1) MVM_unicode_normalizer_push_codepoints(tc, &norm, result_cps + 1, num_result_cps - 1); MVM_unicode_normalizer_eof(tc, &norm); num_result_graphs = MVM_unicode_normalizer_available(tc, &norm); result = MVM_malloc(num_result_graphs * sizeof(MVMGrapheme32)); for (i = 0; i < num_result_graphs; i++) result[i] = MVM_unicode_normalizer_get_grapheme(tc, &norm); MVM_unicode_normalizer_cleanup(tc, &norm); } switch (case_) { case MVM_unicode_case_change_type_upper: synth_info->case_uc = result; synth_info->case_uc_graphs = num_result_graphs; break; case MVM_unicode_case_change_type_lower: synth_info->case_lc = result; synth_info->case_lc_graphs = num_result_graphs; break; case MVM_unicode_case_change_type_title: synth_info->case_tc = result; synth_info->case_tc_graphs = num_result_graphs; break; case MVM_unicode_case_change_type_fold: synth_info->case_fc = result; synth_info->case_fc_graphs = num_result_graphs; break; default: MVM_panic(1, "NFG: invalid case change %d", case_); } }