size_t utf8totitle(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors) { CaseMappingState state; size_t bytes_written = 0; /* Validate parameters */ UTF8_VALIDATE_PARAMETERS_CHAR(char, bytes_written); /* Initialize case mapping */ if (!casemapping_initialize( &state, input, inputSize, target, targetSize, TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr)) { UTF8_SET_ERROR(NONE); return bytes_written; } /* Execute case mapping as long as input remains */ while (state.src_size > 0) { size_t result = casemapping_execute(&state); if (!result) { UTF8_SET_ERROR(NOT_ENOUGH_SPACE); return bytes_written; } /* The first letter of every word should be titlecase, the rest lowercase */ if (state.property_data == TitlecaseDataPtr) { if ((state.last_general_category & GeneralCategory_Letter) != 0) { state.property_index1 = LowercaseIndex1Ptr; state.property_index2 = LowercaseIndex2Ptr; state.property_data = LowercaseDataPtr; } } else if ( (state.last_general_category & GeneralCategory_Letter) == 0) { state.property_index1 = TitlecaseIndex1Ptr; state.property_index2 = TitlecaseIndex2Ptr; state.property_data = TitlecaseDataPtr; } bytes_written += result; } UTF8_SET_ERROR(NONE); return bytes_written; }
TEST(CaseMappingInitialize, Initialize) { CaseMappingState state; const char* i = "Greetings"; size_t is = strlen(i); char o[256] = { 0 }; size_t os = 255; int32_t errors = UTF8_ERR_NONE; EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr, QuickCheckCaseMapped_Titlecase, UTF8_LOCALE_DEFAULT, &errors)); EXPECT_ERROREQ(UTF8_ERR_NONE, errors); EXPECT_EQ(i, state.src); EXPECT_EQ(is, state.src_size); EXPECT_EQ(o, state.dst); EXPECT_EQ(os, state.dst_size); EXPECT_EQ(TitlecaseIndex1Ptr, state.property_index1); EXPECT_EQ(TitlecaseIndex2Ptr, state.property_index2); EXPECT_EQ(TitlecaseDataPtr, state.property_data); EXPECT_LOCALE_EQ(UTF8_LOCALE_DEFAULT, state.locale); EXPECT_EQ(QuickCheckCaseMapped_Titlecase, state.quickcheck_flags); EXPECT_EQ(0, state.total_bytes_needed); EXPECT_EQ(0, state.last_code_point); EXPECT_EQ(0, state.last_code_point_size); EXPECT_EQ(0, state.last_general_category); EXPECT_EQ(0, state.last_canonical_combining_class); }
TEST(CaseMappingInitialize, LocaleInvalid) { CaseMappingState state; const char* i = "Universal"; size_t is = strlen(i); char o[256] = { 0 }; size_t os = 255; int32_t errors = UTF8_ERR_NONE; EXPECT_FALSE(casemapping_initialize(&state, i, is, o, os, LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr, QuickCheckCaseMapped_Uppercase, 312, &errors)); EXPECT_ERROREQ(UTF8_ERR_INVALID_LOCALE, errors); EXPECT_LOCALE_EQ(0, state.locale); }
TEST(CaseMappingInitialize, LocaleTurkishAndAzeriLatin) { CaseMappingState state; const char* i = "I welcome our Turkish friends."; size_t is = strlen(i); char o[256] = { 0 }; size_t os = 255; int32_t errors = UTF8_ERR_NONE; EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, QuickCheckCaseMapped_Uppercase, UTF8_LOCALE_TURKISH_AND_AZERI_LATIN, &errors)); EXPECT_ERROREQ(UTF8_ERR_NONE, errors); EXPECT_LOCALE_EQ(UTF8_LOCALE_TURKISH_AND_AZERI_LATIN, state.locale); }
TEST(CaseMappingInitialize, LocaleLithuanian) { CaseMappingState state; const char* i = "Cuisine"; size_t is = strlen(i); char o[256] = { 0 }; size_t os = 255; int32_t errors = UTF8_ERR_NONE; EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, QuickCheckCaseMapped_Uppercase, UTF8_LOCALE_LITHUANIAN, &errors)); EXPECT_ERROREQ(UTF8_ERR_NONE, errors); EXPECT_LOCALE_EQ(UTF8_LOCALE_LITHUANIAN, state.locale); }
TEST(CaseMappingInitialize, Casefold) { CaseMappingState state; const char* i = "Darkness"; size_t is = strlen(i); char o[256] = { 0 }; size_t os = 255; int32_t errors = UTF8_ERR_NONE; EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, CaseFoldingIndex1Ptr, CaseFoldingIndex2Ptr, CaseFoldingDataPtr, QuickCheckCaseMapped_Casefolded, UTF8_LOCALE_DEFAULT, &errors)); EXPECT_ERROREQ(UTF8_ERR_NONE, errors); EXPECT_EQ(CaseFoldingIndex1Ptr, state.property_index1); EXPECT_EQ(CaseFoldingIndex2Ptr, state.property_index2); EXPECT_EQ(CaseFoldingDataPtr, state.property_data); EXPECT_EQ(QuickCheckCaseMapped_Casefolded, state.quickcheck_flags); }
TEST(CaseMappingInitialize, Titlecase) { CaseMappingState state; const char* i = "farMING"; size_t is = strlen(i); char o[256] = { 0 }; size_t os = 255; int32_t errors = UTF8_ERR_NONE; EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr, QuickCheckCaseMapped_Titlecase, UTF8_LOCALE_DEFAULT, &errors)); EXPECT_ERROREQ(UTF8_ERR_NONE, errors); EXPECT_EQ(TitlecaseIndex1Ptr, state.property_index1); EXPECT_EQ(TitlecaseIndex2Ptr, state.property_index2); EXPECT_EQ(TitlecaseDataPtr, state.property_data); EXPECT_EQ(QuickCheckCaseMapped_Titlecase, state.quickcheck_flags); }
size_t utf8tolower(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors) { CaseMappingState state; size_t bytes_written = 0; /* Validate parameters */ UTF8_VALIDATE_PARAMETERS_CHAR(char, bytes_written); /* Initialize case mapping */ if (!casemapping_initialize( &state, input, inputSize, target, targetSize, LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr)) { UTF8_SET_ERROR(NONE); return bytes_written; } /* Execute case mapping as long as input remains */ while (state.src_size > 0) { size_t result = casemapping_execute(&state); if (!result) { UTF8_SET_ERROR(NOT_ENOUGH_SPACE); return bytes_written; } bytes_written += result; } UTF8_SET_ERROR(NONE); return bytes_written; }
size_t utf8tolower(const char* input, size_t inputSize, char* target, size_t targetSize, size_t locale, int32_t* errors) { CaseMappingState state; /* Validate parameters */ UTF8_VALIDATE_PARAMETERS_CHAR(char, 0); /* Initialize case mapping */ if (!casemapping_initialize( &state, input, inputSize, target, targetSize, LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr, QuickCheckCaseMapped_Lowercase, locale, errors)) { return state.total_bytes_needed; } /* Execute case mapping as long as input remains */ while (state.src_size > 0) { size_t converted; if ((converted = casemapping_execute(&state, errors)) == 0) { return state.total_bytes_needed; } state.total_bytes_needed += converted; } UTF8_SET_ERROR(NONE); return state.total_bytes_needed; }
size_t utf8casefold(const char* input, size_t inputSize, char* target, size_t targetSize, size_t locale, int32_t* errors) { CaseMappingState state; /* Validate parameters */ UTF8_VALIDATE_PARAMETERS_CHAR(char, 0); /* Initialize case mapping */ if (!casemapping_initialize( &state, input, inputSize, target, targetSize, CaseFoldingIndex1Ptr, CaseFoldingIndex2Ptr, CaseFoldingDataPtr, QuickCheckCaseMapped_Casefolded, locale, errors)) { return state.total_bytes_needed; } if (state.locale == UTF8_LOCALE_TURKISH_AND_AZERI_LATIN) { /* Exceptional behavior for Turkish and Azerbaijani (Latin) locales */ while (state.src_size > 0) { const char* resolved = 0; uint8_t bytes_needed = 0; /* Read next code point */ if (!(state.last_code_point_size = codepoint_read(state.src, state.src_size, &state.last_code_point))) { goto invaliddata; } /* Move source cursor */ if (state.src_size >= state.last_code_point_size) { state.src += state.last_code_point_size; state.src_size -= state.last_code_point_size; } else { state.src_size = 0; } /* Resolve case folding */ if ((PROPERTY_GET_CM(state.last_code_point) & QuickCheckCaseMapped_Casefolded) != 0) { if (state.last_code_point == CP_LATIN_CAPITAL_LETTER_I) { resolved = "\xC4\xB1"; bytes_needed = 2; } else if ( state.last_code_point == CP_LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { resolved = "i"; bytes_needed = 1; } else { resolved = database_querydecomposition(state.last_code_point, state.property_index1, state.property_index2, state.property_data, &bytes_needed); } } /* Write to output */ if (resolved != 0) { /* Write resolved string to output */ if (state.dst != 0) { if (state.dst_size < bytes_needed) { goto outofspace; } memcpy(state.dst, resolved, bytes_needed); state.dst += bytes_needed; state.dst_size -= bytes_needed; } } else { /* Write code point unchanged to output */ if (!(bytes_needed = codepoint_write(state.last_code_point, &state.dst, &state.dst_size))) { goto outofspace; } } state.total_bytes_needed += bytes_needed; } } else { /* Execute case mapping as long as input remains */ while (state.src_size > 0) { const char* resolved = 0; uint8_t bytes_needed = 0; /* Read next code point */ if (!(state.last_code_point_size = codepoint_read(state.src, state.src_size, &state.last_code_point))) { goto invaliddata; } /* Move source cursor */ if (state.src_size >= state.last_code_point_size) { state.src += state.last_code_point_size; state.src_size -= state.last_code_point_size; } else { state.src_size = 0; } /* Resolve case folding */ if ((PROPERTY_GET_CM(state.last_code_point) & QuickCheckCaseMapped_Casefolded) != 0) { resolved = database_querydecomposition(state.last_code_point, state.property_index1, state.property_index2, state.property_data, &bytes_needed); } if (resolved != 0) { /* Write resolved string to output */ if (state.dst != 0) { if (state.dst_size < bytes_needed) { goto outofspace; } memcpy(state.dst, resolved, bytes_needed); state.dst += bytes_needed; state.dst_size -= bytes_needed; } } else { /* Write code point unchanged to output */ if (!(bytes_needed = codepoint_write(state.last_code_point, &state.dst, &state.dst_size))) { goto outofspace; } } state.total_bytes_needed += bytes_needed; } } UTF8_SET_ERROR(NONE); return state.total_bytes_needed; invaliddata: UTF8_SET_ERROR(INVALID_DATA); return state.total_bytes_needed; outofspace: UTF8_SET_ERROR(NOT_ENOUGH_SPACE); return state.total_bytes_needed; }
size_t utf8totitle(const char* input, size_t inputSize, char* target, size_t targetSize, size_t locale, int32_t* errors) { CaseMappingState state; /* Validate parameters */ UTF8_VALIDATE_PARAMETERS_CHAR(char, 0); /* Initialize case mapping */ if (!casemapping_initialize( &state, input, inputSize, target, targetSize, TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr, QuickCheckCaseMapped_Titlecase, locale, errors)) { return state.total_bytes_needed; } /* Execute case mapping as long as input remains */ while (state.src_size > 0) { size_t converted; if ((converted = casemapping_execute(&state, errors)) == 0) { return state.total_bytes_needed; } /* The first letter of every word should be titlecase, the rest should be converted to lowercase. */ if (state.last_canonical_combining_class == CCC_NOT_REORDERED) { if (state.property_data == TitlecaseDataPtr) { if ((state.last_general_category & UTF8_CATEGORY_LETTER) != 0) { state.property_index1 = LowercaseIndex1Ptr; state.property_index2 = LowercaseIndex2Ptr; state.property_data = LowercaseDataPtr; state.quickcheck_flags = QuickCheckCaseMapped_Lowercase; } } else if ( (state.last_general_category & UTF8_CATEGORY_LETTER) == 0) { state.property_index1 = TitlecaseIndex1Ptr; state.property_index2 = TitlecaseIndex2Ptr; state.property_data = TitlecaseDataPtr; state.quickcheck_flags = QuickCheckCaseMapped_Titlecase; } } state.total_bytes_needed += converted; } UTF8_SET_ERROR(NONE); return state.total_bytes_needed; }