size_t utf8casefold(const char* input, size_t inputSize, char* target, size_t targetSize, size_t locale, int32_t* errors) { CaseMappingState state; /* Validate parameters */ UTF8_VALIDATE_PARAMETERS_CHAR(char, 0); /* Initialize case mapping */ if (!casemapping_initialize( &state, input, inputSize, target, targetSize, CaseFoldingIndex1Ptr, CaseFoldingIndex2Ptr, CaseFoldingDataPtr, QuickCheckCaseMapped_Casefolded, locale, errors)) { return state.total_bytes_needed; } if (state.locale == UTF8_LOCALE_TURKISH_AND_AZERI_LATIN) { /* Exceptional behavior for Turkish and Azerbaijani (Latin) locales */ while (state.src_size > 0) { const char* resolved = 0; uint8_t bytes_needed = 0; /* Read next code point */ if (!(state.last_code_point_size = codepoint_read(state.src, state.src_size, &state.last_code_point))) { goto invaliddata; } /* Move source cursor */ if (state.src_size >= state.last_code_point_size) { state.src += state.last_code_point_size; state.src_size -= state.last_code_point_size; } else { state.src_size = 0; } /* Resolve case folding */ if ((PROPERTY_GET_CM(state.last_code_point) & QuickCheckCaseMapped_Casefolded) != 0) { if (state.last_code_point == CP_LATIN_CAPITAL_LETTER_I) { resolved = "\xC4\xB1"; bytes_needed = 2; } else if ( state.last_code_point == CP_LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { resolved = "i"; bytes_needed = 1; } else { resolved = database_querydecomposition(state.last_code_point, state.property_index1, state.property_index2, state.property_data, &bytes_needed); } } /* Write to output */ if (resolved != 0) { /* Write resolved string to output */ if (state.dst != 0) { if (state.dst_size < bytes_needed) { goto outofspace; } memcpy(state.dst, resolved, bytes_needed); state.dst += bytes_needed; state.dst_size -= bytes_needed; } } else { /* Write code point unchanged to output */ if (!(bytes_needed = codepoint_write(state.last_code_point, &state.dst, &state.dst_size))) { goto outofspace; } } state.total_bytes_needed += bytes_needed; } } else { /* Execute case mapping as long as input remains */ while (state.src_size > 0) { const char* resolved = 0; uint8_t bytes_needed = 0; /* Read next code point */ if (!(state.last_code_point_size = codepoint_read(state.src, state.src_size, &state.last_code_point))) { goto invaliddata; } /* Move source cursor */ if (state.src_size >= state.last_code_point_size) { state.src += state.last_code_point_size; state.src_size -= state.last_code_point_size; } else { state.src_size = 0; } /* Resolve case folding */ if ((PROPERTY_GET_CM(state.last_code_point) & QuickCheckCaseMapped_Casefolded) != 0) { resolved = database_querydecomposition(state.last_code_point, state.property_index1, state.property_index2, state.property_data, &bytes_needed); } if (resolved != 0) { /* Write resolved string to output */ if (state.dst != 0) { if (state.dst_size < bytes_needed) { goto outofspace; } memcpy(state.dst, resolved, bytes_needed); state.dst += bytes_needed; state.dst_size -= bytes_needed; } } else { /* Write code point unchanged to output */ if (!(bytes_needed = codepoint_write(state.last_code_point, &state.dst, &state.dst_size))) { goto outofspace; } } state.total_bytes_needed += bytes_needed; } } UTF8_SET_ERROR(NONE); return state.total_bytes_needed; invaliddata: UTF8_SET_ERROR(INVALID_DATA); return state.total_bytes_needed; outofspace: UTF8_SET_ERROR(NOT_ENOUGH_SPACE); return state.total_bytes_needed; }
TEST(QueryDecompositionLowercase, Found) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("\xCE\xB3", database_querydecomposition(0x00000393, LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr, &length), length); }
TEST(QueryDecompositionLowercase, FoundFirst) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("\xC3\xA0", database_querydecomposition(0x000000C0, LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr, &length), length); }
TEST(QueryDecompositionUppercase, FoundLast) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("\xF0\x91\xA2\xBF", database_querydecomposition(0x000118DF, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, &length), length); }
TEST(QueryDecompositionUppercase, Missing) { uint8_t length = 0; EXPECT_EQ(nullptr, database_querydecomposition(0x00002BAD, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, &length)); EXPECT_EQ(0, length); }
TEST(QueryDecompositionUppercase, Found) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("\xE1\xB8\x8A", database_querydecomposition(0x00001E0B, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, &length), length); }
TEST(QueryDecompositionUppercase, FoundFirst) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("\xCE\x9C", database_querydecomposition(0x000000B5, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, &length), length); }
TEST(QueryDecompositionCompatibilityDecomposed, FoundLast) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("\xF0\xAA\x98\x80", database_querydecomposition(0x0002FA1D, NFKDIndex1Ptr, NFKDIndex2Ptr, NFKDDataPtr, &length), length); }
TEST(QueryDecompositionCompatibilityDecomposed, Missing) { uint8_t length = 0; EXPECT_EQ(nullptr, database_querydecomposition(0x0001A2AF, NFKDIndex1Ptr, NFKDIndex2Ptr, NFKDDataPtr, &length)); EXPECT_EQ(0, length); }
TEST(QueryDecompositionCompatibilityDecomposed, FoundFirst) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ(" ", database_querydecomposition(0x000000A0, NFKDIndex1Ptr, NFKDIndex2Ptr, NFKDDataPtr, &length), length); }
TEST(QueryDecompositionCompatibilityDecomposed, Found) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("\xD0\x98\xCC\x88", database_querydecomposition(0x000004E4, NFKDIndex1Ptr, NFKDIndex2Ptr, NFKDDataPtr, &length), length); }
TEST(QueryDecompositionDecomposed, Missing) { uint8_t length = 0; EXPECT_EQ(nullptr, database_querydecomposition(0x0001FFFF, NFDIndex1Ptr, NFDIndex2Ptr, NFDDataPtr, &length)); EXPECT_EQ(0, length); }
TEST(QueryDecompositionDecomposed, FoundFirst) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("A\xCC\x80", database_querydecomposition(0x000000C0, NFDIndex1Ptr, NFDIndex2Ptr, NFDDataPtr, &length), length); }
TEST(QueryDecompositionTitlecase, Found) { uint8_t length = 0; EXPECT_UTF8LENGTHEQ("\xD0\x80", database_querydecomposition(0x00000450, TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr, &length), length); }