예제 #1
0
size_t utf8casefold(const char* input, size_t inputSize, char* target, size_t targetSize, size_t locale, int32_t* errors)
{
	CaseMappingState state;

	/* Validate parameters */

	UTF8_VALIDATE_PARAMETERS_CHAR(char, 0);

	/* Initialize case mapping */

	if (!casemapping_initialize(
		&state,
		input, inputSize,
		target, targetSize,
		CaseFoldingIndex1Ptr, CaseFoldingIndex2Ptr, CaseFoldingDataPtr,
		QuickCheckCaseMapped_Casefolded, locale,
		errors))
	{
		return state.total_bytes_needed;
	}

	if (state.locale == UTF8_LOCALE_TURKISH_AND_AZERI_LATIN)
	{
		/* Exceptional behavior for Turkish and Azerbaijani (Latin) locales */

		while (state.src_size > 0)
		{
			const char* resolved = 0;
			uint8_t bytes_needed = 0;

			/* Read next code point */

			if (!(state.last_code_point_size = codepoint_read(state.src, state.src_size, &state.last_code_point)))
			{
				goto invaliddata;
			}

			/* Move source cursor */

			if (state.src_size >= state.last_code_point_size)
			{
				state.src += state.last_code_point_size;
				state.src_size -= state.last_code_point_size;
			}
			else
			{
				state.src_size = 0;
			}

			/* Resolve case folding */

			if ((PROPERTY_GET_CM(state.last_code_point) & QuickCheckCaseMapped_Casefolded) != 0)
			{
				if (state.last_code_point == CP_LATIN_CAPITAL_LETTER_I)
				{
					resolved = "\xC4\xB1";
					bytes_needed = 2;
				}
				else if (
					state.last_code_point == CP_LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)
				{
					resolved = "i";
					bytes_needed = 1;
				}
				else
				{
					resolved = database_querydecomposition(state.last_code_point, state.property_index1, state.property_index2, state.property_data, &bytes_needed);
				}
			}

			/* Write to output */

			if (resolved != 0)
			{
				/* Write resolved string to output */

				if (state.dst != 0)
				{
					if (state.dst_size < bytes_needed)
					{
						goto outofspace;
					}

					memcpy(state.dst, resolved, bytes_needed);

					state.dst += bytes_needed;
					state.dst_size -= bytes_needed;
				}
			}
			else
			{
				/* Write code point unchanged to output */

				if (!(bytes_needed = codepoint_write(state.last_code_point, &state.dst, &state.dst_size)))
				{
					goto outofspace;
				}
			}

			state.total_bytes_needed += bytes_needed;
		}
	}
	else
	{
		/* Execute case mapping as long as input remains */

		while (state.src_size > 0)
		{
			const char* resolved = 0;
			uint8_t bytes_needed = 0;

			/* Read next code point */

			if (!(state.last_code_point_size = codepoint_read(state.src, state.src_size, &state.last_code_point)))
			{
				goto invaliddata;
			}

			/* Move source cursor */

			if (state.src_size >= state.last_code_point_size)
			{
				state.src += state.last_code_point_size;
				state.src_size -= state.last_code_point_size;
			}
			else
			{
				state.src_size = 0;
			}

			/* Resolve case folding */

			if ((PROPERTY_GET_CM(state.last_code_point) & QuickCheckCaseMapped_Casefolded) != 0)
			{
				resolved = database_querydecomposition(state.last_code_point, state.property_index1, state.property_index2, state.property_data, &bytes_needed);
			}

			if (resolved != 0)
			{
				/* Write resolved string to output */

				if (state.dst != 0)
				{
					if (state.dst_size < bytes_needed)
					{
						goto outofspace;
					}

					memcpy(state.dst, resolved, bytes_needed);

					state.dst += bytes_needed;
					state.dst_size -= bytes_needed;
				}
			}
			else
			{
				/* Write code point unchanged to output */

				if (!(bytes_needed = codepoint_write(state.last_code_point, &state.dst, &state.dst_size)))
				{
					goto outofspace;
				}
			}

			state.total_bytes_needed += bytes_needed;
		}
	}

	UTF8_SET_ERROR(NONE);

	return state.total_bytes_needed;

invaliddata:
	UTF8_SET_ERROR(INVALID_DATA);

	return state.total_bytes_needed;

outofspace:
	UTF8_SET_ERROR(NOT_ENOUGH_SPACE);

	return state.total_bytes_needed;
}
TEST(QueryDecompositionLowercase, Found)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("\xCE\xB3", database_querydecomposition(0x00000393, LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr, &length), length);
}
TEST(QueryDecompositionLowercase, FoundFirst)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("\xC3\xA0", database_querydecomposition(0x000000C0, LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr, &length), length);
}
TEST(QueryDecompositionUppercase, FoundLast)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("\xF0\x91\xA2\xBF", database_querydecomposition(0x000118DF, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, &length), length);
}
TEST(QueryDecompositionUppercase, Missing)
{
	uint8_t length = 0;
	EXPECT_EQ(nullptr, database_querydecomposition(0x00002BAD, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, &length));
	EXPECT_EQ(0, length);
}
TEST(QueryDecompositionUppercase, Found)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("\xE1\xB8\x8A", database_querydecomposition(0x00001E0B, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, &length), length);
}
TEST(QueryDecompositionUppercase, FoundFirst)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("\xCE\x9C", database_querydecomposition(0x000000B5, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, &length), length);
}
TEST(QueryDecompositionCompatibilityDecomposed, FoundLast)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("\xF0\xAA\x98\x80", database_querydecomposition(0x0002FA1D, NFKDIndex1Ptr, NFKDIndex2Ptr, NFKDDataPtr, &length), length);
}
TEST(QueryDecompositionCompatibilityDecomposed, Missing)
{
	uint8_t length = 0;
	EXPECT_EQ(nullptr, database_querydecomposition(0x0001A2AF, NFKDIndex1Ptr, NFKDIndex2Ptr, NFKDDataPtr, &length));
	EXPECT_EQ(0, length);
}
TEST(QueryDecompositionCompatibilityDecomposed, FoundFirst)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ(" ", database_querydecomposition(0x000000A0, NFKDIndex1Ptr, NFKDIndex2Ptr, NFKDDataPtr, &length), length);
}
TEST(QueryDecompositionCompatibilityDecomposed, Found)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("\xD0\x98\xCC\x88", database_querydecomposition(0x000004E4, NFKDIndex1Ptr, NFKDIndex2Ptr, NFKDDataPtr, &length), length);
}
TEST(QueryDecompositionDecomposed, Missing)
{
	uint8_t length = 0;
	EXPECT_EQ(nullptr, database_querydecomposition(0x0001FFFF, NFDIndex1Ptr, NFDIndex2Ptr, NFDDataPtr, &length));
	EXPECT_EQ(0, length);
}
TEST(QueryDecompositionDecomposed, FoundFirst)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("A\xCC\x80", database_querydecomposition(0x000000C0, NFDIndex1Ptr, NFDIndex2Ptr, NFDDataPtr, &length), length);
}
TEST(QueryDecompositionTitlecase, Found)
{
	uint8_t length = 0;
	EXPECT_UTF8LENGTHEQ("\xD0\x80", database_querydecomposition(0x00000450, TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr, &length), length);
}