Esempio n. 1
0
size_t utf8totitle(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors)
{
	CaseMappingState state;
	size_t bytes_written = 0;

	/* Validate parameters */

	UTF8_VALIDATE_PARAMETERS_CHAR(char, bytes_written);

	/* Initialize case mapping */

	if (!casemapping_initialize(
		&state,
		input, inputSize,
		target, targetSize,
		TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr))
	{
		UTF8_SET_ERROR(NONE);

		return bytes_written;
	}

	/* Execute case mapping as long as input remains */

	while (state.src_size > 0)
	{
		size_t result = casemapping_execute(&state);
		if (!result)
		{
			UTF8_SET_ERROR(NOT_ENOUGH_SPACE);

			return bytes_written;
		}

		/* The first letter of every word should be titlecase, the rest lowercase */

		if (state.property_data == TitlecaseDataPtr)
		{
			if ((state.last_general_category & GeneralCategory_Letter) != 0)
			{
				state.property_index1 = LowercaseIndex1Ptr;
				state.property_index2 = LowercaseIndex2Ptr;
				state.property_data = LowercaseDataPtr;
			}
		}
		else if (
			(state.last_general_category & GeneralCategory_Letter) == 0)
		{
			state.property_index1 = TitlecaseIndex1Ptr;
			state.property_index2 = TitlecaseIndex2Ptr;
			state.property_data = TitlecaseDataPtr;
		}

		bytes_written += result;
	}

	UTF8_SET_ERROR(NONE);

	return bytes_written;
}
TEST(CaseMappingInitialize, Initialize)
{
	CaseMappingState state;
	const char* i = "Greetings";
	size_t is = strlen(i);
	char o[256] = { 0 };
	size_t os = 255;
	int32_t errors = UTF8_ERR_NONE;

	EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr, QuickCheckCaseMapped_Titlecase, UTF8_LOCALE_DEFAULT, &errors));
	EXPECT_ERROREQ(UTF8_ERR_NONE, errors);
	EXPECT_EQ(i, state.src);
	EXPECT_EQ(is, state.src_size);
	EXPECT_EQ(o, state.dst);
	EXPECT_EQ(os, state.dst_size);
	EXPECT_EQ(TitlecaseIndex1Ptr, state.property_index1);
	EXPECT_EQ(TitlecaseIndex2Ptr, state.property_index2);
	EXPECT_EQ(TitlecaseDataPtr, state.property_data);
	EXPECT_LOCALE_EQ(UTF8_LOCALE_DEFAULT, state.locale);
	EXPECT_EQ(QuickCheckCaseMapped_Titlecase, state.quickcheck_flags);
	EXPECT_EQ(0, state.total_bytes_needed);
	EXPECT_EQ(0, state.last_code_point);
	EXPECT_EQ(0, state.last_code_point_size);
	EXPECT_EQ(0, state.last_general_category);
	EXPECT_EQ(0, state.last_canonical_combining_class);
}
TEST(CaseMappingInitialize, LocaleInvalid)
{
	CaseMappingState state;
	const char* i = "Universal";
	size_t is = strlen(i);
	char o[256] = { 0 };
	size_t os = 255;
	int32_t errors = UTF8_ERR_NONE;

	EXPECT_FALSE(casemapping_initialize(&state, i, is, o, os, LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr, QuickCheckCaseMapped_Uppercase, 312, &errors));
	EXPECT_ERROREQ(UTF8_ERR_INVALID_LOCALE, errors);
	EXPECT_LOCALE_EQ(0, state.locale);
}
TEST(CaseMappingInitialize, LocaleTurkishAndAzeriLatin)
{
	CaseMappingState state;
	const char* i = "I welcome our Turkish friends.";
	size_t is = strlen(i);
	char o[256] = { 0 };
	size_t os = 255;
	int32_t errors = UTF8_ERR_NONE;

	EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, QuickCheckCaseMapped_Uppercase, UTF8_LOCALE_TURKISH_AND_AZERI_LATIN, &errors));
	EXPECT_ERROREQ(UTF8_ERR_NONE, errors);
	EXPECT_LOCALE_EQ(UTF8_LOCALE_TURKISH_AND_AZERI_LATIN, state.locale);
}
TEST(CaseMappingInitialize, LocaleLithuanian)
{
	CaseMappingState state;
	const char* i = "Cuisine";
	size_t is = strlen(i);
	char o[256] = { 0 };
	size_t os = 255;
	int32_t errors = UTF8_ERR_NONE;

	EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, UppercaseIndex1Ptr, UppercaseIndex2Ptr, UppercaseDataPtr, QuickCheckCaseMapped_Uppercase, UTF8_LOCALE_LITHUANIAN, &errors));
	EXPECT_ERROREQ(UTF8_ERR_NONE, errors);
	EXPECT_LOCALE_EQ(UTF8_LOCALE_LITHUANIAN, state.locale);
}
TEST(CaseMappingInitialize, Casefold)
{
	CaseMappingState state;
	const char* i = "Darkness";
	size_t is = strlen(i);
	char o[256] = { 0 };
	size_t os = 255;
	int32_t errors = UTF8_ERR_NONE;

	EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, CaseFoldingIndex1Ptr, CaseFoldingIndex2Ptr, CaseFoldingDataPtr, QuickCheckCaseMapped_Casefolded, UTF8_LOCALE_DEFAULT, &errors));
	EXPECT_ERROREQ(UTF8_ERR_NONE, errors);
	EXPECT_EQ(CaseFoldingIndex1Ptr, state.property_index1);
	EXPECT_EQ(CaseFoldingIndex2Ptr, state.property_index2);
	EXPECT_EQ(CaseFoldingDataPtr, state.property_data);
	EXPECT_EQ(QuickCheckCaseMapped_Casefolded, state.quickcheck_flags);
}
TEST(CaseMappingInitialize, Titlecase)
{
	CaseMappingState state;
	const char* i = "farMING";
	size_t is = strlen(i);
	char o[256] = { 0 };
	size_t os = 255;
	int32_t errors = UTF8_ERR_NONE;

	EXPECT_TRUE(casemapping_initialize(&state, i, is, o, os, TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr, QuickCheckCaseMapped_Titlecase, UTF8_LOCALE_DEFAULT, &errors));
	EXPECT_ERROREQ(UTF8_ERR_NONE, errors);
	EXPECT_EQ(TitlecaseIndex1Ptr, state.property_index1);
	EXPECT_EQ(TitlecaseIndex2Ptr, state.property_index2);
	EXPECT_EQ(TitlecaseDataPtr, state.property_data);
	EXPECT_EQ(QuickCheckCaseMapped_Titlecase, state.quickcheck_flags);
}
Esempio n. 8
0
size_t utf8tolower(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors)
{
	CaseMappingState state;
	size_t bytes_written = 0;

	/* Validate parameters */

	UTF8_VALIDATE_PARAMETERS_CHAR(char, bytes_written);

	/* Initialize case mapping */

	if (!casemapping_initialize(
		&state,
		input, inputSize,
		target, targetSize,
		LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr))
	{
		UTF8_SET_ERROR(NONE);

		return bytes_written;
	}

	/* Execute case mapping as long as input remains */

	while (state.src_size > 0)
	{
		size_t result = casemapping_execute(&state);
		if (!result)
		{
			UTF8_SET_ERROR(NOT_ENOUGH_SPACE);

			return bytes_written;
		}

		bytes_written += result;
	}

	UTF8_SET_ERROR(NONE);

	return bytes_written;
}
Esempio n. 9
0
size_t utf8tolower(const char* input, size_t inputSize, char* target, size_t targetSize, size_t locale, int32_t* errors)
{
	CaseMappingState state;

	/* Validate parameters */

	UTF8_VALIDATE_PARAMETERS_CHAR(char, 0);

	/* Initialize case mapping */

	if (!casemapping_initialize(
		&state,
		input, inputSize,
		target, targetSize,
		LowercaseIndex1Ptr, LowercaseIndex2Ptr, LowercaseDataPtr,
		QuickCheckCaseMapped_Lowercase, locale,
		errors))
	{
		return state.total_bytes_needed;
	}

	/* Execute case mapping as long as input remains */

	while (state.src_size > 0)
	{
		size_t converted;

		if ((converted = casemapping_execute(&state, errors)) == 0)
		{
			return state.total_bytes_needed;
		}

		state.total_bytes_needed += converted;
	}

	UTF8_SET_ERROR(NONE);

	return state.total_bytes_needed;
}
Esempio n. 10
0
size_t utf8casefold(const char* input, size_t inputSize, char* target, size_t targetSize, size_t locale, int32_t* errors)
{
	CaseMappingState state;

	/* Validate parameters */

	UTF8_VALIDATE_PARAMETERS_CHAR(char, 0);

	/* Initialize case mapping */

	if (!casemapping_initialize(
		&state,
		input, inputSize,
		target, targetSize,
		CaseFoldingIndex1Ptr, CaseFoldingIndex2Ptr, CaseFoldingDataPtr,
		QuickCheckCaseMapped_Casefolded, locale,
		errors))
	{
		return state.total_bytes_needed;
	}

	if (state.locale == UTF8_LOCALE_TURKISH_AND_AZERI_LATIN)
	{
		/* Exceptional behavior for Turkish and Azerbaijani (Latin) locales */

		while (state.src_size > 0)
		{
			const char* resolved = 0;
			uint8_t bytes_needed = 0;

			/* Read next code point */

			if (!(state.last_code_point_size = codepoint_read(state.src, state.src_size, &state.last_code_point)))
			{
				goto invaliddata;
			}

			/* Move source cursor */

			if (state.src_size >= state.last_code_point_size)
			{
				state.src += state.last_code_point_size;
				state.src_size -= state.last_code_point_size;
			}
			else
			{
				state.src_size = 0;
			}

			/* Resolve case folding */

			if ((PROPERTY_GET_CM(state.last_code_point) & QuickCheckCaseMapped_Casefolded) != 0)
			{
				if (state.last_code_point == CP_LATIN_CAPITAL_LETTER_I)
				{
					resolved = "\xC4\xB1";
					bytes_needed = 2;
				}
				else if (
					state.last_code_point == CP_LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)
				{
					resolved = "i";
					bytes_needed = 1;
				}
				else
				{
					resolved = database_querydecomposition(state.last_code_point, state.property_index1, state.property_index2, state.property_data, &bytes_needed);
				}
			}

			/* Write to output */

			if (resolved != 0)
			{
				/* Write resolved string to output */

				if (state.dst != 0)
				{
					if (state.dst_size < bytes_needed)
					{
						goto outofspace;
					}

					memcpy(state.dst, resolved, bytes_needed);

					state.dst += bytes_needed;
					state.dst_size -= bytes_needed;
				}
			}
			else
			{
				/* Write code point unchanged to output */

				if (!(bytes_needed = codepoint_write(state.last_code_point, &state.dst, &state.dst_size)))
				{
					goto outofspace;
				}
			}

			state.total_bytes_needed += bytes_needed;
		}
	}
	else
	{
		/* Execute case mapping as long as input remains */

		while (state.src_size > 0)
		{
			const char* resolved = 0;
			uint8_t bytes_needed = 0;

			/* Read next code point */

			if (!(state.last_code_point_size = codepoint_read(state.src, state.src_size, &state.last_code_point)))
			{
				goto invaliddata;
			}

			/* Move source cursor */

			if (state.src_size >= state.last_code_point_size)
			{
				state.src += state.last_code_point_size;
				state.src_size -= state.last_code_point_size;
			}
			else
			{
				state.src_size = 0;
			}

			/* Resolve case folding */

			if ((PROPERTY_GET_CM(state.last_code_point) & QuickCheckCaseMapped_Casefolded) != 0)
			{
				resolved = database_querydecomposition(state.last_code_point, state.property_index1, state.property_index2, state.property_data, &bytes_needed);
			}

			if (resolved != 0)
			{
				/* Write resolved string to output */

				if (state.dst != 0)
				{
					if (state.dst_size < bytes_needed)
					{
						goto outofspace;
					}

					memcpy(state.dst, resolved, bytes_needed);

					state.dst += bytes_needed;
					state.dst_size -= bytes_needed;
				}
			}
			else
			{
				/* Write code point unchanged to output */

				if (!(bytes_needed = codepoint_write(state.last_code_point, &state.dst, &state.dst_size)))
				{
					goto outofspace;
				}
			}

			state.total_bytes_needed += bytes_needed;
		}
	}

	UTF8_SET_ERROR(NONE);

	return state.total_bytes_needed;

invaliddata:
	UTF8_SET_ERROR(INVALID_DATA);

	return state.total_bytes_needed;

outofspace:
	UTF8_SET_ERROR(NOT_ENOUGH_SPACE);

	return state.total_bytes_needed;
}
Esempio n. 11
0
size_t utf8totitle(const char* input, size_t inputSize, char* target, size_t targetSize, size_t locale, int32_t* errors)
{
	CaseMappingState state;

	/* Validate parameters */

	UTF8_VALIDATE_PARAMETERS_CHAR(char, 0);

	/* Initialize case mapping */

	if (!casemapping_initialize(
		&state,
		input, inputSize,
		target, targetSize,
		TitlecaseIndex1Ptr, TitlecaseIndex2Ptr, TitlecaseDataPtr,
		QuickCheckCaseMapped_Titlecase, locale,
		errors))
	{
		return state.total_bytes_needed;
	}

	/* Execute case mapping as long as input remains */

	while (state.src_size > 0)
	{
		size_t converted;
		
		if ((converted = casemapping_execute(&state, errors)) == 0)
		{
			return state.total_bytes_needed;
		}

		/*
			The first letter of every word should be titlecase, the rest should
			be converted to lowercase.
		*/

		if (state.last_canonical_combining_class == CCC_NOT_REORDERED)
		{
			if (state.property_data == TitlecaseDataPtr)
			{
				if ((state.last_general_category & UTF8_CATEGORY_LETTER) != 0)
				{
					state.property_index1 = LowercaseIndex1Ptr;
					state.property_index2 = LowercaseIndex2Ptr;
					state.property_data = LowercaseDataPtr;

					state.quickcheck_flags = QuickCheckCaseMapped_Lowercase;
				}
			}
			else if (
				(state.last_general_category & UTF8_CATEGORY_LETTER) == 0)
			{
				state.property_index1 = TitlecaseIndex1Ptr;
				state.property_index2 = TitlecaseIndex2Ptr;
				state.property_data = TitlecaseDataPtr;

				state.quickcheck_flags = QuickCheckCaseMapped_Titlecase;
			}
		}

		state.total_bytes_needed += converted;
	}

	UTF8_SET_ERROR(NONE);

	return state.total_bytes_needed;
}