errno_t sss_utf8_case_eq(const uint8_t *s1, const uint8_t *s2) { /* Do a case-insensitive comparison. * The input must be encoded in UTF8. * We have no way of knowing the language, * so we'll pass NULL for the language and * hope for the best. */ int ret; int resultp; size_t n1, n2; errno = 0; n1 = u8_strlen(s1); n2 = u8_strlen(s2); ret = u8_casecmp(s1, n1, s2, n2, NULL, NULL, &resultp); if (ret < 0) { /* An error occurred */ return errno; } if (resultp == 0) { return EOK; } return ENOMATCH; }
int main () { /* Empty string. */ { static const uint8_t input[] = { 0 }; ASSERT (u8_strlen (input) == 0); } /* Simple string. */ { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ static const uint8_t input[] = { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\0' }; ASSERT (u8_strlen (input) == SIZEOF (input) - 1); } return 0; }
U8_EXPORT int u8_has_prefix(u8_string string,u8_string prefix,int casefold) { if ((casefold>0)&&(strncasecmp(string,prefix,u8_strlen(prefix))==0)) return 1; else if ((casefold<=0)&&(strncmp(string,prefix,u8_strlen(prefix))==0)) return 1; else return 0; }
U8_EXPORT int u8_has_suffix(u8_string string,u8_string suffix,int casefold) { int stringlen=u8_strlen(string), suffixlen=u8_strlen(suffix); if (suffixlen>stringlen) return 0; else if ((casefold>0)&&(strcasecmp(string+(stringlen-suffixlen),suffix)==0)) return 1; else if ((casefold<=0)&&(strcmp(string+(stringlen-suffixlen),suffix)==0)) return 1; else return 0; }
static void test_function (uint8_t * (*my_asnprintf) (uint8_t *, size_t *, const char *, ...)) { /* Test the support of the 's' conversion specifier for strings. */ { const char *locale_string = "\303\204rger"; /* Ärger */ { size_t length; uint8_t *result = my_asnprintf (NULL, &length, "%s %d", locale_string, 33, 44, 55); static const uint8_t expected[] = "\303\204rger 33"; ASSERT (result != NULL); ASSERT (u8_strcmp (result, expected) == 0); ASSERT (length == u8_strlen (result)); free (result); } { /* Width. */ size_t length; uint8_t *result = my_asnprintf (NULL, &length, "%10s %d", locale_string, 33, 44, 55); static const uint8_t expected[] = " \303\204rger 33"; ASSERT (result != NULL); ASSERT (u8_strcmp (result, expected) == 0); ASSERT (length == u8_strlen (result)); free (result); } { /* FLAG_LEFT. */ size_t length; uint8_t *result = my_asnprintf (NULL, &length, "%-10s %d", locale_string, 33, 44, 55); static const uint8_t expected[] = "\303\204rger 33"; ASSERT (result != NULL); ASSERT (u8_strcmp (result, expected) == 0); ASSERT (length == u8_strlen (result)); free (result); } { /* FLAG_ZERO: no effect. */ size_t length; uint8_t *result = my_asnprintf (NULL, &length, "%010s %d", locale_string, 33, 44, 55); static const uint8_t expected[] = " \303\204rger 33"; ASSERT (result != NULL); ASSERT (u8_strcmp (result, expected) == 0); ASSERT (length == u8_strlen (result)); free (result); } } }
char * u8_strconv_to_encoding (const uint8_t *string, const char *tocode, enum iconv_ilseq_handler handler) { char *result; size_t length; if (STRCASEEQ (tocode, "UTF-8", 'U','T','F','-','8',0,0,0,0)) { /* Conversion from UTF-8 to UTF-8. No need to go through iconv(). */ length = u8_strlen (string) + 1; #if CONFIG_UNICODE_SAFETY if (u8_check (string, length)) { errno = EILSEQ; return NULL; } #endif result = (char *) malloc (length); if (result == NULL) { errno = ENOMEM; return NULL; } memcpy (result, (const char *) string, length); return result; } else { result = NULL; length = 0; if (mem_iconveha ((const char *) string, u8_strlen (string) + 1, "UTF-8", tocode, handler == iconveh_question_mark, handler, NULL, &result, &length) < 0) return NULL; /* Verify the result has exactly one NUL byte, at the end. */ if (!(length > 0 && result[length-1] == '\0' && strlen (result) == length-1)) { free (result); errno = EILSEQ; return NULL; } return result; } }
bool isInFont (char * theText) { if (! fontTableSize) return 0; if (! theText[0]) return 0; // We don't want to compare strings. Only single characters allowed! if (u8_strlen (theText) > 1) return false; int i=0; uint32_t c = u8_nextchar(theText, &i); return u8_strchr(fontOrderString, c, &i); }
static uint8_t * my_xasprintf (const char *format, ...) { va_list args; size_t length; uint8_t *ret; va_start (args, format); ret = u8_vasnprintf (NULL, &length, format, args); va_end (args); if (ret != NULL) ASSERT (length == u8_strlen (ret)); return ret; }
static uint8_t * my_xasprintf (const char *format, ...) { va_list args; uint8_t buf[1000]; int retval; size_t length; uint8_t *result; va_start (args, format); retval = u8_vsnprintf (buf, sizeof (buf), format, args); va_end (args); if (retval < 0 || retval >= (int) sizeof (buf)) return NULL; length = u8_strlen (buf); result = XNMALLOC (length + 1, uint8_t); u8_cpy (result, buf, length + 1); return result; }
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) { char *lookupname = NULL; int rc; uint8_t *lower, resbuf[256]; size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ if (_str_is_ascii(e->label_buf)) return; /* we need a conversion to lowercase */ lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len); if (!lower) { /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */ return; } /* u8_tolower() does not terminate the result string */ if (lower == resbuf) { lower[len]=0; } else { uint8_t *tmp = lower; lower = (uint8_t *)strndup((char *)lower, len); free(tmp); } if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */ if (lower != resbuf) free(lower); }
int main () { static enum iconv_ilseq_handler handlers[] = { iconveh_error, iconveh_question_mark, iconveh_escape_sequence }; size_t h; size_t o; size_t i; #if HAVE_ICONV /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1, ISO-8859-2, and UTF-8. */ /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const uint8_t expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); size_t length; uint8_t *result = u8_conv_from_encoding ("ISO-8859-1", handler, input, strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == u8_strlen (expected)); ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0); if (o) { for (i = 0; i < 37; i++) ASSERT (offsets[i] == (i < 1 ? i : i < 12 ? i + 1 : i < 18 ? i + 2 : i + 3)); ASSERT (offsets[37] == MAGIC); free (offsets); } free (result); } } /* Test conversion from ISO-8859-2 to UTF-8 with no errors. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */ static const uint8_t expected[] = "Rafa\305\202 Maszkowski"; for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); size_t length; uint8_t *result = u8_conv_from_encoding ("ISO-8859-2", handler, input, strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == u8_strlen (expected)); ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0); if (o) { for (i = 0; i < 16; i++) ASSERT (offsets[i] == (i < 5 ? i : i + 1)); ASSERT (offsets[16] == MAGIC); free (offsets); } free (result); } } /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */ # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun) /* Test conversions from autodetect_jp to UTF-8. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */ static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */ for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); size_t length; uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler, input, strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == u8_strlen (expected)); ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0); if (o) { for (i = 0; i < 10; i++) ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1))); ASSERT (offsets[10] == MAGIC); free (offsets); } free (result); } } for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */ static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */ for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); size_t length; uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler, input, strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == u8_strlen (expected)); ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0); if (o) { for (i = 0; i < 10; i++) ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1))); ASSERT (offsets[10] == MAGIC); free (offsets); } free (result); } } for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */ static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */ for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); size_t length; uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler, input, strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == u8_strlen (expected)); ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0); if (o) { for (i = 0; i < 16; i++) ASSERT (offsets[i] == (i == 0 ? 0 : i == 5 ? 3 : i == 7 ? 6 : i == 9 ? 9 : i == 11 ? 12 : i == 13 ? 15 : (size_t)(-1))); ASSERT (offsets[16] == MAGIC); free (offsets); } free (result); } } # endif #endif return 0; }
const char *wget_str_to_ascii(const char *src) { #ifdef WITH_LIBIDN2 if (wget_str_needs_encoding(src)) { char *asc = NULL; int rc; #ifdef WITH_LIBUNISTRING uint8_t *lower, resbuf[256]; size_t len = sizeof(resbuf) - 1; // leave space for additional \0 byte // we need a conversion to lowercase lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len); if (!lower) { error_printf("u8_tolower(%s) failed (%d)\n", src, errno); return src; } // u8_tolower() does not terminate the result string if (lower == resbuf) { lower[len]=0; } else { uint8_t *tmp = lower; lower = (uint8_t *)wget_strmemdup((char *)lower, len); xfree(tmp); } if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) { debug_printf("idn2 '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc)); if (lower != resbuf) xfree(lower); #else if ((rc = idn2_lookup_u8((uint8_t *)src, (uint8_t **)&asc, 0)) == IDN2_OK) { debug_printf("idn2 '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII(%s) failed (%d): %s\n"), src, rc, idn2_strerror(rc)); #endif } #elif WITH_LIBIDN if (wget_str_needs_encoding(src)) { char *asc = NULL; int rc; if (_utf8_is_valid(src)) { // idna_to_ascii_8z() automatically converts UTF-8 to lowercase if ((rc = idna_to_ascii_8z(src, &asc, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) { // debug_printf("toASCII '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); } else error_printf(_("Invalid UTF-8 sequence not converted: '%s'\n"), src); } #else if (wget_str_needs_encoding(src)) { error_printf(_("toASCII not available: '%s'\n"), src); } #endif return src; }
/** * psl_str_to_utf8lower: * @str: string to convert * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL * @lower: return value containing the converted string * * This helper function converts a string to lowercase UTF-8 representation. * Lowercase UTF-8 is needed as input to the domain checking functions. * * @lower is set to %NULL on error. * * The return value 'lower' must be freed after usage. * * Returns: psl_error_t value. * PSL_SUCCESS: Success * PSL_ERR_INVALID_ARG: @str is a %NULL value. * PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding * PSL_ERR_TO_UTF16: Failed to convert @str to unicode * PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase * PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8 * * Since: 0.4 */ psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower) { int ret = PSL_ERR_INVALID_ARG; if (lower) *lower = NULL; if (!str) return PSL_ERR_INVALID_ARG; /* shortcut to avoid costly conversion */ if (_str_is_ascii(str)) { if (lower) { char *p; *lower = strdup(str); /* convert ASCII string to lowercase */ for (p = *lower; *p; p++) if (isupper(*p)) *p = tolower(*p); } return PSL_SUCCESS; } #ifdef WITH_LIBICU do { size_t str_length = strlen(str); UErrorCode status = 0; UChar *utf16_dst, *utf16_lower; int32_t utf16_dst_length; char *utf8_lower; UConverter *uconv; /* C89 allocation */ utf16_dst = alloca(sizeof(UChar) * (str_length * 2 + 1)); utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1)); utf8_lower = alloca(str_length * 2 + 1); uconv = ucnv_open(encoding, &status); if (U_SUCCESS(status)) { utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status); ucnv_close(uconv); if (U_SUCCESS(status)) { int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status); if (U_SUCCESS(status)) { u_strToUTF8(utf8_lower, str_length * 8 + 1, NULL, utf16_lower, utf16_lower_length, &status); if (U_SUCCESS(status)) { if (lower) *lower = strdup(utf8_lower); ret = PSL_SUCCESS; } else { ret = PSL_ERR_TO_UTF8; /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ } } else { ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */ } } else { ret = PSL_ERR_TO_UTF16; /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */ } } else { ret = PSL_ERR_CONVERTER; /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */ } } while (0); #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) do { /* find out local charset encoding */ if (!encoding) { encoding = nl_langinfo(CODESET); if (!encoding || !*encoding) encoding = "ASCII"; } /* convert to UTF-8 */ if (strcasecmp(encoding, "utf-8")) { iconv_t cd = iconv_open("utf-8", encoding); if (cd != (iconv_t)-1) { char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */ size_t tmp_len = strlen(str); size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len; char *dst = malloc(dst_len + 1), *dst_tmp = dst; if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) { uint8_t *resbuf = malloc(dst_len * 2 + 1); size_t len = dst_len * 2; /* leave space for additional \0 byte */ if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) { /* u8_tolower() does not terminate the result string */ if (lower) *lower = strndup((char *)dst, len); } else { ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ } if (lower) *lower = strndup(dst, dst_len - dst_len_tmp); ret = PSL_SUCCESS; } else { ret = PSL_ERR_TO_UTF8; /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ } free(dst); iconv_close(cd); } else { ret = PSL_ERR_TO_UTF8; /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ } } else ret = PSL_SUCCESS; /* convert to lowercase */ if (ret == PSL_SUCCESS) { uint8_t *dst, resbuf[256]; size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ /* we need a conversion to lowercase */ if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) { /* u8_tolower() does not terminate the result string */ if (lower) *lower = strndup((char *)dst, len); } else { ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ } } } while (0); #endif return ret; }
int u8_strwidth (const uint8_t *s, const char *encoding) { return u8_width (s, u8_strlen (s), encoding); }
int stringLength (char * theText) { return u8_strlen (theText); }
int main () { static enum iconv_ilseq_handler handlers[] = { iconveh_error, iconveh_question_mark, iconveh_escape_sequence }; size_t h; size_t o; size_t i; #if HAVE_ICONV /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1, ISO-8859-2, and UTF-8. */ /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const uint8_t input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); size_t length; char *result = u8_conv_to_encoding ("ISO-8859-1", handler, input, u8_strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == strlen (expected)); ASSERT (memcmp (result, expected, length) == 0); if (o) { for (i = 0; i < 41; i++) ASSERT (offsets[i] == (i < 1 ? i : i == 1 ? (size_t)(-1) : i < 13 ? i - 1 : i == 13 ? (size_t)(-1) : i < 20 ? i - 2 : i == 20 ? (size_t)(-1) : i < 40 ? i - 3 : i == 40 ? (size_t)(-1) : i - 4)); ASSERT (offsets[41] == MAGIC); free (offsets); } free (result); } } /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const uint8_t input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */ for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); size_t length = 0xdead; char *result = u8_conv_to_encoding ("ISO-8859-1", handler, input, u8_strlen (input), offsets, NULL, &length); switch (handler) { case iconveh_error: ASSERT (result == NULL); ASSERT (errno == EILSEQ); ASSERT (length == 0xdead); break; case iconveh_question_mark: { static const char expected[] = "Rafa? Maszkowski"; static const char expected_translit[] = "Rafal Maszkowski"; ASSERT (result != NULL); ASSERT (length == strlen (expected)); ASSERT (memcmp (result, expected, length) == 0 || memcmp (result, expected_translit, length) == 0); if (o) { for (i = 0; i < 17; i++) ASSERT (offsets[i] == (i < 5 ? i : i == 5 ? (size_t)(-1) : i - 1)); ASSERT (offsets[17] == MAGIC); free (offsets); } free (result); } break; case iconveh_escape_sequence: { static const char expected[] = "Rafa\\u0142 Maszkowski"; ASSERT (result != NULL); ASSERT (length == strlen (expected)); ASSERT (memcmp (result, expected, length) == 0); if (o) { for (i = 0; i < 17; i++) ASSERT (offsets[i] == (i < 5 ? i : i == 5 ? (size_t)(-1) : i + 4)); ASSERT (offsets[17] == MAGIC); free (offsets); } free (result); } break; } } } /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const uint8_t input[] = "\342"; for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); size_t length; char *result = u8_conv_to_encoding ("ISO-8859-1", handler, input, u8_strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == strlen ("")); if (o) { ASSERT (offsets[0] == 0); ASSERT (offsets[1] == MAGIC); free (offsets); } free (result); } } #endif return 0; }
static void sort_tag_create(char **sort_tag, char *src_tag) { const uint8_t *i_ptr; const uint8_t *n_ptr; const uint8_t *number; uint8_t out[1024]; uint8_t *o_ptr; int append_number; ucs4_t puc; int numlen; size_t len; int charlen; /* Note: include terminating NUL in string length for u8_normalize */ if (*sort_tag) { DPRINTF(E_DBG, L_LIB, "Existing sort tag will be normalized: %s\n", *sort_tag); o_ptr = u8_normalize(UNINORM_NFD, (uint8_t *)*sort_tag, strlen(*sort_tag) + 1, NULL, &len); free(*sort_tag); *sort_tag = (char *)o_ptr; return; } if (!src_tag || ((len = strlen(src_tag)) == 0)) { *sort_tag = NULL; return; } // Set input pointer past article if present if ((strncasecmp(src_tag, "a ", 2) == 0) && (len > 2)) i_ptr = (uint8_t *)(src_tag + 2); else if ((strncasecmp(src_tag, "an ", 3) == 0) && (len > 3)) i_ptr = (uint8_t *)(src_tag + 3); else if ((strncasecmp(src_tag, "the ", 4) == 0) && (len > 4)) i_ptr = (uint8_t *)(src_tag + 4); else i_ptr = (uint8_t *)src_tag; // Poor man's natural sort. Makes sure we sort like this: a1, a2, a10, a11, a21, a111 // We do this by padding zeroes to (short) numbers. As an alternative we could have // made a proper natural sort algorithm in sqlext.c, but we don't, since we don't // want any risk of hurting response times memset(&out, 0, sizeof(out)); o_ptr = (uint8_t *)&out; number = NULL; append_number = 0; do { n_ptr = u8_next(&puc, i_ptr); if (uc_is_digit(puc)) { if (!number) // We have encountered the beginning of a number number = i_ptr; append_number = (n_ptr == NULL); // If last char in string append number now } else { if (number) append_number = 1; // A number has ended so time to append it else { charlen = u8_strmblen(i_ptr); if (charlen >= 0) o_ptr = u8_stpncpy(o_ptr, i_ptr, charlen); // No numbers in sight, just append char } } // Break if less than 100 bytes remain (prevent buffer overflow) if (sizeof(out) - u8_strlen(out) < 100) break; // Break if number is very large (prevent buffer overflow) if (number && (i_ptr - number > 50)) break; if (append_number) { numlen = i_ptr - number; if (numlen < 5) // Max pad width { u8_strcpy(o_ptr, (uint8_t *)"00000"); o_ptr += (5 - numlen); } o_ptr = u8_stpncpy(o_ptr, number, numlen + u8_strmblen(i_ptr)); number = NULL; append_number = 0; } i_ptr = n_ptr; } while (n_ptr); *sort_tag = (char *)u8_normalize(UNINORM_NFD, (uint8_t *)&out, u8_strlen(out) + 1, NULL, &len); }