Beispiel #1
0
errno_t sss_utf8_case_eq(const uint8_t *s1, const uint8_t *s2)
{

    /* Do a case-insensitive comparison.
     * The input must be encoded in UTF8.
     * We have no way of knowing the language,
     * so we'll pass NULL for the language and
     * hope for the best.
     */
    int ret;
    int resultp;
    size_t n1, n2;
    errno = 0;

    n1 = u8_strlen(s1);
    n2 = u8_strlen(s2);

    ret = u8_casecmp(s1, n1,
                     s2, n2,
                     NULL, NULL,
                     &resultp);
    if (ret < 0) {
        /* An error occurred */
        return errno;
    }

    if (resultp == 0) {
        return EOK;
    }
    return ENOMATCH;
}
Beispiel #2
0
int
main ()
{
  /* Empty string.  */
  {
    static const uint8_t input[] = { 0 };
    ASSERT (u8_strlen (input) == 0);
  }

  /* Simple string.  */
  { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a)  日本語,中文,한글" */
    static const uint8_t input[] =
      { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', ' ',
        0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81,
        0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5,
        '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(',
        'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')',
        ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',',
        0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
        0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\0'
      };
    ASSERT (u8_strlen (input) == SIZEOF (input) - 1);
  }

  return 0;
}
Beispiel #3
0
U8_EXPORT int u8_has_prefix(u8_string string,u8_string prefix,int casefold)
{
    if ((casefold>0)&&(strncasecmp(string,prefix,u8_strlen(prefix))==0))
        return 1;
    else if ((casefold<=0)&&(strncmp(string,prefix,u8_strlen(prefix))==0))
        return 1;
    else return 0;
}
Beispiel #4
0
U8_EXPORT int u8_has_suffix(u8_string string,u8_string suffix,int casefold)
{
    int stringlen=u8_strlen(string), suffixlen=u8_strlen(suffix);
    if (suffixlen>stringlen) return 0;
    else if ((casefold>0)&&(strcasecmp(string+(stringlen-suffixlen),suffix)==0))
        return 1;
    else if ((casefold<=0)&&(strcmp(string+(stringlen-suffixlen),suffix)==0))
        return 1;
    else return 0;
}
Beispiel #5
0
static void
test_function (uint8_t * (*my_asnprintf) (uint8_t *, size_t *, const char *, ...))
{
  /* Test the support of the 's' conversion specifier for strings.  */

  {
    const char *locale_string = "\303\204rger"; /* Ärger */
    {
      size_t length;
      uint8_t *result =
        my_asnprintf (NULL, &length, "%s %d", locale_string, 33, 44, 55);
      static const uint8_t expected[] = "\303\204rger 33";
      ASSERT (result != NULL);
      ASSERT (u8_strcmp (result, expected) == 0);
      ASSERT (length == u8_strlen (result));
      free (result);
    }
    { /* Width.  */
      size_t length;
      uint8_t *result =
        my_asnprintf (NULL, &length, "%10s %d", locale_string, 33, 44, 55);
      static const uint8_t expected[] = "     \303\204rger 33";
      ASSERT (result != NULL);
      ASSERT (u8_strcmp (result, expected) == 0);
      ASSERT (length == u8_strlen (result));
      free (result);
    }
    { /* FLAG_LEFT.  */
      size_t length;
      uint8_t *result =
        my_asnprintf (NULL, &length, "%-10s %d", locale_string, 33, 44, 55);
      static const uint8_t expected[] = "\303\204rger      33";
      ASSERT (result != NULL);
      ASSERT (u8_strcmp (result, expected) == 0);
      ASSERT (length == u8_strlen (result));
      free (result);
    }
    { /* FLAG_ZERO: no effect.  */
      size_t length;
      uint8_t *result =
        my_asnprintf (NULL, &length, "%010s %d", locale_string, 33, 44, 55);
      static const uint8_t expected[] = "     \303\204rger 33";
      ASSERT (result != NULL);
      ASSERT (u8_strcmp (result, expected) == 0);
      ASSERT (length == u8_strlen (result));
      free (result);
    }
  }
}
char *
u8_strconv_to_encoding (const uint8_t *string,
                        const char *tocode,
                        enum iconv_ilseq_handler handler)
{
  char *result;
  size_t length;

  if (STRCASEEQ (tocode, "UTF-8", 'U','T','F','-','8',0,0,0,0))
    {
      /* Conversion from UTF-8 to UTF-8.  No need to go through iconv().  */
      length = u8_strlen (string) + 1;
#if CONFIG_UNICODE_SAFETY
      if (u8_check (string, length))
        {
          errno = EILSEQ;
          return NULL;
        }
#endif
      result = (char *) malloc (length);
      if (result == NULL)
        {
          errno = ENOMEM;
          return NULL;
        }
      memcpy (result, (const char *) string, length);
      return result;
    }
  else
    {
      result = NULL;
      length = 0;
      if (mem_iconveha ((const char *) string, u8_strlen (string) + 1,
                        "UTF-8", tocode,
                        handler == iconveh_question_mark, handler,
                        NULL, &result, &length) < 0)
        return NULL;
      /* Verify the result has exactly one NUL byte, at the end.  */
      if (!(length > 0 && result[length-1] == '\0'
            && strlen (result) == length-1))
        {
          free (result);
          errno = EILSEQ;
          return NULL;
        }
      return result;
    }
}
bool isInFont (char * theText) {
	if (! fontTableSize) return 0;
	if (! theText[0]) return 0;
	
	// We don't want to compare strings. Only single characters allowed!
	if (u8_strlen (theText) > 1) return false;
	
	int i=0;
	uint32_t c = u8_nextchar(theText, &i);
	
	return u8_strchr(fontOrderString, c, &i);
}
static uint8_t *
my_xasprintf (const char *format, ...)
{
  va_list args;
  size_t length;
  uint8_t *ret;

  va_start (args, format);
  ret = u8_vasnprintf (NULL, &length, format, args);
  va_end (args);
  if (ret != NULL)
    ASSERT (length == u8_strlen (ret));
  return ret;
}
Beispiel #9
0
static uint8_t *
my_xasprintf (const char *format, ...)
{
  va_list args;
  uint8_t buf[1000];
  int retval;
  size_t length;
  uint8_t *result;

  va_start (args, format);
  retval = u8_vsnprintf (buf, sizeof (buf), format, args);
  va_end (args);
  if (retval < 0 || retval >= (int) sizeof (buf))
    return NULL;
  length = u8_strlen (buf);
  result = XNMALLOC (length + 1, uint8_t);
  u8_cpy (result, buf, length + 1);
  return result;
}
Beispiel #10
0
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
{
	char *lookupname = NULL;
	int rc;
	uint8_t *lower, resbuf[256];
	size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */

	if (_str_is_ascii(e->label_buf))
		return;

	/* we need a conversion to lowercase */
	lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len);
	if (!lower) {
		/* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */
		return;
	}

	/* u8_tolower() does not terminate the result string */
	if (lower == resbuf) {
		lower[len]=0;
	} else {
		uint8_t *tmp = lower;
		lower = (uint8_t *)strndup((char *)lower, len);
		free(tmp);
	}

	if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) {
		if (strcmp(e->label_buf, lookupname)) {
			_psl_entry_t suffix, *suffixp;

			/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
			_suffix_init(&suffix, lookupname, strlen(lookupname));
			suffix.wildcard = e->wildcard;
			suffixp = _vector_get(v, _vector_add(v, &suffix));
			suffixp->label = suffixp->label_buf; /* set label to changed address */
		} /* else ignore */
	} /* else
		fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */

	if (lower != resbuf)
		free(lower);
}
int
main ()
{
  static enum iconv_ilseq_handler handlers[] =
    { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
  size_t h;
  size_t o;
  size_t i;

#if HAVE_ICONV
  /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
     ISO-8859-2, and UTF-8.  */

  /* Test conversion from ISO-8859-1 to UTF-8 with no errors.  */
  for (h = 0; h < SIZEOF (handlers); h++)
    {
      enum iconv_ilseq_handler handler = handlers[h];
      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
      static const uint8_t expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
      for (o = 0; o < 2; o++)
        {
          size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
          size_t length;
          uint8_t *result = u8_conv_from_encoding ("ISO-8859-1", handler,
                                                   input, strlen (input),
                                                   offsets,
                                                   NULL, &length);
          ASSERT (result != NULL);
          ASSERT (length == u8_strlen (expected));
          ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
          if (o)
            {
              for (i = 0; i < 37; i++)
                ASSERT (offsets[i] == (i < 1 ? i :
                                       i < 12 ? i + 1 :
                                       i < 18 ? i + 2 :
                                       i + 3));
              ASSERT (offsets[37] == MAGIC);
              free (offsets);
            }
          free (result);
        }
    }

  /* Test conversion from ISO-8859-2 to UTF-8 with no errors.  */
  for (h = 0; h < SIZEOF (handlers); h++)
    {
      enum iconv_ilseq_handler handler = handlers[h];
      static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
      static const uint8_t expected[] = "Rafa\305\202 Maszkowski";
      for (o = 0; o < 2; o++)
        {
          size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
          size_t length;
          uint8_t *result = u8_conv_from_encoding ("ISO-8859-2", handler,
                                                   input, strlen (input),
                                                   offsets,
                                                   NULL, &length);
          ASSERT (result != NULL);
          ASSERT (length == u8_strlen (expected));
          ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
          if (o)
            {
              for (i = 0; i < 16; i++)
                ASSERT (offsets[i] == (i < 5 ? i :
                                       i + 1));
              ASSERT (offsets[16] == MAGIC);
              free (offsets);
            }
          free (result);
        }
    }

  /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2.  */
# if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
  /* Test conversions from autodetect_jp to UTF-8.  */
  for (h = 0; h < SIZEOF (handlers); h++)
    {
      enum iconv_ilseq_handler handler = handlers[h];
      static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
      static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
      for (o = 0; o < 2; o++)
        {
          size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
          size_t length;
          uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
                                                   input, strlen (input),
                                                   offsets,
                                                   NULL, &length);
          ASSERT (result != NULL);
          ASSERT (length == u8_strlen (expected));
          ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
          if (o)
            {
              for (i = 0; i < 10; i++)
                ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
              ASSERT (offsets[10] == MAGIC);
              free (offsets);
            }
          free (result);
        }
    }
  for (h = 0; h < SIZEOF (handlers); h++)
    {
      enum iconv_ilseq_handler handler = handlers[h];
      static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
      static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
      for (o = 0; o < 2; o++)
        {
          size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
          size_t length;
          uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
                                                   input, strlen (input),
                                                   offsets,
                                                   NULL, &length);
          ASSERT (result != NULL);
          ASSERT (length == u8_strlen (expected));
          ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
          if (o)
            {
              for (i = 0; i < 10; i++)
                ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
              ASSERT (offsets[10] == MAGIC);
              free (offsets);
            }
          free (result);
        }
    }
  for (h = 0; h < SIZEOF (handlers); h++)
    {
      enum iconv_ilseq_handler handler = handlers[h];
      static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
      static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
      for (o = 0; o < 2; o++)
        {
          size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
          size_t length;
          uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
                                                   input, strlen (input),
                                                   offsets,
                                                   NULL, &length);
          ASSERT (result != NULL);
          ASSERT (length == u8_strlen (expected));
          ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
          if (o)
            {
              for (i = 0; i < 16; i++)
                ASSERT (offsets[i] == (i == 0 ? 0 :
                                       i == 5 ? 3 :
                                       i == 7 ? 6 :
                                       i == 9 ? 9 :
                                       i == 11 ? 12 :
                                       i == 13 ? 15 :
                                       (size_t)(-1)));
              ASSERT (offsets[16] == MAGIC);
              free (offsets);
            }
          free (result);
        }
    }
# endif

#endif

  return 0;
}
Beispiel #12
0
const char *wget_str_to_ascii(const char *src)
{
#ifdef WITH_LIBIDN2
	if (wget_str_needs_encoding(src)) {
		char *asc = NULL;
		int rc;
#ifdef WITH_LIBUNISTRING
		uint8_t *lower, resbuf[256];
		size_t len = sizeof(resbuf) - 1; // leave space for additional \0 byte

		// we need a conversion to lowercase
		lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len);
		if (!lower) {
			error_printf("u8_tolower(%s) failed (%d)\n", src, errno);
			return src;
		}

		// u8_tolower() does not terminate the result string
		if (lower == resbuf) {
			lower[len]=0;
		} else {
			uint8_t *tmp = lower;
			lower = (uint8_t *)wget_strmemdup((char *)lower, len);
			xfree(tmp);
		}

		if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) {
			debug_printf("idn2 '%s' -> '%s'\n", src, asc);
			src = asc;
		} else
			error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc));

		if (lower != resbuf)
			xfree(lower);
#else
		if ((rc = idn2_lookup_u8((uint8_t *)src, (uint8_t **)&asc, 0)) == IDN2_OK) {
			debug_printf("idn2 '%s' -> '%s'\n", src, asc);
			src = asc;
		} else
			error_printf(_("toASCII(%s) failed (%d): %s\n"), src, rc, idn2_strerror(rc));
#endif
	}
#elif WITH_LIBIDN
	if (wget_str_needs_encoding(src)) {
		char *asc = NULL;
		int rc;

		if (_utf8_is_valid(src)) {
			// idna_to_ascii_8z() automatically converts UTF-8 to lowercase

			if ((rc = idna_to_ascii_8z(src, &asc, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
				// debug_printf("toASCII '%s' -> '%s'\n", src, asc);
				src = asc;
			} else
				error_printf(_("toASCII failed (%d): %s\n"), rc, idna_strerror(rc));
		}
		else
			error_printf(_("Invalid UTF-8 sequence not converted: '%s'\n"), src);
	}
#else
	if (wget_str_needs_encoding(src)) {
		error_printf(_("toASCII not available: '%s'\n"), src);
	}
#endif

	return src;
}
Beispiel #13
0
/**
 * psl_str_to_utf8lower:
 * @str: string to convert
 * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
 * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
 * @lower: return value containing the converted string
 *
 * This helper function converts a string to lowercase UTF-8 representation.
 * Lowercase UTF-8 is needed as input to the domain checking functions.
 *
 * @lower is set to %NULL on error.
 *
 * The return value 'lower' must be freed after usage.
 *
 * Returns: psl_error_t value.
 *   PSL_SUCCESS: Success
 *   PSL_ERR_INVALID_ARG: @str is a %NULL value.
 *   PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
 *   PSL_ERR_TO_UTF16: Failed to convert @str to unicode
 *   PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
 *   PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
 *
 * Since: 0.4
 */
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower)
{
	int ret = PSL_ERR_INVALID_ARG;

	if (lower)
		*lower = NULL;

	if (!str)
		return PSL_ERR_INVALID_ARG;

	/* shortcut to avoid costly conversion */
	if (_str_is_ascii(str)) {
		if (lower) {
			char *p;

			*lower = strdup(str);

			/* convert ASCII string to lowercase */
			for (p = *lower; *p; p++)
				if (isupper(*p))
					*p = tolower(*p);
		}
		return PSL_SUCCESS;
	}

#ifdef WITH_LIBICU
	do {
	size_t str_length = strlen(str);
	UErrorCode status = 0;
	UChar *utf16_dst, *utf16_lower;
	int32_t utf16_dst_length;
	char *utf8_lower;
	UConverter *uconv;

	/* C89 allocation */
	utf16_dst   = alloca(sizeof(UChar) * (str_length * 2 + 1));
	utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1));
	utf8_lower  = alloca(str_length * 2 + 1);

	uconv = ucnv_open(encoding, &status);
	if (U_SUCCESS(status)) {
		utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status);
		ucnv_close(uconv);

		if (U_SUCCESS(status)) {
			int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status);
			if (U_SUCCESS(status)) {
				u_strToUTF8(utf8_lower, str_length * 8 + 1, NULL, utf16_lower, utf16_lower_length, &status);
				if (U_SUCCESS(status)) {
					if (lower)
						*lower = strdup(utf8_lower);
					ret = PSL_SUCCESS;
				} else {
					ret = PSL_ERR_TO_UTF8;
					/* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
				}
			} else {
				ret = PSL_ERR_TO_LOWER;
				/* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
			}
		} else {
			ret = PSL_ERR_TO_UTF16;
			/* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
		}
	} else {
		ret = PSL_ERR_CONVERTER;
		/* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
	}
	} while (0);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
	do {
		/* find out local charset encoding */
		if (!encoding) {
			encoding = nl_langinfo(CODESET);

			if (!encoding || !*encoding)
				encoding = "ASCII";
		}

		/* convert to UTF-8 */
		if (strcasecmp(encoding, "utf-8")) {
			iconv_t cd = iconv_open("utf-8", encoding);

			if (cd != (iconv_t)-1) {
				char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
				size_t tmp_len = strlen(str);
				size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
				char *dst = malloc(dst_len + 1), *dst_tmp = dst;

				if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) {
					uint8_t *resbuf = malloc(dst_len * 2 + 1);
					size_t len = dst_len * 2; /* leave space for additional \0 byte */

					if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
						/* u8_tolower() does not terminate the result string */
						if (lower)
							*lower = strndup((char *)dst, len);
					} else {
						ret = PSL_ERR_TO_LOWER;
						/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
					}

					if (lower)
						*lower = strndup(dst, dst_len - dst_len_tmp);
					ret = PSL_SUCCESS;
				} else {
					ret = PSL_ERR_TO_UTF8;
					/* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
				}

				free(dst);
				iconv_close(cd);
			} else {
				ret = PSL_ERR_TO_UTF8;
				/* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
			}
		} else
			ret = PSL_SUCCESS;

		/* convert to lowercase */
		if (ret == PSL_SUCCESS) {
			uint8_t *dst, resbuf[256];
			size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */

			/* we need a conversion to lowercase */
			if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) {
				/* u8_tolower() does not terminate the result string */
				if (lower)
					*lower = strndup((char *)dst, len);
			} else {
				ret = PSL_ERR_TO_LOWER;
				/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
			}
		}

	} while (0);
#endif

	return ret;
}
Beispiel #14
0
int
u8_strwidth (const uint8_t *s, const char *encoding)
{
  return u8_width (s, u8_strlen (s), encoding);
}
Beispiel #15
0
int stringLength (char * theText) {
	return u8_strlen (theText);
}
Beispiel #16
0
int
main ()
{
  static enum iconv_ilseq_handler handlers[] =
    { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
  size_t h;
  size_t o;
  size_t i;

#if HAVE_ICONV
  /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
     ISO-8859-2, and UTF-8.  */

  /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */
  for (h = 0; h < SIZEOF (handlers); h++)
    {
      enum iconv_ilseq_handler handler = handlers[h];
      static const uint8_t input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
      static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
      for (o = 0; o < 2; o++)
        {
          size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
          size_t length;
          char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
                                              input, u8_strlen (input),
                                              offsets,
                                              NULL, &length);
          ASSERT (result != NULL);
          ASSERT (length == strlen (expected));
          ASSERT (memcmp (result, expected, length) == 0);
          if (o)
            {
              for (i = 0; i < 41; i++)
                ASSERT (offsets[i] == (i < 1 ? i :
                                       i == 1 ? (size_t)(-1) :
                                       i < 13 ? i - 1 :
                                       i == 13 ? (size_t)(-1) :
                                       i < 20 ? i - 2 :
                                       i == 20 ? (size_t)(-1) :
                                       i < 40 ? i - 3 :
                                       i == 40 ? (size_t)(-1) :
                                       i - 4));
              ASSERT (offsets[41] == MAGIC);
              free (offsets);
            }
          free (result);
        }
    }

  /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
  for (h = 0; h < SIZEOF (handlers); h++)
    {
      enum iconv_ilseq_handler handler = handlers[h];
      static const uint8_t input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
      for (o = 0; o < 2; o++)
        {
          size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
          size_t length = 0xdead;
          char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
                                              input, u8_strlen (input),
                                              offsets,
                                              NULL, &length);
          switch (handler)
            {
            case iconveh_error:
              ASSERT (result == NULL);
              ASSERT (errno == EILSEQ);
              ASSERT (length == 0xdead);
              break;
            case iconveh_question_mark:
              {
                static const char expected[] = "Rafa? Maszkowski";
                static const char expected_translit[] = "Rafal Maszkowski";
                ASSERT (result != NULL);
                ASSERT (length == strlen (expected));
                ASSERT (memcmp (result, expected, length) == 0
                        || memcmp (result, expected_translit, length) == 0);
                if (o)
                  {
                    for (i = 0; i < 17; i++)
                      ASSERT (offsets[i] == (i < 5 ? i :
                                             i == 5 ? (size_t)(-1) :
                                             i - 1));
                    ASSERT (offsets[17] == MAGIC);
                    free (offsets);
                  }
                free (result);
              }
              break;
            case iconveh_escape_sequence:
              {
                static const char expected[] = "Rafa\\u0142 Maszkowski";
                ASSERT (result != NULL);
                ASSERT (length == strlen (expected));
                ASSERT (memcmp (result, expected, length) == 0);
                if (o)
                  {
                    for (i = 0; i < 17; i++)
                      ASSERT (offsets[i] == (i < 5 ? i :
                                             i == 5 ? (size_t)(-1) :
                                             i + 4));
                    ASSERT (offsets[17] == MAGIC);
                    free (offsets);
                  }
                free (result);
              }
              break;
            }
        }
    }

  /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL.  */
  for (h = 0; h < SIZEOF (handlers); h++)
    {
      enum iconv_ilseq_handler handler = handlers[h];
      static const uint8_t input[] = "\342";
      for (o = 0; o < 2; o++)
        {
          size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
          size_t length;
          char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
                                              input, u8_strlen (input),
                                              offsets,
                                              NULL, &length);
          ASSERT (result != NULL);
          ASSERT (length == strlen (""));
          if (o)
            {
              ASSERT (offsets[0] == 0);
              ASSERT (offsets[1] == MAGIC);
              free (offsets);
            }
          free (result);
        }
    }

#endif

  return 0;
}
Beispiel #17
0
static void
sort_tag_create(char **sort_tag, char *src_tag)
{
  const uint8_t *i_ptr;
  const uint8_t *n_ptr;
  const uint8_t *number;
  uint8_t out[1024];
  uint8_t *o_ptr;
  int append_number;
  ucs4_t puc;
  int numlen;
  size_t len;
  int charlen;

  /* Note: include terminating NUL in string length for u8_normalize */

  if (*sort_tag)
    {
      DPRINTF(E_DBG, L_LIB, "Existing sort tag will be normalized: %s\n", *sort_tag);
      o_ptr = u8_normalize(UNINORM_NFD, (uint8_t *)*sort_tag, strlen(*sort_tag) + 1, NULL, &len);
      free(*sort_tag);
      *sort_tag = (char *)o_ptr;
      return;
    }

  if (!src_tag || ((len = strlen(src_tag)) == 0))
    {
      *sort_tag = NULL;
      return;
    }

  // Set input pointer past article if present
  if ((strncasecmp(src_tag, "a ", 2) == 0) && (len > 2))
    i_ptr = (uint8_t *)(src_tag + 2);
  else if ((strncasecmp(src_tag, "an ", 3) == 0) && (len > 3))
    i_ptr = (uint8_t *)(src_tag + 3);
  else if ((strncasecmp(src_tag, "the ", 4) == 0) && (len > 4))
    i_ptr = (uint8_t *)(src_tag + 4);
  else
    i_ptr = (uint8_t *)src_tag;

  // Poor man's natural sort. Makes sure we sort like this: a1, a2, a10, a11, a21, a111
  // We do this by padding zeroes to (short) numbers. As an alternative we could have
  // made a proper natural sort algorithm in sqlext.c, but we don't, since we don't
  // want any risk of hurting response times
  memset(&out, 0, sizeof(out));
  o_ptr = (uint8_t *)&out;
  number = NULL;
  append_number = 0;

  do
    {
      n_ptr = u8_next(&puc, i_ptr);

      if (uc_is_digit(puc))
	{
	  if (!number) // We have encountered the beginning of a number
	    number = i_ptr;
	  append_number = (n_ptr == NULL); // If last char in string append number now
	}
      else
	{
	  if (number)
	    append_number = 1; // A number has ended so time to append it
	  else
	    {
              charlen = u8_strmblen(i_ptr);
              if (charlen >= 0)
	    	o_ptr = u8_stpncpy(o_ptr, i_ptr, charlen); // No numbers in sight, just append char
	    }
	}

      // Break if less than 100 bytes remain (prevent buffer overflow)
      if (sizeof(out) - u8_strlen(out) < 100)
	break;

      // Break if number is very large (prevent buffer overflow)
      if (number && (i_ptr - number > 50))
	break;

      if (append_number)
	{
	  numlen = i_ptr - number;
	  if (numlen < 5) // Max pad width
	    {
	      u8_strcpy(o_ptr, (uint8_t *)"00000");
	      o_ptr += (5 - numlen);
	    }
	  o_ptr = u8_stpncpy(o_ptr, number, numlen + u8_strmblen(i_ptr));

	  number = NULL;
	  append_number = 0;
	}

      i_ptr = n_ptr;
    }
  while (n_ptr);

  *sort_tag = (char *)u8_normalize(UNINORM_NFD, (uint8_t *)&out, u8_strlen(out) + 1, NULL, &len);
}