Esempio n. 1
0
static VALUE decode(VALUE self, VALUE str)
{
  int rc;
  punycode_uint *ustr;
  size_t len;
  char *buf = NULL;
  VALUE retv;

  str = rb_check_convert_type(str, T_STRING, "String", "to_s");

  len = RSTRING_LEN(str);
  ustr = malloc(len * sizeof(punycode_uint));

  if (ustr == NULL) {
    rb_raise(rb_eNoMemError, "cannot allocate memory (%d bytes)", (uint32_t)len);
    return Qnil;
  }

  rc = punycode_decode(RSTRING_LEN(str), RSTRING_PTR(str),
                       &len, ustr, NULL);

  if (rc != PUNYCODE_SUCCESS) {
    xfree(ustr);
    rb_raise(ePunycodeError, "%s (%d)", punycode_strerror(rc), rc);
    return Qnil;
  }

  buf = stringprep_ucs4_to_utf8(ustr, len, NULL, &len);
  retv = rb_enc_str_new(buf, len, rb_utf8_encoding());
  xfree(ustr);
  xfree(buf);
  return retv;
}
Esempio n. 2
0
uschar *
string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
{
size_t p_len = Ustrlen(alabel);
punycode_uint * p;
uschar * s;
uschar * res;
int rc;

if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-')
  {
  if (err) *err = US"bad alabel prefix";
  return NULL;
  }

p_len -= 4;
p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));

if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
  {
  if (err) *err = US punycode_strerror(rc);
  return NULL;
  }

s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
res = string_copyn(s, p_len);
free(s);
return res;
}
Esempio n. 3
0
/**
 * stringprep - prepare internationalized string
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 * @flags: a #Stringprep_profile_flags value, or 0.
 * @profile: pointer to #Stringprep_profile to use.
 *
 * Prepare the input zero terminated UTF-8 string according to the
 * stringprep profile, and write back the result to the input string.
 *
 * Note that you must convert strings entered in the systems locale
 * into UTF-8 before using this function, see
 * stringprep_locale_to_utf8().
 *
 * Since the stringprep operation can expand the string, @maxlen
 * indicate how large the buffer holding the string is.  This function
 * will not read or write to characters outside that size.
 *
 * The @flags are one of #Stringprep_profile_flags values, or 0.
 *
 * The @profile contain the #Stringprep_profile instructions to
 * perform.  Your application can define new profiles, possibly
 * re-using the generic stringprep tables that always will be part of
 * the library, or use one of the currently supported profiles.
 *
 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
 **/
int
stringprep (char *in,
	    size_t maxlen,
	    Stringprep_profile_flags flags,
	    const Stringprep_profile * profile)
{
  int rc;
  char *utf8 = NULL;
  uint32_t *ucs4 = NULL;
  size_t ucs4len, maxucs4len, adducs4len = 50;

  do
    {
      uint32_t *newp;

      if (ucs4)
	free (ucs4);
      ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
      maxucs4len = ucs4len + adducs4len;
      newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
      if (!newp)
	{
	  free (ucs4);
	  return STRINGPREP_MALLOC_ERROR;
	}
      ucs4 = newp;

      rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
      adducs4len += 50;
    }
  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
  if (rc != STRINGPREP_OK)
    {
      free (ucs4);
      return rc;
    }

  utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
  free (ucs4);
  if (!utf8)
    return STRINGPREP_MALLOC_ERROR;

  if (strlen (utf8) >= maxlen)
    {
      free (utf8);
      return STRINGPREP_TOO_SMALL_BUFFER;
    }

  strcpy (in, utf8);		/* flawfinder: ignore */

  free (utf8);

  return STRINGPREP_OK;
}
Esempio n. 4
0
File: nfkc.c Progetto: AlekSi/Jabbin
/**
 * stringprep_ucs4_nfkc_normalize:
 * @str: a Unicode string.
 * @len: length of @str array, or -1 if @str is nul-terminated.
 *
 * Converts UCS4 string into UTF-8 and runs
 * stringprep_utf8_nfkc_normalize().
 *
 * Return value: a newly allocated Unicode string, that is the NFKC
 *   normalized form of @str.
 **/
my_uint32_t *
stringprep_ucs4_nfkc_normalize (my_uint32_t * str, ssize_t len)
{
  char *p;
  my_uint32_t *result_wc;

  p = stringprep_ucs4_to_utf8 (str, len, 0, 0);
  result_wc = _g_utf8_normalize_wc (p, -1, G_NORMALIZE_NFKC);
  free (p);

  return result_wc;
}
Esempio n. 5
0
/**
 * idna_to_unicode_8z8z:
 * @input: zero-terminated UTF-8 string.
 * @output: pointer to newly allocated output UTF-8 string.
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 *
 * Convert possibly ACE encoded domain name in UTF-8 format into a
 * UTF-8 string.  The domain name may contain several labels,
 * separated by dots.  The output buffer must be deallocated by the
 * caller.
 *
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 **/
int
idna_to_unicode_8z8z (const char *input, char **output, int flags)
{
  uint32_t *ucs4;
  int rc;

  rc = idna_to_unicode_8z4z (input, &ucs4, flags);
  *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
  free (ucs4);

  if (!*output)
    return IDNA_ICONV_ERROR;

  return rc;
}
int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
#ifdef HAVE_LIBIDN
        size_t input_size, output_size;
        _cleanup_free_ uint32_t *input = NULL;
        _cleanup_free_ char *result = NULL;
        uint32_t *output = NULL;
        size_t w;

        /* To be invoked after unescaping */

        assert(encoded);
        assert(decoded);

        if (encoded_size < sizeof(IDNA_ACE_PREFIX)-1)
                return 0;

        if (memcmp(encoded, IDNA_ACE_PREFIX, sizeof(IDNA_ACE_PREFIX) -1) != 0)
                return 0;

        input = stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
        if (!input)
                return -ENOMEM;

        output_size = input_size;
        output = newa(uint32_t, output_size);

        idna_to_unicode_44i(input, input_size, output, &output_size, 0);

        result = stringprep_ucs4_to_utf8(output, output_size, NULL, &w);
        if (!result)
                return -ENOMEM;
        if (w <= 0)
                return 0;
        if (w+1 > decoded_max)
                return -EINVAL;

        memcpy(decoded, result, w+1);
        return w;
#else
        return 0;
#endif
}
Esempio n. 7
0
/**
 * idna_to_unicode_44i
 * @in: input array with unicode code points.
 * @inlen: length of input array with unicode code points.
 * @out: output array with unicode code points.
 * @outlen: on input, maximum size of output array with unicode code points,
 *          on exit, actual size of output array with unicode code points.
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 *
 * The ToUnicode operation takes a sequence of Unicode code points
 * that make up one label and returns a sequence of Unicode code
 * points. If the input sequence is a label in ACE form, then the
 * result is an equivalent internationalized label that is not in ACE
 * form, otherwise the original sequence is returned unaltered.
 *
 * ToUnicode never fails. If any step fails, then the original input
 * sequence is returned immediately in that step.
 *
 * The Punycode decoder can never output more code points than it
 * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
 * the number of octets needed to represent a sequence of code points
 * depends on the particular character encoding used.
 *
 * The inputs to ToUnicode are a sequence of code points, the
 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
 * ToUnicode is always a sequence of Unicode code points.
 *
 * Return value: Returns error condition, but it must only be used for
 *               debugging purposes.  The output buffer is always
 *               guaranteed to contain the correct data according to
 *               the specification (sans malloc induced errors).  NB!
 *               This means that you normally ignore the return code
 *               from this function, as checking it means breaking the
 *               standard.
 */
int
idna_to_unicode_44i (const uint32_t * in, size_t inlen,
		     uint32_t * out, size_t * outlen, int flags)
{
  int rc;
  size_t outlensave = *outlen;
  char *p;

  p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
  if (p == NULL)
    return IDNA_MALLOC_ERROR;

  rc = idna_to_unicode_internal (p, out, outlen, flags);
  if (rc != IDNA_SUCCESS)
    {
      memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
					 inlen : outlensave));
      *outlen = inlen;
    }

  /* p is freed in idna_to_unicode_internal.  */

  return rc;
}
Esempio n. 8
0
File: utf8.c Progetto: Exim/exim
static uschar *
string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err)
{
size_t p_len;
punycode_uint * p;
int rc;
uschar * s, * res;

DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel);
alabel += 4;
p_len = Ustrlen(alabel);
p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));

if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
  {
  if (err) *err = US punycode_strerror(rc);
  return NULL;
  }

s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
res = string_copyn(s, p_len);
free(s);
return res;
}
Esempio n. 9
0
/**
 * idna_to_ascii_4i
 * @in: input array with unicode code points.
 * @inlen: length of input array with unicode code points.
 * @out: output zero terminated string that must have room for at
 *       least 63 characters plus the terminating zero.
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 *
 * The ToASCII operation takes a sequence of Unicode code points that make
 * up one label and transforms it into a sequence of code points in the
 * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
 * resulting sequence are equivalent labels.
 *
 * It is important to note that the ToASCII operation can fail. ToASCII
 * fails if any step of it fails. If any step of the ToASCII operation
 * fails on any label in a domain name, that domain name MUST NOT be used
 * as an internationalized domain name. The method for deadling with this
 * failure is application-specific.
 *
 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
 * sequence of ASCII code points or a failure condition.
 *
 * ToASCII never alters a sequence of code points that are all in the ASCII
 * range to begin with (although it could fail). Applying the ToASCII
 * operation multiple times has exactly the same effect as applying it just
 * once.
 *
 * Return value: Returns 0 on success, or an error code.
 */
int
idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
{
  size_t len, outlen;
  uint32_t *src;		/* XXX don't need to copy data? */
  int rc;

  /*
   * ToASCII consists of the following steps:
   *
   * 1. If all code points in the sequence are in the ASCII range (0..7F)
   * then skip to step 3.
   */

  {
    size_t i;
    int inasciirange;

    inasciirange = 1;
    for (i = 0; i < inlen; i++)
      if (in[i] > 0x7F)
	inasciirange = 0;
    if (inasciirange)
      {
	src = malloc (sizeof (in[0]) * (inlen + 1));
	if (src == NULL)
	  return IDNA_MALLOC_ERROR;

	memcpy (src, in, sizeof (in[0]) * inlen);
	src[inlen] = 0;

	goto step3;
      }
  }

  /*
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
   * an error. The AllowUnassigned flag is used in [NAMEPREP].
   */

  {
    char *p;

    p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
    if (p == NULL)
      return IDNA_MALLOC_ERROR;

    len = strlen (p);
    do
      {
	char *newp;

	len = 2 * len + 10;	/* XXX better guess? */
	newp = realloc (p, len);
	if (newp == NULL)
	  {
	    free (p);
	    return IDNA_MALLOC_ERROR;
	  }
	p = newp;

	if (flags & IDNA_ALLOW_UNASSIGNED)
	  rc = stringprep_nameprep (p, len);
	else
	  rc = stringprep_nameprep_no_unassigned (p, len);
      }
    while (rc == STRINGPREP_TOO_SMALL_BUFFER);

    if (rc != STRINGPREP_OK)
      {
	free (p);
	return IDNA_STRINGPREP_ERROR;
      }

    src = stringprep_utf8_to_ucs4 (p, -1, NULL);

    free (p);
  }

step3:
  /*
   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
   *
   * (a) Verify the absence of non-LDH ASCII code points; that is,
   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
   *
   * (b) Verify the absence of leading and trailing hyphen-minus;
   * that is, the absence of U+002D at the beginning and end of
   * the sequence.
   */

  if (flags & IDNA_USE_STD3_ASCII_RULES)
    {
      size_t i;

      for (i = 0; src[i]; i++)
	if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
	    (src[i] >= 0x3A && src[i] <= 0x40) ||
	    (src[i] >= 0x5B && src[i] <= 0x60) ||
	    (src[i] >= 0x7B && src[i] <= 0x7F))
	  {
	    free (src);
	    return IDNA_CONTAINS_NON_LDH;
	  }

      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
	{
	  free (src);
	  return IDNA_CONTAINS_MINUS;
	}
    }

  /*
   * 4. If all code points in the sequence are in the ASCII range
   * (0..7F), then skip to step 8.
   */

  {
    size_t i;
    int inasciirange;

    inasciirange = 1;
    for (i = 0; src[i]; i++)
      {
	if (src[i] > 0x7F)
	  inasciirange = 0;
	/* copy string to output buffer if we are about to skip to step8 */
	if (i < 64)
	  out[i] = src[i];
      }
    if (i < 64)
      out[i] = '\0';
    if (inasciirange)
      goto step8;
  }

  /*
   * 5. Verify that the sequence does NOT begin with the ACE prefix.
   *
   */

  {
    size_t i;
    int match;

    match = 1;
    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
	match = 0;
    if (match)
      {
	free (src);
	return IDNA_CONTAINS_ACE_PREFIX;
      }
  }

  /*
   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
   * and fail if there is an error.
   */
  for (len = 0; src[len]; len++)
    ;
  src[len] = '\0';
  outlen = 63 - strlen (IDNA_ACE_PREFIX);
  rc = punycode_encode (len, src, NULL,
			&outlen, &out[strlen (IDNA_ACE_PREFIX)]);
  if (rc != PUNYCODE_SUCCESS)
    {
      free (src);
      return IDNA_PUNYCODE_ERROR;
    }
  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';

  /*
   * 7. Prepend the ACE prefix.
   */

  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));

  /*
   * 8. Verify that the number of code points is in the range 1 to 63
   * inclusive (0 is excluded).
   */

step8:
  free (src);
  if (strlen (out) < 1 || strlen (out) > 63)
    return IDNA_INVALID_LENGTH;

  return IDNA_SUCCESS;
}
Esempio n. 10
0
void
doit (void)
{
  char *p;
  int rc;
  size_t i;

  if (!stringprep_check_version (STRINGPREP_VERSION))
    fail ("stringprep_check_version() failed\n");

  for (i = 0; i < sizeof (strprep) / sizeof (strprep[0]); i++)
    {
      if (debug)
	printf ("STRINGPREP entry %d\n", i);

      if (debug)
	{
	  printf ("flags: %d\n", strprep[i].flags);

	  printf ("in: ");
	  escapeprint (strprep[i].in, strlen (strprep[i].in));
	  hexprint (strprep[i].in, strlen (strprep[i].in));
	  binprint (strprep[i].in, strlen (strprep[i].in));
	}

      {
	uint32_t *l;
	char *x;
	l = stringprep_utf8_to_ucs4 (strprep[i].in, -1, NULL);
	x = stringprep_ucs4_to_utf8 (l, -1, NULL, NULL);
	free (l);

	if (strcmp (strprep[i].in, x) != 0)
	  {
	    fail ("bad UTF-8 in entry %d\n", i);
	    if (debug)
	      {
		puts ("expected:");
		escapeprint (strprep[i].in, strlen (strprep[i].in));
		hexprint (strprep[i].in, strlen (strprep[i].in));
		puts ("computed:");
		escapeprint (x, strlen (x));
		hexprint (x, strlen (x));
	      }
	  }

	free (x);
      }
      rc = stringprep_profile (strprep[i].in, &p,
			       strprep[i].profile ?
			       strprep[i].profile :
			       "Nameprep", strprep[i].flags);
      if (rc != strprep[i].rc)
	{
	  fail ("stringprep() entry %d failed: %d\n", i, rc);
	  if (debug)
	    printf ("FATAL\n");
	  if (rc == STRINGPREP_OK)
	    free (p);
	  continue;
	}

      if (debug && rc == STRINGPREP_OK)
	{
	  printf ("out: ");
	  escapeprint (p, strlen (p));
	  hexprint (p, strlen (p));
	  binprint (p, strlen (p));

	  printf ("expected out: ");
	  escapeprint (strprep[i].out, strlen (strprep[i].out));
	  hexprint (strprep[i].out, strlen (strprep[i].out));
	  binprint (strprep[i].out, strlen (strprep[i].out));
	}
      else if (debug)
	printf ("returned %d expected %d\n", rc, strprep[i].rc);

      if (rc == STRINGPREP_OK)
	{
	  if (strlen (strprep[i].out) != strlen (p) ||
	      memcmp (strprep[i].out, p, strlen (p)) != 0)
	    {
	      fail ("stringprep() entry %d failed\n", i);
	      if (debug)
		printf ("ERROR\n");
	    }
	  else if (debug)
	    printf ("OK\n\n");

	  free (p);
	}
      else if (debug)
	printf ("OK\n\n");
    }

#if 0
  {
    char p[20];
    memset (p, 0, 10);
    stringprep_unichar_to_utf8 (0x00DF, p);
    hexprint (p, strlen (p));
    puts ("");
  }
#endif
}
Esempio n. 11
0
int
main (int argc, char *argv[])
{
  struct gengetopt_args_info args_info;
  char readbuf[BUFSIZ];
  char *p, *r;
  uint32_t *q;
  unsigned cmdn = 0;
  int rc;

  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);

  if (cmdline_parser (argc, argv, &args_info) != 0)
    return 1;

  if (!args_info.stringprep_given &&
      !args_info.punycode_encode_given && !args_info.punycode_decode_given &&
      !args_info.idna_to_ascii_given && !args_info.idna_to_unicode_given)
    args_info.idna_to_ascii_given = 1;

  if ((args_info.stringprep_given ? 1 : 0) +
      (args_info.punycode_encode_given ? 1 : 0) +
      (args_info.punycode_decode_given ? 1 : 0) +
      (args_info.idna_to_ascii_given ? 1 : 0) +
      (args_info.idna_to_unicode_given ? 1 : 0) != 1)
    {
      fprintf (stderr,
	       _("%s: Only one of -s, -e, -d, -a or -u can be specified.\n"),
	       argv[0]);
      cmdline_parser_print_help ();
      return 1;
    }

  if (!args_info.quiet_given)
    fprintf (stderr, "%s %s\n" GREETING, PACKAGE, VERSION);

  if (args_info.debug_given)
    fprintf (stderr, _("Charset `%s'.\n"), stringprep_locale_charset ());

  if (!args_info.quiet_given && args_info.inputs_num == 0)
    fprintf (stderr, _("Type each input string on a line by itself, "
		       "terminated by a newline character.\n"));

  do
    {
      if (cmdn < args_info.inputs_num)
	{
	  strncpy (readbuf, args_info.inputs[cmdn++], BUFSIZ - 1);
	  readbuf[BUFSIZ - 1] = '\0';
	}
      else if (fgets (readbuf, BUFSIZ, stdin) == NULL)
	{
	  sprintf (readbuf, _("%s: fgets() failed: "), argv[0]);
	  if (!feof (stdin))
	    perror (readbuf);
	  return 1;
	}

      if (readbuf[strlen (readbuf) - 1] == '\n')
	readbuf[strlen (readbuf) - 1] = '\0';

      if (args_info.stringprep_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  if (!q)
	    {
	      free (p);
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  rc = stringprep_profile (p, &r,
				   args_info.profile_given ?
				   args_info.profile_arg : "Nameprep", 0);
	  free (p);
	  if (rc != STRINGPREP_OK)
	    {
	      fprintf (stderr,
		       _("%s: stringprep_profile() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (r, -1, NULL);
	  if (!q)
	    {
	      free (r);
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.punycode_encode_given)
	{
	  size_t len, len2;

	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, &len);
	  free (p);
	  if (!q)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; i < len; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }

	  len2 = BUFSIZ;
	  rc = punycode_encode (len, q, NULL, &len2, readbuf);
	  free (q);
	  if (rc != PUNYCODE_SUCCESS)
	    {
	      fprintf (stderr,
		       _("%s: punycode_encode() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  readbuf[len2] = '\0';

	  p = stringprep_utf8_to_locale (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.punycode_decode_given)
	{
	  size_t len;

	  len = BUFSIZ;
	  q = (uint32_t *) malloc (len * sizeof (q[0]));
	  if (!q)
	    {
	      sprintf (readbuf, _("%s: malloc() failed: "), argv[0]);
	      perror (readbuf);
	      return 1;

	    }

	  rc = punycode_decode (strlen (readbuf), readbuf, &len, q, NULL);
	  if (rc != PUNYCODE_SUCCESS)
	    {
	      free (q);
	      fprintf (stderr,
		       _("%s: punycode_decode() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; i < len; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }

	  q[len] = 0;
	  r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL);
	  free (q);
	  if (!r)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!r)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.idna_to_ascii_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }
	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  free (p);
	  if (!q)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }

	  rc = idna_to_ascii_4z (q, &p,
				 (args_info.allow_unassigned_given ?
				  IDNA_ALLOW_UNASSIGNED : 0) |
				 (args_info.usestd3asciirules_given ?
				  IDNA_USE_STD3_ASCII_RULES : 0));
	  free (q);
	  if (rc != IDNA_SUCCESS)
	    {
	      fprintf (stderr, _("%s: idna_to_ascii_4z() failed "
				 "with error %d.\n"), argv[0], rc);
	      return 1;
	    }

#ifdef WITH_TLD
	  if (args_info.tld_flag)
	    {
	      size_t errpos;

	      rc = idna_to_unicode_8z4z (p, &q,
					 (args_info.allow_unassigned_given ?
					  IDNA_ALLOW_UNASSIGNED : 0) |
					 (args_info.usestd3asciirules_given ?
					  IDNA_USE_STD3_ASCII_RULES : 0));
	      if (rc != IDNA_SUCCESS)
		{
		  fprintf (stderr, _("%s: TLD idna_to_unicode_8z8z() failed "
				     "with error %d.\n"), argv[0], rc);
		  return 1;
		}

	      if (args_info.debug_given)
		{
		  size_t i;
		  for (i = 0; q[i]; i++)
		    fprintf (stderr, _("tld[%d] = U+%04x\n"), i, q[i]);
		}

	      rc = tld_check_4z (q, &errpos, NULL);
	      if (rc == TLD_INVALID)
		{
		  fprintf (stderr, _("%s: string rejected by TLD test "
				     "(Unicode position %d)\n"), argv[0],
			   errpos);
		  free (q);
		  return 1;
		}
	      if (rc != TLD_SUCCESS)
		{
		  fprintf (stderr,
			   _("%s: tld_check_4z failed with error %d.\n"),
			   argv[0], rc);
		  free (q);
		  return 1;
		}

	      free (r);
	    }
#endif

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; p[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, p[i]);
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.idna_to_unicode_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  if (!q)
	    {
	      free (p);
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  rc = idna_to_unicode_8z4z (p, &q,
				     (args_info.allow_unassigned_given ?
				      IDNA_ALLOW_UNASSIGNED : 0) |
				     (args_info.usestd3asciirules_given ?
				      IDNA_USE_STD3_ASCII_RULES : 0));
	  free (p);
	  if (rc != IDNA_SUCCESS)
	    {
	      fprintf (stderr, _("%s: idna_to_unicode_8z4z() "
				 "failed with error %d.\n"), argv[0], rc);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }

#ifdef WITH_TLD
	  if (args_info.tld_flag)
	    {
	      size_t errpos;

	      rc = tld_check_4z (q, &errpos, NULL);
	      if (rc == TLD_INVALID)
		{
		  fprintf (stderr, _("%s: string rejected by TLD test "
				     "(Unicode position %d)\n"), argv[0],
			   errpos);
		  free (q);
		  return 1;
		}
	      if (rc != TLD_SUCCESS)
		{
		  fprintf (stderr,
			   _("%s: tld_check_4z failed with error %d.\n"),
			   argv[0], rc);
		  free (q);
		  return 1;
		}
	    }
#endif

	  r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL);
	  free (q);
	  if (!r)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!r)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}
    }
  while (!feof (stdin) && !ferror (stdin) && (args_info.inputs_num == 0 ||
					      cmdn < args_info.inputs_num));

  return 0;
}
Esempio n. 12
0
void
doit (void)
{
  size_t i;
  int rc;

  for (i = 0; i < sizeof (tv) / sizeof (tv[0]); i++)
    {
      if (debug)
	{
	  uint32_t *p, *q;

	  printf ("PR29 entry %ld: %s\n", i, tv[i].name);

	  printf ("in:\n");
	  ucs4print (tv[i].in, tv[i].inlen);

	  printf ("nfkc:\n");
	  p = stringprep_ucs4_nfkc_normalize (tv[i].in, tv[i].inlen);
	  ucs4print (p, -1);

	  printf ("second nfkc:\n");
	  q = stringprep_ucs4_nfkc_normalize (p, -1);
	  ucs4print (q, -1);

	  free (p);
	  free (q);
	}

      rc = pr29_4 (tv[i].in, tv[i].inlen);
      if (rc != tv[i].rc)
	{
	  fail ("PR29 entry %ld failed (expected %d): %d\n", i, tv[i].rc, rc);
	  if (debug)
	    printf ("FATAL\n");
	  continue;
	}

      rc = pr29_4z (tv[i].in);
      if (rc != tv[i].rc)
	{
	  fail ("PR29 entry %ld failed (expected %d): %d\n", i, tv[i].rc, rc);
	  if (debug)
	    printf ("FATAL\n");
	  continue;
	}

      {
	char *p;
	size_t items_read, items_written;

	p = stringprep_ucs4_to_utf8 (tv[i].in, (ssize_t) tv[i].inlen,
				     &items_read, &items_written);
	if (p == NULL)
	  fail ("FAIL: stringprep_ucs4_to_utf8(tv[%ld]) == NULL\n", i);
	if (debug)
	  hexprint (p, strlen (p));

	rc = pr29_8z (p);
	free (p);
	if (rc != tv[i].rc)
	  {
	    fail ("PR29 entry %ld failed (expected %d): %d\n",
		  i, tv[i].rc, rc);
	    if (debug)
	      printf ("FATAL\n");
	    continue;
	  }
      }

      if (debug)
	{
	  if (tv[i].rc != PR29_SUCCESS)
	    printf ("EXPECTED FAIL\n");
	  else
	    printf ("OK\n");
	}
    }
}