Ejemplo n.º 1
0
static VALUE decode(VALUE self, VALUE str)
{
  int rc;
  punycode_uint *ustr;
  size_t len;
  char *buf = NULL;
  VALUE retv;

  str = rb_check_convert_type(str, T_STRING, "String", "to_s");

  len = RSTRING_LEN(str);
  ustr = malloc(len * sizeof(punycode_uint));

  if (ustr == NULL) {
    rb_raise(rb_eNoMemError, "cannot allocate memory (%d bytes)", (uint32_t)len);
    return Qnil;
  }

  rc = punycode_decode(RSTRING_LEN(str), RSTRING_PTR(str),
                       &len, ustr, NULL);

  if (rc != PUNYCODE_SUCCESS) {
    xfree(ustr);
    rb_raise(ePunycodeError, "%s (%d)", punycode_strerror(rc), rc);
    return Qnil;
  }

  buf = stringprep_ucs4_to_utf8(ustr, len, NULL, &len);
  retv = rb_enc_str_new(buf, len, rb_utf8_encoding());
  xfree(ustr);
  xfree(buf);
  return retv;
}
Ejemplo n.º 2
0
uschar *
string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
{
size_t p_len = Ustrlen(alabel);
punycode_uint * p;
uschar * s;
uschar * res;
int rc;

if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-')
  {
  if (err) *err = US"bad alabel prefix";
  return NULL;
  }

p_len -= 4;
p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));

if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
  {
  if (err) *err = US punycode_strerror(rc);
  return NULL;
  }

s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
res = string_copyn(s, p_len);
free(s);
return res;
}
Ejemplo n.º 3
0
static int32_t convertFromPuny(  const UChar* src, int32_t srcLength,
								 UChar* dest, int32_t destCapacity,
                                 UErrorCode& status){
    char b1Stack[MAX_LABEL_BUFFER_SIZE];
    char* b1 = b1Stack;
    int32_t destLen =0;

    convertUCharsToASCII(src, b1,srcLength);

    uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE];
    uint32_t* b2 = b2Stack;
    int32_t b2Len =MAX_LABEL_BUFFER_SIZE;
    unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*));
    punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
    status = getError(error);
    if(status == U_BUFFER_OVERFLOW_ERROR){
        b2 =  (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t));
        if(b2 == NULL){
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CLEANUP;
        }
        error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
        status = getError(error);
    }

    if(U_FAILURE(status)){
        goto CLEANUP;
    }

    u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status);

CLEANUP:
    if(b1Stack != b1){
        uprv_free(b1);
    }
    if(b2Stack != b2){
        uprv_free(b2);
    }
    uprv_free(caseFlags);

    return destLen;   
}
Ejemplo n.º 4
0
nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
                                 bool allowUnassigned)
{
  bool isAce;
  IsACE(in, &isAce);
  if (!isAce) {
    out.Assign(in);
    return NS_OK;
  }

  // RFC 3490 - 4.2 ToUnicode
  // The ToUnicode output never contains more code points than its input.
  punycode_uint output_length = in.Length() - kACEPrefixLen + 1;
  punycode_uint *output = new punycode_uint[output_length];
  NS_ENSURE_TRUE(output, NS_ERROR_OUT_OF_MEMORY);

  enum punycode_status status = punycode_decode(in.Length() - kACEPrefixLen,
                                                PromiseFlatCString(in).get() + kACEPrefixLen,
                                                &output_length,
                                                output,
                                                nullptr);
  if (status != punycode_success) {
    delete [] output;
    return NS_ERROR_FAILURE;
  }

  // UCS4 -> UTF8
  output[output_length] = 0;
  nsAutoString utf16;
  ucs4toUtf16(output, utf16);
  delete [] output;
  if (!isOnlySafeChars(utf16, mIDNBlacklist))
    return NS_ERROR_FAILURE;
  CopyUTF16toUTF8(utf16, out);

  // Validation: encode back to ACE and compare the strings
  nsAutoCString ace;
  nsresult rv = UTF8toACE(out, ace, allowUnassigned);
  NS_ENSURE_SUCCESS(rv, rv);

  if (!ace.Equals(in, nsCaseInsensitiveCStringComparator()))
    return NS_ERROR_FAILURE;

  return NS_OK;
}
Ejemplo n.º 5
0
/**
 * g_hostname_to_unicode:
 * @hostname: a valid UTF-8 or ASCII hostname
 *
 * Converts @hostname to its canonical presentation form; a UTF-8
 * string in Unicode normalization form C, containing no uppercase
 * letters, no forbidden characters, and no ASCII-encoded segments,
 * and not ending with a trailing dot.
 *
 * Of course if @hostname is not an internationalized hostname, then
 * the canonical presentation form will be entirely ASCII.
 *
 * Return value: a UTF-8 hostname, which must be freed, or %NULL if
 * @hostname is in some way invalid.
 *
 * Since: 2.22
 **/
gchar *
g_hostname_to_unicode (const gchar *hostname)
{
  GString *out;
  gssize llen;

  out = g_string_new (NULL);

  do
    {
      llen = idna_end_of_label (hostname) - hostname;
      if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
	{
	  hostname += IDNA_ACE_PREFIX_LEN;
	  llen -= IDNA_ACE_PREFIX_LEN;
	  if (!punycode_decode (hostname, llen, out))
	    {
	      g_string_free (out, TRUE);
	      return NULL;
	    }
	}
      else
        {
          gboolean unicode;
          gchar *canonicalized = nameprep (hostname, llen, &unicode);

          if (!canonicalized)
            {
              g_string_free (out, TRUE);
              return NULL;
            }
          g_string_append (out, canonicalized);
          g_free (canonicalized);
        }

      hostname += llen;
      if (*hostname)
        hostname = g_utf8_next_char (hostname);
      if (*hostname)
        g_string_append_c (out, '.');
    }
  while (*hostname);

  return g_string_free (out, FALSE);
}
Ejemplo n.º 6
0
/**
 * g_hostname_to_unicode:
 * @hostname: a valid UTF-8 or ASCII hostname
 *
 * Converts @hostname to its canonical presentation form; a UTF-8
 * string in Unicode normalization form C, containing no uppercase
 * letters, no forbidden characters, and no ASCII-encoded segments,
 * and not ending with a trailing dot.
 *
 * Of course if @hostname is not an internationalized hostname, then
 * the canonical presentation form will be entirely ASCII.
 *
 * Return value: a UTF-8 hostname, which must be freed, or %NULL if
 * @hostname is in some way invalid.
 *
 * Since: 2.22
 **/
gchar *
g_hostname_to_unicode (const gchar *hostname)
{
  GString *out;
  gssize llen;

  out = g_string_new (NULL);

  do
    {
      llen = strcspn (hostname, ".");
      if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
	{
	  hostname += IDNA_ACE_PREFIX_LEN;
	  llen -= IDNA_ACE_PREFIX_LEN;
	  if (!punycode_decode (hostname, llen, out))
	    {
	      g_string_free (out, TRUE);
	      return NULL;
	    }
	}
      else
        {
          gchar *canonicalized = nameprep (hostname, llen);

          if (!canonicalized)
            {
              g_string_free (out, TRUE);
              return NULL;
            }
          g_string_append (out, canonicalized);
          g_free (canonicalized);
        }

      hostname += llen;
      if (*hostname && *++hostname)
        g_string_append_c (out, '.');
    }
  while (*hostname);

  return g_string_free (out, FALSE);
}
Ejemplo n.º 7
0
/*
 * Convert a single ACE encoded label to native encoding
 * u+XXXX is used to signify a lowercase character.
 * U+XXXX is used to signify a uppercase character.
 * Normally only lowercase should be expected here.
 */
static char *convert_from_ACE (const char *name)
{
  static char out_buf [MAX_HOST_LEN];
  DWORD  ucs_output [MAX_HOST_LEN];
  BYTE   ucs_case  [MAX_HOST_LEN];
  size_t ucs_len, i, j;

  memset (&ucs_case, 0, sizeof(ucs_case));
  ucs_len = sizeof(ucs_output);
  const punycode_status status = punycode_decode (strlen(name), name, &ucs_len, ucs_output, ucs_case);

  if (status != punycode_success)
  {
#ifdef IDNA_DEBUG_ENABLED
    _idna_errno = IDNAERR_PUNYCODE_BASE + status;
#endif
	dcassert(0);
	ucs_len = 0;
  }

  for (i = j = 0; i < ucs_len && j < _countof(out_buf)-4; i++)
  {
    wchar_t ucs = (wchar_t)ucs_output[i];
    int     len =0; /* [-] please see conv_to_ascii function = 0*/; //[+] idna.cpp(358): error #12144: "len" is possibly uninitialized
    if (!conv_to_ascii(ucs, out_buf+j, &len))
       break;
#ifdef IDNA_DEBUG_ENABLED
    IDNA_DEBUG ("%c+%04X -> %.*s\n",
                ucs_case[i] ? 'U' : 'u', ucs, len, out_buf+j);
#endif
    j += len;
  }
  out_buf[j] = '\0';
#ifdef IDNA_DEBUG_ENABLED
  IDNA_DEBUG ("punycode_decode: status %d, out_len %d, out_buf '%s'\n",
              int(status), int(ucs_len), out_buf);
#endif
 return (status == punycode_success ? out_buf : NULL);
}
Ejemplo n.º 8
0
Archivo: utf8.c Proyecto: Exim/exim
static uschar *
string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err)
{
size_t p_len;
punycode_uint * p;
int rc;
uschar * s, * res;

DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel);
alabel += 4;
p_len = Ustrlen(alabel);
p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));

if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
  {
  if (err) *err = US punycode_strerror(rc);
  return NULL;
  }

s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
res = string_copyn(s, p_len);
free(s);
return res;
}
Ejemplo n.º 9
0
/* ToUnicode().  May realloc() utf8in. */
static int
idna_to_unicode_internal (char *utf8in,
			  uint32_t * out, size_t * outlen, int flags)
{
  int rc;
  char tmpout[64];
  size_t utf8len = strlen (utf8in) + 1;
  size_t addlen = 0;

  /*
   * ToUnicode consists of the following steps:
   *
   * 1. If the sequence contains any code points outside the ASCII range
   * (0..7F) then proceed to step 2, otherwise skip to step 3.
   */

  {
    size_t i;
    int inasciirange;

    inasciirange = 1;
    for (i = 0; utf8in[i]; i++)
      if (utf8in[i] & ~0x7F)
	inasciirange = 0;
    if (inasciirange)
      goto step3;
  }

  /*
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
   * error. (If step 3 of ToASCII is also performed here, it will not
   * affect the overall behavior of ToUnicode, but it is not
   * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
   */
  do
    {
      char *newp = realloc (utf8in, utf8len + addlen);
      if (newp == NULL)
	{
	  free (utf8in);
	  return IDNA_MALLOC_ERROR;
	}
      utf8in = newp;
      if (flags & IDNA_ALLOW_UNASSIGNED)
	rc = stringprep_nameprep (utf8in, utf8len + addlen);
      else
	rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
      addlen += 1;
    }
  while (rc == STRINGPREP_TOO_SMALL_BUFFER);

  if (rc != STRINGPREP_OK)
    {
      free (utf8in);
      return IDNA_STRINGPREP_ERROR;
    }

  /* 3. Verify that the sequence begins with the ACE prefix, and save a
   * copy of the sequence.
   */

step3:
  if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
    {
      free (utf8in);
      return IDNA_NO_ACE_PREFIX;
    }

  /* 4. Remove the ACE prefix.
   */

  memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
	   strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);

  /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
   * and fail if there is an error. Save a copy of the result of
   * this step.
   */

  (*outlen)--;			/* reserve one for the zero */

  rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
  if (rc != PUNYCODE_SUCCESS)
    {
      free (utf8in);
      return IDNA_PUNYCODE_ERROR;
    }

  out[*outlen] = 0;		/* add zero */

  /* 6. Apply ToASCII.
   */

  rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
  if (rc != IDNA_SUCCESS)
    {
      free (utf8in);
      return rc;
    }

  /* 7. Verify that the result of step 6 matches the saved copy from
   * step 3, using a case-insensitive ASCII comparison.
   */

  if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
    {
      free (utf8in);
      return IDNA_ROUNDTRIP_VERIFY_ERROR;
    }

  /* 8. Return the saved copy from step 5.
   */

  free (utf8in);
  return IDNA_SUCCESS;
}
Ejemplo n.º 10
0
void charsetConverter_idna::convert(const string& in, string& out, status* st)
{
	if (st)
		new (st) status();

	out.clear();

	if (m_dest == "idna")
	{
		if (utility::stringUtils::is7bit(in))
		{
			if (st)
			{
				st->inputBytesRead = in.length();
				st->outputBytesWritten = in.length();
			}

			// No need to encode as Punycode
			out = in;
			return;
		}

		string inUTF8;
		charset::convert(in, inUTF8, m_source, vmime::charsets::UTF_8);

		const char* ch = inUTF8.c_str();
		const char* end = inUTF8.c_str() + inUTF8.length();

		std::vector <punycode_uint> unichars;
		unichars.reserve(inUTF8.length());

		while (ch < end)
		{
			const utf8::uint32_t uc = utf8::unchecked::next(ch);
			unichars.push_back(uc);
		}

		if (st)
			st->inputBytesRead = in.length();

		punycode_uint inputLen = static_cast <punycode_uint>(unichars.size());

		std::vector <char> output(inUTF8.length() * 2);
		punycode_uint outputLen = static_cast <punycode_uint>(output.size());

		const punycode_status status = punycode_encode
			(inputLen, &unichars[0], /* case_flags */ NULL, &outputLen, &output[0]);

		if (status == punycode_success)
		{
			out = string("xn--") + string(output.begin(), output.begin() + outputLen);

			if (st)
				st->outputBytesWritten = out.length();
		}
		else
		{
			// TODO
		}
	}
	else if (m_source == "idna")
	{
		if (in.length() < 5 || in.substr(0, 4) != "xn--")
		{
			if (st)
			{
				st->inputBytesRead = in.length();
				st->outputBytesWritten = in.length();
			}

			// Not an IDNA string
			out = in;
			return;
		}

		punycode_uint inputLen = static_cast <punycode_uint>(in.length() - 4);

		std::vector <punycode_uint> output(in.length() - 4);
		punycode_uint outputLen = static_cast <punycode_uint>(output.size());

		const punycode_status status = punycode_decode
			(inputLen, &in[4], &outputLen, &output[0], /* case_flags */ NULL);

		if (st)
			st->inputBytesRead = in.length();

		if (status == punycode_success)
		{
			std::vector <char> outUTF8Bytes(outputLen * 4);
			char* p = &outUTF8Bytes[0];

			for (std::vector <punycode_uint>::const_iterator it = output.begin() ;
			     it != output.begin() + outputLen ; ++it)
			{
				p = utf8::unchecked::append(*it, p);
			}

			string outUTF8(&outUTF8Bytes[0], p);
			charset::convert(outUTF8, out, vmime::charsets::UTF_8, m_dest);

			if (st)
				st->outputBytesWritten = out.length();
		}
		else
		{
			// TODO
		}
	}
}
Ejemplo n.º 11
0
int
main (int argc, char **argv)
{
  enum punycode_status status;
  int r;
  size_t input_length, output_length, j;
  unsigned char case_flags[unicode_max_length];

  setlocale (LC_ALL, "");

  if (argc != 2)
    usage (argv);
  if (argv[1][0] != '-')
    usage (argv);
  if (argv[1][2] != 0)
    usage (argv);

  if (argv[1][1] == 'e')
    {
      uint32_t input[unicode_max_length];
      unsigned long codept;
      char output[ace_max_length + 1], uplus[3];
      int c;

      /* Read the input code points: */

      input_length = 0;

      for (;;)
	{
	  r = scanf ("%2s%lx", uplus, &codept);
	  if (ferror (stdin))
	    fail (io_error);
	  if (r == EOF || r == 0)
	    break;

	  if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1)
	    {
	      fail (invalid_input);
	    }

	  if (input_length == unicode_max_length)
	    fail (too_big);

	  if (uplus[0] == 'u')
	    case_flags[input_length] = 0;
	  else if (uplus[0] == 'U')
	    case_flags[input_length] = 1;
	  else
	    fail (invalid_input);

	  input[input_length++] = codept;
	}

      /* Encode: */

      output_length = ace_max_length;
      status = punycode_encode (input_length, input, case_flags,
				&output_length, output);
      if (status == punycode_bad_input)
	fail (invalid_input);
      if (status == punycode_big_output)
	fail (too_big);
      if (status == punycode_overflow)
	fail (overflow);
      assert (status == punycode_success);

      /* Convert to native charset and output: */

      for (j = 0; j < output_length; ++j)
	{
	  c = output[j];
	  assert (c >= 0 && c <= 127);
	  if (print_ascii[c] == 0)
	    fail (invalid_input);
	  output[j] = print_ascii[c];
	}

      output[j] = 0;
      r = puts (output);
      if (r == EOF)
	fail (io_error);
      return EXIT_SUCCESS;
    }

  if (argv[1][1] == 'd')
    {
      char input[ace_max_length + 2], *p, *pp;
      uint32_t output[unicode_max_length];

      /* Read the Punycode input string and convert to ASCII: */

      if (!fgets (input, ace_max_length + 2, stdin))
	fail (io_error);
      if (ferror (stdin))
	fail (io_error);
      if (feof (stdin))
	fail (invalid_input);
      input_length = strlen (input) - 1;
      if (input[input_length] != '\n')
	fail (too_big);
      input[input_length] = 0;

      for (p = input; *p != 0; ++p)
	{
	  pp = strchr (print_ascii, *p);
	  if (pp == 0)
	    fail (invalid_input);
	  *p = pp - print_ascii;
	}

      /* Decode: */

      output_length = unicode_max_length;
      status = punycode_decode (input_length, input, &output_length,
				output, case_flags);
      if (status == punycode_bad_input)
	fail (invalid_input);
      if (status == punycode_big_output)
	fail (too_big);
      if (status == punycode_overflow)
	fail (overflow);
      assert (status == punycode_success);

      /* Output the result: */

      for (j = 0; j < output_length; ++j)
	{
	  r = printf ("%s+%04lX\n",
		      case_flags[j] ? "U" : "u", (unsigned long) output[j]);
	  if (r < 0)
	    fail (io_error);
	}

      return EXIT_SUCCESS;
    }

  usage (argv);
  return EXIT_SUCCESS;		/* not reached, but quiets compiler warning */
}
Ejemplo n.º 12
0
int
main (int argc, char *argv[])
{
  struct gengetopt_args_info args_info;
  char readbuf[BUFSIZ];
  char *p, *r;
  uint32_t *q;
  unsigned cmdn = 0;
  int rc;

  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);

  if (cmdline_parser (argc, argv, &args_info) != 0)
    return 1;

  if (!args_info.stringprep_given &&
      !args_info.punycode_encode_given && !args_info.punycode_decode_given &&
      !args_info.idna_to_ascii_given && !args_info.idna_to_unicode_given)
    args_info.idna_to_ascii_given = 1;

  if ((args_info.stringprep_given ? 1 : 0) +
      (args_info.punycode_encode_given ? 1 : 0) +
      (args_info.punycode_decode_given ? 1 : 0) +
      (args_info.idna_to_ascii_given ? 1 : 0) +
      (args_info.idna_to_unicode_given ? 1 : 0) != 1)
    {
      fprintf (stderr,
	       _("%s: Only one of -s, -e, -d, -a or -u can be specified.\n"),
	       argv[0]);
      cmdline_parser_print_help ();
      return 1;
    }

  if (!args_info.quiet_given)
    fprintf (stderr, "%s %s\n" GREETING, PACKAGE, VERSION);

  if (args_info.debug_given)
    fprintf (stderr, _("Charset `%s'.\n"), stringprep_locale_charset ());

  if (!args_info.quiet_given && args_info.inputs_num == 0)
    fprintf (stderr, _("Type each input string on a line by itself, "
		       "terminated by a newline character.\n"));

  do
    {
      if (cmdn < args_info.inputs_num)
	{
	  strncpy (readbuf, args_info.inputs[cmdn++], BUFSIZ - 1);
	  readbuf[BUFSIZ - 1] = '\0';
	}
      else if (fgets (readbuf, BUFSIZ, stdin) == NULL)
	{
	  sprintf (readbuf, _("%s: fgets() failed: "), argv[0]);
	  if (!feof (stdin))
	    perror (readbuf);
	  return 1;
	}

      if (readbuf[strlen (readbuf) - 1] == '\n')
	readbuf[strlen (readbuf) - 1] = '\0';

      if (args_info.stringprep_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  if (!q)
	    {
	      free (p);
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  rc = stringprep_profile (p, &r,
				   args_info.profile_given ?
				   args_info.profile_arg : "Nameprep", 0);
	  free (p);
	  if (rc != STRINGPREP_OK)
	    {
	      fprintf (stderr,
		       _("%s: stringprep_profile() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (r, -1, NULL);
	  if (!q)
	    {
	      free (r);
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.punycode_encode_given)
	{
	  size_t len, len2;

	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, &len);
	  free (p);
	  if (!q)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; i < len; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }

	  len2 = BUFSIZ;
	  rc = punycode_encode (len, q, NULL, &len2, readbuf);
	  free (q);
	  if (rc != PUNYCODE_SUCCESS)
	    {
	      fprintf (stderr,
		       _("%s: punycode_encode() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  readbuf[len2] = '\0';

	  p = stringprep_utf8_to_locale (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.punycode_decode_given)
	{
	  size_t len;

	  len = BUFSIZ;
	  q = (uint32_t *) malloc (len * sizeof (q[0]));
	  if (!q)
	    {
	      sprintf (readbuf, _("%s: malloc() failed: "), argv[0]);
	      perror (readbuf);
	      return 1;

	    }

	  rc = punycode_decode (strlen (readbuf), readbuf, &len, q, NULL);
	  if (rc != PUNYCODE_SUCCESS)
	    {
	      free (q);
	      fprintf (stderr,
		       _("%s: punycode_decode() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; i < len; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }

	  q[len] = 0;
	  r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL);
	  free (q);
	  if (!r)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!r)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.idna_to_ascii_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }
	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  free (p);
	  if (!q)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }

	  rc = idna_to_ascii_4z (q, &p,
				 (args_info.allow_unassigned_given ?
				  IDNA_ALLOW_UNASSIGNED : 0) |
				 (args_info.usestd3asciirules_given ?
				  IDNA_USE_STD3_ASCII_RULES : 0));
	  free (q);
	  if (rc != IDNA_SUCCESS)
	    {
	      fprintf (stderr, _("%s: idna_to_ascii_4z() failed "
				 "with error %d.\n"), argv[0], rc);
	      return 1;
	    }

#ifdef WITH_TLD
	  if (args_info.tld_flag)
	    {
	      size_t errpos;

	      rc = idna_to_unicode_8z4z (p, &q,
					 (args_info.allow_unassigned_given ?
					  IDNA_ALLOW_UNASSIGNED : 0) |
					 (args_info.usestd3asciirules_given ?
					  IDNA_USE_STD3_ASCII_RULES : 0));
	      if (rc != IDNA_SUCCESS)
		{
		  fprintf (stderr, _("%s: TLD idna_to_unicode_8z8z() failed "
				     "with error %d.\n"), argv[0], rc);
		  return 1;
		}

	      if (args_info.debug_given)
		{
		  size_t i;
		  for (i = 0; q[i]; i++)
		    fprintf (stderr, _("tld[%d] = U+%04x\n"), i, q[i]);
		}

	      rc = tld_check_4z (q, &errpos, NULL);
	      if (rc == TLD_INVALID)
		{
		  fprintf (stderr, _("%s: string rejected by TLD test "
				     "(Unicode position %d)\n"), argv[0],
			   errpos);
		  free (q);
		  return 1;
		}
	      if (rc != TLD_SUCCESS)
		{
		  fprintf (stderr,
			   _("%s: tld_check_4z failed with error %d.\n"),
			   argv[0], rc);
		  free (q);
		  return 1;
		}

	      free (r);
	    }
#endif

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; p[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, p[i]);
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.idna_to_unicode_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  if (!q)
	    {
	      free (p);
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  rc = idna_to_unicode_8z4z (p, &q,
				     (args_info.allow_unassigned_given ?
				      IDNA_ALLOW_UNASSIGNED : 0) |
				     (args_info.usestd3asciirules_given ?
				      IDNA_USE_STD3_ASCII_RULES : 0));
	  free (p);
	  if (rc != IDNA_SUCCESS)
	    {
	      fprintf (stderr, _("%s: idna_to_unicode_8z4z() "
				 "failed with error %d.\n"), argv[0], rc);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }

#ifdef WITH_TLD
	  if (args_info.tld_flag)
	    {
	      size_t errpos;

	      rc = tld_check_4z (q, &errpos, NULL);
	      if (rc == TLD_INVALID)
		{
		  fprintf (stderr, _("%s: string rejected by TLD test "
				     "(Unicode position %d)\n"), argv[0],
			   errpos);
		  free (q);
		  return 1;
		}
	      if (rc != TLD_SUCCESS)
		{
		  fprintf (stderr,
			   _("%s: tld_check_4z failed with error %d.\n"),
			   argv[0], rc);
		  free (q);
		  return 1;
		}
	    }
#endif

	  r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL);
	  free (q);
	  if (!r)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!r)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}
    }
  while (!feof (stdin) && !ferror (stdin) && (args_info.inputs_num == 0 ||
					      cmdn < args_info.inputs_num));

  return 0;
}
Ejemplo n.º 13
0
int Xcode_puny_decodeString( const UCHAR8 *     pzInputString,
                             const int          iInputSize,
                             UTF16CHAR *        puzOutputString,
                             int *              piOutputSize )
{
  int status;
  int offset          = 0;
  int input_offset    = 0;
  int output_offset   = 0;

  unsigned int punycode_input_length, punycode_output_length;
  char punycode_input[MAX_LABEL_SIZE_8];
  DWORD punycode_output[MAX_LABEL_SIZE_32];

  if ( iInputSize < 1 ) {return XCODE_BAD_ARGUMENT_ERROR;}

  /* Make sure we have a punycode encoded label here, otherwise, just
  return the string untouched. */

  if ( !starts_with_ignore_case( pzInputString, iInputSize, (const unsigned char *)ACE_PREFIX, strlen(ACE_PREFIX) ) )
  {
    //punycode_input_length = 0;

    for( offset = 0; offset < iInputSize; offset++ ) 
    {
      if ( offset >= *piOutputSize ) return XCODE_BUFFER_OVERFLOW_ERROR;
      *(puzOutputString + offset) = (UTF16CHAR)pzInputString[offset];
    }
    *piOutputSize = iInputSize;
    return XCODE_SUCCESS;
  }

  /* copy the input to punycode input ignoring the prefix */

  input_offset = strlen(ACE_PREFIX);
  punycode_input_length = 0;

  for(offset = 0; input_offset < iInputSize; offset++, input_offset++) 
  {
    punycode_input[offset] = (char)pzInputString[input_offset];
    punycode_input_length++;
  }

  /* lowercase it */

  lower_case( (unsigned char *)punycode_input, punycode_input_length );

  punycode_output_length = MAX_LABEL_SIZE_32;

  /* decode the input */

  status = punycode_decode(punycode_input_length, punycode_input,
                            &punycode_output_length, punycode_output);

  /* check the status */

  if (status != XCODE_SUCCESS) { return status; }

  /* copy the punycode output to the output if there is room */

  output_offset = 0;

  if ((int)output_offset > *piOutputSize - (int)punycode_output_length)
  {
    return XCODE_BUFFER_OVERFLOW_ERROR;
  }

  /* Convert result to UTF16 */

  status = Xcode_convert32BitToUTF16( punycode_output, punycode_output_length, 
                                      puzOutputString, piOutputSize );

  if ( status != XCODE_SUCCESS ) return status;

  /* terminate the string */

  *(puzOutputString + *piOutputSize) = 0;

  return XCODE_SUCCESS;
}
Ejemplo n.º 14
0
xn_result
xn_parse_label(
  xn_parser processing,
  const ucd_record input[],
  size_t input_l,
  ucd_record u_label[],
  size_t *u_label_l,
  char a_label[],
  size_t *a_label_l,
  result_list *results)
{
  xn_result res = xn_result_OK;
  bool_t is_hyphen34, is_ascii, is_xn, nfc_applied = 0;
  const ucd_record *input_p = input, *up;
  size_t i;

prepare_input:

  /* check for empty label */
  if (input_l == 0)
    return  __report_string_result(
			xn_error__Input_Empty,results);

  /* hyphen in 3rd and 4th position */
  is_hyphen34 = has_hyphen34(input_p,input_l);

  /* check for ACE prefix, case insensitive */
  is_xn = is_hyphen34 && starts_with_xn(input_p);

  /* check whether it's all ASCII and report non-LDH */
  for (is_ascii = 1, i = 0, up = input_p; i < input_l; i++, up++)
  {
    if (is_ASCII(up->cp))
    {
      if ((processing & xn_validate_ASCII_LDH) && is_non_LDH(up->cp))
      {     /* non-LDH ASCII */
        res = __report_position_result(
            xn_invalid__ASCII_Non_LDH,up->cp,i,results);
      }

      /* copy to alabel */
      if (is_xn || (processing & xn_parser_ASCII_To_Lower))
        a_label[i] = (char)ascii_to_lower(up->cp);
      else
        a_label[i] = (char)up->cp;
    }
    else
    {
      if (is_xn)  /* fatal: non-ASCII ACE string */
        return  __report_position_result(
          xn_error__ACE_Non_ASCII,up->cp,i,results);
      is_ascii = 0;
    }
  }
  
  if (is_ascii)
  {
    *a_label_l = input_l;
    goto process_ascii; /* go on with A-label */
  }
    
  /* U-label pre-processing */
  memcpy(u_label,input_p,input_l*sizeof(ucd_record)),
    *u_label_l = input_l;

  /* UTS46 mapping */
  if (processing & xn_parser_UTS46_Map)
  {
    bool_t uts46_changed;

    if (xn_result_ABORT & (res = uts46_map_ucd(
      processing,
      u_label,
      u_label_l,
      XN_BUFSZ,
      &uts46_changed,
      &is_ascii,
      results)))
    {  /* stop processing */
      return res;
    }

    if (is_ascii)
    { /* string has been changed to ASCII during UTS46 mapping. */
      input_p = u_label, input_l = *u_label_l;
      goto prepare_input; /* loop-save: ASCII input never appears here. */
    }
  }

  /* normalization NFC */
  if (processing & xn_process_NFC)
  {
    ucnf_normalization_result nfc_res;

    if (ucnf_OK != (nfc_res = ucnf_normalize(
        u_label,u_label_l,XN_BUFSZ,xn_ucnf_form_C)))
    {  /* normalization failed */
      if (nfc_res == ucnf_Buffer_Exceeded)
        return  __report_string_result(
          xn_fatal__Buffer_Exceeded,results);
      else
        return  __report_string_result(
          xn_fatal__Unknown_Error,results);
    }
    
    nfc_applied = 1; /* remember input has been normalized */
  }

  /* U-label validation */
  goto validate_ulabel;
  
process_ascii:
	/* A-label */

  if (is_xn)
  {
    punycode_status puny_result;
    codepoint_t puny[XN_BUFSZ];
    size_t puny_l;

    /* punycode decoding */
    if (*a_label_l > 4)
    {
      if (punycode_success != (puny_result = punycode_decode(
          a_label+4,*a_label_l-4,puny,&puny_l)))
      {   /* fatal: punycode decoding failed */
        return  __report_punycode_result(
          xn_error__Puncode_Decoding_Failure,puny_result,results);
      }
    }
    else
    {   /* invalid ACE */
      return __report_string_result(
        xn_error__ACE_Invalid,results);
    }

    /* puncode decoding was successful */
    if (!ucd_get_record_string(puny_l,puny,u_label))
      return  __report_string_result( /* out of range */
        xn_error__Input_Out_Of_Range,results);
    *u_label_l = puny_l;

		goto validate_ulabel;
  }
  else
  {
    /* copy to u-label */
    for (i = 0; i < *a_label_l; ++i) {
      if (!ucd_get_record((codepoint_t)a_label[i], &u_label[i]))
        return  __report_string_result( /* cannot actually happen */
          xn_fatal__Unknown_Error,results);
    }
    *u_label_l = *a_label_l;
  }
  
  /* non-XN A-label validation */

	/* The label must not contain a U+002D HYPHEN-MINUS character
	 in both the third position and fourth positions */
  if ((processing & xn_validate__Hyphen34) && is_hyphen34)
    res |=  __report_string_result(
      xn_invalid__Hyphen34,results);

  /* The label must not begin with a U+002D HYPHEN-MINUS character. */
  if ((processing & xn_validate__Leading_Hyphen) &&
      leading_hyphen(a_label))
    res |=  __report_string_result(
      xn_invalid__Leading_Hyphen,results);

  /* The label must not end with a U+002D HYPHEN-MINUS character. */
  if ((processing & xn_invalid__Trailing_Hyphen) &&
      trailing_hyphen(a_label,*a_label_l))
    res |=  __report_string_result(
      xn_invalid__Leading_Hyphen,results);

  goto finish;

validate_ulabel:
  /* U-label validation */

  /* TR#46
  Unicode IDNA Compatibility Processing
  http://unicode.org/reports/tr46/#Validity_Criteria
  4.1 Validity Criteria
    1. The label must be in Unicode Normalization Form NFC.
    2. The label must not contain a U+002D HYPHEN-MINUS character in both the third position and fourth positions.
    3. The label must neither begin nor end with a U+002D HYPHEN-MINUS character.
    4. The label must not contain a U+002E ( . ) FULL STOP.
    5. The label must not begin with a combining mark, that is: General_Category=Mark.
    6. Each code point in the label must only have certain status values according to Section 5, IDNA Mapping Table:
        6.1 For Transitional Processing, each value must be valid.
        6.2 For Nontransitional Processing, each value must be either valid or deviation.
  */
  
  /* The label must be in Unicode Normalization Form NFC. */
  if (!nfc_applied && (processing & xn_validate__NFC))
  {
    if (!ucnf_is_normalized
        (u_label,*u_label_l,xn_ucnf_form_C))
      res |= __report_string_result(
        xn_invalid__non_NFC,results);
  }

	/* The label must not contain a U+002D HYPHEN-MINUS character
	 in both the third position and fourth positions */
	if ((processing & xn_validate__Hyphen34) &&
			has_hyphen34(u_label,*u_label_l))
    res |=  __report_string_result(
      xn_invalid__Hyphen34,results);

  /* The label must not begin with a U+002D HYPHEN-MINUS character. */
  if ((processing & xn_validate__Leading_Hyphen) &&
      leading_hyphen__rec(u_label))
    res |=  __report_string_result(
      xn_invalid__Leading_Hyphen,results);

  /* The label must not end with a U+002D HYPHEN-MINUS character. */
  if ((processing & xn_validate__Trailing_Hyphen) &&
      trailing_hyphen__rec(u_label,*u_label_l))
    res |=  __report_string_result(
      xn_invalid__Leading_Hyphen,results);

  /* The label must not begin with a combining mark, that is:
  General_Category=Mark.*/
  if ((processing & xn_validate__Leading_Combining_Marks) &&
      is_gc_mark(u_label->general_category))
    res |=  __report_codepoint_result(
      xn_invalid__Leading_Combining_Mark,u_label->cp,results);




finish:


  return res;
}