示例#1
0
文件: utf8.c 项目: Exim/exim
uschar *
string_localpart_utf8_to_alabel(const uschar * utf8, uschar ** err)
{
size_t ucs4_len;
punycode_uint * p;
size_t p_len;
uschar * res;
int rc;

if (!string_is_utf8(utf8)) return string_copy(utf8);

p = (punycode_uint *) stringprep_utf8_to_ucs4(CCS utf8, -1, &ucs4_len);
p_len = ucs4_len*4;	/* this multiplier is pure guesswork */
res = store_get(p_len+5);

res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-';

if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, CS res+4)) != PUNYCODE_SUCCESS)
  {
  DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc));
  free(p);
  if (err) *err = US punycode_strerror(rc);
  return NULL;
  }
p_len += 4;
free(p);
res[p_len] = '\0';
return res;
}
static nsresult punycode(const char* prefix, const nsAString& in, nsACString& out)
{
  PRUint32 ucs4Buf[kMaxDNSNodeLen + 1];
  PRUint32 ucs4Len;
  utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);

  // need maximum 20 bits to encode 16 bit Unicode character
  // (include null terminator)
  const PRUint32 kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;  
  char encodedBuf[kEncodedBufSize];
  punycode_uint encodedLength = kEncodedBufSize;

  enum punycode_status status = punycode_encode(ucs4Len,
                                                ucs4Buf,
                                                nsnull,
                                                &encodedLength,
                                                encodedBuf);

  if (punycode_success != status ||
      encodedLength >= kEncodedBufSize)
    return NS_ERROR_FAILURE;

  encodedBuf[encodedLength] = '\0';
  out.Assign(nsDependentCString(prefix) + nsDependentCString(encodedBuf));

  return NS_OK;
}
示例#3
0
/**
 * g_hostname_to_ascii:
 * @hostname: a valid UTF-8 or ASCII hostname
 *
 * Converts @hostname to its canonical ASCII form; an ASCII-only
 * string containing no uppercase letters and not ending with a
 * trailing dot.
 *
 * Return value: an ASCII hostname, which must be freed, or %NULL if
 * @hostname is in some way invalid.
 *
 * Since: 2.22
 **/
gchar *
g_hostname_to_ascii (const gchar *hostname)
{
  gchar *name, *label, *p;
  GString *out;
  gssize llen, oldlen;
  gboolean unicode;

  label = name = nameprep (hostname, -1, &unicode);
  if (!name || !unicode)
    return name;

  out = g_string_new (NULL);

  do
    {
      unicode = FALSE;
      for (p = label; *p && !idna_is_dot (p); p++)
	{
	  if ((guchar)*p > 0x80)
	    unicode = TRUE;
	}

      oldlen = out->len;
      llen = p - label;
      if (unicode)
	{
          if (!strncmp (label, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
            goto fail;

	  g_string_append (out, IDNA_ACE_PREFIX);
	  if (!punycode_encode (label, llen, out))
	    goto fail;
	}
      else
        g_string_append_len (out, label, llen);

      if (out->len - oldlen > 63)
	goto fail;

      label += llen;
      if (*label)
        label = g_utf8_next_char (label);
      if (*label)
        g_string_append_c (out, '.');
    }
  while (*label);

  g_free (name);
  return g_string_free (out, FALSE);

 fail:
  g_free (name);
  g_string_free (out, TRUE);
  return NULL;
}
示例#4
0
static VALUE encode(VALUE self, VALUE str)
{
  int rc;
  punycode_uint *ustr;
  size_t len;
  size_t buflen = 0x100;
  char *buf = NULL;
  VALUE retv;

  str = rb_check_convert_type(str, T_STRING, "String", "to_s");
  ustr = stringprep_utf8_to_ucs4(RSTRING_PTR(str), RSTRING_LEN(str), &len);

  while (1) {
    buf = realloc(buf, buflen);

    if (buf == NULL) {
      xfree(ustr);
      rb_raise(rb_eNoMemError, "cannot allocate memory (%d bytes)", (uint32_t)buflen);
      return Qnil;
    }

    rc = punycode_encode(len, ustr, NULL, &buflen, buf);

    if (rc == PUNYCODE_SUCCESS) {
      break;
    } else if (rc == PUNYCODE_BIG_OUTPUT) {
      buflen += 0x100;
    } else {
      xfree(ustr);
      xfree(buf);
      rb_raise(ePunycodeError, "%s (%d)", punycode_strerror(rc), rc);
      return Qnil;
    }
  }

  retv = rb_str_new(buf, buflen);
  xfree(ustr);
  xfree(buf);
  return retv;
}
示例#5
0
// wrapper around the reference Punycode implementation 
static int32_t convertToPuny(const UChar* src, int32_t srcLength, 
							 UChar* dest, int32_t destCapacity,
                             UErrorCode& status){
    uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE];
    int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE;
    uint32_t* b1 = b1Stack;
    char b2Stack[MAX_LABEL_BUFFER_SIZE];
    char* b2 = b2Stack;
    int32_t b2Len =MAX_LABEL_BUFFER_SIZE ;
    punycode_status error;
    unsigned char* caseFlags = NULL;

    u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status);
    if(status == U_BUFFER_OVERFLOW_ERROR){
        // redo processing of string
        /* we do not have enough room so grow the buffer*/
        b1 =  (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t));
        if(b1==NULL){
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CLEANUP;
        }

        status = U_ZERO_ERROR; // reset error
        
        u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status);
    }
    if(U_FAILURE(status)){
        goto CLEANUP;
    }

    //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char));

    error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
    status = getError(error);
    
    if(status == U_BUFFER_OVERFLOW_ERROR){
        /* we do not have enough room so grow the buffer*/
        b2 = (char*) uprv_malloc( b2Len * sizeof(char));
        if(b2==NULL){
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CLEANUP;
        }

        status = U_ZERO_ERROR; // reset error

        punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
        status = getError(error);
    }
    if(U_FAILURE(status)){
        goto CLEANUP;
    }
    
    if(b2Len < destCapacity){
          convertASCIIToUChars(b2,dest,b2Len);
    }else{
        status =U_BUFFER_OVERFLOW_ERROR;
    }

CLEANUP:
    if(b1Stack != b1){
        uprv_free(b1);
    }
    if(b2Stack != b2){
        uprv_free(b2);
    }
    uprv_free(caseFlags);

    return b2Len;
}
示例#6
0
文件: idna.c 项目: AubrCool/glibc
/**
 * idna_to_ascii_4i
 * @in: input array with unicode code points.
 * @inlen: length of input array with unicode code points.
 * @out: output zero terminated string that must have room for at
 *       least 63 characters plus the terminating zero.
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 *
 * The ToASCII operation takes a sequence of Unicode code points that make
 * up one label and transforms it into a sequence of code points in the
 * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
 * resulting sequence are equivalent labels.
 *
 * It is important to note that the ToASCII operation can fail. ToASCII
 * fails if any step of it fails. If any step of the ToASCII operation
 * fails on any label in a domain name, that domain name MUST NOT be used
 * as an internationalized domain name. The method for deadling with this
 * failure is application-specific.
 *
 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
 * sequence of ASCII code points or a failure condition.
 *
 * ToASCII never alters a sequence of code points that are all in the ASCII
 * range to begin with (although it could fail). Applying the ToASCII
 * operation multiple times has exactly the same effect as applying it just
 * once.
 *
 * Return value: Returns 0 on success, or an error code.
 */
int
idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
{
  size_t len, outlen;
  uint32_t *src;		/* XXX don't need to copy data? */
  int rc;

  /*
   * ToASCII consists of the following steps:
   *
   * 1. If all code points in the sequence are in the ASCII range (0..7F)
   * then skip to step 3.
   */

  {
    size_t i;
    int inasciirange;

    inasciirange = 1;
    for (i = 0; i < inlen; i++)
      if (in[i] > 0x7F)
	inasciirange = 0;
    if (inasciirange)
      {
	src = malloc (sizeof (in[0]) * (inlen + 1));
	if (src == NULL)
	  return IDNA_MALLOC_ERROR;

	memcpy (src, in, sizeof (in[0]) * inlen);
	src[inlen] = 0;

	goto step3;
      }
  }

  /*
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
   * an error. The AllowUnassigned flag is used in [NAMEPREP].
   */

  {
    char *p;

    p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
    if (p == NULL)
      return IDNA_MALLOC_ERROR;

    len = strlen (p);
    do
      {
	char *newp;

	len = 2 * len + 10;	/* XXX better guess? */
	newp = realloc (p, len);
	if (newp == NULL)
	  {
	    free (p);
	    return IDNA_MALLOC_ERROR;
	  }
	p = newp;

	if (flags & IDNA_ALLOW_UNASSIGNED)
	  rc = stringprep_nameprep (p, len);
	else
	  rc = stringprep_nameprep_no_unassigned (p, len);
      }
    while (rc == STRINGPREP_TOO_SMALL_BUFFER);

    if (rc != STRINGPREP_OK)
      {
	free (p);
	return IDNA_STRINGPREP_ERROR;
      }

    src = stringprep_utf8_to_ucs4 (p, -1, NULL);

    free (p);
  }

step3:
  /*
   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
   *
   * (a) Verify the absence of non-LDH ASCII code points; that is,
   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
   *
   * (b) Verify the absence of leading and trailing hyphen-minus;
   * that is, the absence of U+002D at the beginning and end of
   * the sequence.
   */

  if (flags & IDNA_USE_STD3_ASCII_RULES)
    {
      size_t i;

      for (i = 0; src[i]; i++)
	if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
	    (src[i] >= 0x3A && src[i] <= 0x40) ||
	    (src[i] >= 0x5B && src[i] <= 0x60) ||
	    (src[i] >= 0x7B && src[i] <= 0x7F))
	  {
	    free (src);
	    return IDNA_CONTAINS_NON_LDH;
	  }

      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
	{
	  free (src);
	  return IDNA_CONTAINS_MINUS;
	}
    }

  /*
   * 4. If all code points in the sequence are in the ASCII range
   * (0..7F), then skip to step 8.
   */

  {
    size_t i;
    int inasciirange;

    inasciirange = 1;
    for (i = 0; src[i]; i++)
      {
	if (src[i] > 0x7F)
	  inasciirange = 0;
	/* copy string to output buffer if we are about to skip to step8 */
	if (i < 64)
	  out[i] = src[i];
      }
    if (i < 64)
      out[i] = '\0';
    if (inasciirange)
      goto step8;
  }

  /*
   * 5. Verify that the sequence does NOT begin with the ACE prefix.
   *
   */

  {
    size_t i;
    int match;

    match = 1;
    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
	match = 0;
    if (match)
      {
	free (src);
	return IDNA_CONTAINS_ACE_PREFIX;
      }
  }

  /*
   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
   * and fail if there is an error.
   */
  for (len = 0; src[len]; len++)
    ;
  src[len] = '\0';
  outlen = 63 - strlen (IDNA_ACE_PREFIX);
  rc = punycode_encode (len, src, NULL,
			&outlen, &out[strlen (IDNA_ACE_PREFIX)]);
  if (rc != PUNYCODE_SUCCESS)
    {
      free (src);
      return IDNA_PUNYCODE_ERROR;
    }
  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';

  /*
   * 7. Prepend the ACE prefix.
   */

  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));

  /*
   * 8. Verify that the number of code points is in the range 1 to 63
   * inclusive (0 is excluded).
   */

step8:
  free (src);
  if (strlen (out) < 1 || strlen (out) > 63)
    return IDNA_INVALID_LENGTH;

  return IDNA_SUCCESS;
}
void charsetConverter_idna::convert(const string& in, string& out, status* st)
{
	if (st)
		new (st) status();

	out.clear();

	if (m_dest == "idna")
	{
		if (utility::stringUtils::is7bit(in))
		{
			if (st)
			{
				st->inputBytesRead = in.length();
				st->outputBytesWritten = in.length();
			}

			// No need to encode as Punycode
			out = in;
			return;
		}

		string inUTF8;
		charset::convert(in, inUTF8, m_source, vmime::charsets::UTF_8);

		const char* ch = inUTF8.c_str();
		const char* end = inUTF8.c_str() + inUTF8.length();

		std::vector <punycode_uint> unichars;
		unichars.reserve(inUTF8.length());

		while (ch < end)
		{
			const utf8::uint32_t uc = utf8::unchecked::next(ch);
			unichars.push_back(uc);
		}

		if (st)
			st->inputBytesRead = in.length();

		punycode_uint inputLen = static_cast <punycode_uint>(unichars.size());

		std::vector <char> output(inUTF8.length() * 2);
		punycode_uint outputLen = static_cast <punycode_uint>(output.size());

		const punycode_status status = punycode_encode
			(inputLen, &unichars[0], /* case_flags */ NULL, &outputLen, &output[0]);

		if (status == punycode_success)
		{
			out = string("xn--") + string(output.begin(), output.begin() + outputLen);

			if (st)
				st->outputBytesWritten = out.length();
		}
		else
		{
			// TODO
		}
	}
	else if (m_source == "idna")
	{
		if (in.length() < 5 || in.substr(0, 4) != "xn--")
		{
			if (st)
			{
				st->inputBytesRead = in.length();
				st->outputBytesWritten = in.length();
			}

			// Not an IDNA string
			out = in;
			return;
		}

		punycode_uint inputLen = static_cast <punycode_uint>(in.length() - 4);

		std::vector <punycode_uint> output(in.length() - 4);
		punycode_uint outputLen = static_cast <punycode_uint>(output.size());

		const punycode_status status = punycode_decode
			(inputLen, &in[4], &outputLen, &output[0], /* case_flags */ NULL);

		if (st)
			st->inputBytesRead = in.length();

		if (status == punycode_success)
		{
			std::vector <char> outUTF8Bytes(outputLen * 4);
			char* p = &outUTF8Bytes[0];

			for (std::vector <punycode_uint>::const_iterator it = output.begin() ;
			     it != output.begin() + outputLen ; ++it)
			{
				p = utf8::unchecked::append(*it, p);
			}

			string outUTF8(&outUTF8Bytes[0], p);
			charset::convert(outUTF8, out, vmime::charsets::UTF_8, m_dest);

			if (st)
				st->outputBytesWritten = out.length();
		}
		else
		{
			// TODO
		}
	}
}
示例#8
0
int
main (int argc, char **argv)
{
  enum punycode_status status;
  int r;
  size_t input_length, output_length, j;
  unsigned char case_flags[unicode_max_length];

  setlocale (LC_ALL, "");

  if (argc != 2)
    usage (argv);
  if (argv[1][0] != '-')
    usage (argv);
  if (argv[1][2] != 0)
    usage (argv);

  if (argv[1][1] == 'e')
    {
      uint32_t input[unicode_max_length];
      unsigned long codept;
      char output[ace_max_length + 1], uplus[3];
      int c;

      /* Read the input code points: */

      input_length = 0;

      for (;;)
	{
	  r = scanf ("%2s%lx", uplus, &codept);
	  if (ferror (stdin))
	    fail (io_error);
	  if (r == EOF || r == 0)
	    break;

	  if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1)
	    {
	      fail (invalid_input);
	    }

	  if (input_length == unicode_max_length)
	    fail (too_big);

	  if (uplus[0] == 'u')
	    case_flags[input_length] = 0;
	  else if (uplus[0] == 'U')
	    case_flags[input_length] = 1;
	  else
	    fail (invalid_input);

	  input[input_length++] = codept;
	}

      /* Encode: */

      output_length = ace_max_length;
      status = punycode_encode (input_length, input, case_flags,
				&output_length, output);
      if (status == punycode_bad_input)
	fail (invalid_input);
      if (status == punycode_big_output)
	fail (too_big);
      if (status == punycode_overflow)
	fail (overflow);
      assert (status == punycode_success);

      /* Convert to native charset and output: */

      for (j = 0; j < output_length; ++j)
	{
	  c = output[j];
	  assert (c >= 0 && c <= 127);
	  if (print_ascii[c] == 0)
	    fail (invalid_input);
	  output[j] = print_ascii[c];
	}

      output[j] = 0;
      r = puts (output);
      if (r == EOF)
	fail (io_error);
      return EXIT_SUCCESS;
    }

  if (argv[1][1] == 'd')
    {
      char input[ace_max_length + 2], *p, *pp;
      uint32_t output[unicode_max_length];

      /* Read the Punycode input string and convert to ASCII: */

      if (!fgets (input, ace_max_length + 2, stdin))
	fail (io_error);
      if (ferror (stdin))
	fail (io_error);
      if (feof (stdin))
	fail (invalid_input);
      input_length = strlen (input) - 1;
      if (input[input_length] != '\n')
	fail (too_big);
      input[input_length] = 0;

      for (p = input; *p != 0; ++p)
	{
	  pp = strchr (print_ascii, *p);
	  if (pp == 0)
	    fail (invalid_input);
	  *p = pp - print_ascii;
	}

      /* Decode: */

      output_length = unicode_max_length;
      status = punycode_decode (input_length, input, &output_length,
				output, case_flags);
      if (status == punycode_bad_input)
	fail (invalid_input);
      if (status == punycode_big_output)
	fail (too_big);
      if (status == punycode_overflow)
	fail (overflow);
      assert (status == punycode_success);

      /* Output the result: */

      for (j = 0; j < output_length; ++j)
	{
	  r = printf ("%s+%04lX\n",
		      case_flags[j] ? "U" : "u", (unsigned long) output[j]);
	  if (r < 0)
	    fail (io_error);
	}

      return EXIT_SUCCESS;
    }

  usage (argv);
  return EXIT_SUCCESS;		/* not reached, but quiets compiler warning */
}
示例#9
0
文件: idn.c 项目: bdw0920/uClinux
int
main (int argc, char *argv[])
{
  struct gengetopt_args_info args_info;
  char readbuf[BUFSIZ];
  char *p, *r;
  uint32_t *q;
  unsigned cmdn = 0;
  int rc;

  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);

  if (cmdline_parser (argc, argv, &args_info) != 0)
    return 1;

  if (!args_info.stringprep_given &&
      !args_info.punycode_encode_given && !args_info.punycode_decode_given &&
      !args_info.idna_to_ascii_given && !args_info.idna_to_unicode_given)
    args_info.idna_to_ascii_given = 1;

  if ((args_info.stringprep_given ? 1 : 0) +
      (args_info.punycode_encode_given ? 1 : 0) +
      (args_info.punycode_decode_given ? 1 : 0) +
      (args_info.idna_to_ascii_given ? 1 : 0) +
      (args_info.idna_to_unicode_given ? 1 : 0) != 1)
    {
      fprintf (stderr,
	       _("%s: Only one of -s, -e, -d, -a or -u can be specified.\n"),
	       argv[0]);
      cmdline_parser_print_help ();
      return 1;
    }

  if (!args_info.quiet_given)
    fprintf (stderr, "%s %s\n" GREETING, PACKAGE, VERSION);

  if (args_info.debug_given)
    fprintf (stderr, _("Charset `%s'.\n"), stringprep_locale_charset ());

  if (!args_info.quiet_given && args_info.inputs_num == 0)
    fprintf (stderr, _("Type each input string on a line by itself, "
		       "terminated by a newline character.\n"));

  do
    {
      if (cmdn < args_info.inputs_num)
	{
	  strncpy (readbuf, args_info.inputs[cmdn++], BUFSIZ - 1);
	  readbuf[BUFSIZ - 1] = '\0';
	}
      else if (fgets (readbuf, BUFSIZ, stdin) == NULL)
	{
	  sprintf (readbuf, _("%s: fgets() failed: "), argv[0]);
	  if (!feof (stdin))
	    perror (readbuf);
	  return 1;
	}

      if (readbuf[strlen (readbuf) - 1] == '\n')
	readbuf[strlen (readbuf) - 1] = '\0';

      if (args_info.stringprep_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  if (!q)
	    {
	      free (p);
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  rc = stringprep_profile (p, &r,
				   args_info.profile_given ?
				   args_info.profile_arg : "Nameprep", 0);
	  free (p);
	  if (rc != STRINGPREP_OK)
	    {
	      fprintf (stderr,
		       _("%s: stringprep_profile() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (r, -1, NULL);
	  if (!q)
	    {
	      free (r);
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.punycode_encode_given)
	{
	  size_t len, len2;

	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, &len);
	  free (p);
	  if (!q)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; i < len; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }

	  len2 = BUFSIZ;
	  rc = punycode_encode (len, q, NULL, &len2, readbuf);
	  free (q);
	  if (rc != PUNYCODE_SUCCESS)
	    {
	      fprintf (stderr,
		       _("%s: punycode_encode() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  readbuf[len2] = '\0';

	  p = stringprep_utf8_to_locale (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.punycode_decode_given)
	{
	  size_t len;

	  len = BUFSIZ;
	  q = (uint32_t *) malloc (len * sizeof (q[0]));
	  if (!q)
	    {
	      sprintf (readbuf, _("%s: malloc() failed: "), argv[0]);
	      perror (readbuf);
	      return 1;

	    }

	  rc = punycode_decode (strlen (readbuf), readbuf, &len, q, NULL);
	  if (rc != PUNYCODE_SUCCESS)
	    {
	      free (q);
	      fprintf (stderr,
		       _("%s: punycode_decode() failed with error %d.\n"),
		       argv[0], rc);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; i < len; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }

	  q[len] = 0;
	  r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL);
	  free (q);
	  if (!r)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!r)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.idna_to_ascii_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }
	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  free (p);
	  if (!q)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }

	  rc = idna_to_ascii_4z (q, &p,
				 (args_info.allow_unassigned_given ?
				  IDNA_ALLOW_UNASSIGNED : 0) |
				 (args_info.usestd3asciirules_given ?
				  IDNA_USE_STD3_ASCII_RULES : 0));
	  free (q);
	  if (rc != IDNA_SUCCESS)
	    {
	      fprintf (stderr, _("%s: idna_to_ascii_4z() failed "
				 "with error %d.\n"), argv[0], rc);
	      return 1;
	    }

#ifdef WITH_TLD
	  if (args_info.tld_flag)
	    {
	      size_t errpos;

	      rc = idna_to_unicode_8z4z (p, &q,
					 (args_info.allow_unassigned_given ?
					  IDNA_ALLOW_UNASSIGNED : 0) |
					 (args_info.usestd3asciirules_given ?
					  IDNA_USE_STD3_ASCII_RULES : 0));
	      if (rc != IDNA_SUCCESS)
		{
		  fprintf (stderr, _("%s: TLD idna_to_unicode_8z8z() failed "
				     "with error %d.\n"), argv[0], rc);
		  return 1;
		}

	      if (args_info.debug_given)
		{
		  size_t i;
		  for (i = 0; q[i]; i++)
		    fprintf (stderr, _("tld[%d] = U+%04x\n"), i, q[i]);
		}

	      rc = tld_check_4z (q, &errpos, NULL);
	      if (rc == TLD_INVALID)
		{
		  fprintf (stderr, _("%s: string rejected by TLD test "
				     "(Unicode position %d)\n"), argv[0],
			   errpos);
		  free (q);
		  return 1;
		}
	      if (rc != TLD_SUCCESS)
		{
		  fprintf (stderr,
			   _("%s: tld_check_4z failed with error %d.\n"),
			   argv[0], rc);
		  free (q);
		  return 1;
		}

	      free (r);
	    }
#endif

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; p[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, p[i]);
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}

      if (args_info.idna_to_unicode_given)
	{
	  p = stringprep_locale_to_utf8 (readbuf);
	  if (!p)
	    {
	      fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  q = stringprep_utf8_to_ucs4 (p, -1, NULL);
	  if (!q)
	    {
	      free (p);
	      fprintf (stderr,
		       _("%s: could not convert from UCS-4 to UTF-8.\n"),
		       argv[0]);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]);
	    }
	  free (q);

	  rc = idna_to_unicode_8z4z (p, &q,
				     (args_info.allow_unassigned_given ?
				      IDNA_ALLOW_UNASSIGNED : 0) |
				     (args_info.usestd3asciirules_given ?
				      IDNA_USE_STD3_ASCII_RULES : 0));
	  free (p);
	  if (rc != IDNA_SUCCESS)
	    {
	      fprintf (stderr, _("%s: idna_to_unicode_8z4z() "
				 "failed with error %d.\n"), argv[0], rc);
	      return 1;
	    }

	  if (args_info.debug_given)
	    {
	      size_t i;
	      for (i = 0; q[i]; i++)
		fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]);
	    }

#ifdef WITH_TLD
	  if (args_info.tld_flag)
	    {
	      size_t errpos;

	      rc = tld_check_4z (q, &errpos, NULL);
	      if (rc == TLD_INVALID)
		{
		  fprintf (stderr, _("%s: string rejected by TLD test "
				     "(Unicode position %d)\n"), argv[0],
			   errpos);
		  free (q);
		  return 1;
		}
	      if (rc != TLD_SUCCESS)
		{
		  fprintf (stderr,
			   _("%s: tld_check_4z failed with error %d.\n"),
			   argv[0], rc);
		  free (q);
		  return 1;
		}
	    }
#endif

	  r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL);
	  free (q);
	  if (!r)
	    {
	      fprintf (stderr,
		       _("%s: could not convert from UTF-8 to UCS-4.\n"),
		       argv[0]);
	      return 1;
	    }

	  p = stringprep_utf8_to_locale (r);
	  free (r);
	  if (!r)
	    {
	      fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"),
		       argv[0], stringprep_locale_charset ());
	      return 1;
	    }

	  fprintf (stdout, "%s\n", p);

	  free (p);
	}
    }
  while (!feof (stdin) && !ferror (stdin) && (args_info.inputs_num == 0 ||
					      cmdn < args_info.inputs_num));

  return 0;
}
示例#10
0
int Xcode_puny_encodeString( const DWORD *  pdwzInputString,
                             const int      iInputSize,
                             UCHAR8 *       pzOutputString,
                             int *          piOutputSize )
{
  int status;
  int offset        = 0;
  int output_offset = 0;

  unsigned int punycode_input_length;
  DWORD punycode_input[MAX_LABEL_SIZE_32];

  unsigned int encoded_string_length;
  char encoded_string[MAX_LABEL_SIZE_8];

  if ( iInputSize < 1 || pzOutputString == 0 ) 
  {
    return XCODE_BAD_ARGUMENT_ERROR;
  }

  memset( pzOutputString, 0, *piOutputSize );

  if (iInputSize > MAX_LABEL_SIZE_32) 
  {
    return XCODE_BUFFER_OVERFLOW_ERROR;
  }

  /* copy the input to punycode input */

  punycode_input_length = 0;

  for( offset = 0; offset < iInputSize; offset++ ) 
  {
    punycode_input[offset] = pdwzInputString[offset];
    punycode_input_length++;
  }

  /* check if the input contains all basic code points
  if so just copy the input to output. no need to encode
  otherwise try to encode it */

  if( is_all_basic(punycode_input_length, punycode_input) == 1 ) 
  {
    /* copy the input to output */
    for (offset = 0; offset < (int)punycode_input_length; offset++)
    {
	    *(pzOutputString + offset) = (char)*(punycode_input + offset);
    }
    *piOutputSize = punycode_input_length;
    return XCODE_SUCCESS;
  }

  /* encode the input */

  encoded_string_length = MAX_LABEL_SIZE_8;

  status = punycode_encode(
    punycode_input_length,
    punycode_input,
    &encoded_string_length,
    encoded_string );

  /* check the status */

  if (status != XCODE_SUCCESS) 
  {
    return status;
  }

  /* copy the prefix and the encoded string to the output */

  if( ( strlen( ACE_PREFIX ) + encoded_string_length ) > MAX_LABEL_SIZE_8 ) 
  {
    return XCODE_BUFFER_OVERFLOW_ERROR;
  }

  output_offset = strlen(ACE_PREFIX);

  strncat( (char*)pzOutputString, ACE_PREFIX, strlen(ACE_PREFIX) );

  for ( offset = 0; offset < (int)encoded_string_length; offset++ )
  {
    *(pzOutputString + output_offset++) = *(encoded_string + offset);
  }

  *piOutputSize = strlen(ACE_PREFIX) + encoded_string_length;

  /* terminate the string */

  *(pzOutputString + output_offset) = '\0';

  return XCODE_SUCCESS;
}
示例#11
0
/*
 * Convert a single label to ACE form
 */
static char *convert_to_ACE (const char *name)
{
  static char out_buf [2*MAX_HOST_LEN];  /* A conservative guess */
  DWORD  ucs_input [MAX_HOST_LEN];
  BYTE   ucs_case [MAX_HOST_LEN];
  const  char *p;
  size_t in_len, out_len;
  int    i, c;

  for (i = 0, p = name; *p; i++)
  {
    wchar_t ucs = 0;

    c = *p++;
    if (!conv_to_unicode ((char)c, &ucs))
       break;

    ucs_input[i] = ucs;
    ucs_case[i]  = 0;
#ifdef IDNA_DEBUG_ENABLED
    IDNA_DEBUG ("%c -> u+%04X\n", c, ucs);
#endif
  }
  in_len  = i;
  out_len = sizeof(out_buf);
  const punycode_status status = punycode_encode (in_len, ucs_input, ucs_case, &out_len, out_buf);

  if (status != punycode_success)
  {
#ifdef IDNA_DEBUG_ENABLED
    _idna_errno = IDNAERR_PUNYCODE_BASE + status;
#endif
	out_len = 0;
  }

  for (i = 0; i < (int)out_len; i++)
  {
    c = out_buf[i];
    if (c < 0 || c > 127)
    {
#ifdef IDNA_DEBUG_ENABLED
      _idna_errno = IDNAERR_PUNY_ENCODE;
      IDNA_DEBUG ("illegal Punycode result: %c (%d)\n", c, c);
#endif
	  dcassert(0);
      break;
    }
    if (!g_print_ascii[c])
    {
#ifdef IDNA_DEBUG_ENABLED
      _idna_errno = IDNAERR_PUNY_ENCODE;
      IDNA_DEBUG ("Punycode not ASCII: %c (%d)\n", c, c);
#endif
	  dcassert(0);
      break;
    }
    out_buf[i] = g_print_ascii[c];
  }
  out_buf[i] = '\0';
#ifdef IDNA_DEBUG_ENABLED
  IDNA_DEBUG ("punycode_encode: status %d, out_len %d, out_buf '%s'\n",
              int(status), int(out_len), out_buf);
#endif
  if (status == punycode_success && i == (int)out_len)   /* encoding and ASCII conversion okay */
     return (out_buf);

  return NULL;
}