uschar * string_localpart_utf8_to_alabel(const uschar * utf8, uschar ** err) { size_t ucs4_len; punycode_uint * p; size_t p_len; uschar * res; int rc; if (!string_is_utf8(utf8)) return string_copy(utf8); p = (punycode_uint *) stringprep_utf8_to_ucs4(CCS utf8, -1, &ucs4_len); p_len = ucs4_len*4; /* this multiplier is pure guesswork */ res = store_get(p_len+5); res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-'; if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, CS res+4)) != PUNYCODE_SUCCESS) { DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc)); free(p); if (err) *err = US punycode_strerror(rc); return NULL; } p_len += 4; free(p); res[p_len] = '\0'; return res; }
static nsresult punycode(const char* prefix, const nsAString& in, nsACString& out) { PRUint32 ucs4Buf[kMaxDNSNodeLen + 1]; PRUint32 ucs4Len; utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len); // need maximum 20 bits to encode 16 bit Unicode character // (include null terminator) const PRUint32 kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1; char encodedBuf[kEncodedBufSize]; punycode_uint encodedLength = kEncodedBufSize; enum punycode_status status = punycode_encode(ucs4Len, ucs4Buf, nsnull, &encodedLength, encodedBuf); if (punycode_success != status || encodedLength >= kEncodedBufSize) return NS_ERROR_FAILURE; encodedBuf[encodedLength] = '\0'; out.Assign(nsDependentCString(prefix) + nsDependentCString(encodedBuf)); return NS_OK; }
/** * g_hostname_to_ascii: * @hostname: a valid UTF-8 or ASCII hostname * * Converts @hostname to its canonical ASCII form; an ASCII-only * string containing no uppercase letters and not ending with a * trailing dot. * * Return value: an ASCII hostname, which must be freed, or %NULL if * @hostname is in some way invalid. * * Since: 2.22 **/ gchar * g_hostname_to_ascii (const gchar *hostname) { gchar *name, *label, *p; GString *out; gssize llen, oldlen; gboolean unicode; label = name = nameprep (hostname, -1, &unicode); if (!name || !unicode) return name; out = g_string_new (NULL); do { unicode = FALSE; for (p = label; *p && !idna_is_dot (p); p++) { if ((guchar)*p > 0x80) unicode = TRUE; } oldlen = out->len; llen = p - label; if (unicode) { if (!strncmp (label, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN)) goto fail; g_string_append (out, IDNA_ACE_PREFIX); if (!punycode_encode (label, llen, out)) goto fail; } else g_string_append_len (out, label, llen); if (out->len - oldlen > 63) goto fail; label += llen; if (*label) label = g_utf8_next_char (label); if (*label) g_string_append_c (out, '.'); } while (*label); g_free (name); return g_string_free (out, FALSE); fail: g_free (name); g_string_free (out, TRUE); return NULL; }
static VALUE encode(VALUE self, VALUE str) { int rc; punycode_uint *ustr; size_t len; size_t buflen = 0x100; char *buf = NULL; VALUE retv; str = rb_check_convert_type(str, T_STRING, "String", "to_s"); ustr = stringprep_utf8_to_ucs4(RSTRING_PTR(str), RSTRING_LEN(str), &len); while (1) { buf = realloc(buf, buflen); if (buf == NULL) { xfree(ustr); rb_raise(rb_eNoMemError, "cannot allocate memory (%d bytes)", (uint32_t)buflen); return Qnil; } rc = punycode_encode(len, ustr, NULL, &buflen, buf); if (rc == PUNYCODE_SUCCESS) { break; } else if (rc == PUNYCODE_BIG_OUTPUT) { buflen += 0x100; } else { xfree(ustr); xfree(buf); rb_raise(ePunycodeError, "%s (%d)", punycode_strerror(rc), rc); return Qnil; } } retv = rb_str_new(buf, buflen); xfree(ustr); xfree(buf); return retv; }
// wrapper around the reference Punycode implementation static int32_t convertToPuny(const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, UErrorCode& status){ uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; uint32_t* b1 = b1Stack; char b2Stack[MAX_LABEL_BUFFER_SIZE]; char* b2 = b2Stack; int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; punycode_status error; unsigned char* caseFlags = NULL; u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); if(status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); if(b1==NULL){ status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } status = U_ZERO_ERROR; // reset error u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); } if(U_FAILURE(status)){ goto CLEANUP; } //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); status = getError(error); if(status == U_BUFFER_OVERFLOW_ERROR){ /* we do not have enough room so grow the buffer*/ b2 = (char*) uprv_malloc( b2Len * sizeof(char)); if(b2==NULL){ status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } status = U_ZERO_ERROR; // reset error punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); status = getError(error); } if(U_FAILURE(status)){ goto CLEANUP; } if(b2Len < destCapacity){ convertASCIIToUChars(b2,dest,b2Len); }else{ status =U_BUFFER_OVERFLOW_ERROR; } CLEANUP: if(b1Stack != b1){ uprv_free(b1); } if(b2Stack != b2){ uprv_free(b2); } uprv_free(caseFlags); return b2Len; }
/** * idna_to_ascii_4i * @in: input array with unicode code points. * @inlen: length of input array with unicode code points. * @out: output zero terminated string that must have room for at * least 63 characters plus the terminating zero. * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. * * The ToASCII operation takes a sequence of Unicode code points that make * up one label and transforms it into a sequence of code points in the * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the * resulting sequence are equivalent labels. * * It is important to note that the ToASCII operation can fail. ToASCII * fails if any step of it fails. If any step of the ToASCII operation * fails on any label in a domain name, that domain name MUST NOT be used * as an internationalized domain name. The method for deadling with this * failure is application-specific. * * The inputs to ToASCII are a sequence of code points, the AllowUnassigned * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a * sequence of ASCII code points or a failure condition. * * ToASCII never alters a sequence of code points that are all in the ASCII * range to begin with (although it could fail). Applying the ToASCII * operation multiple times has exactly the same effect as applying it just * once. * * Return value: Returns 0 on success, or an error code. */ int idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags) { size_t len, outlen; uint32_t *src; /* XXX don't need to copy data? */ int rc; /* * ToASCII consists of the following steps: * * 1. If all code points in the sequence are in the ASCII range (0..7F) * then skip to step 3. */ { size_t i; int inasciirange; inasciirange = 1; for (i = 0; i < inlen; i++) if (in[i] > 0x7F) inasciirange = 0; if (inasciirange) { src = malloc (sizeof (in[0]) * (inlen + 1)); if (src == NULL) return IDNA_MALLOC_ERROR; memcpy (src, in, sizeof (in[0]) * inlen); src[inlen] = 0; goto step3; } } /* * 2. Perform the steps specified in [NAMEPREP] and fail if there is * an error. The AllowUnassigned flag is used in [NAMEPREP]. */ { char *p; p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL); if (p == NULL) return IDNA_MALLOC_ERROR; len = strlen (p); do { char *newp; len = 2 * len + 10; /* XXX better guess? */ newp = realloc (p, len); if (newp == NULL) { free (p); return IDNA_MALLOC_ERROR; } p = newp; if (flags & IDNA_ALLOW_UNASSIGNED) rc = stringprep_nameprep (p, len); else rc = stringprep_nameprep_no_unassigned (p, len); } while (rc == STRINGPREP_TOO_SMALL_BUFFER); if (rc != STRINGPREP_OK) { free (p); return IDNA_STRINGPREP_ERROR; } src = stringprep_utf8_to_ucs4 (p, -1, NULL); free (p); } step3: /* * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks: * * (a) Verify the absence of non-LDH ASCII code points; that is, * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. * * (b) Verify the absence of leading and trailing hyphen-minus; * that is, the absence of U+002D at the beginning and end of * the sequence. */ if (flags & IDNA_USE_STD3_ASCII_RULES) { size_t i; for (i = 0; src[i]; i++) if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F || (src[i] >= 0x3A && src[i] <= 0x40) || (src[i] >= 0x5B && src[i] <= 0x60) || (src[i] >= 0x7B && src[i] <= 0x7F)) { free (src); return IDNA_CONTAINS_NON_LDH; } if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D)) { free (src); return IDNA_CONTAINS_MINUS; } } /* * 4. If all code points in the sequence are in the ASCII range * (0..7F), then skip to step 8. */ { size_t i; int inasciirange; inasciirange = 1; for (i = 0; src[i]; i++) { if (src[i] > 0x7F) inasciirange = 0; /* copy string to output buffer if we are about to skip to step8 */ if (i < 64) out[i] = src[i]; } if (i < 64) out[i] = '\0'; if (inasciirange) goto step8; } /* * 5. Verify that the sequence does NOT begin with the ACE prefix. * */ { size_t i; int match; match = 1; for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++) if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i]) match = 0; if (match) { free (src); return IDNA_CONTAINS_ACE_PREFIX; } } /* * 6. Encode the sequence using the encoding algorithm in [PUNYCODE] * and fail if there is an error. */ for (len = 0; src[len]; len++) ; src[len] = '\0'; outlen = 63 - strlen (IDNA_ACE_PREFIX); rc = punycode_encode (len, src, NULL, &outlen, &out[strlen (IDNA_ACE_PREFIX)]); if (rc != PUNYCODE_SUCCESS) { free (src); return IDNA_PUNYCODE_ERROR; } out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0'; /* * 7. Prepend the ACE prefix. */ memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)); /* * 8. Verify that the number of code points is in the range 1 to 63 * inclusive (0 is excluded). */ step8: free (src); if (strlen (out) < 1 || strlen (out) > 63) return IDNA_INVALID_LENGTH; return IDNA_SUCCESS; }
void charsetConverter_idna::convert(const string& in, string& out, status* st) { if (st) new (st) status(); out.clear(); if (m_dest == "idna") { if (utility::stringUtils::is7bit(in)) { if (st) { st->inputBytesRead = in.length(); st->outputBytesWritten = in.length(); } // No need to encode as Punycode out = in; return; } string inUTF8; charset::convert(in, inUTF8, m_source, vmime::charsets::UTF_8); const char* ch = inUTF8.c_str(); const char* end = inUTF8.c_str() + inUTF8.length(); std::vector <punycode_uint> unichars; unichars.reserve(inUTF8.length()); while (ch < end) { const utf8::uint32_t uc = utf8::unchecked::next(ch); unichars.push_back(uc); } if (st) st->inputBytesRead = in.length(); punycode_uint inputLen = static_cast <punycode_uint>(unichars.size()); std::vector <char> output(inUTF8.length() * 2); punycode_uint outputLen = static_cast <punycode_uint>(output.size()); const punycode_status status = punycode_encode (inputLen, &unichars[0], /* case_flags */ NULL, &outputLen, &output[0]); if (status == punycode_success) { out = string("xn--") + string(output.begin(), output.begin() + outputLen); if (st) st->outputBytesWritten = out.length(); } else { // TODO } } else if (m_source == "idna") { if (in.length() < 5 || in.substr(0, 4) != "xn--") { if (st) { st->inputBytesRead = in.length(); st->outputBytesWritten = in.length(); } // Not an IDNA string out = in; return; } punycode_uint inputLen = static_cast <punycode_uint>(in.length() - 4); std::vector <punycode_uint> output(in.length() - 4); punycode_uint outputLen = static_cast <punycode_uint>(output.size()); const punycode_status status = punycode_decode (inputLen, &in[4], &outputLen, &output[0], /* case_flags */ NULL); if (st) st->inputBytesRead = in.length(); if (status == punycode_success) { std::vector <char> outUTF8Bytes(outputLen * 4); char* p = &outUTF8Bytes[0]; for (std::vector <punycode_uint>::const_iterator it = output.begin() ; it != output.begin() + outputLen ; ++it) { p = utf8::unchecked::append(*it, p); } string outUTF8(&outUTF8Bytes[0], p); charset::convert(outUTF8, out, vmime::charsets::UTF_8, m_dest); if (st) st->outputBytesWritten = out.length(); } else { // TODO } } }
int main (int argc, char **argv) { enum punycode_status status; int r; size_t input_length, output_length, j; unsigned char case_flags[unicode_max_length]; setlocale (LC_ALL, ""); if (argc != 2) usage (argv); if (argv[1][0] != '-') usage (argv); if (argv[1][2] != 0) usage (argv); if (argv[1][1] == 'e') { uint32_t input[unicode_max_length]; unsigned long codept; char output[ace_max_length + 1], uplus[3]; int c; /* Read the input code points: */ input_length = 0; for (;;) { r = scanf ("%2s%lx", uplus, &codept); if (ferror (stdin)) fail (io_error); if (r == EOF || r == 0) break; if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1) { fail (invalid_input); } if (input_length == unicode_max_length) fail (too_big); if (uplus[0] == 'u') case_flags[input_length] = 0; else if (uplus[0] == 'U') case_flags[input_length] = 1; else fail (invalid_input); input[input_length++] = codept; } /* Encode: */ output_length = ace_max_length; status = punycode_encode (input_length, input, case_flags, &output_length, output); if (status == punycode_bad_input) fail (invalid_input); if (status == punycode_big_output) fail (too_big); if (status == punycode_overflow) fail (overflow); assert (status == punycode_success); /* Convert to native charset and output: */ for (j = 0; j < output_length; ++j) { c = output[j]; assert (c >= 0 && c <= 127); if (print_ascii[c] == 0) fail (invalid_input); output[j] = print_ascii[c]; } output[j] = 0; r = puts (output); if (r == EOF) fail (io_error); return EXIT_SUCCESS; } if (argv[1][1] == 'd') { char input[ace_max_length + 2], *p, *pp; uint32_t output[unicode_max_length]; /* Read the Punycode input string and convert to ASCII: */ if (!fgets (input, ace_max_length + 2, stdin)) fail (io_error); if (ferror (stdin)) fail (io_error); if (feof (stdin)) fail (invalid_input); input_length = strlen (input) - 1; if (input[input_length] != '\n') fail (too_big); input[input_length] = 0; for (p = input; *p != 0; ++p) { pp = strchr (print_ascii, *p); if (pp == 0) fail (invalid_input); *p = pp - print_ascii; } /* Decode: */ output_length = unicode_max_length; status = punycode_decode (input_length, input, &output_length, output, case_flags); if (status == punycode_bad_input) fail (invalid_input); if (status == punycode_big_output) fail (too_big); if (status == punycode_overflow) fail (overflow); assert (status == punycode_success); /* Output the result: */ for (j = 0; j < output_length; ++j) { r = printf ("%s+%04lX\n", case_flags[j] ? "U" : "u", (unsigned long) output[j]); if (r < 0) fail (io_error); } return EXIT_SUCCESS; } usage (argv); return EXIT_SUCCESS; /* not reached, but quiets compiler warning */ }
int main (int argc, char *argv[]) { struct gengetopt_args_info args_info; char readbuf[BUFSIZ]; char *p, *r; uint32_t *q; unsigned cmdn = 0; int rc; setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); if (cmdline_parser (argc, argv, &args_info) != 0) return 1; if (!args_info.stringprep_given && !args_info.punycode_encode_given && !args_info.punycode_decode_given && !args_info.idna_to_ascii_given && !args_info.idna_to_unicode_given) args_info.idna_to_ascii_given = 1; if ((args_info.stringprep_given ? 1 : 0) + (args_info.punycode_encode_given ? 1 : 0) + (args_info.punycode_decode_given ? 1 : 0) + (args_info.idna_to_ascii_given ? 1 : 0) + (args_info.idna_to_unicode_given ? 1 : 0) != 1) { fprintf (stderr, _("%s: Only one of -s, -e, -d, -a or -u can be specified.\n"), argv[0]); cmdline_parser_print_help (); return 1; } if (!args_info.quiet_given) fprintf (stderr, "%s %s\n" GREETING, PACKAGE, VERSION); if (args_info.debug_given) fprintf (stderr, _("Charset `%s'.\n"), stringprep_locale_charset ()); if (!args_info.quiet_given && args_info.inputs_num == 0) fprintf (stderr, _("Type each input string on a line by itself, " "terminated by a newline character.\n")); do { if (cmdn < args_info.inputs_num) { strncpy (readbuf, args_info.inputs[cmdn++], BUFSIZ - 1); readbuf[BUFSIZ - 1] = '\0'; } else if (fgets (readbuf, BUFSIZ, stdin) == NULL) { sprintf (readbuf, _("%s: fgets() failed: "), argv[0]); if (!feof (stdin)) perror (readbuf); return 1; } if (readbuf[strlen (readbuf) - 1] == '\n') readbuf[strlen (readbuf) - 1] = '\0'; if (args_info.stringprep_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); if (!q) { free (p); fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } free (q); rc = stringprep_profile (p, &r, args_info.profile_given ? args_info.profile_arg : "Nameprep", 0); free (p); if (rc != STRINGPREP_OK) { fprintf (stderr, _("%s: stringprep_profile() failed with error %d.\n"), argv[0], rc); return 1; } q = stringprep_utf8_to_ucs4 (r, -1, NULL); if (!q) { free (r); fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } free (q); p = stringprep_utf8_to_locale (r); free (r); if (!p) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.punycode_encode_given) { size_t len, len2; p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, &len); free (p); if (!q) { fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; i < len; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } len2 = BUFSIZ; rc = punycode_encode (len, q, NULL, &len2, readbuf); free (q); if (rc != PUNYCODE_SUCCESS) { fprintf (stderr, _("%s: punycode_encode() failed with error %d.\n"), argv[0], rc); return 1; } readbuf[len2] = '\0'; p = stringprep_utf8_to_locale (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.punycode_decode_given) { size_t len; len = BUFSIZ; q = (uint32_t *) malloc (len * sizeof (q[0])); if (!q) { sprintf (readbuf, _("%s: malloc() failed: "), argv[0]); perror (readbuf); return 1; } rc = punycode_decode (strlen (readbuf), readbuf, &len, q, NULL); if (rc != PUNYCODE_SUCCESS) { free (q); fprintf (stderr, _("%s: punycode_decode() failed with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; i < len; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } q[len] = 0; r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL); free (q); if (!r) { fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } p = stringprep_utf8_to_locale (r); free (r); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.idna_to_ascii_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); free (p); if (!q) { fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } rc = idna_to_ascii_4z (q, &p, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); free (q); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: idna_to_ascii_4z() failed " "with error %d.\n"), argv[0], rc); return 1; } #ifdef WITH_TLD if (args_info.tld_flag) { size_t errpos; rc = idna_to_unicode_8z4z (p, &q, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: TLD idna_to_unicode_8z8z() failed " "with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("tld[%d] = U+%04x\n"), i, q[i]); } rc = tld_check_4z (q, &errpos, NULL); if (rc == TLD_INVALID) { fprintf (stderr, _("%s: string rejected by TLD test " "(Unicode position %d)\n"), argv[0], errpos); free (q); return 1; } if (rc != TLD_SUCCESS) { fprintf (stderr, _("%s: tld_check_4z failed with error %d.\n"), argv[0], rc); free (q); return 1; } free (r); } #endif if (args_info.debug_given) { size_t i; for (i = 0; p[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, p[i]); } fprintf (stdout, "%s\n", p); free (p); } if (args_info.idna_to_unicode_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); if (!q) { free (p); fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } free (q); rc = idna_to_unicode_8z4z (p, &q, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); free (p); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: idna_to_unicode_8z4z() " "failed with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } #ifdef WITH_TLD if (args_info.tld_flag) { size_t errpos; rc = tld_check_4z (q, &errpos, NULL); if (rc == TLD_INVALID) { fprintf (stderr, _("%s: string rejected by TLD test " "(Unicode position %d)\n"), argv[0], errpos); free (q); return 1; } if (rc != TLD_SUCCESS) { fprintf (stderr, _("%s: tld_check_4z failed with error %d.\n"), argv[0], rc); free (q); return 1; } } #endif r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL); free (q); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } p = stringprep_utf8_to_locale (r); free (r); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } } while (!feof (stdin) && !ferror (stdin) && (args_info.inputs_num == 0 || cmdn < args_info.inputs_num)); return 0; }
int Xcode_puny_encodeString( const DWORD * pdwzInputString, const int iInputSize, UCHAR8 * pzOutputString, int * piOutputSize ) { int status; int offset = 0; int output_offset = 0; unsigned int punycode_input_length; DWORD punycode_input[MAX_LABEL_SIZE_32]; unsigned int encoded_string_length; char encoded_string[MAX_LABEL_SIZE_8]; if ( iInputSize < 1 || pzOutputString == 0 ) { return XCODE_BAD_ARGUMENT_ERROR; } memset( pzOutputString, 0, *piOutputSize ); if (iInputSize > MAX_LABEL_SIZE_32) { return XCODE_BUFFER_OVERFLOW_ERROR; } /* copy the input to punycode input */ punycode_input_length = 0; for( offset = 0; offset < iInputSize; offset++ ) { punycode_input[offset] = pdwzInputString[offset]; punycode_input_length++; } /* check if the input contains all basic code points if so just copy the input to output. no need to encode otherwise try to encode it */ if( is_all_basic(punycode_input_length, punycode_input) == 1 ) { /* copy the input to output */ for (offset = 0; offset < (int)punycode_input_length; offset++) { *(pzOutputString + offset) = (char)*(punycode_input + offset); } *piOutputSize = punycode_input_length; return XCODE_SUCCESS; } /* encode the input */ encoded_string_length = MAX_LABEL_SIZE_8; status = punycode_encode( punycode_input_length, punycode_input, &encoded_string_length, encoded_string ); /* check the status */ if (status != XCODE_SUCCESS) { return status; } /* copy the prefix and the encoded string to the output */ if( ( strlen( ACE_PREFIX ) + encoded_string_length ) > MAX_LABEL_SIZE_8 ) { return XCODE_BUFFER_OVERFLOW_ERROR; } output_offset = strlen(ACE_PREFIX); strncat( (char*)pzOutputString, ACE_PREFIX, strlen(ACE_PREFIX) ); for ( offset = 0; offset < (int)encoded_string_length; offset++ ) { *(pzOutputString + output_offset++) = *(encoded_string + offset); } *piOutputSize = strlen(ACE_PREFIX) + encoded_string_length; /* terminate the string */ *(pzOutputString + output_offset) = '\0'; return XCODE_SUCCESS; }
/* * Convert a single label to ACE form */ static char *convert_to_ACE (const char *name) { static char out_buf [2*MAX_HOST_LEN]; /* A conservative guess */ DWORD ucs_input [MAX_HOST_LEN]; BYTE ucs_case [MAX_HOST_LEN]; const char *p; size_t in_len, out_len; int i, c; for (i = 0, p = name; *p; i++) { wchar_t ucs = 0; c = *p++; if (!conv_to_unicode ((char)c, &ucs)) break; ucs_input[i] = ucs; ucs_case[i] = 0; #ifdef IDNA_DEBUG_ENABLED IDNA_DEBUG ("%c -> u+%04X\n", c, ucs); #endif } in_len = i; out_len = sizeof(out_buf); const punycode_status status = punycode_encode (in_len, ucs_input, ucs_case, &out_len, out_buf); if (status != punycode_success) { #ifdef IDNA_DEBUG_ENABLED _idna_errno = IDNAERR_PUNYCODE_BASE + status; #endif out_len = 0; } for (i = 0; i < (int)out_len; i++) { c = out_buf[i]; if (c < 0 || c > 127) { #ifdef IDNA_DEBUG_ENABLED _idna_errno = IDNAERR_PUNY_ENCODE; IDNA_DEBUG ("illegal Punycode result: %c (%d)\n", c, c); #endif dcassert(0); break; } if (!g_print_ascii[c]) { #ifdef IDNA_DEBUG_ENABLED _idna_errno = IDNAERR_PUNY_ENCODE; IDNA_DEBUG ("Punycode not ASCII: %c (%d)\n", c, c); #endif dcassert(0); break; } out_buf[i] = g_print_ascii[c]; } out_buf[i] = '\0'; #ifdef IDNA_DEBUG_ENABLED IDNA_DEBUG ("punycode_encode: status %d, out_len %d, out_buf '%s'\n", int(status), int(out_len), out_buf); #endif if (status == punycode_success && i == (int)out_len) /* encoding and ASCII conversion okay */ return (out_buf); return NULL; }