static VALUE decode(VALUE self, VALUE str) { int rc; punycode_uint *ustr; size_t len; char *buf = NULL; VALUE retv; str = rb_check_convert_type(str, T_STRING, "String", "to_s"); len = RSTRING_LEN(str); ustr = malloc(len * sizeof(punycode_uint)); if (ustr == NULL) { rb_raise(rb_eNoMemError, "cannot allocate memory (%d bytes)", (uint32_t)len); return Qnil; } rc = punycode_decode(RSTRING_LEN(str), RSTRING_PTR(str), &len, ustr, NULL); if (rc != PUNYCODE_SUCCESS) { xfree(ustr); rb_raise(ePunycodeError, "%s (%d)", punycode_strerror(rc), rc); return Qnil; } buf = stringprep_ucs4_to_utf8(ustr, len, NULL, &len); retv = rb_enc_str_new(buf, len, rb_utf8_encoding()); xfree(ustr); xfree(buf); return retv; }
uschar * string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err) { size_t p_len = Ustrlen(alabel); punycode_uint * p; uschar * s; uschar * res; int rc; if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-') { if (err) *err = US"bad alabel prefix"; return NULL; } p_len -= 4; p = (punycode_uint *) store_get((p_len+1) * sizeof(*p)); if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUCCESS) { if (err) *err = US punycode_strerror(rc); return NULL; } s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len); res = string_copyn(s, p_len); free(s); return res; }
static int32_t convertFromPuny( const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, UErrorCode& status){ char b1Stack[MAX_LABEL_BUFFER_SIZE]; char* b1 = b1Stack; int32_t destLen =0; convertUCharsToASCII(src, b1,srcLength); uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; uint32_t* b2 = b2Stack; int32_t b2Len =MAX_LABEL_BUFFER_SIZE; unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*)); punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); status = getError(error); if(status == U_BUFFER_OVERFLOW_ERROR){ b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); if(b2 == NULL){ status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); status = getError(error); } if(U_FAILURE(status)){ goto CLEANUP; } u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); CLEANUP: if(b1Stack != b1){ uprv_free(b1); } if(b2Stack != b2){ uprv_free(b2); } uprv_free(caseFlags); return destLen; }
nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out, bool allowUnassigned) { bool isAce; IsACE(in, &isAce); if (!isAce) { out.Assign(in); return NS_OK; } // RFC 3490 - 4.2 ToUnicode // The ToUnicode output never contains more code points than its input. punycode_uint output_length = in.Length() - kACEPrefixLen + 1; punycode_uint *output = new punycode_uint[output_length]; NS_ENSURE_TRUE(output, NS_ERROR_OUT_OF_MEMORY); enum punycode_status status = punycode_decode(in.Length() - kACEPrefixLen, PromiseFlatCString(in).get() + kACEPrefixLen, &output_length, output, nullptr); if (status != punycode_success) { delete [] output; return NS_ERROR_FAILURE; } // UCS4 -> UTF8 output[output_length] = 0; nsAutoString utf16; ucs4toUtf16(output, utf16); delete [] output; if (!isOnlySafeChars(utf16, mIDNBlacklist)) return NS_ERROR_FAILURE; CopyUTF16toUTF8(utf16, out); // Validation: encode back to ACE and compare the strings nsAutoCString ace; nsresult rv = UTF8toACE(out, ace, allowUnassigned); NS_ENSURE_SUCCESS(rv, rv); if (!ace.Equals(in, nsCaseInsensitiveCStringComparator())) return NS_ERROR_FAILURE; return NS_OK; }
/** * g_hostname_to_unicode: * @hostname: a valid UTF-8 or ASCII hostname * * Converts @hostname to its canonical presentation form; a UTF-8 * string in Unicode normalization form C, containing no uppercase * letters, no forbidden characters, and no ASCII-encoded segments, * and not ending with a trailing dot. * * Of course if @hostname is not an internationalized hostname, then * the canonical presentation form will be entirely ASCII. * * Return value: a UTF-8 hostname, which must be freed, or %NULL if * @hostname is in some way invalid. * * Since: 2.22 **/ gchar * g_hostname_to_unicode (const gchar *hostname) { GString *out; gssize llen; out = g_string_new (NULL); do { llen = idna_end_of_label (hostname) - hostname; if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN)) { hostname += IDNA_ACE_PREFIX_LEN; llen -= IDNA_ACE_PREFIX_LEN; if (!punycode_decode (hostname, llen, out)) { g_string_free (out, TRUE); return NULL; } } else { gboolean unicode; gchar *canonicalized = nameprep (hostname, llen, &unicode); if (!canonicalized) { g_string_free (out, TRUE); return NULL; } g_string_append (out, canonicalized); g_free (canonicalized); } hostname += llen; if (*hostname) hostname = g_utf8_next_char (hostname); if (*hostname) g_string_append_c (out, '.'); } while (*hostname); return g_string_free (out, FALSE); }
/** * g_hostname_to_unicode: * @hostname: a valid UTF-8 or ASCII hostname * * Converts @hostname to its canonical presentation form; a UTF-8 * string in Unicode normalization form C, containing no uppercase * letters, no forbidden characters, and no ASCII-encoded segments, * and not ending with a trailing dot. * * Of course if @hostname is not an internationalized hostname, then * the canonical presentation form will be entirely ASCII. * * Return value: a UTF-8 hostname, which must be freed, or %NULL if * @hostname is in some way invalid. * * Since: 2.22 **/ gchar * g_hostname_to_unicode (const gchar *hostname) { GString *out; gssize llen; out = g_string_new (NULL); do { llen = strcspn (hostname, "."); if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN)) { hostname += IDNA_ACE_PREFIX_LEN; llen -= IDNA_ACE_PREFIX_LEN; if (!punycode_decode (hostname, llen, out)) { g_string_free (out, TRUE); return NULL; } } else { gchar *canonicalized = nameprep (hostname, llen); if (!canonicalized) { g_string_free (out, TRUE); return NULL; } g_string_append (out, canonicalized); g_free (canonicalized); } hostname += llen; if (*hostname && *++hostname) g_string_append_c (out, '.'); } while (*hostname); return g_string_free (out, FALSE); }
/* * Convert a single ACE encoded label to native encoding * u+XXXX is used to signify a lowercase character. * U+XXXX is used to signify a uppercase character. * Normally only lowercase should be expected here. */ static char *convert_from_ACE (const char *name) { static char out_buf [MAX_HOST_LEN]; DWORD ucs_output [MAX_HOST_LEN]; BYTE ucs_case [MAX_HOST_LEN]; size_t ucs_len, i, j; memset (&ucs_case, 0, sizeof(ucs_case)); ucs_len = sizeof(ucs_output); const punycode_status status = punycode_decode (strlen(name), name, &ucs_len, ucs_output, ucs_case); if (status != punycode_success) { #ifdef IDNA_DEBUG_ENABLED _idna_errno = IDNAERR_PUNYCODE_BASE + status; #endif dcassert(0); ucs_len = 0; } for (i = j = 0; i < ucs_len && j < _countof(out_buf)-4; i++) { wchar_t ucs = (wchar_t)ucs_output[i]; int len =0; /* [-] please see conv_to_ascii function = 0*/; //[+] idna.cpp(358): error #12144: "len" is possibly uninitialized if (!conv_to_ascii(ucs, out_buf+j, &len)) break; #ifdef IDNA_DEBUG_ENABLED IDNA_DEBUG ("%c+%04X -> %.*s\n", ucs_case[i] ? 'U' : 'u', ucs, len, out_buf+j); #endif j += len; } out_buf[j] = '\0'; #ifdef IDNA_DEBUG_ENABLED IDNA_DEBUG ("punycode_decode: status %d, out_len %d, out_buf '%s'\n", int(status), int(ucs_len), out_buf); #endif return (status == punycode_success ? out_buf : NULL); }
static uschar * string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err) { size_t p_len; punycode_uint * p; int rc; uschar * s, * res; DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel); alabel += 4; p_len = Ustrlen(alabel); p = (punycode_uint *) store_get((p_len+1) * sizeof(*p)); if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS) { if (err) *err = US punycode_strerror(rc); return NULL; } s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len); res = string_copyn(s, p_len); free(s); return res; }
/* ToUnicode(). May realloc() utf8in. */ static int idna_to_unicode_internal (char *utf8in, uint32_t * out, size_t * outlen, int flags) { int rc; char tmpout[64]; size_t utf8len = strlen (utf8in) + 1; size_t addlen = 0; /* * ToUnicode consists of the following steps: * * 1. If the sequence contains any code points outside the ASCII range * (0..7F) then proceed to step 2, otherwise skip to step 3. */ { size_t i; int inasciirange; inasciirange = 1; for (i = 0; utf8in[i]; i++) if (utf8in[i] & ~0x7F) inasciirange = 0; if (inasciirange) goto step3; } /* * 2. Perform the steps specified in [NAMEPREP] and fail if there is an * error. (If step 3 of ToASCII is also performed here, it will not * affect the overall behavior of ToUnicode, but it is not * necessary.) The AllowUnassigned flag is used in [NAMEPREP]. */ do { char *newp = realloc (utf8in, utf8len + addlen); if (newp == NULL) { free (utf8in); return IDNA_MALLOC_ERROR; } utf8in = newp; if (flags & IDNA_ALLOW_UNASSIGNED) rc = stringprep_nameprep (utf8in, utf8len + addlen); else rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen); addlen += 1; } while (rc == STRINGPREP_TOO_SMALL_BUFFER); if (rc != STRINGPREP_OK) { free (utf8in); return IDNA_STRINGPREP_ERROR; } /* 3. Verify that the sequence begins with the ACE prefix, and save a * copy of the sequence. */ step3: if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0) { free (utf8in); return IDNA_NO_ACE_PREFIX; } /* 4. Remove the ACE prefix. */ memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)], strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1); /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE] * and fail if there is an error. Save a copy of the result of * this step. */ (*outlen)--; /* reserve one for the zero */ rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL); if (rc != PUNYCODE_SUCCESS) { free (utf8in); return IDNA_PUNYCODE_ERROR; } out[*outlen] = 0; /* add zero */ /* 6. Apply ToASCII. */ rc = idna_to_ascii_4i (out, *outlen, tmpout, flags); if (rc != IDNA_SUCCESS) { free (utf8in); return rc; } /* 7. Verify that the result of step 6 matches the saved copy from * step 3, using a case-insensitive ASCII comparison. */ if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0) { free (utf8in); return IDNA_ROUNDTRIP_VERIFY_ERROR; } /* 8. Return the saved copy from step 5. */ free (utf8in); return IDNA_SUCCESS; }
void charsetConverter_idna::convert(const string& in, string& out, status* st) { if (st) new (st) status(); out.clear(); if (m_dest == "idna") { if (utility::stringUtils::is7bit(in)) { if (st) { st->inputBytesRead = in.length(); st->outputBytesWritten = in.length(); } // No need to encode as Punycode out = in; return; } string inUTF8; charset::convert(in, inUTF8, m_source, vmime::charsets::UTF_8); const char* ch = inUTF8.c_str(); const char* end = inUTF8.c_str() + inUTF8.length(); std::vector <punycode_uint> unichars; unichars.reserve(inUTF8.length()); while (ch < end) { const utf8::uint32_t uc = utf8::unchecked::next(ch); unichars.push_back(uc); } if (st) st->inputBytesRead = in.length(); punycode_uint inputLen = static_cast <punycode_uint>(unichars.size()); std::vector <char> output(inUTF8.length() * 2); punycode_uint outputLen = static_cast <punycode_uint>(output.size()); const punycode_status status = punycode_encode (inputLen, &unichars[0], /* case_flags */ NULL, &outputLen, &output[0]); if (status == punycode_success) { out = string("xn--") + string(output.begin(), output.begin() + outputLen); if (st) st->outputBytesWritten = out.length(); } else { // TODO } } else if (m_source == "idna") { if (in.length() < 5 || in.substr(0, 4) != "xn--") { if (st) { st->inputBytesRead = in.length(); st->outputBytesWritten = in.length(); } // Not an IDNA string out = in; return; } punycode_uint inputLen = static_cast <punycode_uint>(in.length() - 4); std::vector <punycode_uint> output(in.length() - 4); punycode_uint outputLen = static_cast <punycode_uint>(output.size()); const punycode_status status = punycode_decode (inputLen, &in[4], &outputLen, &output[0], /* case_flags */ NULL); if (st) st->inputBytesRead = in.length(); if (status == punycode_success) { std::vector <char> outUTF8Bytes(outputLen * 4); char* p = &outUTF8Bytes[0]; for (std::vector <punycode_uint>::const_iterator it = output.begin() ; it != output.begin() + outputLen ; ++it) { p = utf8::unchecked::append(*it, p); } string outUTF8(&outUTF8Bytes[0], p); charset::convert(outUTF8, out, vmime::charsets::UTF_8, m_dest); if (st) st->outputBytesWritten = out.length(); } else { // TODO } } }
int main (int argc, char **argv) { enum punycode_status status; int r; size_t input_length, output_length, j; unsigned char case_flags[unicode_max_length]; setlocale (LC_ALL, ""); if (argc != 2) usage (argv); if (argv[1][0] != '-') usage (argv); if (argv[1][2] != 0) usage (argv); if (argv[1][1] == 'e') { uint32_t input[unicode_max_length]; unsigned long codept; char output[ace_max_length + 1], uplus[3]; int c; /* Read the input code points: */ input_length = 0; for (;;) { r = scanf ("%2s%lx", uplus, &codept); if (ferror (stdin)) fail (io_error); if (r == EOF || r == 0) break; if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1) { fail (invalid_input); } if (input_length == unicode_max_length) fail (too_big); if (uplus[0] == 'u') case_flags[input_length] = 0; else if (uplus[0] == 'U') case_flags[input_length] = 1; else fail (invalid_input); input[input_length++] = codept; } /* Encode: */ output_length = ace_max_length; status = punycode_encode (input_length, input, case_flags, &output_length, output); if (status == punycode_bad_input) fail (invalid_input); if (status == punycode_big_output) fail (too_big); if (status == punycode_overflow) fail (overflow); assert (status == punycode_success); /* Convert to native charset and output: */ for (j = 0; j < output_length; ++j) { c = output[j]; assert (c >= 0 && c <= 127); if (print_ascii[c] == 0) fail (invalid_input); output[j] = print_ascii[c]; } output[j] = 0; r = puts (output); if (r == EOF) fail (io_error); return EXIT_SUCCESS; } if (argv[1][1] == 'd') { char input[ace_max_length + 2], *p, *pp; uint32_t output[unicode_max_length]; /* Read the Punycode input string and convert to ASCII: */ if (!fgets (input, ace_max_length + 2, stdin)) fail (io_error); if (ferror (stdin)) fail (io_error); if (feof (stdin)) fail (invalid_input); input_length = strlen (input) - 1; if (input[input_length] != '\n') fail (too_big); input[input_length] = 0; for (p = input; *p != 0; ++p) { pp = strchr (print_ascii, *p); if (pp == 0) fail (invalid_input); *p = pp - print_ascii; } /* Decode: */ output_length = unicode_max_length; status = punycode_decode (input_length, input, &output_length, output, case_flags); if (status == punycode_bad_input) fail (invalid_input); if (status == punycode_big_output) fail (too_big); if (status == punycode_overflow) fail (overflow); assert (status == punycode_success); /* Output the result: */ for (j = 0; j < output_length; ++j) { r = printf ("%s+%04lX\n", case_flags[j] ? "U" : "u", (unsigned long) output[j]); if (r < 0) fail (io_error); } return EXIT_SUCCESS; } usage (argv); return EXIT_SUCCESS; /* not reached, but quiets compiler warning */ }
int main (int argc, char *argv[]) { struct gengetopt_args_info args_info; char readbuf[BUFSIZ]; char *p, *r; uint32_t *q; unsigned cmdn = 0; int rc; setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); if (cmdline_parser (argc, argv, &args_info) != 0) return 1; if (!args_info.stringprep_given && !args_info.punycode_encode_given && !args_info.punycode_decode_given && !args_info.idna_to_ascii_given && !args_info.idna_to_unicode_given) args_info.idna_to_ascii_given = 1; if ((args_info.stringprep_given ? 1 : 0) + (args_info.punycode_encode_given ? 1 : 0) + (args_info.punycode_decode_given ? 1 : 0) + (args_info.idna_to_ascii_given ? 1 : 0) + (args_info.idna_to_unicode_given ? 1 : 0) != 1) { fprintf (stderr, _("%s: Only one of -s, -e, -d, -a or -u can be specified.\n"), argv[0]); cmdline_parser_print_help (); return 1; } if (!args_info.quiet_given) fprintf (stderr, "%s %s\n" GREETING, PACKAGE, VERSION); if (args_info.debug_given) fprintf (stderr, _("Charset `%s'.\n"), stringprep_locale_charset ()); if (!args_info.quiet_given && args_info.inputs_num == 0) fprintf (stderr, _("Type each input string on a line by itself, " "terminated by a newline character.\n")); do { if (cmdn < args_info.inputs_num) { strncpy (readbuf, args_info.inputs[cmdn++], BUFSIZ - 1); readbuf[BUFSIZ - 1] = '\0'; } else if (fgets (readbuf, BUFSIZ, stdin) == NULL) { sprintf (readbuf, _("%s: fgets() failed: "), argv[0]); if (!feof (stdin)) perror (readbuf); return 1; } if (readbuf[strlen (readbuf) - 1] == '\n') readbuf[strlen (readbuf) - 1] = '\0'; if (args_info.stringprep_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); if (!q) { free (p); fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } free (q); rc = stringprep_profile (p, &r, args_info.profile_given ? args_info.profile_arg : "Nameprep", 0); free (p); if (rc != STRINGPREP_OK) { fprintf (stderr, _("%s: stringprep_profile() failed with error %d.\n"), argv[0], rc); return 1; } q = stringprep_utf8_to_ucs4 (r, -1, NULL); if (!q) { free (r); fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } free (q); p = stringprep_utf8_to_locale (r); free (r); if (!p) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.punycode_encode_given) { size_t len, len2; p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, &len); free (p); if (!q) { fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; i < len; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } len2 = BUFSIZ; rc = punycode_encode (len, q, NULL, &len2, readbuf); free (q); if (rc != PUNYCODE_SUCCESS) { fprintf (stderr, _("%s: punycode_encode() failed with error %d.\n"), argv[0], rc); return 1; } readbuf[len2] = '\0'; p = stringprep_utf8_to_locale (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.punycode_decode_given) { size_t len; len = BUFSIZ; q = (uint32_t *) malloc (len * sizeof (q[0])); if (!q) { sprintf (readbuf, _("%s: malloc() failed: "), argv[0]); perror (readbuf); return 1; } rc = punycode_decode (strlen (readbuf), readbuf, &len, q, NULL); if (rc != PUNYCODE_SUCCESS) { free (q); fprintf (stderr, _("%s: punycode_decode() failed with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; i < len; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } q[len] = 0; r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL); free (q); if (!r) { fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } p = stringprep_utf8_to_locale (r); free (r); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.idna_to_ascii_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); free (p); if (!q) { fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } rc = idna_to_ascii_4z (q, &p, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); free (q); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: idna_to_ascii_4z() failed " "with error %d.\n"), argv[0], rc); return 1; } #ifdef WITH_TLD if (args_info.tld_flag) { size_t errpos; rc = idna_to_unicode_8z4z (p, &q, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: TLD idna_to_unicode_8z8z() failed " "with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("tld[%d] = U+%04x\n"), i, q[i]); } rc = tld_check_4z (q, &errpos, NULL); if (rc == TLD_INVALID) { fprintf (stderr, _("%s: string rejected by TLD test " "(Unicode position %d)\n"), argv[0], errpos); free (q); return 1; } if (rc != TLD_SUCCESS) { fprintf (stderr, _("%s: tld_check_4z failed with error %d.\n"), argv[0], rc); free (q); return 1; } free (r); } #endif if (args_info.debug_given) { size_t i; for (i = 0; p[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, p[i]); } fprintf (stdout, "%s\n", p); free (p); } if (args_info.idna_to_unicode_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); if (!q) { free (p); fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } free (q); rc = idna_to_unicode_8z4z (p, &q, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); free (p); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: idna_to_unicode_8z4z() " "failed with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } #ifdef WITH_TLD if (args_info.tld_flag) { size_t errpos; rc = tld_check_4z (q, &errpos, NULL); if (rc == TLD_INVALID) { fprintf (stderr, _("%s: string rejected by TLD test " "(Unicode position %d)\n"), argv[0], errpos); free (q); return 1; } if (rc != TLD_SUCCESS) { fprintf (stderr, _("%s: tld_check_4z failed with error %d.\n"), argv[0], rc); free (q); return 1; } } #endif r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL); free (q); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } p = stringprep_utf8_to_locale (r); free (r); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } } while (!feof (stdin) && !ferror (stdin) && (args_info.inputs_num == 0 || cmdn < args_info.inputs_num)); return 0; }
int Xcode_puny_decodeString( const UCHAR8 * pzInputString, const int iInputSize, UTF16CHAR * puzOutputString, int * piOutputSize ) { int status; int offset = 0; int input_offset = 0; int output_offset = 0; unsigned int punycode_input_length, punycode_output_length; char punycode_input[MAX_LABEL_SIZE_8]; DWORD punycode_output[MAX_LABEL_SIZE_32]; if ( iInputSize < 1 ) {return XCODE_BAD_ARGUMENT_ERROR;} /* Make sure we have a punycode encoded label here, otherwise, just return the string untouched. */ if ( !starts_with_ignore_case( pzInputString, iInputSize, (const unsigned char *)ACE_PREFIX, strlen(ACE_PREFIX) ) ) { //punycode_input_length = 0; for( offset = 0; offset < iInputSize; offset++ ) { if ( offset >= *piOutputSize ) return XCODE_BUFFER_OVERFLOW_ERROR; *(puzOutputString + offset) = (UTF16CHAR)pzInputString[offset]; } *piOutputSize = iInputSize; return XCODE_SUCCESS; } /* copy the input to punycode input ignoring the prefix */ input_offset = strlen(ACE_PREFIX); punycode_input_length = 0; for(offset = 0; input_offset < iInputSize; offset++, input_offset++) { punycode_input[offset] = (char)pzInputString[input_offset]; punycode_input_length++; } /* lowercase it */ lower_case( (unsigned char *)punycode_input, punycode_input_length ); punycode_output_length = MAX_LABEL_SIZE_32; /* decode the input */ status = punycode_decode(punycode_input_length, punycode_input, &punycode_output_length, punycode_output); /* check the status */ if (status != XCODE_SUCCESS) { return status; } /* copy the punycode output to the output if there is room */ output_offset = 0; if ((int)output_offset > *piOutputSize - (int)punycode_output_length) { return XCODE_BUFFER_OVERFLOW_ERROR; } /* Convert result to UTF16 */ status = Xcode_convert32BitToUTF16( punycode_output, punycode_output_length, puzOutputString, piOutputSize ); if ( status != XCODE_SUCCESS ) return status; /* terminate the string */ *(puzOutputString + *piOutputSize) = 0; return XCODE_SUCCESS; }
xn_result xn_parse_label( xn_parser processing, const ucd_record input[], size_t input_l, ucd_record u_label[], size_t *u_label_l, char a_label[], size_t *a_label_l, result_list *results) { xn_result res = xn_result_OK; bool_t is_hyphen34, is_ascii, is_xn, nfc_applied = 0; const ucd_record *input_p = input, *up; size_t i; prepare_input: /* check for empty label */ if (input_l == 0) return __report_string_result( xn_error__Input_Empty,results); /* hyphen in 3rd and 4th position */ is_hyphen34 = has_hyphen34(input_p,input_l); /* check for ACE prefix, case insensitive */ is_xn = is_hyphen34 && starts_with_xn(input_p); /* check whether it's all ASCII and report non-LDH */ for (is_ascii = 1, i = 0, up = input_p; i < input_l; i++, up++) { if (is_ASCII(up->cp)) { if ((processing & xn_validate_ASCII_LDH) && is_non_LDH(up->cp)) { /* non-LDH ASCII */ res = __report_position_result( xn_invalid__ASCII_Non_LDH,up->cp,i,results); } /* copy to alabel */ if (is_xn || (processing & xn_parser_ASCII_To_Lower)) a_label[i] = (char)ascii_to_lower(up->cp); else a_label[i] = (char)up->cp; } else { if (is_xn) /* fatal: non-ASCII ACE string */ return __report_position_result( xn_error__ACE_Non_ASCII,up->cp,i,results); is_ascii = 0; } } if (is_ascii) { *a_label_l = input_l; goto process_ascii; /* go on with A-label */ } /* U-label pre-processing */ memcpy(u_label,input_p,input_l*sizeof(ucd_record)), *u_label_l = input_l; /* UTS46 mapping */ if (processing & xn_parser_UTS46_Map) { bool_t uts46_changed; if (xn_result_ABORT & (res = uts46_map_ucd( processing, u_label, u_label_l, XN_BUFSZ, &uts46_changed, &is_ascii, results))) { /* stop processing */ return res; } if (is_ascii) { /* string has been changed to ASCII during UTS46 mapping. */ input_p = u_label, input_l = *u_label_l; goto prepare_input; /* loop-save: ASCII input never appears here. */ } } /* normalization NFC */ if (processing & xn_process_NFC) { ucnf_normalization_result nfc_res; if (ucnf_OK != (nfc_res = ucnf_normalize( u_label,u_label_l,XN_BUFSZ,xn_ucnf_form_C))) { /* normalization failed */ if (nfc_res == ucnf_Buffer_Exceeded) return __report_string_result( xn_fatal__Buffer_Exceeded,results); else return __report_string_result( xn_fatal__Unknown_Error,results); } nfc_applied = 1; /* remember input has been normalized */ } /* U-label validation */ goto validate_ulabel; process_ascii: /* A-label */ if (is_xn) { punycode_status puny_result; codepoint_t puny[XN_BUFSZ]; size_t puny_l; /* punycode decoding */ if (*a_label_l > 4) { if (punycode_success != (puny_result = punycode_decode( a_label+4,*a_label_l-4,puny,&puny_l))) { /* fatal: punycode decoding failed */ return __report_punycode_result( xn_error__Puncode_Decoding_Failure,puny_result,results); } } else { /* invalid ACE */ return __report_string_result( xn_error__ACE_Invalid,results); } /* puncode decoding was successful */ if (!ucd_get_record_string(puny_l,puny,u_label)) return __report_string_result( /* out of range */ xn_error__Input_Out_Of_Range,results); *u_label_l = puny_l; goto validate_ulabel; } else { /* copy to u-label */ for (i = 0; i < *a_label_l; ++i) { if (!ucd_get_record((codepoint_t)a_label[i], &u_label[i])) return __report_string_result( /* cannot actually happen */ xn_fatal__Unknown_Error,results); } *u_label_l = *a_label_l; } /* non-XN A-label validation */ /* The label must not contain a U+002D HYPHEN-MINUS character in both the third position and fourth positions */ if ((processing & xn_validate__Hyphen34) && is_hyphen34) res |= __report_string_result( xn_invalid__Hyphen34,results); /* The label must not begin with a U+002D HYPHEN-MINUS character. */ if ((processing & xn_validate__Leading_Hyphen) && leading_hyphen(a_label)) res |= __report_string_result( xn_invalid__Leading_Hyphen,results); /* The label must not end with a U+002D HYPHEN-MINUS character. */ if ((processing & xn_invalid__Trailing_Hyphen) && trailing_hyphen(a_label,*a_label_l)) res |= __report_string_result( xn_invalid__Leading_Hyphen,results); goto finish; validate_ulabel: /* U-label validation */ /* TR#46 Unicode IDNA Compatibility Processing http://unicode.org/reports/tr46/#Validity_Criteria 4.1 Validity Criteria 1. The label must be in Unicode Normalization Form NFC. 2. The label must not contain a U+002D HYPHEN-MINUS character in both the third position and fourth positions. 3. The label must neither begin nor end with a U+002D HYPHEN-MINUS character. 4. The label must not contain a U+002E ( . ) FULL STOP. 5. The label must not begin with a combining mark, that is: General_Category=Mark. 6. Each code point in the label must only have certain status values according to Section 5, IDNA Mapping Table: 6.1 For Transitional Processing, each value must be valid. 6.2 For Nontransitional Processing, each value must be either valid or deviation. */ /* The label must be in Unicode Normalization Form NFC. */ if (!nfc_applied && (processing & xn_validate__NFC)) { if (!ucnf_is_normalized (u_label,*u_label_l,xn_ucnf_form_C)) res |= __report_string_result( xn_invalid__non_NFC,results); } /* The label must not contain a U+002D HYPHEN-MINUS character in both the third position and fourth positions */ if ((processing & xn_validate__Hyphen34) && has_hyphen34(u_label,*u_label_l)) res |= __report_string_result( xn_invalid__Hyphen34,results); /* The label must not begin with a U+002D HYPHEN-MINUS character. */ if ((processing & xn_validate__Leading_Hyphen) && leading_hyphen__rec(u_label)) res |= __report_string_result( xn_invalid__Leading_Hyphen,results); /* The label must not end with a U+002D HYPHEN-MINUS character. */ if ((processing & xn_validate__Trailing_Hyphen) && trailing_hyphen__rec(u_label,*u_label_l)) res |= __report_string_result( xn_invalid__Leading_Hyphen,results); /* The label must not begin with a combining mark, that is: General_Category=Mark.*/ if ((processing & xn_validate__Leading_Combining_Marks) && is_gc_mark(u_label->general_category)) res |= __report_codepoint_result( xn_invalid__Leading_Combining_Mark,u_label->cp,results); finish: return res; }