static VALUE decode(VALUE self, VALUE str) { int rc; punycode_uint *ustr; size_t len; char *buf = NULL; VALUE retv; str = rb_check_convert_type(str, T_STRING, "String", "to_s"); len = RSTRING_LEN(str); ustr = malloc(len * sizeof(punycode_uint)); if (ustr == NULL) { rb_raise(rb_eNoMemError, "cannot allocate memory (%d bytes)", (uint32_t)len); return Qnil; } rc = punycode_decode(RSTRING_LEN(str), RSTRING_PTR(str), &len, ustr, NULL); if (rc != PUNYCODE_SUCCESS) { xfree(ustr); rb_raise(ePunycodeError, "%s (%d)", punycode_strerror(rc), rc); return Qnil; } buf = stringprep_ucs4_to_utf8(ustr, len, NULL, &len); retv = rb_enc_str_new(buf, len, rb_utf8_encoding()); xfree(ustr); xfree(buf); return retv; }
uschar * string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err) { size_t p_len = Ustrlen(alabel); punycode_uint * p; uschar * s; uschar * res; int rc; if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-') { if (err) *err = US"bad alabel prefix"; return NULL; } p_len -= 4; p = (punycode_uint *) store_get((p_len+1) * sizeof(*p)); if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUCCESS) { if (err) *err = US punycode_strerror(rc); return NULL; } s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len); res = string_copyn(s, p_len); free(s); return res; }
/** * stringprep - prepare internationalized string * @in: input/ouput array with string to prepare. * @maxlen: maximum length of input/output array. * @flags: a #Stringprep_profile_flags value, or 0. * @profile: pointer to #Stringprep_profile to use. * * Prepare the input zero terminated UTF-8 string according to the * stringprep profile, and write back the result to the input string. * * Note that you must convert strings entered in the systems locale * into UTF-8 before using this function, see * stringprep_locale_to_utf8(). * * Since the stringprep operation can expand the string, @maxlen * indicate how large the buffer holding the string is. This function * will not read or write to characters outside that size. * * The @flags are one of #Stringprep_profile_flags values, or 0. * * The @profile contain the #Stringprep_profile instructions to * perform. Your application can define new profiles, possibly * re-using the generic stringprep tables that always will be part of * the library, or use one of the currently supported profiles. * * Return value: Returns %STRINGPREP_OK iff successful, or an error code. **/ int stringprep (char *in, size_t maxlen, Stringprep_profile_flags flags, const Stringprep_profile * profile) { int rc; char *utf8 = NULL; uint32_t *ucs4 = NULL; size_t ucs4len, maxucs4len, adducs4len = 50; do { uint32_t *newp; if (ucs4) free (ucs4); ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len); maxucs4len = ucs4len + adducs4len; newp = realloc (ucs4, maxucs4len * sizeof (uint32_t)); if (!newp) { free (ucs4); return STRINGPREP_MALLOC_ERROR; } ucs4 = newp; rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile); adducs4len += 50; } while (rc == STRINGPREP_TOO_SMALL_BUFFER); if (rc != STRINGPREP_OK) { free (ucs4); return rc; } utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0); free (ucs4); if (!utf8) return STRINGPREP_MALLOC_ERROR; if (strlen (utf8) >= maxlen) { free (utf8); return STRINGPREP_TOO_SMALL_BUFFER; } strcpy (in, utf8); /* flawfinder: ignore */ free (utf8); return STRINGPREP_OK; }
/** * stringprep_ucs4_nfkc_normalize: * @str: a Unicode string. * @len: length of @str array, or -1 if @str is nul-terminated. * * Converts UCS4 string into UTF-8 and runs * stringprep_utf8_nfkc_normalize(). * * Return value: a newly allocated Unicode string, that is the NFKC * normalized form of @str. **/ my_uint32_t * stringprep_ucs4_nfkc_normalize (my_uint32_t * str, ssize_t len) { char *p; my_uint32_t *result_wc; p = stringprep_ucs4_to_utf8 (str, len, 0, 0); result_wc = _g_utf8_normalize_wc (p, -1, G_NORMALIZE_NFKC); free (p); return result_wc; }
/** * idna_to_unicode_8z8z: * @input: zero-terminated UTF-8 string. * @output: pointer to newly allocated output UTF-8 string. * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. * * Convert possibly ACE encoded domain name in UTF-8 format into a * UTF-8 string. The domain name may contain several labels, * separated by dots. The output buffer must be deallocated by the * caller. * * Return value: Returns IDNA_SUCCESS on success, or error code. **/ int idna_to_unicode_8z8z (const char *input, char **output, int flags) { uint32_t *ucs4; int rc; rc = idna_to_unicode_8z4z (input, &ucs4, flags); *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL); free (ucs4); if (!*output) return IDNA_ICONV_ERROR; return rc; }
int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) { #ifdef HAVE_LIBIDN size_t input_size, output_size; _cleanup_free_ uint32_t *input = NULL; _cleanup_free_ char *result = NULL; uint32_t *output = NULL; size_t w; /* To be invoked after unescaping */ assert(encoded); assert(decoded); if (encoded_size < sizeof(IDNA_ACE_PREFIX)-1) return 0; if (memcmp(encoded, IDNA_ACE_PREFIX, sizeof(IDNA_ACE_PREFIX) -1) != 0) return 0; input = stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size); if (!input) return -ENOMEM; output_size = input_size; output = newa(uint32_t, output_size); idna_to_unicode_44i(input, input_size, output, &output_size, 0); result = stringprep_ucs4_to_utf8(output, output_size, NULL, &w); if (!result) return -ENOMEM; if (w <= 0) return 0; if (w+1 > decoded_max) return -EINVAL; memcpy(decoded, result, w+1); return w; #else return 0; #endif }
/** * idna_to_unicode_44i * @in: input array with unicode code points. * @inlen: length of input array with unicode code points. * @out: output array with unicode code points. * @outlen: on input, maximum size of output array with unicode code points, * on exit, actual size of output array with unicode code points. * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. * * The ToUnicode operation takes a sequence of Unicode code points * that make up one label and returns a sequence of Unicode code * points. If the input sequence is a label in ACE form, then the * result is an equivalent internationalized label that is not in ACE * form, otherwise the original sequence is returned unaltered. * * ToUnicode never fails. If any step fails, then the original input * sequence is returned immediately in that step. * * The Punycode decoder can never output more code points than it * inputs, but Nameprep can, and therefore ToUnicode can. Note that * the number of octets needed to represent a sequence of code points * depends on the particular character encoding used. * * The inputs to ToUnicode are a sequence of code points, the * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of * ToUnicode is always a sequence of Unicode code points. * * Return value: Returns error condition, but it must only be used for * debugging purposes. The output buffer is always * guaranteed to contain the correct data according to * the specification (sans malloc induced errors). NB! * This means that you normally ignore the return code * from this function, as checking it means breaking the * standard. */ int idna_to_unicode_44i (const uint32_t * in, size_t inlen, uint32_t * out, size_t * outlen, int flags) { int rc; size_t outlensave = *outlen; char *p; p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL); if (p == NULL) return IDNA_MALLOC_ERROR; rc = idna_to_unicode_internal (p, out, outlen, flags); if (rc != IDNA_SUCCESS) { memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ? inlen : outlensave)); *outlen = inlen; } /* p is freed in idna_to_unicode_internal. */ return rc; }
static uschar * string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err) { size_t p_len; punycode_uint * p; int rc; uschar * s, * res; DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel); alabel += 4; p_len = Ustrlen(alabel); p = (punycode_uint *) store_get((p_len+1) * sizeof(*p)); if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS) { if (err) *err = US punycode_strerror(rc); return NULL; } s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len); res = string_copyn(s, p_len); free(s); return res; }
/** * idna_to_ascii_4i * @in: input array with unicode code points. * @inlen: length of input array with unicode code points. * @out: output zero terminated string that must have room for at * least 63 characters plus the terminating zero. * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. * * The ToASCII operation takes a sequence of Unicode code points that make * up one label and transforms it into a sequence of code points in the * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the * resulting sequence are equivalent labels. * * It is important to note that the ToASCII operation can fail. ToASCII * fails if any step of it fails. If any step of the ToASCII operation * fails on any label in a domain name, that domain name MUST NOT be used * as an internationalized domain name. The method for deadling with this * failure is application-specific. * * The inputs to ToASCII are a sequence of code points, the AllowUnassigned * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a * sequence of ASCII code points or a failure condition. * * ToASCII never alters a sequence of code points that are all in the ASCII * range to begin with (although it could fail). Applying the ToASCII * operation multiple times has exactly the same effect as applying it just * once. * * Return value: Returns 0 on success, or an error code. */ int idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags) { size_t len, outlen; uint32_t *src; /* XXX don't need to copy data? */ int rc; /* * ToASCII consists of the following steps: * * 1. If all code points in the sequence are in the ASCII range (0..7F) * then skip to step 3. */ { size_t i; int inasciirange; inasciirange = 1; for (i = 0; i < inlen; i++) if (in[i] > 0x7F) inasciirange = 0; if (inasciirange) { src = malloc (sizeof (in[0]) * (inlen + 1)); if (src == NULL) return IDNA_MALLOC_ERROR; memcpy (src, in, sizeof (in[0]) * inlen); src[inlen] = 0; goto step3; } } /* * 2. Perform the steps specified in [NAMEPREP] and fail if there is * an error. The AllowUnassigned flag is used in [NAMEPREP]. */ { char *p; p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL); if (p == NULL) return IDNA_MALLOC_ERROR; len = strlen (p); do { char *newp; len = 2 * len + 10; /* XXX better guess? */ newp = realloc (p, len); if (newp == NULL) { free (p); return IDNA_MALLOC_ERROR; } p = newp; if (flags & IDNA_ALLOW_UNASSIGNED) rc = stringprep_nameprep (p, len); else rc = stringprep_nameprep_no_unassigned (p, len); } while (rc == STRINGPREP_TOO_SMALL_BUFFER); if (rc != STRINGPREP_OK) { free (p); return IDNA_STRINGPREP_ERROR; } src = stringprep_utf8_to_ucs4 (p, -1, NULL); free (p); } step3: /* * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks: * * (a) Verify the absence of non-LDH ASCII code points; that is, * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. * * (b) Verify the absence of leading and trailing hyphen-minus; * that is, the absence of U+002D at the beginning and end of * the sequence. */ if (flags & IDNA_USE_STD3_ASCII_RULES) { size_t i; for (i = 0; src[i]; i++) if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F || (src[i] >= 0x3A && src[i] <= 0x40) || (src[i] >= 0x5B && src[i] <= 0x60) || (src[i] >= 0x7B && src[i] <= 0x7F)) { free (src); return IDNA_CONTAINS_NON_LDH; } if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D)) { free (src); return IDNA_CONTAINS_MINUS; } } /* * 4. If all code points in the sequence are in the ASCII range * (0..7F), then skip to step 8. */ { size_t i; int inasciirange; inasciirange = 1; for (i = 0; src[i]; i++) { if (src[i] > 0x7F) inasciirange = 0; /* copy string to output buffer if we are about to skip to step8 */ if (i < 64) out[i] = src[i]; } if (i < 64) out[i] = '\0'; if (inasciirange) goto step8; } /* * 5. Verify that the sequence does NOT begin with the ACE prefix. * */ { size_t i; int match; match = 1; for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++) if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i]) match = 0; if (match) { free (src); return IDNA_CONTAINS_ACE_PREFIX; } } /* * 6. Encode the sequence using the encoding algorithm in [PUNYCODE] * and fail if there is an error. */ for (len = 0; src[len]; len++) ; src[len] = '\0'; outlen = 63 - strlen (IDNA_ACE_PREFIX); rc = punycode_encode (len, src, NULL, &outlen, &out[strlen (IDNA_ACE_PREFIX)]); if (rc != PUNYCODE_SUCCESS) { free (src); return IDNA_PUNYCODE_ERROR; } out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0'; /* * 7. Prepend the ACE prefix. */ memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)); /* * 8. Verify that the number of code points is in the range 1 to 63 * inclusive (0 is excluded). */ step8: free (src); if (strlen (out) < 1 || strlen (out) > 63) return IDNA_INVALID_LENGTH; return IDNA_SUCCESS; }
void doit (void) { char *p; int rc; size_t i; if (!stringprep_check_version (STRINGPREP_VERSION)) fail ("stringprep_check_version() failed\n"); for (i = 0; i < sizeof (strprep) / sizeof (strprep[0]); i++) { if (debug) printf ("STRINGPREP entry %d\n", i); if (debug) { printf ("flags: %d\n", strprep[i].flags); printf ("in: "); escapeprint (strprep[i].in, strlen (strprep[i].in)); hexprint (strprep[i].in, strlen (strprep[i].in)); binprint (strprep[i].in, strlen (strprep[i].in)); } { uint32_t *l; char *x; l = stringprep_utf8_to_ucs4 (strprep[i].in, -1, NULL); x = stringprep_ucs4_to_utf8 (l, -1, NULL, NULL); free (l); if (strcmp (strprep[i].in, x) != 0) { fail ("bad UTF-8 in entry %d\n", i); if (debug) { puts ("expected:"); escapeprint (strprep[i].in, strlen (strprep[i].in)); hexprint (strprep[i].in, strlen (strprep[i].in)); puts ("computed:"); escapeprint (x, strlen (x)); hexprint (x, strlen (x)); } } free (x); } rc = stringprep_profile (strprep[i].in, &p, strprep[i].profile ? strprep[i].profile : "Nameprep", strprep[i].flags); if (rc != strprep[i].rc) { fail ("stringprep() entry %d failed: %d\n", i, rc); if (debug) printf ("FATAL\n"); if (rc == STRINGPREP_OK) free (p); continue; } if (debug && rc == STRINGPREP_OK) { printf ("out: "); escapeprint (p, strlen (p)); hexprint (p, strlen (p)); binprint (p, strlen (p)); printf ("expected out: "); escapeprint (strprep[i].out, strlen (strprep[i].out)); hexprint (strprep[i].out, strlen (strprep[i].out)); binprint (strprep[i].out, strlen (strprep[i].out)); } else if (debug) printf ("returned %d expected %d\n", rc, strprep[i].rc); if (rc == STRINGPREP_OK) { if (strlen (strprep[i].out) != strlen (p) || memcmp (strprep[i].out, p, strlen (p)) != 0) { fail ("stringprep() entry %d failed\n", i); if (debug) printf ("ERROR\n"); } else if (debug) printf ("OK\n\n"); free (p); } else if (debug) printf ("OK\n\n"); } #if 0 { char p[20]; memset (p, 0, 10); stringprep_unichar_to_utf8 (0x00DF, p); hexprint (p, strlen (p)); puts (""); } #endif }
int main (int argc, char *argv[]) { struct gengetopt_args_info args_info; char readbuf[BUFSIZ]; char *p, *r; uint32_t *q; unsigned cmdn = 0; int rc; setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); if (cmdline_parser (argc, argv, &args_info) != 0) return 1; if (!args_info.stringprep_given && !args_info.punycode_encode_given && !args_info.punycode_decode_given && !args_info.idna_to_ascii_given && !args_info.idna_to_unicode_given) args_info.idna_to_ascii_given = 1; if ((args_info.stringprep_given ? 1 : 0) + (args_info.punycode_encode_given ? 1 : 0) + (args_info.punycode_decode_given ? 1 : 0) + (args_info.idna_to_ascii_given ? 1 : 0) + (args_info.idna_to_unicode_given ? 1 : 0) != 1) { fprintf (stderr, _("%s: Only one of -s, -e, -d, -a or -u can be specified.\n"), argv[0]); cmdline_parser_print_help (); return 1; } if (!args_info.quiet_given) fprintf (stderr, "%s %s\n" GREETING, PACKAGE, VERSION); if (args_info.debug_given) fprintf (stderr, _("Charset `%s'.\n"), stringprep_locale_charset ()); if (!args_info.quiet_given && args_info.inputs_num == 0) fprintf (stderr, _("Type each input string on a line by itself, " "terminated by a newline character.\n")); do { if (cmdn < args_info.inputs_num) { strncpy (readbuf, args_info.inputs[cmdn++], BUFSIZ - 1); readbuf[BUFSIZ - 1] = '\0'; } else if (fgets (readbuf, BUFSIZ, stdin) == NULL) { sprintf (readbuf, _("%s: fgets() failed: "), argv[0]); if (!feof (stdin)) perror (readbuf); return 1; } if (readbuf[strlen (readbuf) - 1] == '\n') readbuf[strlen (readbuf) - 1] = '\0'; if (args_info.stringprep_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); if (!q) { free (p); fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } free (q); rc = stringprep_profile (p, &r, args_info.profile_given ? args_info.profile_arg : "Nameprep", 0); free (p); if (rc != STRINGPREP_OK) { fprintf (stderr, _("%s: stringprep_profile() failed with error %d.\n"), argv[0], rc); return 1; } q = stringprep_utf8_to_ucs4 (r, -1, NULL); if (!q) { free (r); fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } free (q); p = stringprep_utf8_to_locale (r); free (r); if (!p) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.punycode_encode_given) { size_t len, len2; p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, &len); free (p); if (!q) { fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; i < len; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } len2 = BUFSIZ; rc = punycode_encode (len, q, NULL, &len2, readbuf); free (q); if (rc != PUNYCODE_SUCCESS) { fprintf (stderr, _("%s: punycode_encode() failed with error %d.\n"), argv[0], rc); return 1; } readbuf[len2] = '\0'; p = stringprep_utf8_to_locale (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.punycode_decode_given) { size_t len; len = BUFSIZ; q = (uint32_t *) malloc (len * sizeof (q[0])); if (!q) { sprintf (readbuf, _("%s: malloc() failed: "), argv[0]); perror (readbuf); return 1; } rc = punycode_decode (strlen (readbuf), readbuf, &len, q, NULL); if (rc != PUNYCODE_SUCCESS) { free (q); fprintf (stderr, _("%s: punycode_decode() failed with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; i < len; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } q[len] = 0; r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL); free (q); if (!r) { fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } p = stringprep_utf8_to_locale (r); free (r); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } if (args_info.idna_to_ascii_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); free (p); if (!q) { fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } rc = idna_to_ascii_4z (q, &p, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); free (q); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: idna_to_ascii_4z() failed " "with error %d.\n"), argv[0], rc); return 1; } #ifdef WITH_TLD if (args_info.tld_flag) { size_t errpos; rc = idna_to_unicode_8z4z (p, &q, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: TLD idna_to_unicode_8z8z() failed " "with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("tld[%d] = U+%04x\n"), i, q[i]); } rc = tld_check_4z (q, &errpos, NULL); if (rc == TLD_INVALID) { fprintf (stderr, _("%s: string rejected by TLD test " "(Unicode position %d)\n"), argv[0], errpos); free (q); return 1; } if (rc != TLD_SUCCESS) { fprintf (stderr, _("%s: tld_check_4z failed with error %d.\n"), argv[0], rc); free (q); return 1; } free (r); } #endif if (args_info.debug_given) { size_t i; for (i = 0; p[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, p[i]); } fprintf (stdout, "%s\n", p); free (p); } if (args_info.idna_to_unicode_given) { p = stringprep_locale_to_utf8 (readbuf); if (!p) { fprintf (stderr, _("%s: could not convert from %s to UTF-8.\n"), argv[0], stringprep_locale_charset ()); return 1; } q = stringprep_utf8_to_ucs4 (p, -1, NULL); if (!q) { free (p); fprintf (stderr, _("%s: could not convert from UCS-4 to UTF-8.\n"), argv[0]); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("input[%d] = U+%04x\n"), i, q[i]); } free (q); rc = idna_to_unicode_8z4z (p, &q, (args_info.allow_unassigned_given ? IDNA_ALLOW_UNASSIGNED : 0) | (args_info.usestd3asciirules_given ? IDNA_USE_STD3_ASCII_RULES : 0)); free (p); if (rc != IDNA_SUCCESS) { fprintf (stderr, _("%s: idna_to_unicode_8z4z() " "failed with error %d.\n"), argv[0], rc); return 1; } if (args_info.debug_given) { size_t i; for (i = 0; q[i]; i++) fprintf (stderr, _("output[%d] = U+%04x\n"), i, q[i]); } #ifdef WITH_TLD if (args_info.tld_flag) { size_t errpos; rc = tld_check_4z (q, &errpos, NULL); if (rc == TLD_INVALID) { fprintf (stderr, _("%s: string rejected by TLD test " "(Unicode position %d)\n"), argv[0], errpos); free (q); return 1; } if (rc != TLD_SUCCESS) { fprintf (stderr, _("%s: tld_check_4z failed with error %d.\n"), argv[0], rc); free (q); return 1; } } #endif r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL); free (q); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to UCS-4.\n"), argv[0]); return 1; } p = stringprep_utf8_to_locale (r); free (r); if (!r) { fprintf (stderr, _("%s: could not convert from UTF-8 to %s.\n"), argv[0], stringprep_locale_charset ()); return 1; } fprintf (stdout, "%s\n", p); free (p); } } while (!feof (stdin) && !ferror (stdin) && (args_info.inputs_num == 0 || cmdn < args_info.inputs_num)); return 0; }
void doit (void) { size_t i; int rc; for (i = 0; i < sizeof (tv) / sizeof (tv[0]); i++) { if (debug) { uint32_t *p, *q; printf ("PR29 entry %ld: %s\n", i, tv[i].name); printf ("in:\n"); ucs4print (tv[i].in, tv[i].inlen); printf ("nfkc:\n"); p = stringprep_ucs4_nfkc_normalize (tv[i].in, tv[i].inlen); ucs4print (p, -1); printf ("second nfkc:\n"); q = stringprep_ucs4_nfkc_normalize (p, -1); ucs4print (q, -1); free (p); free (q); } rc = pr29_4 (tv[i].in, tv[i].inlen); if (rc != tv[i].rc) { fail ("PR29 entry %ld failed (expected %d): %d\n", i, tv[i].rc, rc); if (debug) printf ("FATAL\n"); continue; } rc = pr29_4z (tv[i].in); if (rc != tv[i].rc) { fail ("PR29 entry %ld failed (expected %d): %d\n", i, tv[i].rc, rc); if (debug) printf ("FATAL\n"); continue; } { char *p; size_t items_read, items_written; p = stringprep_ucs4_to_utf8 (tv[i].in, (ssize_t) tv[i].inlen, &items_read, &items_written); if (p == NULL) fail ("FAIL: stringprep_ucs4_to_utf8(tv[%ld]) == NULL\n", i); if (debug) hexprint (p, strlen (p)); rc = pr29_8z (p); free (p); if (rc != tv[i].rc) { fail ("PR29 entry %ld failed (expected %d): %d\n", i, tv[i].rc, rc); if (debug) printf ("FATAL\n"); continue; } } if (debug) { if (tv[i].rc != PR29_SUCCESS) printf ("EXPECTED FAIL\n"); else printf ("OK\n"); } } }