STRBUF *wrap(STRBUF *buf, int width) { const char *lf = "\n"; const size_t lflen = strlen(lf); const char *bufp; const char *last; const char *lastspace = 0; size_t linelen = 0; STRBUF *out = strbuf_new(); bufp = strbuf_get(buf); last = bufp; if (width == -1) { strbuf_append_n(out, strbuf_get(buf), strbuf_len(buf)); return out; } strbuf_append_n(out, lf, lflen); while(bufp - strbuf_get(buf) < (ptrdiff_t)strbuf_len(buf)) { if (*bufp == ' ') lastspace = bufp; else if (*bufp == '\n') { strbuf_append_n(out, last, (size_t)(bufp - last)); do { strbuf_append_n(out, lf, lflen); } while (*++bufp == '\n'); lastspace = NULL; while(*bufp == ' ') { bufp++; } last = bufp; linelen = 0; } if (NULL != lastspace && (int)linelen > width) { strbuf_append_n(out, last, (size_t)(lastspace - last)); strbuf_append_n(out, lf, lflen); last = lastspace; lastspace = NULL; linelen = (size_t)(bufp - last); while(*last == ' ') { last++; } if(last > bufp) bufp = last; } bufp++; linelen++; if ((unsigned char)*bufp > 0x80) bufp += utf8_length[(unsigned char)*bufp - 0x80]; } strbuf_append_n(out, "\n", 1); return out; }
static idn_result_t convert_line (idnconv_strbuf_t * from, idnconv_strbuf_t * to, idn_resconf_t conf, idn_action_t actions, int flags) { idn_result_t r = idn_success; char *from_str = strbuf_get (from); for (;;) { char *to_str = strbuf_get (to); size_t to_size = strbuf_size (to); switch (flags & (FLAG_REVERSE | FLAG_SELECTIVE)) { case 0: r = idn_res_encodename (conf, actions, from_str, to_str, to_size); break; case FLAG_REVERSE: r = idn_res_decodename (conf, actions, from_str, to_str, to_size); break; case FLAG_SELECTIVE: r = selective_encode (conf, actions, from_str, to_str, to_size); break; case FLAG_REVERSE | FLAG_SELECTIVE: r = selective_decode (conf, actions, from_str, to_str, to_size); break; } if (r == idn_buffer_overflow) { /* * Conversion is not successful because * the size of the target buffer is not enough. * Double the size and retry. */ if (strbuf_double (to) == NULL) { /* oops. allocation failed. */ return (idn_nomemory); } } else { break; } } return (r); }
static STRBUF *conv(iconv_t ic, STRBUF *buf) { STRBUF *output; output = strbuf_new(); strbuf_append_n(output, strbuf_get(buf), strbuf_len(buf)); return output; }
static int trim_newline(idnconv_strbuf_t *buf) { /* * If the string in BUF ends with a newline, trim it and * return 1. Otherwise, just return 0 without modifying BUF. */ char *s = strbuf_get(buf); size_t len = strlen(s); if (s[len - 1] == '\n') { s[len - 1] = '\0'; return (1); } return (0); }
static void write_to_file(STRBUF *outbuf, const char *filename) { int fd; ssize_t len; fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (fd == -1) { fprintf(stderr, "Can't open %s: %s\n", filename, strerror(errno)); exit(EXIT_FAILURE); } len = write(fd, strbuf_get(outbuf), strbuf_len(outbuf)); if (len == -1) { fprintf(stderr, "Can't write to %s: %s\n", filename, strerror(errno)); exit(EXIT_FAILURE); } close(fd); }
static int decode_file (idn_resconf_t conf1, idn_resconf_t conf2, FILE * fp, int flags) { idn_result_t r; idnconv_strbuf_t buf1, buf2; idn_action_t actions1, actions2; int nl_trimmed; int local_ace_hack, idn_ace_hack; idn_converter_t conv; /* * See if the input codeset is an ACE. */ conv = idn_resconf_getidnconverter (conf1); if (conv != NULL && idn_converter_isasciicompatible (conv) && (flags & FLAG_SELECTIVE)) idn_ace_hack = 1; else idn_ace_hack = 0; if (conv != NULL) idn_converter_destroy (conv); conv = idn_resconf_getlocalconverter (conf1); if (conv != NULL && idn_converter_isasciicompatible (conv) && (flags & FLAG_SELECTIVE)) local_ace_hack = 1; else local_ace_hack = 0; if (conv != NULL) idn_converter_destroy (conv); actions1 = IDN_IDNCONV; if (local_ace_hack) { actions2 = IDN_IDNCONV; if (flags & FLAG_MAP) actions2 |= IDN_MAP; if (flags & FLAG_NORMALIZE) actions2 |= IDN_NORMALIZE; if (flags & FLAG_PROHIBITCHECK) actions2 |= IDN_PROHCHECK; if (flags & FLAG_UNASSIGNCHECK) actions2 |= IDN_UNASCHECK; if (flags & FLAG_BIDICHECK) actions2 |= IDN_BIDICHECK; if (flags & FLAG_ASCIICHECK) actions2 |= IDN_ASCCHECK; if (flags & FLAG_LENGTHCHECK) actions2 |= IDN_LENCHECK; } else { actions2 = IDN_LOCALCONV; } if (flags & FLAG_DELIMMAP) actions1 |= IDN_DELIMMAP; if (flags & FLAG_MAP) actions1 |= IDN_MAP; if (flags & FLAG_NORMALIZE) actions1 |= IDN_NORMALIZE; if (flags & FLAG_NORMALIZE) actions1 |= IDN_NORMALIZE; if (flags & FLAG_PROHIBITCHECK) actions1 |= IDN_PROHCHECK; if (flags & FLAG_UNASSIGNCHECK) actions1 |= IDN_UNASCHECK; if (flags & FLAG_BIDICHECK) actions1 |= IDN_BIDICHECK; if (flags & FLAG_ASCIICHECK) actions1 |= IDN_ASCCHECK; if (flags & FLAG_ROUNDTRIPCHECK) actions1 |= IDN_RTCHECK; strbuf_init (&buf1); strbuf_init (&buf2); line_number = 1; while (strbuf_getline (&buf1, fp) != NULL) { /* * Trim newline at the end. This is needed for * those ascii-comatible encodings such as UTF-5 or RACE * not to try converting newlines, which will result * in `invalid encoding' error. */ nl_trimmed = trim_newline (&buf1); /* * Treat input line as the string encoded in local * encoding and convert it to UTF-8 encoded string. */ if (local_ace_hack) { if (strbuf_copy (&buf2, strbuf_get (&buf1)) == NULL) r = idn_nomemory; else r = idn_success; } else { r = convert_line (&buf1, &buf2, conf1, IDN_LOCALCONV, 0); } if (r != idn_success) { errormsg ("conversion failed at line %d: %s\n", line_number, idn_result_tostring (r)); goto error; } /* * Convert internationalized domain names in the line. */ if (idn_ace_hack) { r = convert_line (&buf2, &buf1, conf1, actions1, FLAG_REVERSE | FLAG_SELECTIVE); } else { r = convert_line (&buf2, &buf1, conf1, actions1, FLAG_REVERSE); } if (r != idn_success) { errormsg ("conversion failed at line %d: %s\n", line_number, idn_result_tostring (r)); goto error; } if (!idn_utf8_isvalidstring (strbuf_get (&buf1))) { errormsg ("conversion to utf-8 failed at line %d\n", line_number); goto error; } /* * Perform round trip check and convert to the output * codeset. */ if (local_ace_hack) { r = convert_line (&buf1, &buf2, conf2, actions2, FLAG_SELECTIVE); } else { r = convert_line (&buf1, &buf2, conf1, actions2, FLAG_REVERSE); } if (r != idn_success) { errormsg ("error in nameprep or output conversion " "at line %d: %s\n", line_number, idn_result_tostring (r)); goto error; } fputs (strbuf_get (&buf2), stdout); if (nl_trimmed) putc ('\n', stdout); if (flush_every_line) fflush (stdout); line_number++; } strbuf_reset (&buf1); strbuf_reset (&buf2); return (0); error: strbuf_reset (&buf1); strbuf_reset (&buf2); return (1); }
int main(int argc, const char **argv) { struct stat st; iconv_t ic; STRBUF *wbuf; STRBUF *docbuf; STRBUF *outbuf; int i = 1; (void)setlocale(LC_ALL, ""); while (argv[i]) { if (!strcmp(argv[i], "--raw")) { opt_raw = 1; i++; continue; } else if (!strcmp(argv[i], "--raw-input")) { opt_raw_input = 1; i++; continue; } else if (!strncmp(argv[i], "--encoding=", 11)) { size_t arglen = strlen(argv[i]) - 10; #ifdef iconvlist if (!strcmp(argv[i] + 11, "list")) { show_iconvlist(); } #endif opt_encoding = ymalloc(arglen); memcpy(opt_encoding, argv[i] + 11, arglen); i++; continue; } else if (!strncmp(argv[i], "--width=", 8)) { opt_width = atoi(argv[i] + 8); if(opt_width < 3 && opt_width != -1) { fprintf(stderr, "Invalid value for width: %s\n", argv[i] + 8); exit(EXIT_FAILURE); } i++; continue; } else if (!strcmp(argv[i], "--force")) { // ignore this setting i++; continue; } else if (!strncmp(argv[i], "--output=", 9)) { if (*(argv[i] + 9) != '-') { size_t arglen = strlen(argv[i]) - 8; opt_output = ymalloc(arglen); memcpy(opt_output, argv[i] + 9, arglen); } i++; continue; } else if (!strncmp(argv[i], "--subst=", 8)) { if (!strcmp(argv[i] + 8, "none")) opt_subst = SUBST_NONE; else if (!strcmp(argv[i] + 8, "some")) opt_subst = SUBST_SOME; else if (!strcmp(argv[i] + 8, "all")) opt_subst = SUBST_ALL; else { fprintf(stderr, "Invalid value for --subst: %s\n", argv[i] + 8); exit(EXIT_FAILURE); } i++; continue; } else if (!strcmp(argv[i], "--help")) { usage(); } else if (!strcmp(argv[i], "--version") || !strcmp(argv[i], "-v")) { version_info(); } else if (!strcmp(argv[i], "-")) { usage(); } else { if(opt_filename) usage(); opt_filename = argv[i]; i++; continue; } } if(opt_encoding && !strcmp("show", opt_encoding)) { yfree(opt_encoding); opt_encoding = guess_encoding(); printf("%s\n", opt_encoding); yfree(opt_encoding); exit(EXIT_SUCCESS); } if(opt_raw) opt_width = -1; if(!opt_filename) usage(); if(!opt_encoding) { opt_encoding = guess_encoding(); } ic = init_conv("UTF-8", opt_encoding); if (0 != stat(opt_filename, &st)) { fprintf(stderr, "%s: %s\n", opt_filename, strerror(errno)); exit(EXIT_FAILURE); } /* read content.xml */ docbuf = opt_raw_input ? read_from_xml(opt_filename, "content.xml") : read_from_zip(opt_filename, "content.xml"); if (!opt_raw) { subst_doc(ic, docbuf); format_doc(docbuf, opt_raw_input); } wbuf = wrap(docbuf, opt_width); /* remove all trailing whitespace */ (void) regex_subst(wbuf, " +\n", _REG_GLOBAL, "\n"); outbuf = conv(ic, wbuf); if (opt_output) write_to_file(outbuf, opt_output); else fwrite(strbuf_get(outbuf), strbuf_len(outbuf), 1, stdout); finish_conv(ic); strbuf_free(wbuf); strbuf_free(docbuf); strbuf_free(outbuf); #ifndef NO_ICONV yfree(opt_encoding); #endif if (opt_output) yfree(opt_output); return EXIT_SUCCESS; }
static STRBUF *conv(iconv_t ic, STRBUF *buf) { /* FIXME: This functionality belongs into strbuf.c */ ICONV_CHAR *doc; char *out, *outbuf; size_t inleft, outleft = 0; size_t r; size_t outlen = 0; const size_t alloc_step = 4096; STRBUF *output; inleft = strbuf_len(buf); doc = (ICONV_CHAR*)strbuf_get(buf); outlen = alloc_step; outleft = alloc_step; outbuf = ymalloc(alloc_step); out = outbuf; outleft = alloc_step; do { if (!outleft) { outlen += alloc_step; outleft += alloc_step; yrealloc_buf(&outbuf, &out, outlen); } r = iconv(ic, &doc, &inleft, &out, &outleft); if (r == (size_t)-1) { if(errno == E2BIG) { outlen += alloc_step; outleft += alloc_step; if (outlen > (strbuf_len(buf) << 3)) { fprintf(stderr, "Buffer grew to much. " "Corrupted document?\n"); exit(EXIT_FAILURE); } yrealloc_buf(&outbuf, &out, outlen); continue; } else if ((errno == EILSEQ) || (errno == EINVAL)) { char skip = 1; /* advance in source buffer */ if ((unsigned char)*doc > 0x80) skip += utf8_length[(unsigned char)*doc - 0x80]; doc += skip; inleft -= skip; /* advance in output buffer */ *out = '?'; out++; outleft--; continue; } fprintf(stderr, "iconv returned: %s\n", strerror(errno)); exit(EXIT_FAILURE); } } while(inleft != 0); if (!outleft) { outbuf = yrealloc(outbuf, outlen + 1); } *out = '\0'; output = strbuf_slurp_n(outbuf, (size_t)(out - outbuf)); strbuf_setopt(output, STRBUF_NULLOK); return output; }
int regex_subst(STRBUF *buf, const char *regex, int regopt, const void *subst) { int r; const char *bufp; size_t off = 0; const int i = 0; int match_count = 0; regex_t rx; const size_t nmatches = 10; regmatch_t matches[10]; r = regcomp(&rx, regex, REG_EXTENDED); if (r) { print_regexp_err(r, &rx); exit(EXIT_FAILURE); } do { if (off > strbuf_len(buf)) break; bufp = strbuf_get(buf) + off; #ifdef REG_STARTEND matches[0].rm_so = 0; matches[0].rm_eo = strbuf_len(buf) - off; if (0 != regexec(&rx, bufp, nmatches, matches, REG_STARTEND)) #else if (0 != regexec(&rx, bufp, nmatches, matches, 0)) #endif break; if (matches[i].rm_so != -1) { char *s; int subst_len; if (regopt & _REG_EXEC) { s = (*(char *(*) (const char *buf, regmatch_t matches[], size_t nmatch, size_t off))subst) (strbuf_get(buf), matches, nmatches, off); } else s = (char*)subst; subst_len = strbuf_subst(buf, matches[i].rm_so + off, matches[i].rm_eo + off, s); match_count++; if (regopt & _REG_EXEC) yfree(s); off += matches[i].rm_so; if (subst_len >= 0) off += subst_len + 1; } } while (regopt & _REG_GLOBAL); regfree(&rx); return match_count; }