Exemplo n.º 1
0
STRBUF *wrap(STRBUF *buf, int width)
{
	const char *lf = "\n";
	const size_t lflen = strlen(lf);
	const char *bufp;
	const char *last;
	const char *lastspace = 0;
	size_t linelen = 0;
	STRBUF *out = strbuf_new();

	bufp = strbuf_get(buf);
	last = bufp;

	if (width == -1) {
		strbuf_append_n(out, strbuf_get(buf), strbuf_len(buf));
		return out;
	}

	strbuf_append_n(out, lf, lflen);
	while(bufp - strbuf_get(buf) < (ptrdiff_t)strbuf_len(buf)) {
		if (*bufp == ' ')
			lastspace = bufp;
		else if (*bufp == '\n') {
			strbuf_append_n(out, last, (size_t)(bufp - last));
			do {
				strbuf_append_n(out, lf, lflen);
			} while (*++bufp == '\n');
			lastspace = NULL;

			while(*bufp == ' ') {
				bufp++;
			}
			last = bufp;
			linelen = 0;
		}

		if (NULL != lastspace && (int)linelen > width) {
			strbuf_append_n(out, last, (size_t)(lastspace - last));
			strbuf_append_n(out, lf, lflen);
			last = lastspace;
			lastspace = NULL;
			linelen = (size_t)(bufp - last);

			while(*last == ' ') {
				last++;
			}
			if(last > bufp)
				bufp = last;
		}

		bufp++;
		linelen++;
		if ((unsigned char)*bufp > 0x80)
			bufp += utf8_length[(unsigned char)*bufp - 0x80];
	}
	strbuf_append_n(out, "\n", 1);
	return out;
}
Exemplo n.º 2
0
static idn_result_t
convert_line (idnconv_strbuf_t * from, idnconv_strbuf_t * to, idn_resconf_t conf, idn_action_t actions, int flags)
{
    idn_result_t r = idn_success;

    char *from_str = strbuf_get (from);

    for (;;)
    {
        char *to_str = strbuf_get (to);

        size_t to_size = strbuf_size (to);

        switch (flags & (FLAG_REVERSE | FLAG_SELECTIVE))
        {
            case 0:
                r = idn_res_encodename (conf, actions, from_str, to_str, to_size);
                break;
            case FLAG_REVERSE:
                r = idn_res_decodename (conf, actions, from_str, to_str, to_size);
                break;
            case FLAG_SELECTIVE:
                r = selective_encode (conf, actions, from_str, to_str, to_size);
                break;
            case FLAG_REVERSE | FLAG_SELECTIVE:
                r = selective_decode (conf, actions, from_str, to_str, to_size);
                break;
        }
        if (r == idn_buffer_overflow)
        {
            /*
             * Conversion is not successful because
             * the size of the target buffer is not enough.
             * Double the size and retry.
             */
            if (strbuf_double (to) == NULL)
            {
                /* oops. allocation failed. */
                return (idn_nomemory);
            }
        }
        else
        {
            break;
        }
    }
    return (r);
}
Exemplo n.º 3
0
static STRBUF *conv(iconv_t ic, STRBUF *buf) {
	STRBUF *output;

	output = strbuf_new();
	strbuf_append_n(output, strbuf_get(buf), strbuf_len(buf));

	return output;
}
Exemplo n.º 4
0
static int
trim_newline(idnconv_strbuf_t *buf) {
	/*
	 * If the string in BUF ends with a newline, trim it and
	 * return 1.  Otherwise, just return 0 without modifying BUF.
	 */
	char *s = strbuf_get(buf);
	size_t len = strlen(s);

	if (s[len - 1] == '\n') {
		s[len - 1] = '\0';
		return (1);
	}

	return (0);
}
Exemplo n.º 5
0
static void write_to_file(STRBUF *outbuf, const char *filename)
{
	int fd;
	ssize_t len;

	fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
	if (fd == -1) {
		fprintf(stderr, "Can't open %s: %s\n", filename, strerror(errno));
		exit(EXIT_FAILURE);
	}

	len = write(fd, strbuf_get(outbuf), strbuf_len(outbuf));
	if (len == -1) {
		fprintf(stderr, "Can't write to %s: %s\n", filename, strerror(errno));
		exit(EXIT_FAILURE);
	}

	close(fd);
}
Exemplo n.º 6
0
static int decode_file (idn_resconf_t conf1, idn_resconf_t conf2, FILE * fp, int flags)
{
    idn_result_t r;

    idnconv_strbuf_t buf1, buf2;

    idn_action_t actions1, actions2;

    int nl_trimmed;

    int local_ace_hack, idn_ace_hack;

    idn_converter_t conv;

    /*
     * See if the input codeset is an ACE.
     */
    conv = idn_resconf_getidnconverter (conf1);
    if (conv != NULL && idn_converter_isasciicompatible (conv) && (flags & FLAG_SELECTIVE))
        idn_ace_hack = 1;
    else
        idn_ace_hack = 0;
    if (conv != NULL)
        idn_converter_destroy (conv);

    conv = idn_resconf_getlocalconverter (conf1);
    if (conv != NULL && idn_converter_isasciicompatible (conv) && (flags & FLAG_SELECTIVE))
        local_ace_hack = 1;
    else
        local_ace_hack = 0;
    if (conv != NULL)
        idn_converter_destroy (conv);

    actions1 = IDN_IDNCONV;

    if (local_ace_hack)
    {
        actions2 = IDN_IDNCONV;
        if (flags & FLAG_MAP)
            actions2 |= IDN_MAP;
        if (flags & FLAG_NORMALIZE)
            actions2 |= IDN_NORMALIZE;
        if (flags & FLAG_PROHIBITCHECK)
            actions2 |= IDN_PROHCHECK;
        if (flags & FLAG_UNASSIGNCHECK)
            actions2 |= IDN_UNASCHECK;
        if (flags & FLAG_BIDICHECK)
            actions2 |= IDN_BIDICHECK;
        if (flags & FLAG_ASCIICHECK)
            actions2 |= IDN_ASCCHECK;
        if (flags & FLAG_LENGTHCHECK)
            actions2 |= IDN_LENCHECK;
    }
    else
    {
        actions2 = IDN_LOCALCONV;
    }

    if (flags & FLAG_DELIMMAP)
        actions1 |= IDN_DELIMMAP;
    if (flags & FLAG_MAP)
        actions1 |= IDN_MAP;
    if (flags & FLAG_NORMALIZE)
        actions1 |= IDN_NORMALIZE;
    if (flags & FLAG_NORMALIZE)
        actions1 |= IDN_NORMALIZE;
    if (flags & FLAG_PROHIBITCHECK)
        actions1 |= IDN_PROHCHECK;
    if (flags & FLAG_UNASSIGNCHECK)
        actions1 |= IDN_UNASCHECK;
    if (flags & FLAG_BIDICHECK)
        actions1 |= IDN_BIDICHECK;
    if (flags & FLAG_ASCIICHECK)
        actions1 |= IDN_ASCCHECK;
    if (flags & FLAG_ROUNDTRIPCHECK)
        actions1 |= IDN_RTCHECK;

    strbuf_init (&buf1);
    strbuf_init (&buf2);
    line_number = 1;
    while (strbuf_getline (&buf1, fp) != NULL)
    {
        /*
         * Trim newline at the end.  This is needed for
         * those ascii-comatible encodings such as UTF-5 or RACE
         * not to try converting newlines, which will result
         * in `invalid encoding' error.
         */
        nl_trimmed = trim_newline (&buf1);

        /*
         * Treat input line as the string encoded in local
         * encoding and convert it to UTF-8 encoded string.
         */
        if (local_ace_hack)
        {
            if (strbuf_copy (&buf2, strbuf_get (&buf1)) == NULL)
                r = idn_nomemory;
            else
                r = idn_success;
        }
        else
        {
            r = convert_line (&buf1, &buf2, conf1, IDN_LOCALCONV, 0);
        }
        if (r != idn_success)
        {
            errormsg ("conversion failed at line %d: %s\n", line_number, idn_result_tostring (r));
            goto error;
        }

        /*
         * Convert internationalized domain names in the line.
         */
        if (idn_ace_hack)
        {
            r = convert_line (&buf2, &buf1, conf1, actions1, FLAG_REVERSE | FLAG_SELECTIVE);
        }
        else
        {
            r = convert_line (&buf2, &buf1, conf1, actions1, FLAG_REVERSE);
        }
        if (r != idn_success)
        {
            errormsg ("conversion failed at line %d: %s\n", line_number, idn_result_tostring (r));
            goto error;
        }
        if (!idn_utf8_isvalidstring (strbuf_get (&buf1)))
        {
            errormsg ("conversion to utf-8 failed at line %d\n", line_number);
            goto error;
        }

        /*
         * Perform round trip check and convert to the output
         * codeset.
         */
        if (local_ace_hack)
        {
            r = convert_line (&buf1, &buf2, conf2, actions2, FLAG_SELECTIVE);
        }
        else
        {
            r = convert_line (&buf1, &buf2, conf1, actions2, FLAG_REVERSE);
        }

        if (r != idn_success)
        {
            errormsg ("error in nameprep or output conversion "
                      "at line %d: %s\n", line_number, idn_result_tostring (r));
            goto error;
        }

        fputs (strbuf_get (&buf2), stdout);
        if (nl_trimmed)
            putc ('\n', stdout);

        if (flush_every_line)
            fflush (stdout);

        line_number++;
    }
    strbuf_reset (&buf1);
    strbuf_reset (&buf2);
    return (0);

  error:
    strbuf_reset (&buf1);
    strbuf_reset (&buf2);
    return (1);
}
Exemplo n.º 7
0
int main(int argc, const char **argv)
{
	struct stat st;
	iconv_t ic;
	STRBUF *wbuf;
	STRBUF *docbuf;
	STRBUF *outbuf;
	int i = 1;

	(void)setlocale(LC_ALL, "");

	while (argv[i]) {
		if (!strcmp(argv[i], "--raw")) {
			opt_raw = 1;
			i++; continue;
		} else if (!strcmp(argv[i], "--raw-input")) {
			opt_raw_input = 1;
			i++; continue;
		} else if (!strncmp(argv[i], "--encoding=", 11)) {
			size_t arglen = strlen(argv[i]) - 10;
#ifdef iconvlist
			if (!strcmp(argv[i] + 11, "list")) {
				show_iconvlist();
			}
#endif
			opt_encoding = ymalloc(arglen);
			memcpy(opt_encoding, argv[i] + 11, arglen);
			i++; continue;
		} else if (!strncmp(argv[i], "--width=", 8)) {
			opt_width = atoi(argv[i] + 8);
			if(opt_width < 3 && opt_width != -1) {
				fprintf(stderr, "Invalid value for width: %s\n",
					argv[i] + 8);
				exit(EXIT_FAILURE);
			}
			i++; continue;
		} else if (!strcmp(argv[i], "--force")) {
			// ignore this setting
			i++; continue;
		} else if (!strncmp(argv[i], "--output=", 9)) {
			if (*(argv[i] + 9) != '-') {
				size_t arglen = strlen(argv[i]) - 8;
				opt_output = ymalloc(arglen);
				memcpy(opt_output, argv[i] + 9, arglen);
			}
			i++; continue;
		} else if (!strncmp(argv[i], "--subst=", 8)) {
			if (!strcmp(argv[i] + 8, "none"))
				opt_subst = SUBST_NONE;
			else if (!strcmp(argv[i] + 8, "some"))
				opt_subst = SUBST_SOME;
			else if (!strcmp(argv[i] + 8, "all"))
				opt_subst = SUBST_ALL;
			else {
				fprintf(stderr, "Invalid value for --subst: %s\n",
					argv[i] + 8);
				exit(EXIT_FAILURE);
			}
			i++; continue;
		} else if (!strcmp(argv[i], "--help")) {
			usage();
		} else if (!strcmp(argv[i], "--version")
			   || !strcmp(argv[i], "-v")) {
			version_info();
		} else if (!strcmp(argv[i], "-")) {
			usage();
		} else {
			if(opt_filename)
				usage();
			opt_filename = argv[i];
			i++; continue;
		}
	}

	if(opt_encoding && !strcmp("show", opt_encoding)) {
		yfree(opt_encoding);
		opt_encoding = guess_encoding();
		printf("%s\n", opt_encoding);
		yfree(opt_encoding);
		exit(EXIT_SUCCESS);
	}

	if(opt_raw)
		opt_width = -1;

	if(!opt_filename)
		usage();

	if(!opt_encoding) {
		opt_encoding = guess_encoding();
	}

	ic = init_conv("UTF-8", opt_encoding);

	if (0 != stat(opt_filename, &st)) {
		fprintf(stderr, "%s: %s\n",
			opt_filename, strerror(errno));
		exit(EXIT_FAILURE);
	}

	/* read content.xml */
	docbuf = opt_raw_input ?
		read_from_xml(opt_filename, "content.xml") :
		read_from_zip(opt_filename, "content.xml");

	if (!opt_raw) {
		subst_doc(ic, docbuf);
		format_doc(docbuf, opt_raw_input);
	}

	wbuf = wrap(docbuf, opt_width);

	/* remove all trailing whitespace */
	(void) regex_subst(wbuf, " +\n", _REG_GLOBAL, "\n");

	outbuf = conv(ic, wbuf);

	if (opt_output)
		write_to_file(outbuf, opt_output);
	else
		fwrite(strbuf_get(outbuf), strbuf_len(outbuf), 1, stdout);

	finish_conv(ic);
	strbuf_free(wbuf);
	strbuf_free(docbuf);
	strbuf_free(outbuf);
#ifndef NO_ICONV
	yfree(opt_encoding);
#endif
	if (opt_output)
		yfree(opt_output);

	return EXIT_SUCCESS;
}
Exemplo n.º 8
0
static STRBUF *conv(iconv_t ic, STRBUF *buf)
{
	/* FIXME: This functionality belongs into strbuf.c */
	ICONV_CHAR *doc;
	char *out, *outbuf;
	size_t inleft, outleft = 0;
	size_t r;
	size_t outlen = 0;
	const size_t alloc_step = 4096;
	STRBUF *output;

	inleft = strbuf_len(buf);
	doc = (ICONV_CHAR*)strbuf_get(buf);
	outlen = alloc_step; outleft = alloc_step;
	outbuf = ymalloc(alloc_step);
	out = outbuf;
	outleft = alloc_step;

	do {
		if (!outleft) {
			outlen += alloc_step; outleft += alloc_step;
			yrealloc_buf(&outbuf, &out, outlen);
		}
		r = iconv(ic, &doc, &inleft, &out, &outleft);
		if (r == (size_t)-1) {
			if(errno == E2BIG) {
				outlen += alloc_step; outleft += alloc_step;
				if (outlen > (strbuf_len(buf) << 3)) {
					fprintf(stderr, "Buffer grew to much. "
						"Corrupted document?\n");
					exit(EXIT_FAILURE);
				}
				yrealloc_buf(&outbuf, &out, outlen);
				continue;
			} else if ((errno == EILSEQ) || (errno == EINVAL)) {
				char skip = 1;

				/* advance in source buffer */
				if ((unsigned char)*doc > 0x80)
					skip += utf8_length[(unsigned char)*doc - 0x80];
				doc += skip;
				inleft -= skip;

				/* advance in output buffer */
				*out = '?';
				out++;
				outleft--;

				continue;
			}
			fprintf(stderr, "iconv returned: %s\n", strerror(errno));
			exit(EXIT_FAILURE);
		}
	} while(inleft != 0);

	if (!outleft) {
		outbuf = yrealloc(outbuf, outlen + 1);
	}
	*out = '\0';

	output = strbuf_slurp_n(outbuf, (size_t)(out - outbuf));
	strbuf_setopt(output, STRBUF_NULLOK);
	return output;
}
Exemplo n.º 9
0
int regex_subst(STRBUF *buf,
		const char *regex, int regopt,
		const void *subst)
{
	int r;
	const char *bufp;
	size_t off = 0;
	const int i = 0;
	int match_count = 0;

	regex_t rx;
	const size_t nmatches = 10;
	regmatch_t matches[10];

	r = regcomp(&rx, regex, REG_EXTENDED);
	if (r) {
		print_regexp_err(r, &rx);
		exit(EXIT_FAILURE);
	}

	do {
		if (off > strbuf_len(buf))
			break;

		bufp = strbuf_get(buf) + off;

#ifdef REG_STARTEND
		matches[0].rm_so = 0;
		matches[0].rm_eo = strbuf_len(buf) - off;

		if (0 != regexec(&rx, bufp, nmatches, matches, REG_STARTEND))
#else
		if (0 != regexec(&rx, bufp, nmatches, matches, 0))
#endif
			break;

		if (matches[i].rm_so != -1) {
			char *s;
			int subst_len;

			if (regopt & _REG_EXEC) {
				s = (*(char *(*)
				       (const char *buf, regmatch_t matches[],
					size_t nmatch, size_t off))subst)
					(strbuf_get(buf), matches, nmatches, off);
			} else
				s = (char*)subst;

			subst_len = strbuf_subst(buf,
						 matches[i].rm_so + off,
						 matches[i].rm_eo + off,
						 s);
			match_count++;

			if (regopt & _REG_EXEC)
				yfree(s);

			off += matches[i].rm_so;
			if (subst_len >= 0)
				off += subst_len + 1;
		}
	} while (regopt & _REG_GLOBAL);

	regfree(&rx);
	return match_count;
}