/*
 * Convert a multibyte character string argument for the %s format to a wide
 * string representation. ``prec'' specifies the maximum number of bytes
 * to output. If ``prec'' is greater than or equal to zero, we can't assume
 * that the multibyte char. string ends in a null character.
 */
static wchar_t *
__mbsconv(char *mbsarg, int prec)
{
	mbstate_t mbs;
	wchar_t *convbuf, *wcp;
	const char *p;
	size_t insize, nchars, nconv;

	if (mbsarg == NULL)
		return (NULL);

	/*
	 * Supplied argument is a multibyte string; convert it to wide
	 * characters first.
	 */
	if (prec >= 0) {
		/*
		 * String is not guaranteed to be NUL-terminated. Find the
		 * number of characters to print.
		 */
		p = mbsarg;
		insize = nchars = nconv = 0;
		mbs = initial_mbs;
		while (nchars != (size_t)prec) {
			nconv = mbrlen(p, MB_CUR_MAX, &mbs);
			if (nconv == 0 || nconv == (size_t)-1 ||
			    nconv == (size_t)-2)
				break;
			p += nconv;
			nchars++;
			insize += nconv;
		}
		if (nconv == (size_t)-1 || nconv == (size_t)-2)
			return (NULL);
	} else {
		insize = strlen(mbsarg);
		nconv = 0;
	}

	/*
	 * Allocate buffer for the result and perform the conversion,
	 * converting at most `size' bytes of the input multibyte string to
	 * wide characters for printing.
	 */
	convbuf = malloc((insize + 1) * sizeof(*convbuf));
	if (convbuf == NULL)
		return (NULL);
	wcp = convbuf;
	p = mbsarg;
	mbs = initial_mbs;
	while (insize != 0) {
		nconv = mbrtowc(wcp, p, insize, &mbs);
		if (nconv == 0 || nconv == (size_t)-1 || nconv == (size_t)-2)
			break;
		wcp++;
		p += nconv;
		insize -= nconv;
	}
	if (nconv == (size_t)-1 || nconv == (size_t)-2) {
		free(convbuf);
		return (NULL);
	}
	*wcp = L'\0';

	return (convbuf);
}
TEST(wchar, mbrtowc) {
  wchar_t out[8];

  out[0] = 'x';
  ASSERT_EQ(0U, mbrtowc(out, "hello", 0, NULL));
  ASSERT_EQ('x', out[0]);

  ASSERT_EQ(0U, mbrtowc(out, "hello", 0, NULL));
  ASSERT_EQ(0U, mbrtowc(out, "", 0, NULL));
  ASSERT_EQ(1U, mbrtowc(out, "hello", 1, NULL));
  ASSERT_EQ(L'h', out[0]);

  ASSERT_EQ(0U, mbrtowc(NULL, "hello", 0, NULL));
  ASSERT_EQ(0U, mbrtowc(NULL, "", 0, NULL));
  ASSERT_EQ(1U, mbrtowc(NULL, "hello", 1, NULL));

  ASSERT_EQ(0U, mbrtowc(NULL, NULL, 0, NULL));

  ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
  uselocale(LC_GLOBAL_LOCALE);

  // 1-byte UTF-8.
  ASSERT_EQ(1U, mbrtowc(out, "abcdef", 6, NULL));
  ASSERT_EQ(L'a', out[0]);
  // 2-byte UTF-8.
  ASSERT_EQ(2U, mbrtowc(out, "\xc2\xa2" "cdef", 6, NULL));
  ASSERT_EQ(static_cast<wchar_t>(0x00a2), out[0]);
  // 3-byte UTF-8.
  ASSERT_EQ(3U, mbrtowc(out, "\xe2\x82\xac" "def", 6, NULL));
  ASSERT_EQ(static_cast<wchar_t>(0x20ac), out[0]);
  // 4-byte UTF-8.
  ASSERT_EQ(4U, mbrtowc(out, "\xf0\xa4\xad\xa2" "ef", 6, NULL));
  ASSERT_EQ(static_cast<wchar_t>(0x24b62), out[0]);
#if defined(__BIONIC__) // glibc allows this.
  // Illegal 5-byte UTF-8.
  ASSERT_EQ(static_cast<size_t>(-1), mbrtowc(out, "\xf8\xa1\xa2\xa3\xa4" "f", 6, NULL));
  ASSERT_EQ(EILSEQ, errno);
#endif
  // Illegal over-long sequence.
  ASSERT_EQ(static_cast<size_t>(-1), mbrtowc(out, "\xf0\x82\x82\xac" "ef", 6, NULL));
  ASSERT_EQ(EILSEQ, errno);
}
Exemple #3
0
const char *file_getbuffer(RMagic *ms) {
	char *pbuf, *op, *np;
	size_t psize, len;

	if (ms->haderr)
		return NULL;

	if (ms->flags & R_MAGIC_RAW)
		return ms->o.buf;

	if (ms->o.buf == NULL) {
		eprintf ("ms->o.buf = NULL\n");
		return NULL;
	}

	/* * 4 is for octal representation, + 1 is for NUL */
	len = strlen (ms->o.buf);
	if (len > (SIZE_MAX - 1) / 4) {
		file_oomem (ms, len);
		return NULL;
	}
	psize = len * 4 + 1;
	if ((pbuf = realloc (ms->o.pbuf, psize)) == NULL) {
		file_oomem (ms, psize);
		return NULL;
	}
	ms->o.pbuf = pbuf;

#if 1
//defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
	{
		mbstate_t state;
		wchar_t nextchar;
		int mb_conv = 1;
		size_t bytesconsumed;
		char *eop;
		(void)memset(&state, 0, sizeof(mbstate_t));

		np = ms->o.pbuf;
		op = ms->o.buf;
		eop = op + len;

		while (op < eop) {
			bytesconsumed = mbrtowc(&nextchar, op,
			    (size_t)(eop - op), &state);
			if (bytesconsumed == (size_t)(-1) ||
			    bytesconsumed == (size_t)(-2)) {
				mb_conv = 0;
				break;
			}

			if (iswprint(nextchar)) {
				(void)memcpy(np, op, bytesconsumed);
				op += bytesconsumed;
				np += bytesconsumed;
			} else {
				while (bytesconsumed-- > 0)
					OCTALIFY(np, op);
			}
		}
		*np = '\0';

		/* Parsing succeeded as a multi-byte sequence */
		if (mb_conv != 0)
			return ms->o.pbuf;
	}
#endif
	for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
		if (isprint ((ut8)*op)) {
			*np++ = *op;	
		} else {
			OCTALIFY (np, op);
		}
	}
	*np = '\0';
	return ms->o.pbuf;
}
Exemple #4
0
int
main(int argc, char *argv[])
{
    FILE *fp;
    int (*fcn)(FILE *, const char *);
    int ch, rval;
    size_t n;

    setlocale(LC_ALL, "");

    fcn = NULL;
    dchar = '\t';			/* default delimiter is \t */
    strcpy(dcharmb, "\t");

    while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1)
        switch(ch) {
        case 'b':
            get_list(optarg);
            bflag = 1;
            break;
        case 'c':
            get_list(optarg);
            cflag = 1;
            break;
        case 'd':
            n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL);
            if (dchar == '\0' || n != strlen(optarg))
                errx(1, "bad delimiter");
            strcpy(dcharmb, optarg);
            dflag = 1;
            break;
        case 'f':
            get_list(optarg);
            fflag = 1;
            break;
        case 's':
            sflag = 1;
            break;
        case 'n':
            nflag = 1;
            break;
        case 'w':
            wflag = 1;
            break;
        case '?':
        default:
            usage();
        }
    argc -= optind;
    argv += optind;

    if (fflag) {
        if (bflag || cflag || nflag || (wflag && dflag))
            usage();
    } else if (!(bflag || cflag) || dflag || sflag || wflag)
        usage();
    else if (!bflag && nflag)
        usage();

    if (fflag)
        fcn = f_cut;
    else if (cflag)
        fcn = MB_CUR_MAX > 1 ? c_cut : b_cut;
    else if (bflag)
        fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut;

    rval = 0;
    if (*argv)
        for (; *argv; ++argv) {
            if (strcmp(*argv, "-") == 0)
                rval |= fcn(stdin, "stdin");
            else {
                if (!(fp = fopen(*argv, "r"))) {
                    warn("%s", *argv);
                    rval = 1;
                    continue;
                }
                fcn(fp, *argv);
                (void)fclose(fp);
            }
        }
    else
        rval = fcn(stdin, "stdin");
    exit(rval);
}
Exemple #5
0
void fold_text(const char *text,
		int linelen,
		int line_cb(void *arg, const char *start, int len),
		void *arg)
{
	const char *start, *end, *sep;
	size_t sep_bytes, len;
	int col, rc = 0;
	mbstate_t ps;

	/* start, end and sep are byte-positions in the string, and should always
	 * lie on the start of a multibyte sequence */
	start = end = sep = text;
	sep_bytes = 0;
	col = 0;
	len = strlen(text);
	memset(&ps, 0, sizeof(ps));

	while (!rc) {
		size_t bytes;
		wchar_t wc;
		int width;

		bytes = mbrtowc(&wc, end, len - (end - text), &ps);

		assert(bytes != (size_t)-1);

		/* we'll get a zero size for the nul terminator, (size_t) -2
		 * if we've reached the end of the buffer, or (size_t) -1 on
		 * error */
		if (!bytes || bytes == (size_t) -2 || bytes == (size_t) -1) {
			line_cb(arg, start, end - start);
			break;
		}

		if (wc == L'\n') {
			rc = line_cb(arg, start, end - start);
			start = sep = end += bytes;
			sep_bytes = 0;
			col = 0;
			continue;
		}

		width = wcwidth(wc);

		/* we should have caught this in the !bytes check... */
		if (width == 0) {
			line_cb(arg, start, end - start);
			break;
		}

		/* unprintable character? just add it to the current line */
		if (width < 0) {
			end += bytes;
			continue;
		}

		col += width;

		if (col > linelen) {
			if (sep != start) {
				/* split on a previous word boundary, if
				 * possible */
				rc = line_cb(arg, start, sep - start);
				end = sep + sep_bytes;
			} else {
				/* otherwise, break the word */
				rc = line_cb(arg, start, end - start);
			}
			sep_bytes = 0;
			start = sep = end;
			col = 0;

		} else {
			/* record our last separator */
			if (wc == L' ') {
				sep = end;
				sep_bytes = bytes;
			}
			end += bytes;
		}
	}
}
Exemple #6
0
wchar_t input_common_readch(int timed)
{
    if (! has_lookahead())
    {
        if (timed)
        {
            int count;
            fd_set fds;
            struct timeval tm=
            {
                0,
                1000 * WAIT_ON_ESCAPE
            }
            ;

            FD_ZERO(&fds);
            FD_SET(0, &fds);
            count = select(1, &fds, 0, 0, &tm);

            switch (count)
            {
                case 0:
                    return WEOF;

                case -1:
                    return WEOF;
                    break;
                default:
                    break;

            }
        }

        wchar_t res;
        mbstate_t state = {};

        while (1)
        {
            wint_t b = readb();
            char bb;

            size_t sz;

            if ((b >= R_NULL) && (b < R_NULL + 1000))
                return b;

            bb=b;

            sz = mbrtowc(&res, &bb, 1, &state);

            switch (sz)
            {
                case (size_t)(-1):
                    memset(&state, '\0', sizeof(state));
                    debug(2, L"Illegal input");
                    return R_NULL;
                case (size_t)(-2):
                    break;
                case 0:
                    return 0;
                default:
                    return res;
            }
        }
    }
    else
    {
        if (!timed)
        {
            while (has_lookahead() && lookahead_top() == WEOF)
                lookahead_pop();
            if (! has_lookahead())
                return input_common_readch(0);
        }

        return lookahead_pop();
    }
}
Exemple #7
0
/* Get a logical line */
static int get_line(register FILE *f, int *length)
{
	int c;
	char *p;
	int column;
	static int colflg;

#ifdef HAVE_WIDECHAR
	size_t i;
	wchar_t wc;
	int wc_width;
	mbstate_t state, state_bak;	/* Current status of the stream. */
	char mbc[MB_LEN_MAX];		/* Buffer for one multibyte char. */
	size_t mblength;		/* Byte length of multibyte char. */
	size_t mbc_pos = 0;		/* Position of the MBC. */
	int use_mbc_buffer_flag = 0;	/* If 1, mbc has data. */
	int break_flag = 0;		/* If 1, exit while(). */
	long file_pos_bak = Ftell(f);

	memset(&state, '\0', sizeof(mbstate_t));
#endif

	prepare_line_buffer();

	p = Line;
	column = 0;
	c = Getc(f);
	if (colflg && c == '\n') {
		Currline++;
		c = Getc(f);
	}
	while (p < &Line[LineLen - 1]) {
#ifdef HAVE_WIDECHAR
		if (fold_opt && use_mbc_buffer_flag && MB_CUR_MAX > 1) {
			use_mbc_buffer_flag = 0;
			state_bak = state;
			mbc[mbc_pos++] = c;
 process_mbc:
			mblength = mbrtowc(&wc, mbc, mbc_pos, &state);

			switch (mblength) {
			case (size_t)-2:	/* Incomplete multibyte character. */
				use_mbc_buffer_flag = 1;
				state = state_bak;
				break;

			case (size_t)-1:	/* Invalid as a multibyte character. */
				*p++ = mbc[0];
				state = state_bak;
				column++;
				file_pos_bak++;

				if (column >= Mcol) {
					Fseek(f, file_pos_bak);
				} else {
					memmove(mbc, mbc + 1, --mbc_pos);
					if (mbc_pos > 0) {
						mbc[mbc_pos] = '\0';
						goto process_mbc;
					}
				}
				break;

			default:
				wc_width = wcwidth(wc);

				if (column + wc_width > Mcol) {
					Fseek(f, file_pos_bak);
					break_flag = 1;
				} else {
					for (i = 0; p < &Line[LineLen - 1] &&
						    i < mbc_pos; i++)
						*p++ = mbc[i];
					if (wc_width > 0)
						column += wc_width;
				}
			}

			if (break_flag || column >= Mcol)
				break;

			c = Getc(f);
			continue;
		}
#endif	/* HAVE_WIDECHAR */
		if (c == EOF) {
			if (p > Line) {
				*p = '\0';
				*length = p - Line;
				return (column);
			}
			*length = p - Line;
			return (EOF);
		}
		if (c == '\n') {
			Currline++;
			break;
		}

		*p++ = c;
#if 0
		if (c == '\033') {	/* ESC */
			c = Getc(f);
			while (c > ' ' && c < '0' && p < &Line[LineLen - 1]) {
				*p++ = c;
				c = Getc(f);
			}
			if (c >= '0' && c < '\177' && p < &Line[LineLen - 1]) {
				*p++ = c;
				c = Getc(f);
				continue;
			}
		}
#endif	/* 0 */
		if (c == '\t') {
			if (!hardtabs || (column < promptlen && !hard)) {
				if (hardtabs && eraseln && !dumb) {
					column = 1 + (column | 7);
					putstring(eraseln);
					promptlen = 0;
				} else {
					for (--p; p < &Line[LineLen - 1];) {
						*p++ = ' ';
						if ((++column & 7) == 0)
							break;
					}
					if (column >= promptlen)
						promptlen = 0;
				}
			} else
				column = 1 + (column | 7);
		} else if (c == '\b' && column > 0) {
			column--;
		} else if (c == '\r') {
			int next = Getc(f);
			if (next == '\n') {
				p--;
				Currline++;
				break;
			}
			Ungetc(next, f);
			column = 0;
		} else if (c == '\f' && stop_opt) {
			p[-1] = '^';
			*p++ = 'L';
			column += 2;
			Pause++;
		} else if (c == EOF) {
			*length = p - Line;
			return (column);
		} else {
#ifdef HAVE_WIDECHAR
			if (fold_opt && MB_CUR_MAX > 1) {
				memset(mbc, '\0', MB_LEN_MAX);
				mbc_pos = 0;
				mbc[mbc_pos++] = c;
				state_bak = state;

				mblength = mbrtowc(&wc, mbc, mbc_pos, &state);
				/* The value of mblength is always less than 2 here. */
				switch (mblength) {
				case (size_t)-2:
					p--;
					file_pos_bak = Ftell(f) - 1;
					state = state_bak;
					use_mbc_buffer_flag = 1;
					break;

				case (size_t)-1:
					state = state_bak;
					column++;
					break;

				default:
					wc_width = wcwidth(wc);
					if (wc_width > 0)
						column += wc_width;
				}
			} else
#endif	/* HAVE_WIDECHAR */
			{
				if (isprint(c))
					column++;
			}
		}

		if (column >= Mcol && fold_opt)
			break;
#ifdef HAVE_WIDECHAR
		if (use_mbc_buffer_flag == 0 && p >= &Line[LineLen - 1 - 4])
			/* don't read another char if there is no space for
			 * whole multibyte sequence */
			break;
#endif
		c = Getc(f);
	}
	if (column >= Mcol && Mcol > 0) {
		if (!Wrap) {
			*p++ = '\n';
		}
	}
	colflg = column == Mcol && fold_opt;
	if (colflg && eatnl && Wrap) {
		*p++ = '\n';	/* simulate normal wrap */
	}
	*length = p - Line;
	*p = 0;
	return (column);
}
Exemple #8
0
long ILAnsiGetChars(const unsigned char *bytes, unsigned long byteCount,
					unsigned short *chars, unsigned long charCount)
{
#ifdef IL_CONFIG_LATIN1
	unsigned long len;

	/* Check for enough space in the output buffer */
	if(byteCount > charCount)
	{
		return -1;
	}

	/* Convert the bytes */
	len = byteCount;
	while(len > 0)
	{
		*chars++ = (unsigned short)(*bytes++);
		--len;
	}
	return (long)byteCount;
#else
#if HAVE_MBRTOWC
	/* Use the re-entrant function to perform the conversion */
	mbstate_t state;
	size_t chlen;
	unsigned long len = 0;
	wchar_t ch;
	int wrlen;
	ILMemZero(&state, sizeof(state));
	mbrtowc((wchar_t *)0, (char *)0, 0, &state);
	while(byteCount > 0)
	{
		chlen = mbrtowc(&ch, (char *)bytes, (size_t)byteCount, &state);
		if(chlen == (size_t)(-1) || chlen == (size_t)(-2))
		{
			/* Invalid character */
			++bytes;
			--byteCount;
		}
		else if(chlen != 0)
		{
			/* Ordinary character */
			wrlen = ILUTF16WriteChar((unsigned short *)0, (unsigned long)ch);
			if(charCount < (unsigned long)wrlen)
			{
				return -1;
			}
			ILUTF16WriteChar(chars, (unsigned long)ch);
			chars += wrlen;
			len += wrlen;
			bytes += chlen;
			byteCount -= (unsigned long)chlen;
		}
		else
		{
			/* Embedded NUL character */
			if(charCount <= 0)
			{
				return -1;
			}
			*chars++ = '\0';
			++len;
			++bytes;
			--byteCount;
		}
	}
	return (long)len;
#else
	/* Use the non re-entrant function to perform the conversion
	   and just hope that the underlying libc takes care of the
	   thread-safety issues for us */
	int chlen;
	unsigned long len = 0;
	wchar_t ch;
	int wrlen;
	mbtowc((wchar_t *)0, (char *)0, 0);
	while(byteCount > 0)
	{
		chlen = mbtowc(&ch, (char *)bytes, (size_t)byteCount);
		if(chlen > 0)
		{
			/* Ordinary character */
			wrlen = ILUTF16WriteChar((unsigned short *)0, (unsigned long)ch);
			if(charCount < (unsigned long)wrlen)
			{
				return -1;
			}
			ILUTF16WriteChar(chars, (unsigned long)ch);
			chars += wrlen;
			len += wrlen;
			bytes += chlen;
			byteCount -= (unsigned long)chlen;
		}
		else if(!chlen)
		{
			/* Embedded NUL character */
			if(charCount <= 0)
			{
				return -1;
			}
			*chars++ = '\0';
			++len;
			++bytes;
			--byteCount;
		}
		else
		{
			/* Invalid character */
			++bytes;
			--byteCount;
		}
	}
	return (long)len;
#endif
#endif
}
Exemple #9
0
int
main(int argc, char *argv[])
{
    INPUT *F1, *F2;
    int aflag, ch, cval, vflag;
    char *end;

    setlocale(LC_ALL, "");

    F1 = &input1;
    F2 = &input2;

    aflag = vflag = 0;
    obsolete(argv);
    while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) {
        switch (ch) {
        case '\01':		/* See comment in obsolete(). */
            aflag = 1;
            F1->unpair = F2->unpair = 1;
            break;
        case '1':
            if ((F1->joinf = strtol(optarg, &end, 10)) < 1)
                errx(1, "-1 option field number less than 1");
            if (*end)
                errx(1, "illegal field number -- %s", optarg);
            --F1->joinf;
            break;
        case '2':
            if ((F2->joinf = strtol(optarg, &end, 10)) < 1)
                errx(1, "-2 option field number less than 1");
            if (*end)
                errx(1, "illegal field number -- %s", optarg);
            --F2->joinf;
            break;
        case 'a':
            aflag = 1;
            switch(strtol(optarg, &end, 10)) {
            case 1:
                F1->unpair = 1;
                break;
            case 2:
                F2->unpair = 1;
                break;
            default:
                errx(1, "-a option file number not 1 or 2");
                break;
            }
            if (*end)
                errx(1, "illegal file number -- %s", optarg);
            break;
        case 'e':
            empty = optarg;
            break;
        case 'j':
            if ((F1->joinf = F2->joinf =
                                 strtol(optarg, &end, 10)) < 1)
                errx(1, "-j option field number less than 1");
            if (*end)
                errx(1, "illegal field number -- %s", optarg);
            --F1->joinf;
            --F2->joinf;
            break;
        case 'o':
            fieldarg(optarg);
            break;
        case 't':
            spans = 0;
            if (mbrtowc(&tabchar[0], optarg, MB_LEN_MAX, NULL) !=
                    strlen(optarg))
                errx(1, "illegal tab character specification");
            tabchar[1] = L'\0';
            break;
        case 'v':
            vflag = 1;
            joinout = 0;
            switch (strtol(optarg, &end, 10)) {
            case 1:
                F1->unpair = 1;
                break;
            case 2:
                F2->unpair = 1;
                break;
            default:
                errx(1, "-v option file number not 1 or 2");
                break;
            }
            if (*end)
                errx(1, "illegal file number -- %s", optarg);
            break;
        case '?':
        default:
            usage();
        }
    }
    argc -= optind;
    argv += optind;

    if (aflag && vflag)
        errx(1, "the -a and -v options are mutually exclusive");

    if (argc != 2)
        usage();

    /* Open the files; "-" means stdin. */
    if (!strcmp(*argv, "-"))
        F1->fp = stdin;
    else if ((F1->fp = fopen(*argv, "r")) == NULL)
        err(1, "%s", *argv);
    ++argv;
    if (!strcmp(*argv, "-"))
        F2->fp = stdin;
    else if ((F2->fp = fopen(*argv, "r")) == NULL)
        err(1, "%s", *argv);
    if (F1->fp == stdin && F2->fp == stdin)
        errx(1, "only one input file may be stdin");

    slurp(F1);
    slurp(F2);
    while (F1->setcnt && F2->setcnt) {
        cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf);
        if (cval == 0) {
            /* Oh joy, oh rapture, oh beauty divine! */
            if (joinout)
                joinlines(F1, F2);
            slurp(F1);
            slurp(F2);
        } else if (cval < 0) {
            /* File 1 takes the lead... */
            if (F1->unpair)
                joinlines(F1, NULL);
            slurp(F1);
        } else {
            /* File 2 takes the lead... */
            if (F2->unpair)
                joinlines(F2, NULL);
            slurp(F2);
        }
    }

    /*
     * Now that one of the files is used up, optionally output any
     * remaining lines from the other file.
     */
    if (F1->unpair)
        while (F1->setcnt) {
            joinlines(F1, NULL);
            slurp(F1);
        }
    if (F2->unpair)
        while (F2->setcnt) {
            joinlines(F2, NULL);
            slurp(F2);
        }
    exit(0);
}
Exemple #10
0
/*
 * __svfscanf_unlocked - non-MT-safe version of __svfscanf
 */
int
__svfscanf_unlocked(FILE *fp, const char *fmt0, va_list ap)
{
	const u_char *fmt = (const u_char *)fmt0;
	int c;			/* character from format, or conversion */
	size_t width;		/* field width, or 0 */
	char *p;		/* points into all kinds of strings */
	size_t n;		/* handy size_t */
	int flags;		/* flags as defined above */
	char *p0;		/* saves original value of p when necessary */
	int nassigned;		/* number of fields assigned */
	int nconversions;	/* number of conversions */
	int nread;		/* number of characters consumed from fp */
	int base;		/* base argument to conversion function */
	char ccltab[256];	/* character class table for %[...] */
	char buf[BUF];		/* buffer for numeric and mb conversions */
	wchar_t *wcp;		/* handy wide-character pointer */
	size_t nconv;		/* length of multibyte sequence converted */
	static const mbstate_t initial;
	mbstate_t mbs;

	/* `basefix' is used to avoid `if' tests in the integer scanner */
	static const short basefix[17] =
		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };

	_DIAGASSERT(fp != NULL);
	_DIAGASSERT(fmt0 != NULL);

	_SET_ORIENTATION(fp, -1);

	nassigned = 0;
	nconversions = 0;
	nread = 0;
	base = 0;
	for (;;) {
		c = (unsigned char)*fmt++;
		if (c == 0)
			return (nassigned);
		if (isspace(c)) {
			while ((fp->_r > 0 || __srefill(fp) == 0) &&
			    isspace(*fp->_p))
				nread++, fp->_r--, fp->_p++;
			continue;
		}
		if (c != '%')
			goto literal;
		width = 0;
		flags = 0;
		/*
		 * switch on the format.  continue if done;
		 * break once format type is derived.
		 */
again:		c = *fmt++;
		switch (c) {
		case '%':
			SCANF_SKIP_SPACE();
literal:
			if (fp->_r <= 0 && __srefill(fp))
				goto input_failure;
			if (*fp->_p != c)
				goto match_failure;
			fp->_r--, fp->_p++;
			nread++;
			continue;

		case '*':
			flags |= SUPPRESS;
			goto again;
		case 'j':
			flags |= INTMAXT;
			goto again;
		case 'l':
			if (flags & LONG) {
				flags &= ~LONG;
				flags |= LONGLONG;
			} else
				flags |= LONG;
			goto again;
		case 'q':
			flags |= LONGLONG;	/* not quite */
			goto again;
		case 't':
			flags |= PTRDIFFT;
			goto again;
		case 'z':
			flags |= SIZET;
			goto again;
		case 'L':
			flags |= LONGDBL;
			goto again;
		case 'h':
			if (flags & SHORT) {
				flags &= ~SHORT;
				flags |= SHORTSHORT;
			} else
				flags |= SHORT;
			goto again;

		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			width = width * 10 + c - '0';
			goto again;

		/*
		 * Conversions.
		 */
		case 'd':
			c = CT_INT;
			base = 10;
			break;

		case 'i':
			c = CT_INT;
			base = 0;
			break;

		case 'o':
			c = CT_INT;
			flags |= UNSIGNED;
			base = 8;
			break;

		case 'u':
			c = CT_INT;
			flags |= UNSIGNED;
			base = 10;
			break;

		case 'X':
		case 'x':
			flags |= PFXOK;	/* enable 0x prefixing */
			c = CT_INT;
			flags |= UNSIGNED;
			base = 16;
			break;

#ifndef NO_FLOATING_POINT
		case 'A': case 'E': case 'F': case 'G':
		case 'a': case 'e': case 'f': case 'g':
			c = CT_FLOAT;
			break;
#endif

		case 'S':
			flags |= LONG;
			/* FALLTHROUGH */
		case 's':
			c = CT_STRING;
			break;

		case '[':
			fmt = __sccl(ccltab, fmt);
			flags |= NOSKIP;
			c = CT_CCL;
			break;

		case 'C':
			flags |= LONG;
			/* FALLTHROUGH */
		case 'c':
			flags |= NOSKIP;
			c = CT_CHAR;
			break;

		case 'p':	/* pointer format is like hex */
			flags |= POINTER | PFXOK;
			c = CT_INT;		/* assumes sizeof(uintmax_t) */
			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
			base = 16;
			break;

		case 'n':
			nconversions++;
			if (flags & SUPPRESS)	/* ??? */
				continue;
			if (flags & SHORTSHORT)
				*va_arg(ap, char *) = nread;
			else if (flags & SHORT)
				*va_arg(ap, short *) = nread;
			else if (flags & LONG)
				*va_arg(ap, long *) = nread;
			else if (flags & LONGLONG)
				*va_arg(ap, long long *) = nread;
			else if (flags & INTMAXT)
				*va_arg(ap, intmax_t *) = nread;
			else if (flags & SIZET)
				*va_arg(ap, size_t *) = nread;
			else if (flags & PTRDIFFT)
				*va_arg(ap, ptrdiff_t *) = nread;
			else
				*va_arg(ap, int *) = nread;
			continue;

		default:
			goto match_failure;

		/*
		 * Disgusting backwards compatibility hack.	XXX
		 */
		case '\0':	/* compat */
			return (EOF);
		}

		/*
		 * We have a conversion that requires input.
		 */
		if (fp->_r <= 0 && __srefill(fp))
			goto input_failure;

		/*
		 * Consume leading white space, except for formats
		 * that suppress this.
		 */
		if ((flags & NOSKIP) == 0) {
			while (isspace(*fp->_p)) {
				nread++;
				if (--fp->_r > 0)
					fp->_p++;
				else if (__srefill(fp))
					goto input_failure;
			}
			/*
			 * Note that there is at least one character in
			 * the buffer, so conversions that do not set NOSKIP
			 * ca no longer result in an input failure.
			 */
		}

		/*
		 * Do the conversion.
		 */
		switch (c) {

		case CT_CHAR:
			/* scan arbitrary characters (sets NOSKIP) */
			if (width == 0)
				width = 1;
			if (flags & LONG) {
				if ((flags & SUPPRESS) == 0)
					wcp = va_arg(ap, wchar_t *);
				else
					wcp = NULL;
				n = 0;
				while (width != 0) {
					if (n == MB_CUR_MAX) {
						fp->_flags |= __SERR;
						goto input_failure;
					}
					buf[n++] = *fp->_p;
					fp->_p++;
					fp->_r--;
					mbs = initial;
					nconv = mbrtowc(wcp, buf, n, &mbs);
					if (nconv == (size_t)-1) {
						fp->_flags |= __SERR;
						goto input_failure;
					}
					if (nconv == 0 && !(flags & SUPPRESS))
						*wcp = L'\0';
					if (nconv != (size_t)-2) {
						nread += n;
						width--;
						if (!(flags & SUPPRESS))
							wcp++;
						n = 0;
					}
					if (fp->_r <= 0 && __srefill(fp)) {
						if (n != 0) {
							fp->_flags |= __SERR;
							goto input_failure;
						}
						break;
					}
				}
				if (!(flags & SUPPRESS))
					nassigned++;
			} else if (flags & SUPPRESS) {
				size_t sum = 0;
				for (;;) {
					if ((n = fp->_r) < width) {
						sum += n;
						width -= n;
						fp->_p += n;
						if (__srefill(fp)) {
							if (sum == 0)
							    goto input_failure;
							break;
						}
					} else {
						sum += width;
						fp->_r -= width;
						fp->_p += width;
						break;
					}
				}
				nread += sum;
			} else {
				size_t r = fread(va_arg(ap, char *), 1,
				    width, fp);

				if (r == 0)
					goto input_failure;
				nread += r;
				nassigned++;
			}
			nconversions++;
			break;

		case CT_CCL:
			/* scan a (nonempty) character class (sets NOSKIP) */
			if (width == 0)
				width = (size_t)~0;	/* `infinity' */
			/* take only those things in the class */
			if (flags & LONG) {
				wchar_t twc;
				int nchars;

				if ((flags & SUPPRESS) == 0)
					wcp = va_arg(ap, wchar_t *);
				else
Exemple #11
0
unsigned long ILAnsiGetCharCount(const unsigned char *bytes,
								 unsigned long count)
{
#ifdef IL_CONFIG_LATIN1
	return count;
#else
#if HAVE_MBRTOWC
	/* Use the re-entrant function to perform the conversion */
	mbstate_t state;
	size_t chlen;
	unsigned long len = 0;
	wchar_t ch;
	ILMemZero(&state, sizeof(state));
	mbrtowc((wchar_t *)0, (char *)0, 0, &state);
	while(count > 0)
	{
		chlen = mbrtowc(&ch, (char *)bytes, (size_t)count, &state);
		if(chlen == (size_t)(-1) || chlen == (size_t)(-2))
		{
			/* Invalid character */
			++bytes;
			--count;
		}
		else if(chlen != 0)
		{
			/* Ordinary character */
			len += ILUTF16WriteChar((unsigned short *)0, (unsigned long)ch);
			bytes += chlen;
			count -= (unsigned long)chlen;
		}
		else
		{
			/* Embedded NUL character */
			++len;
			++bytes;
			--count;
		}
	}
	return len;
#else
	/* Use the non re-entrant function to perform the conversion
	   and just hope that the underlying libc takes care of the
	   thread-safety issues for us */
	int chlen;
	unsigned long len = 0;
	wchar_t ch;
	mbtowc((wchar_t *)0, (char *)0, 0);
	while(count > 0)
	{
		chlen = mbtowc(&ch, (char *)bytes, (size_t)count);
		if(chlen > 0)
		{
			/* Ordinary character */
			len += ILUTF16WriteChar((unsigned short *)0, (unsigned long)ch);
			bytes += chlen;
			count -= (unsigned long)chlen;
		}
		else if(!chlen)
		{
			/* Embedded NUL character */
			++len;
			++bytes;
			--count;
		}
		else
		{
			/* Invalid character */
			++bytes;
			--count;
		}
	}
	return len;
#endif
#endif
}
Exemple #12
0
size_t
rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
# if MBRTOWC_NULL_ARG_BUG || MBRTOWC_RETVAL_BUG
  if (s == NULL)
    {
      pwc = NULL;
      s = "";
      n = 1;
    }
# endif

# if MBRTOWC_RETVAL_BUG
  {
    static mbstate_t internal_state;

    /* Override mbrtowc's internal state.  We can not call mbsinit() on the
       hidden internal state, but we can call it on our variable.  */
    if (ps == NULL)
      ps = &internal_state;

    if (!mbsinit (ps))
      {
        /* Parse the rest of the multibyte character byte for byte.  */
        size_t count = 0;
        for (; n > 0; s++, n--)
          {
            wchar_t wc;
            size_t ret = mbrtowc (&wc, s, 1, ps);

            if (ret == (size_t)(-1))
              return (size_t)(-1);
            count++;
            if (ret != (size_t)(-2))
              {
                /* The multibyte character has been completed.  */
                if (pwc != NULL)
                  *pwc = wc;
                return (wc == 0 ? 0 : count);
              }
          }
        return (size_t)(-2);
      }
  }
# endif

# if MBRTOWC_NUL_RETVAL_BUG
  {
    wchar_t wc;
    size_t ret = mbrtowc (&wc, s, n, ps);

    if (ret != (size_t)(-1) && ret != (size_t)(-2))
      {
        if (pwc != NULL)
          *pwc = wc;
        if (wc == 0)
          ret = 0;
      }
    return ret;
  }
# else
  return mbrtowc (pwc, s, n, ps);
# endif
}
Exemple #13
0
int main()
{
// mbstate_t comes from the underlying C library; it is defined (in C99) as:
//    a complete object type other than an array type that can hold the conversion 
//    state information necessary to convert between sequences of multibyte 
//    characters and wide characters
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wmissing-braces"
#endif
    mbstate_t mb = {0};
#if defined(__clang__)
#pragma clang diagnostic pop
#endif

    size_t s = 0;
    tm *tm = 0;
    wint_t w = 0;
    ::FILE* fp = 0;
#ifdef __APPLE__
    __darwin_va_list va;
#else
    __builtin_va_list va;
#endif
    char* ns = 0;
    wchar_t* ws = 0;
    static_assert((std::is_same<decltype(fwprintf(fp, L"")), int>::value), "");
    static_assert((std::is_same<decltype(fwscanf(fp, L"")), int>::value), "");
    static_assert((std::is_same<decltype(swprintf(ws, s, L"")), int>::value), "");
    static_assert((std::is_same<decltype(swscanf(L"", L"")), int>::value), "");
    static_assert((std::is_same<decltype(vfwprintf(fp, L"", va)), int>::value), "");
    static_assert((std::is_same<decltype(vfwscanf(fp, L"", va)), int>::value), "");
    static_assert((std::is_same<decltype(vswprintf(ws, s, L"", va)), int>::value), "");
    static_assert((std::is_same<decltype(vswscanf(L"", L"", va)), int>::value), "");
    static_assert((std::is_same<decltype(fgetwc(fp)), wint_t>::value), "");
    static_assert((std::is_same<decltype(fgetws(ws, 0, fp)), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(fputwc(L' ', fp)), wint_t>::value), "");
    static_assert((std::is_same<decltype(fputws(L"", fp)), int>::value), "");
    static_assert((std::is_same<decltype(fwide(fp, 0)), int>::value), "");
    static_assert((std::is_same<decltype(getwc(fp)), wint_t>::value), "");
    static_assert((std::is_same<decltype(putwc(L' ', fp)), wint_t>::value), "");
    static_assert((std::is_same<decltype(ungetwc(L' ', fp)), wint_t>::value), "");
    static_assert((std::is_same<decltype(wcstod(L"", (wchar_t**)0)), double>::value), "");
    static_assert((std::is_same<decltype(wcstof(L"", (wchar_t**)0)), float>::value), "");
    static_assert((std::is_same<decltype(wcstold(L"", (wchar_t**)0)), long double>::value), "");
    static_assert((std::is_same<decltype(wcstol(L"", (wchar_t**)0, 0)), long>::value), "");
    static_assert((std::is_same<decltype(wcstoll(L"", (wchar_t**)0, 0)), long long>::value), "");
    static_assert((std::is_same<decltype(wcstoul(L"", (wchar_t**)0, 0)), unsigned long>::value), "");
    static_assert((std::is_same<decltype(wcstoull(L"", (wchar_t**)0, 0)), unsigned long long>::value), "");
    static_assert((std::is_same<decltype(wcscpy(ws, L"")), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcsncpy(ws, L"", s)), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcscat(ws, L"")), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcsncat(ws, L"", s)), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcscmp(L"", L"")), int>::value), "");
    static_assert((std::is_same<decltype(wcscoll(L"", L"")), int>::value), "");
    static_assert((std::is_same<decltype(wcsncmp(L"", L"", s)), int>::value), "");
    static_assert((std::is_same<decltype(wcsxfrm(ws, L"", s)), size_t>::value), "");
    static_assert((std::is_same<decltype(wcschr((wchar_t*)0, L' ')), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcscspn(L"", L"")), size_t>::value), "");
    static_assert((std::is_same<decltype(wcslen(L"")), size_t>::value), "");
    static_assert((std::is_same<decltype(wcspbrk((wchar_t*)0, L"")), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcsrchr((wchar_t*)0, L' ')), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcsspn(L"", L"")), size_t>::value), "");
    static_assert((std::is_same<decltype(wcsstr((wchar_t*)0, L"")), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcstok(ws, L"", (wchar_t**)0)), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wmemchr((wchar_t*)0, L' ', s)), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wmemcmp(L"", L"", s)), int>::value), "");
    static_assert((std::is_same<decltype(wmemcpy(ws, L"", s)), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wmemmove(ws, L"", s)), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wmemset(ws, L' ', s)), wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcsftime(ws, s, L"", tm)), size_t>::value), "");
    static_assert((std::is_same<decltype(btowc(0)), wint_t>::value), "");
    static_assert((std::is_same<decltype(wctob(w)), int>::value), "");
    static_assert((std::is_same<decltype(mbsinit(&mb)), int>::value), "");
    static_assert((std::is_same<decltype(mbrlen("", s, &mb)), size_t>::value), "");
    static_assert((std::is_same<decltype(mbrtowc(ws, "", s, &mb)), size_t>::value), "");
    static_assert((std::is_same<decltype(wcrtomb(ns, L' ', &mb)), size_t>::value), "");
    static_assert((std::is_same<decltype(mbsrtowcs(ws, (const char**)0, s, &mb)), size_t>::value), "");
    static_assert((std::is_same<decltype(wcsrtombs(ns, (const wchar_t**)0, s, &mb)), size_t>::value), "");

    // These tests fail on systems whose C library doesn't provide a correct overload
    // set for wcschr, wcspbrk, wcsrchr, wcsstr, and wmemchr, unless the compiler is
    // a suitably recent version of Clang.
#if !defined(__APPLE__) || defined(_LIBCPP_PREFERRED_OVERLOAD)
    static_assert((std::is_same<decltype(wcschr((const wchar_t*)0, L' ')), const wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcspbrk((const wchar_t*)0, L"")), const wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcsrchr((const wchar_t*)0, L' ')), const wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wcsstr((const wchar_t*)0, L"")), const wchar_t*>::value), "");
    static_assert((std::is_same<decltype(wmemchr((const wchar_t*)0, L' ', s)), const wchar_t*>::value), "");
#endif

#ifndef _LIBCPP_HAS_NO_STDIN
    static_assert((std::is_same<decltype(getwchar()), wint_t>::value), "");
    static_assert((std::is_same<decltype(vwscanf(L"", va)), int>::value), "");
    static_assert((std::is_same<decltype(wscanf(L"")), int>::value), "");
#endif

#ifndef _LIBCPP_HAS_NO_STDOUT
    static_assert((std::is_same<decltype(putwchar(L' ')), wint_t>::value), "");
    static_assert((std::is_same<decltype(vwprintf(L"", va)), int>::value), "");
    static_assert((std::is_same<decltype(wprintf(L"")), int>::value), "");
#endif
}
Exemple #14
0
int
main(int argc, char *argv[])
{
	struct stat sb;
	int ch, fd, match;
	wchar_t termchar;
	unsigned char *back, *front;
	unsigned const char *file;
	wchar_t *key;

	(void) setlocale(LC_CTYPE, "");

	file = _path_words;
	termchar = L'\0';
	while ((ch = getopt(argc, argv, "dft:")) != -1)
		switch(ch) {
		case 'd':
			dflag = 1;
			break;
		case 'f':
			fflag = 1;
			break;
		case 't':
			if (mbrtowc(&termchar, optarg, MB_LEN_MAX, NULL) !=
			    strlen(optarg))
				errx(2, "invalid termination character");
			break;
		case '?':
		default:
			usage();
		}
	argc -= optind;
	argv += optind;

	if (argc == 0)
		usage();
	if (argc == 1) 			/* But set -df by default. */
		dflag = fflag = 1;
	key = prepkey(*argv++, termchar);
	if (argc >= 2)
		file = *argv++;

	match = 1;

	do {
		if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
			err(2, "%s", file);
		if ((uintmax_t)sb.st_size > (uintmax_t)SIZE_T_MAX)
			errx(2, "%s: %s", file, strerror(EFBIG));
		if (sb.st_size == 0) {
			close(fd);
			continue;
		}
		if ((front = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_SHARED, fd, (off_t)0)) == MAP_FAILED)
			err(2, "%s", file);
		back = front + sb.st_size;
		match *= (look(key, front, back));
		close(fd);
	} while (argc-- > 2 && (file = *argv++));

	exit(match);
}
Exemple #15
0
static void
h_ctype2(const struct test *t, bool use_mbstate)
{
	mbstate_t *stp;
	mbstate_t st;
	char buf[SIZE];
	char *str;
	size_t n;

	ATF_REQUIRE_STREQ(setlocale(LC_ALL, "C"), "C");
#if defined(__NetBSD__)
	ATF_REQUIRE(setlocale(LC_CTYPE, t->locale) != NULL);
#else
	if (setlocale(LC_CTYPE, t->locale) == NULL) {
		fprintf(stderr, "Locale %s not found.\n", t->locale);
		return;
	}
#endif

	(void)strvis(buf, t->data, VIS_WHITE | VIS_OCTAL);
	(void)printf("Checking string: \"%s\"\n", buf);

	ATF_REQUIRE((str = setlocale(LC_ALL, NULL)) != NULL);
	(void)printf("Using locale: %s\n", str);

	(void)printf("Using mbstate: %s\n", use_mbstate ? "yes" : "no");

	(void)memset(&st, 0, sizeof(st));
//	mbrtowc(0, 0, 0, &st); /* XXX for ISO2022-JP */
	stp = use_mbstate ? &st : 0;

	for (n = 9; n > 0; n--) {
		const char *src = t->data;
		wchar_t dst;
		size_t nchar = 0;
		int width = 0;

		ATF_REQUIRE(mbsinit(stp) != 0);

		for (;;) {
			size_t rv = mbrtowc(&dst, src, n, stp);

			if (rv == 0)
				break;

			if (rv == (size_t)-2) {
				src += n;
				width += n;

				continue;
			}
			if (rv == (size_t)-1) {
				ATF_REQUIRE_EQ(errno, EILSEQ);
				atf_tc_fail("Invalid sequence");
				/* NOTREACHED */
			}

			width += rv;
			src += rv;

			if (dst != t->wchars[nchar] ||
			    width != t->widths[nchar]) {
				(void)printf("At position %zd:\n", nchar);
				(void)printf("  expected: 0x%04X (%u)\n",
					t->wchars[nchar], t->widths[nchar]);
				(void)printf("  got     : 0x%04X (%u)\n",
					dst, width);
				atf_tc_fail("Test failed");
			}

			nchar++;
			width = 0;
		}

		ATF_REQUIRE_EQ_MSG(dst, 0, "Incorrect terminating character: "
			"0x%04X (expected: 0x00)", dst);

		ATF_REQUIRE_EQ_MSG(nchar, t->length, "Incorrect length: "
			"%zd (expected: %zd)", nchar, t->length);
	}

	{
		wchar_t wbuf[SIZE];
		size_t rv;
		char const *src = t->data;
		int i;

		(void)memset(wbuf, 0xFF, sizeof(wbuf));

		rv = mbsrtowcs(wbuf, &src, SIZE, stp);

		ATF_REQUIRE_EQ_MSG(rv, t->length, "Incorrect length: %zd "
			"(expected: %zd)", rv, t->length);
		ATF_REQUIRE_EQ(src, NULL);

		for (i = 0; wbuf[i] != 0; ++i) {
			if (wbuf[i] == t->wchars[i])
				continue;

			(void)printf("At position %d:\n", i);
			(void)printf("  expected: 0x%04X\n", t->wchars[i]);
			(void)printf("  got     : 0x%04X\n", wbuf[i]);
			atf_tc_fail("Test failed");
		}

		ATF_REQUIRE_EQ_MSG((size_t)i, t->length, "Incorrect length: "
			"%d (expected: %zd)", i, t->length);
	}

	(void)printf("Ok.\n");
}
Exemple #16
0
/* Decode an item via the fish 1.x format. Adapted from fish 1.x's item_get(). */
history_item_t history_t::decode_item_fish_1_x(const char *begin, size_t length) {

    const char *end = begin + length;
    const char *pos=begin;

    bool was_backslash = 0;
    wcstring out;
    bool first_char = true;
    bool timestamp_mode = false;
    time_t timestamp = 0;
    
    while( 1 )
    {
        wchar_t c;
        mbstate_t state;
        size_t res;

        memset( &state, 0, sizeof(state) );

        res = mbrtowc( &c, pos, end-pos, &state );

        if( res == (size_t)-1 )
        {
            pos++;
            continue;
        }
        else if( res == (size_t)-2 )
        {
            break;
        }
        else if( res == (size_t)0 )
        {
            pos++;
            continue;
        }
        pos += res;

        if( c == L'\n' )
        {
            if( timestamp_mode )
            {
                const wchar_t *time_string = out.c_str();
                while( *time_string && !iswdigit(*time_string))
                    time_string++;
                errno=0;

                if( *time_string )
                {
                    time_t tm;
                    wchar_t *end;

                    errno = 0;
                    tm = (time_t)wcstol( time_string, &end, 10 );

                    if( tm && !errno && !*end )
                    {
                        timestamp = tm;
                    }

                }

                out.clear();
                timestamp_mode = false;
                continue;
            }
            if( !was_backslash )
                break;
        }

        if( first_char )
        {
            if( c == L'#' )
                timestamp_mode = true;
        }

        first_char = false;

        out.push_back(c);

        was_backslash = ( (c == L'\\') && !was_backslash);

    }
    
    out = history_unescape_newlines_fish_1_x(out);
    return history_item_t(out, timestamp);
}
Exemple #17
0
wchar_t *str2wcs_internal( const char *in, wchar_t *out )
{
	size_t res=0;
	int in_pos=0;
	int out_pos = 0;
	mbstate_t state;
	size_t len;

	CHECK( in, 0 );
	CHECK( out, 0 );

	len = strlen(in);

	memset( &state, 0, sizeof(state) );

	while( in[in_pos] )
	{
		res = mbrtowc( &out[out_pos], &in[in_pos], len-in_pos, &state );

		if( ( ( out[out_pos] >= ENCODE_DIRECT_BASE) &&
		      ( out[out_pos] < ENCODE_DIRECT_BASE+256)) ||
		    ( out[out_pos] == INTERNAL_SEPARATOR ) )
		{
			out[out_pos] = ENCODE_DIRECT_BASE + (unsigned char)in[in_pos];
			in_pos++;
			memset( &state, 0, sizeof(state) );
			out_pos++;
		}
		else
		{

			switch( res )
			{
				case (size_t)(-2):
				case (size_t)(-1):
				{
					out[out_pos] = ENCODE_DIRECT_BASE + (unsigned char)in[in_pos];
					in_pos++;
					memset( &state, 0, sizeof(state) );
					break;
				}

				case 0:
				{
					return out;
				}

				default:
				{
					in_pos += res;
					break;
				}
			}
			out_pos++;
		}

	}
	out[out_pos] = 0;

	return out;
}
Exemple #18
0
mod_export ZLE_INT_T
getrestchar(int inchar)
{
    char c = inchar;
    wchar_t outchar;
    int timeout;
    static mbstate_t mbs;

    /*
     * We are guaranteed to set a valid wide last character,
     * although it may be WEOF (which is technically not
     * a wide character at all...)
     */
    lastchar_wide_valid = 1;

    if (inchar == EOF) {
	/* End of input, so reset the shift state. */
	memset(&mbs, 0, sizeof mbs);
	return lastchar_wide = WEOF;
    }

    /*
     * Return may be zero if we have a NULL; handle this like
     * any other character.
     */
    while (1) {
	size_t cnt = mbrtowc(&outchar, &c, 1, &mbs);
	if (cnt == MB_INVALID) {
	    /*
	     * Invalid input.  Hmm, what's the right thing to do here?
	     */
	    memset(&mbs, 0, sizeof mbs);
	    return lastchar_wide = WEOF;
	}
	if (cnt != MB_INCOMPLETE)
	    break;

	/*
	 * Always apply KEYTIMEOUT to the remains of the input
	 * character.  The parts of a multibyte character should
	 * arrive together.  If we don't do this the input can
	 * get stuck if an invalid byte sequence arrives.
	 */
	inchar = getbyte(1L, &timeout);
	/* getbyte deliberately resets lastchar_wide_valid */
	lastchar_wide_valid = 1;
	if (inchar == EOF) {
	    memset(&mbs, 0, sizeof mbs);
	    if (timeout)
	    {
		/*
		 * This case means that we got a valid initial byte
		 * (since we tested for EOF above), but the followup
		 * timed out.  This probably indicates a duff character.
		 * Return a '?'.
		 */
		lastchar = '?';
		return lastchar_wide = L'?';
	    }
	    else
		return lastchar_wide = WEOF;
	}
	c = inchar;
    }
    return lastchar_wide = (ZLE_INT_T)outchar;
}
Exemple #19
0
static int
default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
                 size_t *tolen, const CHAR_T **dst, const char *enc)
{
    int j;
    size_t i = 0;
    CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
    size_t  *blen = &cw->blen1;
    mbstate_t mbs;
    size_t   n;
    ssize_t  nlen = len;
    const char *src = (const char *)str;
    iconv_t	id = (iconv_t)-1;
    char	buffer[CONV_BUFFER_SIZE];
    size_t	left = len;
    int		error = 1;

    MEMSET(&mbs, 0, 1);
    BINC_RETW(NULL, *tostr, *blen, nlen);

#ifdef USE_ICONV
    if (strcmp(nl_langinfo(CODESET), enc)) {
        id = iconv_open(nl_langinfo(CODESET), enc);
        if (id == (iconv_t)-1)
            goto err;
        CONVERT(str, left, src, len);
    }
#endif

    for (i = 0, j = 0; j < len; ) {
        n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
        /* NULL character converted */
        if (n == (size_t)-2) error = -(len-j);
        if (n == (size_t)-1 || n == (size_t)-2)
            HANDLE_MBR_ERROR(n, mbs, (*tostr)[i], src[j]);
        if (n == 0) n = 1;
        j += n;
        if (++i >= *blen) {
            nlen += 256;
            BINC_RETW(NULL, *tostr, *blen, nlen);
        }
        if (id != (iconv_t)-1 && j == len && left) {
            CONVERT(str, left, src, len);
            j = 0;
        }
    }
    *tolen = i;

    if (id != (iconv_t)-1)
        iconv_close(id);

    *dst = cw->bp1;

    return 0;
err:
    *tolen = i;
    if (id != (iconv_t)-1)
        iconv_close(id);
    *dst = cw->bp1;

    return error;
}
Exemple #20
0
int
main (int argc, char *argv[])
{
  mbstate_t state;
  wchar_t wc;
  size_t ret;

  /* configure should already have checked that the locale is supported.  */
  if (setlocale (LC_ALL, "") == NULL)
    return 1;

  /* Test NUL byte input.  */
  {
    const char *src;

    memset (&state, '\0', sizeof (mbstate_t));

    src = "";
    ret = mbsnrtowcs (NULL, &src, 1, 0, &state);
    ASSERT (ret == 0);
    ASSERT (mbsinit (&state));

    src = "";
    ret = mbsnrtowcs (NULL, &src, 1, 1, &state);
    ASSERT (ret == 0);
    ASSERT (mbsinit (&state));

    wc = (wchar_t) 0xBADFACE;
    src = "";
    ret = mbsnrtowcs (&wc, &src, 1, 0, &state);
    ASSERT (ret == 0);
    ASSERT (wc == (wchar_t) 0xBADFACE);
    ASSERT (mbsinit (&state));

    wc = (wchar_t) 0xBADFACE;
    src = "";
    ret = mbsnrtowcs (&wc, &src, 1, 1, &state);
    ASSERT (ret == 0);
    ASSERT (wc == 0);
    ASSERT (mbsinit (&state));
  }

  if (argc > 1)
    {
      int unlimited;

      for (unlimited = 0; unlimited < 2; unlimited++)
        {
          #define BUFSIZE 10
          wchar_t buf[BUFSIZE];
          const char *src;
          mbstate_t temp_state;

          {
            size_t i;
            for (i = 0; i < BUFSIZE; i++)
              buf[i] = (wchar_t) 0xBADFACE;
          }

          switch (argv[1][0])
            {
            case '1':
              /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
              {
                char input[] = "B\374\337er"; /* "Büßer" */
                memset (&state, '\0', sizeof (mbstate_t));

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input, 1, &state);
                ASSERT (ret == 1);
                ASSERT (wc == 'B');
                ASSERT (mbsinit (&state));
                input[0] = '\0';

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input + 1, 1, &state);
                ASSERT (ret == 1);
                ASSERT (wctob (wc) == (unsigned char) '\374');
                ASSERT (mbsinit (&state));
                input[1] = '\0';

                src = input + 2;
                temp_state = state;
                ret = mbsnrtowcs (NULL, &src, 4, unlimited ? BUFSIZE : 1, &temp_state);
                ASSERT (ret == 3);
                ASSERT (src == input + 2);
                ASSERT (mbsinit (&state));

                src = input + 2;
                ret = mbsnrtowcs (buf, &src, 4, unlimited ? BUFSIZE : 1, &state);
                ASSERT (ret == (unlimited ? 3 : 1));
                ASSERT (src == (unlimited ? NULL : input + 3));
                ASSERT (wctob (buf[0]) == (unsigned char) '\337');
                if (unlimited)
                  {
                    ASSERT (buf[1] == 'e');
                    ASSERT (buf[2] == 'r');
                    ASSERT (buf[3] == 0);
                    ASSERT (buf[4] == (wchar_t) 0xBADFACE);
                  }
                else
                  ASSERT (buf[1] == (wchar_t) 0xBADFACE);
                ASSERT (mbsinit (&state));
              }
              break;

            case '2':
              /* Locale encoding is UTF-8.  */
              {
                char input[] = "B\303\274\303\237er"; /* "Büßer" */
                memset (&state, '\0', sizeof (mbstate_t));

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input, 1, &state);
                ASSERT (ret == 1);
                ASSERT (wc == 'B');
                ASSERT (mbsinit (&state));
                input[0] = '\0';

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input + 1, 1, &state);
                ASSERT (ret == (size_t)(-2));
                ASSERT (wc == (wchar_t) 0xBADFACE);
                ASSERT (!mbsinit (&state));
                input[1] = '\0';

                src = input + 2;
                temp_state = state;
                ret = mbsnrtowcs (NULL, &src, 6, unlimited ? BUFSIZE : 2, &temp_state);
                ASSERT (ret == 4);
                ASSERT (src == input + 2);
                ASSERT (!mbsinit (&state));

                src = input + 2;
                ret = mbsnrtowcs (buf, &src, 6, unlimited ? BUFSIZE : 2, &state);
                ASSERT (ret == (unlimited ? 4 : 2));
                ASSERT (src == (unlimited ? NULL : input + 5));
                ASSERT (wctob (buf[0]) == EOF);
                ASSERT (wctob (buf[1]) == EOF);
                if (unlimited)
                  {
                    ASSERT (buf[2] == 'e');
                    ASSERT (buf[3] == 'r');
                    ASSERT (buf[4] == 0);
                    ASSERT (buf[5] == (wchar_t) 0xBADFACE);
                  }
                else
                  ASSERT (buf[2] == (wchar_t) 0xBADFACE);
                ASSERT (mbsinit (&state));
              }
              break;

            case '3':
              /* Locale encoding is EUC-JP.  */
              {
                char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
                memset (&state, '\0', sizeof (mbstate_t));

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input, 1, &state);
                ASSERT (ret == 1);
                ASSERT (wc == '<');
                ASSERT (mbsinit (&state));
                input[0] = '\0';

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input + 1, 2, &state);
                ASSERT (ret == 2);
                ASSERT (wctob (wc) == EOF);
                ASSERT (mbsinit (&state));
                input[1] = '\0';
                input[2] = '\0';

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input + 3, 1, &state);
                ASSERT (ret == (size_t)(-2));
                ASSERT (wc == (wchar_t) 0xBADFACE);
                ASSERT (!mbsinit (&state));
                input[3] = '\0';

                src = input + 4;
                temp_state = state;
                ret = mbsnrtowcs (NULL, &src, 5, unlimited ? BUFSIZE : 2, &temp_state);
                ASSERT (ret == 3);
                ASSERT (src == input + 4);
                ASSERT (!mbsinit (&state));

                src = input + 4;
                ret = mbsnrtowcs (buf, &src, 5, unlimited ? BUFSIZE : 2, &state);
                ASSERT (ret == (unlimited ? 3 : 2));
                ASSERT (src == (unlimited ? NULL : input + 7));
                ASSERT (wctob (buf[0]) == EOF);
                ASSERT (wctob (buf[1]) == EOF);
                if (unlimited)
                  {
                    ASSERT (buf[2] == '>');
                    ASSERT (buf[3] == 0);
                    ASSERT (buf[4] == (wchar_t) 0xBADFACE);
                  }
                else
                  ASSERT (buf[2] == (wchar_t) 0xBADFACE);
                ASSERT (mbsinit (&state));
              }
              break;

            case '4':
              /* Locale encoding is GB18030.  */
              {
                char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
                memset (&state, '\0', sizeof (mbstate_t));

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input, 1, &state);
                ASSERT (ret == 1);
                ASSERT (wc == 'B');
                ASSERT (mbsinit (&state));
                input[0] = '\0';

                wc = (wchar_t) 0xBADFACE;
                ret = mbrtowc (&wc, input + 1, 1, &state);
                ASSERT (ret == (size_t)(-2));
                ASSERT (wc == (wchar_t) 0xBADFACE);
                ASSERT (!mbsinit (&state));
                input[1] = '\0';

                src = input + 2;
                temp_state = state;
                ret = mbsnrtowcs (NULL, &src, 8, unlimited ? BUFSIZE : 2, &temp_state);
                ASSERT (ret == 4);
                ASSERT (src == input + 2);
                ASSERT (!mbsinit (&state));

                src = input + 2;
                ret = mbsnrtowcs (buf, &src, 8, unlimited ? BUFSIZE : 2, &state);
                ASSERT (ret == (unlimited ? 4 : 2));
                ASSERT (src == (unlimited ? NULL : input + 7));
                ASSERT (wctob (buf[0]) == EOF);
                ASSERT (wctob (buf[1]) == EOF);
                if (unlimited)
                  {
                    ASSERT (buf[2] == 'e');
                    ASSERT (buf[3] == 'r');
                    ASSERT (buf[4] == 0);
                    ASSERT (buf[5] == (wchar_t) 0xBADFACE);
                  }
                else
                  ASSERT (buf[2] == (wchar_t) 0xBADFACE);
                ASSERT (mbsinit (&state));
              }
              break;

            default:
              return 1;
            }
        }

      return 0;
    }

  return 1;
}
Exemple #21
0
static void ttyin(char buf[], register int nmax, char pchar)
{
	char *sp;
	int c;
	int slash = 0;
	int maxlen;

	sp = buf;
	maxlen = 0;
	while (sp - buf < nmax) {
		if (promptlen > maxlen)
			maxlen = promptlen;
		c = readch();
		if (c == '\\') {
			slash++;
		} else if (((cc_t) c == otty.c_cc[VERASE]) && !slash) {
			if (sp > buf) {
#ifdef HAVE_WIDECHAR
				if (MB_CUR_MAX > 1) {
					wchar_t wc;
					size_t pos = 0, mblength;
					mbstate_t state, state_bak;

					memset(&state, '\0', sizeof(mbstate_t));

					while (1) {
						state_bak = state;
						mblength =
						    mbrtowc(&wc, buf + pos,
							    sp - buf, &state);

						state = (mblength == (size_t)-2
							 || mblength ==
							 (size_t)-1) ? state_bak
						    : state;
						mblength =
						    (mblength == (size_t)-2
						     || mblength == (size_t)-1
						     || mblength ==
						     0) ? 1 : mblength;

						if (buf + pos + mblength >= sp)
							break;

						pos += mblength;
					}

					if (mblength == 1) {
						ERASEONECOLUMN(docrterase);
					} else {
						int wc_width;
						wc_width = wcwidth(wc);
						wc_width =
						    (wc_width <
						     1) ? 1 : wc_width;
						while (wc_width--) {
							ERASEONECOLUMN(docrterase);
						}
					}

					while (mblength--) {
						--promptlen;
						--sp;
					}
				} else
#endif	/* HAVE_WIDECHAR */
				{
					--promptlen;
					ERASEONECOLUMN(docrterase);
					--sp;
				}

				if ((*sp < ' ' && *sp != '\n') || *sp == RUBOUT) {
					--promptlen;
					ERASEONECOLUMN(docrterase);
				}
				continue;
			} else {
				if (!eraseln)
					promptlen = maxlen;
				siglongjmp(restore, 1);
			}
		} else if (((cc_t) c == otty.c_cc[VKILL]) && !slash) {
			if (hard) {
				show(c);
				putchar('\n');
				putchar(pchar);
			} else {
				putchar('\r');
				putchar(pchar);
				if (eraseln)
					erasep(1);
				else if (docrtkill)
					while (promptlen-- > 1)
						putserr(BSB);
				promptlen = 1;
			}
			sp = buf;
			fflush(stdout);
			continue;
		}
		if (slash && ((cc_t) c == otty.c_cc[VKILL]
			      || (cc_t) c == otty.c_cc[VERASE])) {
			ERASEONECOLUMN(docrterase);
			--sp;
		}
		if (c != '\\')
			slash = 0;
		*sp++ = c;
		if ((c < ' ' && c != '\n' && c != ESC) || c == RUBOUT) {
			c += (c == RUBOUT) ? -0100 : 0100;
			putserr(CARAT);
			promptlen++;
		}
		if (c != '\n' && c != ESC) {
			putcerr(c);
			promptlen++;
		} else
			break;
	}
	*--sp = '\0';
	if (!eraseln)
		promptlen = maxlen;
	if (sp - buf >= nmax - 1)
		more_error(_("Line too long"));
}
Exemple #22
0
int
glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob)
{
	const char *patnext;
	size_t limit;
	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
	mbstate_t mbs;
	wchar_t wc;
	size_t clen;

	patnext = pattern;
	if (!(flags & GLOB_APPEND)) {
		pglob->gl_pathc = 0;
		pglob->gl_pathv = NULL;
		if (!(flags & GLOB_DOOFFS))
			pglob->gl_offs = 0;
	}

	limit = 0;

	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
	pglob->gl_errfunc = errfunc;
	pglob->gl_matchc = 0;

	bufnext = patbuf;
	bufend = bufnext + MAXPATHLEN - 1;
	if (flags & GLOB_NOESCAPE) {
		memset(&mbs, 0, sizeof(mbs));
		while (bufend - bufnext >= MB_CUR_MAX) {
			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
			if (clen == (size_t)-1 || clen == (size_t)-2)
				return (GLOB_NOMATCH);
			else if (clen == 0)
				break;
			*bufnext++ = wc;
			patnext += clen;
		}
	} else {
		/* Protect the quoted characters. */
		memset(&mbs, 0, sizeof(mbs));
		while (bufend - bufnext >= MB_CUR_MAX) {
			if (*patnext == QUOTE) {
				if (*++patnext == EOS) {
					*bufnext++ = QUOTE | M_PROTECT;
					continue;
				}
				prot = M_PROTECT;
			} else
				prot = 0;
			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
			if (clen == (size_t)-1 || clen == (size_t)-2)
				return (GLOB_NOMATCH);
			else if (clen == 0)
				break;
			*bufnext++ = wc | prot;
			patnext += clen;
		}
	}
	*bufnext = EOS;

	if (flags & GLOB_BRACE)
	    return globexp1(patbuf, pglob, &limit);
	else
	    return glob0(patbuf, pglob, &limit);
}
Exemple #23
0
static int
decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
                      const char **reason, int surrogateescape)
{
    wchar_t *res;
    size_t argsize;
    size_t count;
#ifdef HAVE_MBRTOWC
    unsigned char *in;
    wchar_t *out;
    mbstate_t mbs;
#endif

#ifdef HAVE_BROKEN_MBSTOWCS
    /* Some platforms have a broken implementation of
     * mbstowcs which does not count the characters that
     * would result from conversion.  Use an upper bound.
     */
    argsize = strlen(arg);
#else
    argsize = mbstowcs(NULL, arg, 0);
#endif
    if (argsize != (size_t)-1) {
        if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
            return -1;
        }
        res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
        if (!res) {
            return -1;
        }

        count = mbstowcs(res, arg, argsize + 1);
        if (count != (size_t)-1) {
            wchar_t *tmp;
            /* Only use the result if it contains no
               surrogate characters. */
            for (tmp = res; *tmp != 0 &&
                         !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
                ;
            if (*tmp == 0) {
                if (wlen != NULL) {
                    *wlen = count;
                }
                *wstr = res;
                return 0;
            }
        }
        PyMem_RawFree(res);
    }

    /* Conversion failed. Fall back to escaping with surrogateescape. */
#ifdef HAVE_MBRTOWC
    /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */

    /* Overallocate; as multi-byte characters are in the argument, the
       actual output could use less memory. */
    argsize = strlen(arg) + 1;
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
        return -1;
    }
    res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
    if (!res) {
        return -1;
    }

    in = (unsigned char*)arg;
    out = res;
    memset(&mbs, 0, sizeof mbs);
    while (argsize) {
        size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
        if (converted == 0) {
            /* Reached end of string; null char stored. */
            break;
        }

        if (converted == (size_t)-2) {
            /* Incomplete character. This should never happen,
               since we provide everything that we have -
               unless there is a bug in the C library, or I
               misunderstood how mbrtowc works. */
            goto decode_error;
        }

        if (converted == (size_t)-1) {
            if (!surrogateescape) {
                goto decode_error;
            }

            /* Conversion error. Escape as UTF-8b, and start over
               in the initial shift state. */
            *out++ = 0xdc00 + *in++;
            argsize--;
            memset(&mbs, 0, sizeof mbs);
            continue;
        }

        if (Py_UNICODE_IS_SURROGATE(*out)) {
            if (!surrogateescape) {
                goto decode_error;
            }

            /* Surrogate character.  Escape the original
               byte sequence with surrogateescape. */
            argsize -= converted;
            while (converted--) {
                *out++ = 0xdc00 + *in++;
            }
            continue;
        }
        /* successfully converted some bytes */
        in += converted;
        argsize -= converted;
        out++;
    }
    if (wlen != NULL) {
        *wlen = out - res;
    }
    *wstr = res;
    return 0;

decode_error:
    PyMem_RawFree(res);
    if (wlen) {
        *wlen = in - (unsigned char*)arg;
    }
    if (reason) {
        *reason = "decoding error";
    }
    return -2;
#else   /* HAVE_MBRTOWC */
    /* Cannot use C locale for escaping; manually escape as if charset
       is ASCII (i.e. escape all bytes > 128. This will still roundtrip
       correctly in the locale's charset, which must be an ASCII superset. */
    return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
#endif   /* HAVE_MBRTOWC */
}
Exemple #24
0
static int
glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
      Char *pattern, Char *restpattern,
      glob_t *pglob, size_t *limit)
{
	struct dirent *dp;
	DIR *dirp;
	int err;
	char buf[MAXPATHLEN];

	/*
	 * The readdirfunc declaration can't be prototyped, because it is
	 * assigned, below, to two functions which are prototyped in glob.h
	 * and dirent.h as taking pointers to differently typed opaque
	 * structures.
	 */
	struct dirent *(*readdirfunc)();

	if (pathend > pathend_last)
		return (GLOB_ABORTED);
	*pathend = EOS;
	errno = 0;

	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
		/* TODO: don't call for ENOENT or ENOTDIR? */
		if (pglob->gl_errfunc) {
			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
				return (GLOB_ABORTED);
			if (pglob->gl_errfunc(buf, errno) ||
			    pglob->gl_flags & GLOB_ERR)
				return (GLOB_ABORTED);
		}
		return(0);
	}

	err = 0;

	/* Search directory for matching names. */
	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
		readdirfunc = pglob->gl_readdir;
	else
		readdirfunc = readdir;
	while ((dp = (*readdirfunc)(dirp))) {
		char *sc;
		Char *dc;
		wchar_t wc;
		size_t clen;
		mbstate_t mbs;

		/* Initial DOT must be matched literally. */
		if (dp->d_name[0] == DOT && *pattern != DOT)
			continue;
		memset(&mbs, 0, sizeof(mbs));
		dc = pathend;
		sc = dp->d_name;
		while (dc < pathend_last) {
			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
			if (clen == (size_t)-1 || clen == (size_t)-2) {
				wc = *sc;
				clen = 1;
				memset(&mbs, 0, sizeof(mbs));
			}
			if ((*dc++ = wc) == EOS)
				break;
			sc += clen;
		}
		if (!match(pathend, pattern, restpattern)) {
			*pathend = EOS;
			continue;
		}
		err = glob2(pathbuf, --dc, pathend_last, restpattern,
		    pglob, limit);
		if (err)
			break;
	}

	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
		(*pglob->gl_closedir)(dirp);
	else
		closedir(dirp);
	return(err);
}
TEST(wchar, wcstombs_wcrtombs) {
  const wchar_t chars[] = { L'h', L'e', L'l', L'l', L'o', 0 };
  const wchar_t bad_chars[] = { L'h', L'i', static_cast<wchar_t>(0xffffffff), 0 };
  const wchar_t* src;
  char bytes[BUFSIZ];

  // Given a NULL destination, these functions count valid characters.
  EXPECT_EQ(5U, wcstombs(NULL, chars, 0));
  EXPECT_EQ(5U, wcstombs(NULL, chars, 4));
  EXPECT_EQ(5U, wcstombs(NULL, chars, 256));
  src = chars;
  EXPECT_EQ(5U, wcsrtombs(NULL, &src, 0, NULL));
  EXPECT_EQ(&chars[0], src);
  src = chars;
  EXPECT_EQ(5U, wcsrtombs(NULL, &src, 4, NULL));
  EXPECT_EQ(&chars[0], src);
  src = chars;
  EXPECT_EQ(5U, wcsrtombs(NULL, &src, 256, NULL));
  EXPECT_EQ(&chars[0], src);

  // An unrepresentable char just returns an error from wcstombs...
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), wcstombs(NULL, bad_chars, 0));
  EXPECT_EQ(EILSEQ, errno);
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), wcstombs(NULL, bad_chars, 256));
  EXPECT_EQ(EILSEQ, errno);

  // And wcsrtombs doesn't tell us where it got stuck because we didn't ask it
  // to actually convert anything...
  errno = 0;
  src = bad_chars;
  EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(NULL, &src, 0, NULL));
  EXPECT_EQ(&bad_chars[0], src);
  EXPECT_EQ(EILSEQ, errno);
  errno = 0;
  src = bad_chars;
  EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(NULL, &src, 256, NULL));
  EXPECT_EQ(&bad_chars[0], src);
  EXPECT_EQ(EILSEQ, errno);

  // Okay, now let's test actually converting something...
  memset(bytes, 'x', sizeof(bytes));
  EXPECT_EQ(0U, wcstombs(bytes, chars, 0));
  memset(bytes, 'x', sizeof(bytes));
  EXPECT_EQ(4U, wcstombs(bytes, chars, 4));
  bytes[5] = 0;
  EXPECT_STREQ("hellx", bytes);
  memset(bytes, 'x', sizeof(bytes));
  EXPECT_EQ(5U, wcstombs(bytes, chars, 256));
  EXPECT_STREQ("hello", bytes);
  memset(bytes, 'x', sizeof(bytes));
  EXPECT_EQ(5U, wcstombs(bytes, chars, 6));
  EXPECT_STREQ("hello", bytes);
  errno = 0;
  memset(bytes, 'x', sizeof(bytes));
  EXPECT_EQ(static_cast<size_t>(-1), wcstombs(bytes, bad_chars, 256));
  EXPECT_EQ(EILSEQ, errno);
  bytes[3] = 0;
  EXPECT_STREQ("hix", bytes);

  // wcsrtombs is a bit more informative...
  memset(bytes, 'x', sizeof(bytes));
  src = chars;
  EXPECT_EQ(0U, wcsrtombs(bytes, &src, 0, NULL));
  EXPECT_EQ(&chars[0], src); // No input consumed.
  EXPECT_EQ(EILSEQ, errno);

  memset(bytes, 'x', sizeof(bytes));
  src = chars;
  EXPECT_EQ(4U, wcsrtombs(bytes, &src, 4, NULL));
  EXPECT_EQ(&chars[4], src); // Some input consumed.
  EXPECT_EQ(EILSEQ, errno);
  bytes[5] = 0;
  EXPECT_STREQ("hellx", bytes);

  memset(bytes, 'x', sizeof(bytes));
  src = chars;
  EXPECT_EQ(5U, wcsrtombs(bytes, &src, 256, NULL));
  EXPECT_EQ(NULL, src); // All input consumed!
  EXPECT_EQ(EILSEQ, errno);
  EXPECT_STREQ("hello", bytes);

  memset(bytes, 'x', sizeof(bytes));
  src = chars;
  EXPECT_EQ(5U, wcsrtombs(bytes, &src, 6, NULL));
  EXPECT_EQ(NULL, src); // All input consumed.
  EXPECT_EQ(EILSEQ, errno);
  EXPECT_STREQ("hello", bytes);

  memset(bytes, 'x', sizeof(bytes));
  src = bad_chars;
  EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(bytes, &src, 256, NULL));
  EXPECT_EQ(&bad_chars[2], src);
  EXPECT_EQ(EILSEQ, errno);
  bytes[3] = 0;
  EXPECT_STREQ("hix", bytes);

  // Any non-initial state is invalid when calling wcsrtombs.
  mbstate_t ps;
  src = chars;
  memset(&ps, 0, sizeof(ps));
  ASSERT_EQ(static_cast<size_t>(-2), mbrtowc(NULL, "\xc2", 1, &ps));
  EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(NULL, &src, 0, &ps));
  EXPECT_EQ(EILSEQ, errno);
}
Exemple #26
0
static inline void do_layout(VisualRow *vrow, VTLayout *vtlayout)
{
#define SIZE 4096
  size_t logical_char2cell_index[SIZE];
  size_t in2out[SIZE];
  size_t out2in[SIZE];
  char logical[SIZE]; /* is this enough? */
  int logical_width, logical_length;
  char visual[SIZE];  /* is this enough? */
  size_t visual_length = SIZE;
  wchar_t wc;
  mbstate_t ps;
  size_t mb_len;
  int consumed = 0;
  int cursor_pos = 0;
  int current_char = 0;
  int width;
  int logical_cell_index;
  Char mbchar;

  memset(logical_char2cell_index,0,SIZE);
  memset(logical,0,SIZE);
  memset(visual,0,SIZE);
  memset(in2out,0,SIZE);
  memset(out2in,0,SIZE);
  memset(&ps,0,sizeof(ps));

  construct_char2cell_index(vrow->row,logical_char2cell_index);

  Row_compose_bytes(vrow->row,
                    0,vrow->row->num_columns,
                    logical,SIZE,
                    &logical_length,&logical_width,1);
#undef SIZE  
#if  debug
  printf("logical: [%s]\n",logical);
  printf("logical_length = %u\n",logical_length);
#endif

  vtlayout->transform(vtlayout,
                     logical, logical_length,
                     visual, &visual_length,
                     in2out,out2in,NULL);
#if debug
  printf("visual : [%s]\n",visual);
  printf("visual_length  = %u\n\n",visual_length);
#endif  

      /* Well off course, I need to refine this code ;-) */
  while(consumed < visual_length)
  {
    mb_len = mbrtowc(&wc,visual+consumed,visual_length-consumed,&ps);
    if(mb_len == 0 || mb_len == (size_t)-1 || mb_len == (size_t)-2 )
    {
#if debug      
      printf("found invalid or NULL character(%u)\n",mb_len); 
#endif
      break;
    }
    else
    {
      logical_cell_index = logical_char2cell_index[out2in[current_char]];
#if debug      
      printf("logical char index[%d] <-> visual char index[%d]\n",
             out2in[current_char],current_char);
#endif      
      if(Row_is_char_drawn(vrow->row,logical_cell_index))
                          
      {
        width = wcwidth(wc);
        Char_init(mbchar,visual+consumed,mb_len,width,(width>0));
        Row_add_char(vrow->vrow,
                     cursor_pos,
                     &mbchar,
                     Row_is_bold(vrow->row,logical_cell_index),
                     Row_is_blink(vrow->row,logical_cell_index),
                     Row_is_inverse(vrow->row,logical_cell_index),
                     Row_is_underline(vrow->row,logical_cell_index),
                     Row_get_foreground(vrow->row,logical_cell_index),
                     Row_get_background(vrow->row,logical_cell_index),
                     Row_get_charset(vrow->row,logical_cell_index));
      }
      else
      {
        Row_set_char_drawn(vrow->vrow,cursor_pos,0);
        width = 1;
      }
      
      vrow->visual2logical_index[cursor_pos] = logical_cell_index;
      vrow->logical2visual_index[logical_cell_index] = cursor_pos;
#if debug      
      printf("logical cell index[%d] <-> visual  cell index[%d]\n",
             logical_cell_index,cursor_pos);
#endif      
                   
    }
        /* process for multi column character */
    if(width > 1 && Row_is_char_drawn(vrow->row,logical_cell_index) &&
       width == Row_get_cell_width(vrow->row,logical_cell_index) )
    {
      int i;
      for(i=1;i<width;i++)
      {
        vrow->visual2logical_index[cursor_pos+i] = logical_cell_index+i;
        vrow->logical2visual_index[logical_cell_index+i] = cursor_pos+i;
      }
    }
    cursor_pos+=width;
    current_char++;
    consumed+=mb_len;
  }
      /* The visual row possibly has ligatures */
  if(cursor_pos < vrow->row->num_columns &&
     !vtlayout->is_direction_LTR(vtlayout))
      Row_insert_cells(vrow->vrow,0,vrow->row->num_columns-cursor_pos);
      
}
Exemple #27
0
size_t
mbsrtowcs (wchar_t *dest, const char **srcp, size_t len, mbstate_t *ps)
{
  if (ps == NULL)
    ps = &_gl_mbsrtowcs_state;
  {
    const char *src = *srcp;

    if (dest != NULL)
      {
        wchar_t *destptr = dest;

        for (; len > 0; destptr++, len--)
          {
            size_t src_avail;
            size_t ret;

            /* An optimized variant of
               src_avail = strnlen1 (src, MB_LEN_MAX);  */
            if (src[0] == '\0')
              src_avail = 1;
            else if (src[1] == '\0')
              src_avail = 2;
            else if (src[2] == '\0')
              src_avail = 3;
            else if (MB_LEN_MAX <= 4 || src[3] == '\0')
              src_avail = 4;
            else
              src_avail = 4 + strnlen1 (src + 4, MB_LEN_MAX - 4);

            /* Parse the next multibyte character.  */
            ret = mbrtowc (destptr, src, src_avail, ps);

            if (ret == (size_t)(-2))
              /* Encountered a multibyte character that extends past a '\0' byte
                 or that is longer than MB_LEN_MAX bytes.  Cannot happen.  */
              abort ();

            if (ret == (size_t)(-1))
              goto bad_input;
            if (ret == 0)
              {
                src = NULL;
                /* Here mbsinit (ps).  */
                break;
              }
            src += ret;
          }

        *srcp = src;
        return destptr - dest;
      }
    else
      {
        /* Ignore dest and len, don't store *srcp at the end, and
           don't clobber *ps.  */
        mbstate_t state = *ps;
        size_t totalcount = 0;

        for (;; totalcount++)
          {
            size_t src_avail;
            size_t ret;

            /* An optimized variant of
               src_avail = strnlen1 (src, MB_LEN_MAX);  */
            if (src[0] == '\0')
              src_avail = 1;
            else if (src[1] == '\0')
              src_avail = 2;
            else if (src[2] == '\0')
              src_avail = 3;
            else if (MB_LEN_MAX <= 4 || src[3] == '\0')
              src_avail = 4;
            else
              src_avail = 4 + strnlen1 (src + 4, MB_LEN_MAX - 4);

            /* Parse the next multibyte character.  */
            ret = mbrtowc (NULL, src, src_avail, &state);

            if (ret == (size_t)(-2))
              /* Encountered a multibyte character that extends past a '\0' byte
                 or that is longer than MB_LEN_MAX bytes.  Cannot happen.  */
              abort ();

            if (ret == (size_t)(-1))
              goto bad_input2;
            if (ret == 0)
              {
                /* Here mbsinit (&state).  */
                break;
              }
            src += ret;
          }

        return totalcount;
      }

   bad_input:
    *srcp = src;
   bad_input2:
    errno = EILSEQ;
    return (size_t)(-1);
  }
}
Exemple #28
0
void
conv_c(PR *pr, u_char *p, size_t bufsize)
{
	char buf[10];
	char const *str;
	wchar_t wc;
	size_t clen, oclen;
	int converr, pad, width;
	u_char peekbuf[MB_LEN_MAX];

	if (pr->mbleft > 0) {
		str = "**";
		pr->mbleft--;
		goto strpr;
	}

	switch(*p) {
	case '\0':
		str = "\\0";
		goto strpr;
	/* case '\a': */
	case '\007':
		str = "\\a";
		goto strpr;
	case '\b':
		str = "\\b";
		goto strpr;
	case '\f':
		str = "\\f";
		goto strpr;
	case '\n':
		str = "\\n";
		goto strpr;
	case '\r':
		str = "\\r";
		goto strpr;
	case '\t':
		str = "\\t";
		goto strpr;
	case '\v':
		str = "\\v";
		goto strpr;
	default:
		break;
	}
	/*
	 * Multibyte characters are disabled for hexdump(1) for backwards
	 * compatibility and consistency (none of its other output formats
	 * recognize them correctly).
	 */
	converr = 0;
	if (odmode && MB_CUR_MAX > 1) {
		oclen = 0;
retry:
		clen = mbrtowc(&wc, (const char *)p, bufsize, &pr->mbstate);
		if (clen == 0)
			clen = 1;
		else if (clen == (size_t)-1 || (clen == (size_t)-2 &&
		    p == peekbuf)) {
			memset(&pr->mbstate, 0, sizeof(pr->mbstate));
			wc = *p;
			clen = 1;
			converr = 1;
		} else if (clen == (size_t)-2) {
			/*
			 * Incomplete character; peek ahead and see if we
			 * can complete it.
			 */
			oclen = bufsize;
			bufsize = peek(p = peekbuf, MB_CUR_MAX);
			goto retry;
		}
		clen += oclen;
	} else {
		wc = *p;
		clen = 1;
	}
	if (!converr && iswprint(wc)) {
		if (!odmode) {
			*pr->cchar = 'c';
			(void)printf(pr->fmt, (int)wc);
		} else {	
			*pr->cchar = 'C';
			assert(strcmp(pr->fmt, "%3C") == 0);
			width = wcwidth(wc);
			assert(width >= 0);
			pad = 3 - width;
			if (pad < 0)
				pad = 0;
			(void)printf("%*s%C", pad, "", wc);
			pr->mbleft = clen - 1;
		}
	} else {
		(void)sprintf(buf, "%03o", (int)*p);
		str = buf;
strpr:		*pr->cchar = 's';
		(void)printf(pr->fmt, str);
	}
}
Exemple #29
0
static int
test_one_locale (const char *name, int codepage)
{
  mbstate_t state;
  wchar_t wc;
  size_t ret;

# if 1
  /* Portable code to set the locale.  */
  {
    char name_with_codepage[1024];

    sprintf (name_with_codepage, "%s.%d", name, codepage);

    /* Set the locale.  */
    if (setlocale (LC_ALL, name_with_codepage) == NULL)
      return 77;
  }
# else
  /* Hacky way to set a locale.codepage combination that setlocale() refuses
     to set.  */
  {
    /* Codepage of the current locale, set with setlocale().
       Not necessarily the same as GetACP().  */
    extern __declspec(dllimport) unsigned int __lc_codepage;

    /* Set the locale.  */
    if (setlocale (LC_ALL, name) == NULL)
      return 77;

    /* Clobber the codepage and MB_CUR_MAX, both set by setlocale().  */
    __lc_codepage = codepage;
    switch (codepage)
      {
      case 1252:
      case 1256:
        MB_CUR_MAX = 1;
        break;
      case 932:
      case 950:
      case 936:
        MB_CUR_MAX = 2;
        break;
      case 54936:
      case 65001:
        MB_CUR_MAX = 4;
        break;
      }

    /* Test whether the codepage is really available.  */
    memset (&state, '\0', sizeof (mbstate_t));
    if (mbrtowc (&wc, " ", 1, &state) == (size_t)(-1))
      return 77;
  }
# endif

  /* Test zero-length input.  */
  {
    memset (&state, '\0', sizeof (mbstate_t));
    wc = (wchar_t) 0xBADFACE;
    ret = mbrtowc (&wc, "x", 0, &state);
    /* gnulib's implementation returns (size_t)(-2).
       The AIX 5.1 implementation returns (size_t)(-1).
       glibc's implementation returns 0.  */
    ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
    ASSERT (mbsinit (&state));
  }

  /* Test NUL byte input.  */
  {
    memset (&state, '\0', sizeof (mbstate_t));
    wc = (wchar_t) 0xBADFACE;
    ret = mbrtowc (&wc, "", 1, &state);
    ASSERT (ret == 0);
    ASSERT (wc == 0);
    ASSERT (mbsinit (&state));
    ret = mbrtowc (NULL, "", 1, &state);
    ASSERT (ret == 0);
    ASSERT (mbsinit (&state));
  }

  /* Test single-byte input.  */
  {
    int c;
    char buf[1];

    memset (&state, '\0', sizeof (mbstate_t));
    for (c = 0; c < 0x100; c++)
      switch (c)
        {
        case '\t': case '\v': case '\f':
        case ' ': case '!': case '"': case '#': case '%':
        case '&': case '\'': case '(': case ')': case '*':
        case '+': case ',': case '-': case '.': case '/':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
        case ':': case ';': case '<': case '=': case '>':
        case '?':
        case 'A': case 'B': case 'C': case 'D': case 'E':
        case 'F': case 'G': case 'H': case 'I': case 'J':
        case 'K': case 'L': case 'M': case 'N': case 'O':
        case 'P': case 'Q': case 'R': case 'S': case 'T':
        case 'U': case 'V': case 'W': case 'X': case 'Y':
        case 'Z':
        case '[': case '\\': case ']': case '^': case '_':
        case 'a': case 'b': case 'c': case 'd': case 'e':
        case 'f': case 'g': case 'h': case 'i': case 'j':
        case 'k': case 'l': case 'm': case 'n': case 'o':
        case 'p': case 'q': case 'r': case 's': case 't':
        case 'u': case 'v': case 'w': case 'x': case 'y':
        case 'z': case '{': case '|': case '}': case '~':
          /* c is in the ISO C "basic character set".  */
          buf[0] = c;
          wc = (wchar_t) 0xBADFACE;
          ret = mbrtowc (&wc, buf, 1, &state);
          ASSERT (ret == 1);
          ASSERT (wc == c);
          ASSERT (mbsinit (&state));
          ret = mbrtowc (NULL, buf, 1, &state);
          ASSERT (ret == 1);
          ASSERT (mbsinit (&state));
          break;
        }
  }

  /* Test special calling convention, passing a NULL pointer.  */
  {
    memset (&state, '\0', sizeof (mbstate_t));
    wc = (wchar_t) 0xBADFACE;
    ret = mbrtowc (&wc, NULL, 5, &state);
    ASSERT (ret == 0);
    ASSERT (wc == (wchar_t) 0xBADFACE);
    ASSERT (mbsinit (&state));
  }

  switch (codepage)
    {
    case 1252:
      /* Locale encoding is CP1252, an extension of ISO-8859-1.  */
      {
        char input[] = "B\374\337er"; /* "Büßer" */
        memset (&state, '\0', sizeof (mbstate_t));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'B');
        ASSERT (mbsinit (&state));
        input[0] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 1, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == (unsigned char) '\374');
        ASSERT (wc == 0x00FC);
        ASSERT (mbsinit (&state));
        input[1] = '\0';

        /* Test support of NULL first argument.  */
        ret = mbrtowc (NULL, input + 2, 3, &state);
        ASSERT (ret == 1);
        ASSERT (mbsinit (&state));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 2, 3, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == (unsigned char) '\337');
        ASSERT (wc == 0x00DF);
        ASSERT (mbsinit (&state));
        input[2] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 3, 2, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'e');
        ASSERT (mbsinit (&state));
        input[3] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 4, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'r');
        ASSERT (mbsinit (&state));
      }
      return 0;

    case 1256:
      /* Locale encoding is CP1256, not the same as ISO-8859-6.  */
      {
        char input[] = "x\302\341\346y"; /* "xآلوy" */
        memset (&state, '\0', sizeof (mbstate_t));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'x');
        ASSERT (mbsinit (&state));
        input[0] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 1, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == (unsigned char) '\302');
        ASSERT (wc == 0x0622);
        ASSERT (mbsinit (&state));
        input[1] = '\0';

        /* Test support of NULL first argument.  */
        ret = mbrtowc (NULL, input + 2, 3, &state);
        ASSERT (ret == 1);
        ASSERT (mbsinit (&state));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 2, 3, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == (unsigned char) '\341');
        ASSERT (wc == 0x0644);
        ASSERT (mbsinit (&state));
        input[2] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 3, 2, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == (unsigned char) '\346');
        ASSERT (wc == 0x0648);
        ASSERT (mbsinit (&state));
        input[3] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 4, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'y');
        ASSERT (mbsinit (&state));
      }
      return 0;

    case 932:
      /* Locale encoding is CP932, similar to Shift_JIS.  */
      {
        char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
        memset (&state, '\0', sizeof (mbstate_t));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == '<');
        ASSERT (mbsinit (&state));
        input[0] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 1, 2, &state);
        ASSERT (ret == 2);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x65E5);
        ASSERT (mbsinit (&state));
        input[1] = '\0';
        input[2] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 3, 1, &state);
        ASSERT (ret == (size_t)(-2));
        ASSERT (wc == (wchar_t) 0xBADFACE);
        ASSERT (!mbsinit (&state));
        input[3] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 4, 4, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x672C);
        ASSERT (mbsinit (&state));
        input[4] = '\0';

        /* Test support of NULL first argument.  */
        ret = mbrtowc (NULL, input + 5, 3, &state);
        ASSERT (ret == 2);
        ASSERT (mbsinit (&state));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 5, 3, &state);
        ASSERT (ret == 2);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x8A9E);
        ASSERT (mbsinit (&state));
        input[5] = '\0';
        input[6] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 7, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == '>');
        ASSERT (mbsinit (&state));

        /* Test some invalid input.  */
        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);
      }
      return 0;

    case 950:
      /* Locale encoding is CP950, similar to Big5.  */
      {
        char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
        memset (&state, '\0', sizeof (mbstate_t));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == '<');
        ASSERT (mbsinit (&state));
        input[0] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 1, 2, &state);
        ASSERT (ret == 2);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x65E5);
        ASSERT (mbsinit (&state));
        input[1] = '\0';
        input[2] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 3, 1, &state);
        ASSERT (ret == (size_t)(-2));
        ASSERT (wc == (wchar_t) 0xBADFACE);
        ASSERT (!mbsinit (&state));
        input[3] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 4, 4, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x672C);
        ASSERT (mbsinit (&state));
        input[4] = '\0';

        /* Test support of NULL first argument.  */
        ret = mbrtowc (NULL, input + 5, 3, &state);
        ASSERT (ret == 2);
        ASSERT (mbsinit (&state));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 5, 3, &state);
        ASSERT (ret == 2);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x8A9E);
        ASSERT (mbsinit (&state));
        input[5] = '\0';
        input[6] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 7, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == '>');
        ASSERT (mbsinit (&state));

        /* Test some invalid input.  */
        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);
      }
      return 0;

    case 936:
      /* Locale encoding is CP936 = GBK, an extension of GB2312.  */
      {
        char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
        memset (&state, '\0', sizeof (mbstate_t));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == '<');
        ASSERT (mbsinit (&state));
        input[0] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 1, 2, &state);
        ASSERT (ret == 2);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x65E5);
        ASSERT (mbsinit (&state));
        input[1] = '\0';
        input[2] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 3, 1, &state);
        ASSERT (ret == (size_t)(-2));
        ASSERT (wc == (wchar_t) 0xBADFACE);
        ASSERT (!mbsinit (&state));
        input[3] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 4, 4, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x672C);
        ASSERT (mbsinit (&state));
        input[4] = '\0';

        /* Test support of NULL first argument.  */
        ret = mbrtowc (NULL, input + 5, 3, &state);
        ASSERT (ret == 2);
        ASSERT (mbsinit (&state));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 5, 3, &state);
        ASSERT (ret == 2);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x8A9E);
        ASSERT (mbsinit (&state));
        input[5] = '\0';
        input[6] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 7, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == '>');
        ASSERT (mbsinit (&state));

        /* Test some invalid input.  */
        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);
      }
      return 0;

    case 54936:
      /* Locale encoding is CP54936 = GB18030.  */
      {
        char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
        memset (&state, '\0', sizeof (mbstate_t));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'B');
        ASSERT (mbsinit (&state));
        input[0] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 1, 1, &state);
        ASSERT (ret == (size_t)(-2));
        ASSERT (wc == (wchar_t) 0xBADFACE);
        ASSERT (!mbsinit (&state));
        input[1] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 2, 7, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x00FC);
        ASSERT (mbsinit (&state));
        input[2] = '\0';

        /* Test support of NULL first argument.  */
        ret = mbrtowc (NULL, input + 3, 6, &state);
        ASSERT (ret == 4);
        ASSERT (mbsinit (&state));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 3, 6, &state);
        ASSERT (ret == 4);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x00DF);
        ASSERT (mbsinit (&state));
        input[3] = '\0';
        input[4] = '\0';
        input[5] = '\0';
        input[6] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 7, 2, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'e');
        ASSERT (mbsinit (&state));
        input[5] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 8, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'r');
        ASSERT (mbsinit (&state));

        /* Test some invalid input.  */
        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\201\045", 2, &state); /* 0x81 0x25 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\201\060\377", 3, &state); /* 0x81 0x30 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\201\060\377\064", 4, &state); /* 0x81 0x30 0xFF 0x34 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\201\060\211\072", 4, &state); /* 0x81 0x30 0x89 0x3A */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);
      }
      return 0;

    case 65001:
      /* Locale encoding is CP65001 = UTF-8.  */
      {
        char input[] = "B\303\274\303\237er"; /* "Büßer" */
        memset (&state, '\0', sizeof (mbstate_t));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'B');
        ASSERT (mbsinit (&state));
        input[0] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 1, 1, &state);
        ASSERT (ret == (size_t)(-2));
        ASSERT (wc == (wchar_t) 0xBADFACE);
        ASSERT (!mbsinit (&state));
        input[1] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 2, 5, &state);
        ASSERT (ret == 1);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x00FC);
        ASSERT (mbsinit (&state));
        input[2] = '\0';

        /* Test support of NULL first argument.  */
        ret = mbrtowc (NULL, input + 3, 4, &state);
        ASSERT (ret == 2);
        ASSERT (mbsinit (&state));

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 3, 4, &state);
        ASSERT (ret == 2);
        ASSERT (wctob (wc) == EOF);
        ASSERT (wc == 0x00DF);
        ASSERT (mbsinit (&state));
        input[3] = '\0';
        input[4] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 5, 2, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'e');
        ASSERT (mbsinit (&state));
        input[5] = '\0';

        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 6, 1, &state);
        ASSERT (ret == 1);
        ASSERT (wc == 'r');
        ASSERT (mbsinit (&state));

        /* Test some invalid input.  */
        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\303\300", 2, &state); /* 0xC3 0xC0 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\343\300", 2, &state); /* 0xE3 0xC0 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\343\300\200", 3, &state); /* 0xE3 0xC0 0x80 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\343\200\300", 3, &state); /* 0xE3 0x80 0xC0 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\363\300", 2, &state); /* 0xF3 0xC0 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\363\300\200\200", 4, &state); /* 0xF3 0xC0 0x80 0x80 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\363\200\300", 3, &state); /* 0xF3 0x80 0xC0 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\363\200\300\200", 4, &state); /* 0xF3 0x80 0xC0 0x80 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);

        memset (&state, '\0', sizeof (mbstate_t));
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, "\363\200\200\300", 4, &state); /* 0xF3 0x80 0x80 0xC0 */
        ASSERT (ret == (size_t)-1);
        ASSERT (errno == EILSEQ);
      }
      return 0;

    default:
      return 1;
    }
}
Exemple #30
0
static int
cnt(const char *file)
{
	struct stat sb;
	uintmax_t linect, wordct, charct, llct, tmpll;
	int fd, len, warned;
	size_t clen;
	short gotsp;
	u_char *p;
	u_char buf[MAXBSIZE];
	wchar_t wch;
	mbstate_t mbs;

	linect = wordct = charct = llct = tmpll = 0;
	if (file == NULL)
		fd = STDIN_FILENO;
	else {
		if ((fd = open(file, O_RDONLY, 0)) < 0) {
			warn("%s: open", file);
			return (1);
		}
		if (doword || (domulti && MB_CUR_MAX != 1))
			goto word;
		/*
		 * Line counting is split out because it's a lot faster to get
		 * lines than to get words, since the word count requires some
		 * logic.
		 */
		if (doline) {
			while ((len = read(fd, buf, MAXBSIZE))) {
				if (len == -1) {
					warn("%s: read", file);
					(void)close(fd);
					return (1);
				}
				if (siginfo) {
					show_cnt(file, linect, wordct, charct,
					    llct);
				}
				charct += len;
				for (p = buf; len--; ++p)
					if (*p == '\n') {
						if (tmpll > llct)
							llct = tmpll;
						tmpll = 0;
						++linect;
					} else
						tmpll++;
			}
			reset_siginfo();
			tlinect += linect;
			if (dochar)
				tcharct += charct;
			if (dolongline) {
				if (llct > tlongline)
					tlongline = llct;
			}
			show_cnt(file, linect, wordct, charct, llct);
			(void)close(fd);
			return (0);
		}
		/*
		 * If all we need is the number of characters and it's a
		 * regular file, just stat the puppy.
		 */
		if (dochar || domulti) {
			if (fstat(fd, &sb)) {
				warn("%s: fstat", file);
				(void)close(fd);
				return (1);
			}
			if (S_ISREG(sb.st_mode)) {
				reset_siginfo();
				charct = sb.st_size;
				show_cnt(file, linect, wordct, charct, llct);
				tcharct += charct;
				(void)close(fd);
				return (0);
			}
		}
	}

	/* Do it the hard way... */
word:	gotsp = 1;
	warned = 0;
	memset(&mbs, 0, sizeof(mbs));
	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
		if (len == -1) {
			warn("%s: read", file != NULL ? file : "stdin");
			(void)close(fd);
			return (1);
		}
		p = buf;
		while (len > 0) {
			if (siginfo)
				show_cnt(file, linect, wordct, charct, llct);
			if (!domulti || MB_CUR_MAX == 1) {
				clen = 1;
				wch = (unsigned char)*p;
			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
			    (size_t)-1) {
				if (!warned) {
					errno = EILSEQ;
					warn("%s",
					    file != NULL ? file : "stdin");
					warned = 1;
				}
				memset(&mbs, 0, sizeof(mbs));
				clen = 1;
				wch = (unsigned char)*p;
			} else if (clen == (size_t)-2)
				break;
			else if (clen == 0)
				clen = 1;
			charct++;
			if (wch != L'\n')
				tmpll++;
			len -= clen;
			p += clen;
			if (wch == L'\n') {
				if (tmpll > llct)
					llct = tmpll;
				tmpll = 0;
				++linect;
			}
			if (iswspace(wch))
				gotsp = 1;
			else if (gotsp) {
				gotsp = 0;
				++wordct;
			}
		}
	}
	reset_siginfo();
	if (domulti && MB_CUR_MAX > 1)
		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
			warn("%s", file != NULL ? file : "stdin");
	if (doline)
		tlinect += linect;
	if (doword)
		twordct += wordct;
	if (dochar || domulti)
		tcharct += charct;
	if (dolongline) {
		if (llct > tlongline)
			tlongline = llct;
	}
	show_cnt(file, linect, wordct, charct, llct);
	(void)close(fd);
	return (0);
}