/* * Convert a multibyte character string argument for the %s format to a wide * string representation. ``prec'' specifies the maximum number of bytes * to output. If ``prec'' is greater than or equal to zero, we can't assume * that the multibyte char. string ends in a null character. */ static wchar_t * __mbsconv(char *mbsarg, int prec) { mbstate_t mbs; wchar_t *convbuf, *wcp; const char *p; size_t insize, nchars, nconv; if (mbsarg == NULL) return (NULL); /* * Supplied argument is a multibyte string; convert it to wide * characters first. */ if (prec >= 0) { /* * String is not guaranteed to be NUL-terminated. Find the * number of characters to print. */ p = mbsarg; insize = nchars = nconv = 0; mbs = initial_mbs; while (nchars != (size_t)prec) { nconv = mbrlen(p, MB_CUR_MAX, &mbs); if (nconv == 0 || nconv == (size_t)-1 || nconv == (size_t)-2) break; p += nconv; nchars++; insize += nconv; } if (nconv == (size_t)-1 || nconv == (size_t)-2) return (NULL); } else { insize = strlen(mbsarg); nconv = 0; } /* * Allocate buffer for the result and perform the conversion, * converting at most `size' bytes of the input multibyte string to * wide characters for printing. */ convbuf = malloc((insize + 1) * sizeof(*convbuf)); if (convbuf == NULL) return (NULL); wcp = convbuf; p = mbsarg; mbs = initial_mbs; while (insize != 0) { nconv = mbrtowc(wcp, p, insize, &mbs); if (nconv == 0 || nconv == (size_t)-1 || nconv == (size_t)-2) break; wcp++; p += nconv; insize -= nconv; } if (nconv == (size_t)-1 || nconv == (size_t)-2) { free(convbuf); return (NULL); } *wcp = L'\0'; return (convbuf); }
TEST(wchar, mbrtowc) { wchar_t out[8]; out[0] = 'x'; ASSERT_EQ(0U, mbrtowc(out, "hello", 0, NULL)); ASSERT_EQ('x', out[0]); ASSERT_EQ(0U, mbrtowc(out, "hello", 0, NULL)); ASSERT_EQ(0U, mbrtowc(out, "", 0, NULL)); ASSERT_EQ(1U, mbrtowc(out, "hello", 1, NULL)); ASSERT_EQ(L'h', out[0]); ASSERT_EQ(0U, mbrtowc(NULL, "hello", 0, NULL)); ASSERT_EQ(0U, mbrtowc(NULL, "", 0, NULL)); ASSERT_EQ(1U, mbrtowc(NULL, "hello", 1, NULL)); ASSERT_EQ(0U, mbrtowc(NULL, NULL, 0, NULL)); ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); uselocale(LC_GLOBAL_LOCALE); // 1-byte UTF-8. ASSERT_EQ(1U, mbrtowc(out, "abcdef", 6, NULL)); ASSERT_EQ(L'a', out[0]); // 2-byte UTF-8. ASSERT_EQ(2U, mbrtowc(out, "\xc2\xa2" "cdef", 6, NULL)); ASSERT_EQ(static_cast<wchar_t>(0x00a2), out[0]); // 3-byte UTF-8. ASSERT_EQ(3U, mbrtowc(out, "\xe2\x82\xac" "def", 6, NULL)); ASSERT_EQ(static_cast<wchar_t>(0x20ac), out[0]); // 4-byte UTF-8. ASSERT_EQ(4U, mbrtowc(out, "\xf0\xa4\xad\xa2" "ef", 6, NULL)); ASSERT_EQ(static_cast<wchar_t>(0x24b62), out[0]); #if defined(__BIONIC__) // glibc allows this. // Illegal 5-byte UTF-8. ASSERT_EQ(static_cast<size_t>(-1), mbrtowc(out, "\xf8\xa1\xa2\xa3\xa4" "f", 6, NULL)); ASSERT_EQ(EILSEQ, errno); #endif // Illegal over-long sequence. ASSERT_EQ(static_cast<size_t>(-1), mbrtowc(out, "\xf0\x82\x82\xac" "ef", 6, NULL)); ASSERT_EQ(EILSEQ, errno); }
const char *file_getbuffer(RMagic *ms) { char *pbuf, *op, *np; size_t psize, len; if (ms->haderr) return NULL; if (ms->flags & R_MAGIC_RAW) return ms->o.buf; if (ms->o.buf == NULL) { eprintf ("ms->o.buf = NULL\n"); return NULL; } /* * 4 is for octal representation, + 1 is for NUL */ len = strlen (ms->o.buf); if (len > (SIZE_MAX - 1) / 4) { file_oomem (ms, len); return NULL; } psize = len * 4 + 1; if ((pbuf = realloc (ms->o.pbuf, psize)) == NULL) { file_oomem (ms, psize); return NULL; } ms->o.pbuf = pbuf; #if 1 //defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) { mbstate_t state; wchar_t nextchar; int mb_conv = 1; size_t bytesconsumed; char *eop; (void)memset(&state, 0, sizeof(mbstate_t)); np = ms->o.pbuf; op = ms->o.buf; eop = op + len; while (op < eop) { bytesconsumed = mbrtowc(&nextchar, op, (size_t)(eop - op), &state); if (bytesconsumed == (size_t)(-1) || bytesconsumed == (size_t)(-2)) { mb_conv = 0; break; } if (iswprint(nextchar)) { (void)memcpy(np, op, bytesconsumed); op += bytesconsumed; np += bytesconsumed; } else { while (bytesconsumed-- > 0) OCTALIFY(np, op); } } *np = '\0'; /* Parsing succeeded as a multi-byte sequence */ if (mb_conv != 0) return ms->o.pbuf; } #endif for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) { if (isprint ((ut8)*op)) { *np++ = *op; } else { OCTALIFY (np, op); } } *np = '\0'; return ms->o.pbuf; }
int main(int argc, char *argv[]) { FILE *fp; int (*fcn)(FILE *, const char *); int ch, rval; size_t n; setlocale(LC_ALL, ""); fcn = NULL; dchar = '\t'; /* default delimiter is \t */ strcpy(dcharmb, "\t"); while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1) switch(ch) { case 'b': get_list(optarg); bflag = 1; break; case 'c': get_list(optarg); cflag = 1; break; case 'd': n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL); if (dchar == '\0' || n != strlen(optarg)) errx(1, "bad delimiter"); strcpy(dcharmb, optarg); dflag = 1; break; case 'f': get_list(optarg); fflag = 1; break; case 's': sflag = 1; break; case 'n': nflag = 1; break; case 'w': wflag = 1; break; case '?': default: usage(); } argc -= optind; argv += optind; if (fflag) { if (bflag || cflag || nflag || (wflag && dflag)) usage(); } else if (!(bflag || cflag) || dflag || sflag || wflag) usage(); else if (!bflag && nflag) usage(); if (fflag) fcn = f_cut; else if (cflag) fcn = MB_CUR_MAX > 1 ? c_cut : b_cut; else if (bflag) fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut; rval = 0; if (*argv) for (; *argv; ++argv) { if (strcmp(*argv, "-") == 0) rval |= fcn(stdin, "stdin"); else { if (!(fp = fopen(*argv, "r"))) { warn("%s", *argv); rval = 1; continue; } fcn(fp, *argv); (void)fclose(fp); } } else rval = fcn(stdin, "stdin"); exit(rval); }
void fold_text(const char *text, int linelen, int line_cb(void *arg, const char *start, int len), void *arg) { const char *start, *end, *sep; size_t sep_bytes, len; int col, rc = 0; mbstate_t ps; /* start, end and sep are byte-positions in the string, and should always * lie on the start of a multibyte sequence */ start = end = sep = text; sep_bytes = 0; col = 0; len = strlen(text); memset(&ps, 0, sizeof(ps)); while (!rc) { size_t bytes; wchar_t wc; int width; bytes = mbrtowc(&wc, end, len - (end - text), &ps); assert(bytes != (size_t)-1); /* we'll get a zero size for the nul terminator, (size_t) -2 * if we've reached the end of the buffer, or (size_t) -1 on * error */ if (!bytes || bytes == (size_t) -2 || bytes == (size_t) -1) { line_cb(arg, start, end - start); break; } if (wc == L'\n') { rc = line_cb(arg, start, end - start); start = sep = end += bytes; sep_bytes = 0; col = 0; continue; } width = wcwidth(wc); /* we should have caught this in the !bytes check... */ if (width == 0) { line_cb(arg, start, end - start); break; } /* unprintable character? just add it to the current line */ if (width < 0) { end += bytes; continue; } col += width; if (col > linelen) { if (sep != start) { /* split on a previous word boundary, if * possible */ rc = line_cb(arg, start, sep - start); end = sep + sep_bytes; } else { /* otherwise, break the word */ rc = line_cb(arg, start, end - start); } sep_bytes = 0; start = sep = end; col = 0; } else { /* record our last separator */ if (wc == L' ') { sep = end; sep_bytes = bytes; } end += bytes; } } }
wchar_t input_common_readch(int timed) { if (! has_lookahead()) { if (timed) { int count; fd_set fds; struct timeval tm= { 0, 1000 * WAIT_ON_ESCAPE } ; FD_ZERO(&fds); FD_SET(0, &fds); count = select(1, &fds, 0, 0, &tm); switch (count) { case 0: return WEOF; case -1: return WEOF; break; default: break; } } wchar_t res; mbstate_t state = {}; while (1) { wint_t b = readb(); char bb; size_t sz; if ((b >= R_NULL) && (b < R_NULL + 1000)) return b; bb=b; sz = mbrtowc(&res, &bb, 1, &state); switch (sz) { case (size_t)(-1): memset(&state, '\0', sizeof(state)); debug(2, L"Illegal input"); return R_NULL; case (size_t)(-2): break; case 0: return 0; default: return res; } } } else { if (!timed) { while (has_lookahead() && lookahead_top() == WEOF) lookahead_pop(); if (! has_lookahead()) return input_common_readch(0); } return lookahead_pop(); } }
/* Get a logical line */ static int get_line(register FILE *f, int *length) { int c; char *p; int column; static int colflg; #ifdef HAVE_WIDECHAR size_t i; wchar_t wc; int wc_width; mbstate_t state, state_bak; /* Current status of the stream. */ char mbc[MB_LEN_MAX]; /* Buffer for one multibyte char. */ size_t mblength; /* Byte length of multibyte char. */ size_t mbc_pos = 0; /* Position of the MBC. */ int use_mbc_buffer_flag = 0; /* If 1, mbc has data. */ int break_flag = 0; /* If 1, exit while(). */ long file_pos_bak = Ftell(f); memset(&state, '\0', sizeof(mbstate_t)); #endif prepare_line_buffer(); p = Line; column = 0; c = Getc(f); if (colflg && c == '\n') { Currline++; c = Getc(f); } while (p < &Line[LineLen - 1]) { #ifdef HAVE_WIDECHAR if (fold_opt && use_mbc_buffer_flag && MB_CUR_MAX > 1) { use_mbc_buffer_flag = 0; state_bak = state; mbc[mbc_pos++] = c; process_mbc: mblength = mbrtowc(&wc, mbc, mbc_pos, &state); switch (mblength) { case (size_t)-2: /* Incomplete multibyte character. */ use_mbc_buffer_flag = 1; state = state_bak; break; case (size_t)-1: /* Invalid as a multibyte character. */ *p++ = mbc[0]; state = state_bak; column++; file_pos_bak++; if (column >= Mcol) { Fseek(f, file_pos_bak); } else { memmove(mbc, mbc + 1, --mbc_pos); if (mbc_pos > 0) { mbc[mbc_pos] = '\0'; goto process_mbc; } } break; default: wc_width = wcwidth(wc); if (column + wc_width > Mcol) { Fseek(f, file_pos_bak); break_flag = 1; } else { for (i = 0; p < &Line[LineLen - 1] && i < mbc_pos; i++) *p++ = mbc[i]; if (wc_width > 0) column += wc_width; } } if (break_flag || column >= Mcol) break; c = Getc(f); continue; } #endif /* HAVE_WIDECHAR */ if (c == EOF) { if (p > Line) { *p = '\0'; *length = p - Line; return (column); } *length = p - Line; return (EOF); } if (c == '\n') { Currline++; break; } *p++ = c; #if 0 if (c == '\033') { /* ESC */ c = Getc(f); while (c > ' ' && c < '0' && p < &Line[LineLen - 1]) { *p++ = c; c = Getc(f); } if (c >= '0' && c < '\177' && p < &Line[LineLen - 1]) { *p++ = c; c = Getc(f); continue; } } #endif /* 0 */ if (c == '\t') { if (!hardtabs || (column < promptlen && !hard)) { if (hardtabs && eraseln && !dumb) { column = 1 + (column | 7); putstring(eraseln); promptlen = 0; } else { for (--p; p < &Line[LineLen - 1];) { *p++ = ' '; if ((++column & 7) == 0) break; } if (column >= promptlen) promptlen = 0; } } else column = 1 + (column | 7); } else if (c == '\b' && column > 0) { column--; } else if (c == '\r') { int next = Getc(f); if (next == '\n') { p--; Currline++; break; } Ungetc(next, f); column = 0; } else if (c == '\f' && stop_opt) { p[-1] = '^'; *p++ = 'L'; column += 2; Pause++; } else if (c == EOF) { *length = p - Line; return (column); } else { #ifdef HAVE_WIDECHAR if (fold_opt && MB_CUR_MAX > 1) { memset(mbc, '\0', MB_LEN_MAX); mbc_pos = 0; mbc[mbc_pos++] = c; state_bak = state; mblength = mbrtowc(&wc, mbc, mbc_pos, &state); /* The value of mblength is always less than 2 here. */ switch (mblength) { case (size_t)-2: p--; file_pos_bak = Ftell(f) - 1; state = state_bak; use_mbc_buffer_flag = 1; break; case (size_t)-1: state = state_bak; column++; break; default: wc_width = wcwidth(wc); if (wc_width > 0) column += wc_width; } } else #endif /* HAVE_WIDECHAR */ { if (isprint(c)) column++; } } if (column >= Mcol && fold_opt) break; #ifdef HAVE_WIDECHAR if (use_mbc_buffer_flag == 0 && p >= &Line[LineLen - 1 - 4]) /* don't read another char if there is no space for * whole multibyte sequence */ break; #endif c = Getc(f); } if (column >= Mcol && Mcol > 0) { if (!Wrap) { *p++ = '\n'; } } colflg = column == Mcol && fold_opt; if (colflg && eatnl && Wrap) { *p++ = '\n'; /* simulate normal wrap */ } *length = p - Line; *p = 0; return (column); }
long ILAnsiGetChars(const unsigned char *bytes, unsigned long byteCount, unsigned short *chars, unsigned long charCount) { #ifdef IL_CONFIG_LATIN1 unsigned long len; /* Check for enough space in the output buffer */ if(byteCount > charCount) { return -1; } /* Convert the bytes */ len = byteCount; while(len > 0) { *chars++ = (unsigned short)(*bytes++); --len; } return (long)byteCount; #else #if HAVE_MBRTOWC /* Use the re-entrant function to perform the conversion */ mbstate_t state; size_t chlen; unsigned long len = 0; wchar_t ch; int wrlen; ILMemZero(&state, sizeof(state)); mbrtowc((wchar_t *)0, (char *)0, 0, &state); while(byteCount > 0) { chlen = mbrtowc(&ch, (char *)bytes, (size_t)byteCount, &state); if(chlen == (size_t)(-1) || chlen == (size_t)(-2)) { /* Invalid character */ ++bytes; --byteCount; } else if(chlen != 0) { /* Ordinary character */ wrlen = ILUTF16WriteChar((unsigned short *)0, (unsigned long)ch); if(charCount < (unsigned long)wrlen) { return -1; } ILUTF16WriteChar(chars, (unsigned long)ch); chars += wrlen; len += wrlen; bytes += chlen; byteCount -= (unsigned long)chlen; } else { /* Embedded NUL character */ if(charCount <= 0) { return -1; } *chars++ = '\0'; ++len; ++bytes; --byteCount; } } return (long)len; #else /* Use the non re-entrant function to perform the conversion and just hope that the underlying libc takes care of the thread-safety issues for us */ int chlen; unsigned long len = 0; wchar_t ch; int wrlen; mbtowc((wchar_t *)0, (char *)0, 0); while(byteCount > 0) { chlen = mbtowc(&ch, (char *)bytes, (size_t)byteCount); if(chlen > 0) { /* Ordinary character */ wrlen = ILUTF16WriteChar((unsigned short *)0, (unsigned long)ch); if(charCount < (unsigned long)wrlen) { return -1; } ILUTF16WriteChar(chars, (unsigned long)ch); chars += wrlen; len += wrlen; bytes += chlen; byteCount -= (unsigned long)chlen; } else if(!chlen) { /* Embedded NUL character */ if(charCount <= 0) { return -1; } *chars++ = '\0'; ++len; ++bytes; --byteCount; } else { /* Invalid character */ ++bytes; --byteCount; } } return (long)len; #endif #endif }
int main(int argc, char *argv[]) { INPUT *F1, *F2; int aflag, ch, cval, vflag; char *end; setlocale(LC_ALL, ""); F1 = &input1; F2 = &input2; aflag = vflag = 0; obsolete(argv); while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) { switch (ch) { case '\01': /* See comment in obsolete(). */ aflag = 1; F1->unpair = F2->unpair = 1; break; case '1': if ((F1->joinf = strtol(optarg, &end, 10)) < 1) errx(1, "-1 option field number less than 1"); if (*end) errx(1, "illegal field number -- %s", optarg); --F1->joinf; break; case '2': if ((F2->joinf = strtol(optarg, &end, 10)) < 1) errx(1, "-2 option field number less than 1"); if (*end) errx(1, "illegal field number -- %s", optarg); --F2->joinf; break; case 'a': aflag = 1; switch(strtol(optarg, &end, 10)) { case 1: F1->unpair = 1; break; case 2: F2->unpair = 1; break; default: errx(1, "-a option file number not 1 or 2"); break; } if (*end) errx(1, "illegal file number -- %s", optarg); break; case 'e': empty = optarg; break; case 'j': if ((F1->joinf = F2->joinf = strtol(optarg, &end, 10)) < 1) errx(1, "-j option field number less than 1"); if (*end) errx(1, "illegal field number -- %s", optarg); --F1->joinf; --F2->joinf; break; case 'o': fieldarg(optarg); break; case 't': spans = 0; if (mbrtowc(&tabchar[0], optarg, MB_LEN_MAX, NULL) != strlen(optarg)) errx(1, "illegal tab character specification"); tabchar[1] = L'\0'; break; case 'v': vflag = 1; joinout = 0; switch (strtol(optarg, &end, 10)) { case 1: F1->unpair = 1; break; case 2: F2->unpair = 1; break; default: errx(1, "-v option file number not 1 or 2"); break; } if (*end) errx(1, "illegal file number -- %s", optarg); break; case '?': default: usage(); } } argc -= optind; argv += optind; if (aflag && vflag) errx(1, "the -a and -v options are mutually exclusive"); if (argc != 2) usage(); /* Open the files; "-" means stdin. */ if (!strcmp(*argv, "-")) F1->fp = stdin; else if ((F1->fp = fopen(*argv, "r")) == NULL) err(1, "%s", *argv); ++argv; if (!strcmp(*argv, "-")) F2->fp = stdin; else if ((F2->fp = fopen(*argv, "r")) == NULL) err(1, "%s", *argv); if (F1->fp == stdin && F2->fp == stdin) errx(1, "only one input file may be stdin"); slurp(F1); slurp(F2); while (F1->setcnt && F2->setcnt) { cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf); if (cval == 0) { /* Oh joy, oh rapture, oh beauty divine! */ if (joinout) joinlines(F1, F2); slurp(F1); slurp(F2); } else if (cval < 0) { /* File 1 takes the lead... */ if (F1->unpair) joinlines(F1, NULL); slurp(F1); } else { /* File 2 takes the lead... */ if (F2->unpair) joinlines(F2, NULL); slurp(F2); } } /* * Now that one of the files is used up, optionally output any * remaining lines from the other file. */ if (F1->unpair) while (F1->setcnt) { joinlines(F1, NULL); slurp(F1); } if (F2->unpair) while (F2->setcnt) { joinlines(F2, NULL); slurp(F2); } exit(0); }
/* * __svfscanf_unlocked - non-MT-safe version of __svfscanf */ int __svfscanf_unlocked(FILE *fp, const char *fmt0, va_list ap) { const u_char *fmt = (const u_char *)fmt0; int c; /* character from format, or conversion */ size_t width; /* field width, or 0 */ char *p; /* points into all kinds of strings */ size_t n; /* handy size_t */ int flags; /* flags as defined above */ char *p0; /* saves original value of p when necessary */ int nassigned; /* number of fields assigned */ int nconversions; /* number of conversions */ int nread; /* number of characters consumed from fp */ int base; /* base argument to conversion function */ char ccltab[256]; /* character class table for %[...] */ char buf[BUF]; /* buffer for numeric and mb conversions */ wchar_t *wcp; /* handy wide-character pointer */ size_t nconv; /* length of multibyte sequence converted */ static const mbstate_t initial; mbstate_t mbs; /* `basefix' is used to avoid `if' tests in the integer scanner */ static const short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; _DIAGASSERT(fp != NULL); _DIAGASSERT(fmt0 != NULL); _SET_ORIENTATION(fp, -1); nassigned = 0; nconversions = 0; nread = 0; base = 0; for (;;) { c = (unsigned char)*fmt++; if (c == 0) return (nassigned); if (isspace(c)) { while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) nread++, fp->_r--, fp->_p++; continue; } if (c != '%') goto literal; width = 0; flags = 0; /* * switch on the format. continue if done; * break once format type is derived. */ again: c = *fmt++; switch (c) { case '%': SCANF_SKIP_SPACE(); literal: if (fp->_r <= 0 && __srefill(fp)) goto input_failure; if (*fp->_p != c) goto match_failure; fp->_r--, fp->_p++; nread++; continue; case '*': flags |= SUPPRESS; goto again; case 'j': flags |= INTMAXT; goto again; case 'l': if (flags & LONG) { flags &= ~LONG; flags |= LONGLONG; } else flags |= LONG; goto again; case 'q': flags |= LONGLONG; /* not quite */ goto again; case 't': flags |= PTRDIFFT; goto again; case 'z': flags |= SIZET; goto again; case 'L': flags |= LONGDBL; goto again; case 'h': if (flags & SHORT) { flags &= ~SHORT; flags |= SHORTSHORT; } else flags |= SHORT; goto again; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': width = width * 10 + c - '0'; goto again; /* * Conversions. */ case 'd': c = CT_INT; base = 10; break; case 'i': c = CT_INT; base = 0; break; case 'o': c = CT_INT; flags |= UNSIGNED; base = 8; break; case 'u': c = CT_INT; flags |= UNSIGNED; base = 10; break; case 'X': case 'x': flags |= PFXOK; /* enable 0x prefixing */ c = CT_INT; flags |= UNSIGNED; base = 16; break; #ifndef NO_FLOATING_POINT case 'A': case 'E': case 'F': case 'G': case 'a': case 'e': case 'f': case 'g': c = CT_FLOAT; break; #endif case 'S': flags |= LONG; /* FALLTHROUGH */ case 's': c = CT_STRING; break; case '[': fmt = __sccl(ccltab, fmt); flags |= NOSKIP; c = CT_CCL; break; case 'C': flags |= LONG; /* FALLTHROUGH */ case 'c': flags |= NOSKIP; c = CT_CHAR; break; case 'p': /* pointer format is like hex */ flags |= POINTER | PFXOK; c = CT_INT; /* assumes sizeof(uintmax_t) */ flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ base = 16; break; case 'n': nconversions++; if (flags & SUPPRESS) /* ??? */ continue; if (flags & SHORTSHORT) *va_arg(ap, char *) = nread; else if (flags & SHORT) *va_arg(ap, short *) = nread; else if (flags & LONG) *va_arg(ap, long *) = nread; else if (flags & LONGLONG) *va_arg(ap, long long *) = nread; else if (flags & INTMAXT) *va_arg(ap, intmax_t *) = nread; else if (flags & SIZET) *va_arg(ap, size_t *) = nread; else if (flags & PTRDIFFT) *va_arg(ap, ptrdiff_t *) = nread; else *va_arg(ap, int *) = nread; continue; default: goto match_failure; /* * Disgusting backwards compatibility hack. XXX */ case '\0': /* compat */ return (EOF); } /* * We have a conversion that requires input. */ if (fp->_r <= 0 && __srefill(fp)) goto input_failure; /* * Consume leading white space, except for formats * that suppress this. */ if ((flags & NOSKIP) == 0) { while (isspace(*fp->_p)) { nread++; if (--fp->_r > 0) fp->_p++; else if (__srefill(fp)) goto input_failure; } /* * Note that there is at least one character in * the buffer, so conversions that do not set NOSKIP * ca no longer result in an input failure. */ } /* * Do the conversion. */ switch (c) { case CT_CHAR: /* scan arbitrary characters (sets NOSKIP) */ if (width == 0) width = 1; if (flags & LONG) { if ((flags & SUPPRESS) == 0) wcp = va_arg(ap, wchar_t *); else wcp = NULL; n = 0; while (width != 0) { if (n == MB_CUR_MAX) { fp->_flags |= __SERR; goto input_failure; } buf[n++] = *fp->_p; fp->_p++; fp->_r--; mbs = initial; nconv = mbrtowc(wcp, buf, n, &mbs); if (nconv == (size_t)-1) { fp->_flags |= __SERR; goto input_failure; } if (nconv == 0 && !(flags & SUPPRESS)) *wcp = L'\0'; if (nconv != (size_t)-2) { nread += n; width--; if (!(flags & SUPPRESS)) wcp++; n = 0; } if (fp->_r <= 0 && __srefill(fp)) { if (n != 0) { fp->_flags |= __SERR; goto input_failure; } break; } } if (!(flags & SUPPRESS)) nassigned++; } else if (flags & SUPPRESS) { size_t sum = 0; for (;;) { if ((n = fp->_r) < width) { sum += n; width -= n; fp->_p += n; if (__srefill(fp)) { if (sum == 0) goto input_failure; break; } } else { sum += width; fp->_r -= width; fp->_p += width; break; } } nread += sum; } else { size_t r = fread(va_arg(ap, char *), 1, width, fp); if (r == 0) goto input_failure; nread += r; nassigned++; } nconversions++; break; case CT_CCL: /* scan a (nonempty) character class (sets NOSKIP) */ if (width == 0) width = (size_t)~0; /* `infinity' */ /* take only those things in the class */ if (flags & LONG) { wchar_t twc; int nchars; if ((flags & SUPPRESS) == 0) wcp = va_arg(ap, wchar_t *); else
unsigned long ILAnsiGetCharCount(const unsigned char *bytes, unsigned long count) { #ifdef IL_CONFIG_LATIN1 return count; #else #if HAVE_MBRTOWC /* Use the re-entrant function to perform the conversion */ mbstate_t state; size_t chlen; unsigned long len = 0; wchar_t ch; ILMemZero(&state, sizeof(state)); mbrtowc((wchar_t *)0, (char *)0, 0, &state); while(count > 0) { chlen = mbrtowc(&ch, (char *)bytes, (size_t)count, &state); if(chlen == (size_t)(-1) || chlen == (size_t)(-2)) { /* Invalid character */ ++bytes; --count; } else if(chlen != 0) { /* Ordinary character */ len += ILUTF16WriteChar((unsigned short *)0, (unsigned long)ch); bytes += chlen; count -= (unsigned long)chlen; } else { /* Embedded NUL character */ ++len; ++bytes; --count; } } return len; #else /* Use the non re-entrant function to perform the conversion and just hope that the underlying libc takes care of the thread-safety issues for us */ int chlen; unsigned long len = 0; wchar_t ch; mbtowc((wchar_t *)0, (char *)0, 0); while(count > 0) { chlen = mbtowc(&ch, (char *)bytes, (size_t)count); if(chlen > 0) { /* Ordinary character */ len += ILUTF16WriteChar((unsigned short *)0, (unsigned long)ch); bytes += chlen; count -= (unsigned long)chlen; } else if(!chlen) { /* Embedded NUL character */ ++len; ++bytes; --count; } else { /* Invalid character */ ++bytes; --count; } } return len; #endif #endif }
size_t rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) { # if MBRTOWC_NULL_ARG_BUG || MBRTOWC_RETVAL_BUG if (s == NULL) { pwc = NULL; s = ""; n = 1; } # endif # if MBRTOWC_RETVAL_BUG { static mbstate_t internal_state; /* Override mbrtowc's internal state. We can not call mbsinit() on the hidden internal state, but we can call it on our variable. */ if (ps == NULL) ps = &internal_state; if (!mbsinit (ps)) { /* Parse the rest of the multibyte character byte for byte. */ size_t count = 0; for (; n > 0; s++, n--) { wchar_t wc; size_t ret = mbrtowc (&wc, s, 1, ps); if (ret == (size_t)(-1)) return (size_t)(-1); count++; if (ret != (size_t)(-2)) { /* The multibyte character has been completed. */ if (pwc != NULL) *pwc = wc; return (wc == 0 ? 0 : count); } } return (size_t)(-2); } } # endif # if MBRTOWC_NUL_RETVAL_BUG { wchar_t wc; size_t ret = mbrtowc (&wc, s, n, ps); if (ret != (size_t)(-1) && ret != (size_t)(-2)) { if (pwc != NULL) *pwc = wc; if (wc == 0) ret = 0; } return ret; } # else return mbrtowc (pwc, s, n, ps); # endif }
int main() { // mbstate_t comes from the underlying C library; it is defined (in C99) as: // a complete object type other than an array type that can hold the conversion // state information necessary to convert between sequences of multibyte // characters and wide characters #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wmissing-braces" #endif mbstate_t mb = {0}; #if defined(__clang__) #pragma clang diagnostic pop #endif size_t s = 0; tm *tm = 0; wint_t w = 0; ::FILE* fp = 0; #ifdef __APPLE__ __darwin_va_list va; #else __builtin_va_list va; #endif char* ns = 0; wchar_t* ws = 0; static_assert((std::is_same<decltype(fwprintf(fp, L"")), int>::value), ""); static_assert((std::is_same<decltype(fwscanf(fp, L"")), int>::value), ""); static_assert((std::is_same<decltype(swprintf(ws, s, L"")), int>::value), ""); static_assert((std::is_same<decltype(swscanf(L"", L"")), int>::value), ""); static_assert((std::is_same<decltype(vfwprintf(fp, L"", va)), int>::value), ""); static_assert((std::is_same<decltype(vfwscanf(fp, L"", va)), int>::value), ""); static_assert((std::is_same<decltype(vswprintf(ws, s, L"", va)), int>::value), ""); static_assert((std::is_same<decltype(vswscanf(L"", L"", va)), int>::value), ""); static_assert((std::is_same<decltype(fgetwc(fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(fgetws(ws, 0, fp)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(fputwc(L' ', fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(fputws(L"", fp)), int>::value), ""); static_assert((std::is_same<decltype(fwide(fp, 0)), int>::value), ""); static_assert((std::is_same<decltype(getwc(fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(putwc(L' ', fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(ungetwc(L' ', fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(wcstod(L"", (wchar_t**)0)), double>::value), ""); static_assert((std::is_same<decltype(wcstof(L"", (wchar_t**)0)), float>::value), ""); static_assert((std::is_same<decltype(wcstold(L"", (wchar_t**)0)), long double>::value), ""); static_assert((std::is_same<decltype(wcstol(L"", (wchar_t**)0, 0)), long>::value), ""); static_assert((std::is_same<decltype(wcstoll(L"", (wchar_t**)0, 0)), long long>::value), ""); static_assert((std::is_same<decltype(wcstoul(L"", (wchar_t**)0, 0)), unsigned long>::value), ""); static_assert((std::is_same<decltype(wcstoull(L"", (wchar_t**)0, 0)), unsigned long long>::value), ""); static_assert((std::is_same<decltype(wcscpy(ws, L"")), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsncpy(ws, L"", s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcscat(ws, L"")), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsncat(ws, L"", s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcscmp(L"", L"")), int>::value), ""); static_assert((std::is_same<decltype(wcscoll(L"", L"")), int>::value), ""); static_assert((std::is_same<decltype(wcsncmp(L"", L"", s)), int>::value), ""); static_assert((std::is_same<decltype(wcsxfrm(ws, L"", s)), size_t>::value), ""); static_assert((std::is_same<decltype(wcschr((wchar_t*)0, L' ')), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcscspn(L"", L"")), size_t>::value), ""); static_assert((std::is_same<decltype(wcslen(L"")), size_t>::value), ""); static_assert((std::is_same<decltype(wcspbrk((wchar_t*)0, L"")), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsrchr((wchar_t*)0, L' ')), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsspn(L"", L"")), size_t>::value), ""); static_assert((std::is_same<decltype(wcsstr((wchar_t*)0, L"")), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcstok(ws, L"", (wchar_t**)0)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemchr((wchar_t*)0, L' ', s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemcmp(L"", L"", s)), int>::value), ""); static_assert((std::is_same<decltype(wmemcpy(ws, L"", s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemmove(ws, L"", s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemset(ws, L' ', s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsftime(ws, s, L"", tm)), size_t>::value), ""); static_assert((std::is_same<decltype(btowc(0)), wint_t>::value), ""); static_assert((std::is_same<decltype(wctob(w)), int>::value), ""); static_assert((std::is_same<decltype(mbsinit(&mb)), int>::value), ""); static_assert((std::is_same<decltype(mbrlen("", s, &mb)), size_t>::value), ""); static_assert((std::is_same<decltype(mbrtowc(ws, "", s, &mb)), size_t>::value), ""); static_assert((std::is_same<decltype(wcrtomb(ns, L' ', &mb)), size_t>::value), ""); static_assert((std::is_same<decltype(mbsrtowcs(ws, (const char**)0, s, &mb)), size_t>::value), ""); static_assert((std::is_same<decltype(wcsrtombs(ns, (const wchar_t**)0, s, &mb)), size_t>::value), ""); // These tests fail on systems whose C library doesn't provide a correct overload // set for wcschr, wcspbrk, wcsrchr, wcsstr, and wmemchr, unless the compiler is // a suitably recent version of Clang. #if !defined(__APPLE__) || defined(_LIBCPP_PREFERRED_OVERLOAD) static_assert((std::is_same<decltype(wcschr((const wchar_t*)0, L' ')), const wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcspbrk((const wchar_t*)0, L"")), const wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsrchr((const wchar_t*)0, L' ')), const wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsstr((const wchar_t*)0, L"")), const wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemchr((const wchar_t*)0, L' ', s)), const wchar_t*>::value), ""); #endif #ifndef _LIBCPP_HAS_NO_STDIN static_assert((std::is_same<decltype(getwchar()), wint_t>::value), ""); static_assert((std::is_same<decltype(vwscanf(L"", va)), int>::value), ""); static_assert((std::is_same<decltype(wscanf(L"")), int>::value), ""); #endif #ifndef _LIBCPP_HAS_NO_STDOUT static_assert((std::is_same<decltype(putwchar(L' ')), wint_t>::value), ""); static_assert((std::is_same<decltype(vwprintf(L"", va)), int>::value), ""); static_assert((std::is_same<decltype(wprintf(L"")), int>::value), ""); #endif }
int main(int argc, char *argv[]) { struct stat sb; int ch, fd, match; wchar_t termchar; unsigned char *back, *front; unsigned const char *file; wchar_t *key; (void) setlocale(LC_CTYPE, ""); file = _path_words; termchar = L'\0'; while ((ch = getopt(argc, argv, "dft:")) != -1) switch(ch) { case 'd': dflag = 1; break; case 'f': fflag = 1; break; case 't': if (mbrtowc(&termchar, optarg, MB_LEN_MAX, NULL) != strlen(optarg)) errx(2, "invalid termination character"); break; case '?': default: usage(); } argc -= optind; argv += optind; if (argc == 0) usage(); if (argc == 1) /* But set -df by default. */ dflag = fflag = 1; key = prepkey(*argv++, termchar); if (argc >= 2) file = *argv++; match = 1; do { if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb)) err(2, "%s", file); if ((uintmax_t)sb.st_size > (uintmax_t)SIZE_T_MAX) errx(2, "%s: %s", file, strerror(EFBIG)); if (sb.st_size == 0) { close(fd); continue; } if ((front = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_SHARED, fd, (off_t)0)) == MAP_FAILED) err(2, "%s", file); back = front + sb.st_size; match *= (look(key, front, back)); close(fd); } while (argc-- > 2 && (file = *argv++)); exit(match); }
static void h_ctype2(const struct test *t, bool use_mbstate) { mbstate_t *stp; mbstate_t st; char buf[SIZE]; char *str; size_t n; ATF_REQUIRE_STREQ(setlocale(LC_ALL, "C"), "C"); #if defined(__NetBSD__) ATF_REQUIRE(setlocale(LC_CTYPE, t->locale) != NULL); #else if (setlocale(LC_CTYPE, t->locale) == NULL) { fprintf(stderr, "Locale %s not found.\n", t->locale); return; } #endif (void)strvis(buf, t->data, VIS_WHITE | VIS_OCTAL); (void)printf("Checking string: \"%s\"\n", buf); ATF_REQUIRE((str = setlocale(LC_ALL, NULL)) != NULL); (void)printf("Using locale: %s\n", str); (void)printf("Using mbstate: %s\n", use_mbstate ? "yes" : "no"); (void)memset(&st, 0, sizeof(st)); // mbrtowc(0, 0, 0, &st); /* XXX for ISO2022-JP */ stp = use_mbstate ? &st : 0; for (n = 9; n > 0; n--) { const char *src = t->data; wchar_t dst; size_t nchar = 0; int width = 0; ATF_REQUIRE(mbsinit(stp) != 0); for (;;) { size_t rv = mbrtowc(&dst, src, n, stp); if (rv == 0) break; if (rv == (size_t)-2) { src += n; width += n; continue; } if (rv == (size_t)-1) { ATF_REQUIRE_EQ(errno, EILSEQ); atf_tc_fail("Invalid sequence"); /* NOTREACHED */ } width += rv; src += rv; if (dst != t->wchars[nchar] || width != t->widths[nchar]) { (void)printf("At position %zd:\n", nchar); (void)printf(" expected: 0x%04X (%u)\n", t->wchars[nchar], t->widths[nchar]); (void)printf(" got : 0x%04X (%u)\n", dst, width); atf_tc_fail("Test failed"); } nchar++; width = 0; } ATF_REQUIRE_EQ_MSG(dst, 0, "Incorrect terminating character: " "0x%04X (expected: 0x00)", dst); ATF_REQUIRE_EQ_MSG(nchar, t->length, "Incorrect length: " "%zd (expected: %zd)", nchar, t->length); } { wchar_t wbuf[SIZE]; size_t rv; char const *src = t->data; int i; (void)memset(wbuf, 0xFF, sizeof(wbuf)); rv = mbsrtowcs(wbuf, &src, SIZE, stp); ATF_REQUIRE_EQ_MSG(rv, t->length, "Incorrect length: %zd " "(expected: %zd)", rv, t->length); ATF_REQUIRE_EQ(src, NULL); for (i = 0; wbuf[i] != 0; ++i) { if (wbuf[i] == t->wchars[i]) continue; (void)printf("At position %d:\n", i); (void)printf(" expected: 0x%04X\n", t->wchars[i]); (void)printf(" got : 0x%04X\n", wbuf[i]); atf_tc_fail("Test failed"); } ATF_REQUIRE_EQ_MSG((size_t)i, t->length, "Incorrect length: " "%d (expected: %zd)", i, t->length); } (void)printf("Ok.\n"); }
/* Decode an item via the fish 1.x format. Adapted from fish 1.x's item_get(). */ history_item_t history_t::decode_item_fish_1_x(const char *begin, size_t length) { const char *end = begin + length; const char *pos=begin; bool was_backslash = 0; wcstring out; bool first_char = true; bool timestamp_mode = false; time_t timestamp = 0; while( 1 ) { wchar_t c; mbstate_t state; size_t res; memset( &state, 0, sizeof(state) ); res = mbrtowc( &c, pos, end-pos, &state ); if( res == (size_t)-1 ) { pos++; continue; } else if( res == (size_t)-2 ) { break; } else if( res == (size_t)0 ) { pos++; continue; } pos += res; if( c == L'\n' ) { if( timestamp_mode ) { const wchar_t *time_string = out.c_str(); while( *time_string && !iswdigit(*time_string)) time_string++; errno=0; if( *time_string ) { time_t tm; wchar_t *end; errno = 0; tm = (time_t)wcstol( time_string, &end, 10 ); if( tm && !errno && !*end ) { timestamp = tm; } } out.clear(); timestamp_mode = false; continue; } if( !was_backslash ) break; } if( first_char ) { if( c == L'#' ) timestamp_mode = true; } first_char = false; out.push_back(c); was_backslash = ( (c == L'\\') && !was_backslash); } out = history_unescape_newlines_fish_1_x(out); return history_item_t(out, timestamp); }
wchar_t *str2wcs_internal( const char *in, wchar_t *out ) { size_t res=0; int in_pos=0; int out_pos = 0; mbstate_t state; size_t len; CHECK( in, 0 ); CHECK( out, 0 ); len = strlen(in); memset( &state, 0, sizeof(state) ); while( in[in_pos] ) { res = mbrtowc( &out[out_pos], &in[in_pos], len-in_pos, &state ); if( ( ( out[out_pos] >= ENCODE_DIRECT_BASE) && ( out[out_pos] < ENCODE_DIRECT_BASE+256)) || ( out[out_pos] == INTERNAL_SEPARATOR ) ) { out[out_pos] = ENCODE_DIRECT_BASE + (unsigned char)in[in_pos]; in_pos++; memset( &state, 0, sizeof(state) ); out_pos++; } else { switch( res ) { case (size_t)(-2): case (size_t)(-1): { out[out_pos] = ENCODE_DIRECT_BASE + (unsigned char)in[in_pos]; in_pos++; memset( &state, 0, sizeof(state) ); break; } case 0: { return out; } default: { in_pos += res; break; } } out_pos++; } } out[out_pos] = 0; return out; }
mod_export ZLE_INT_T getrestchar(int inchar) { char c = inchar; wchar_t outchar; int timeout; static mbstate_t mbs; /* * We are guaranteed to set a valid wide last character, * although it may be WEOF (which is technically not * a wide character at all...) */ lastchar_wide_valid = 1; if (inchar == EOF) { /* End of input, so reset the shift state. */ memset(&mbs, 0, sizeof mbs); return lastchar_wide = WEOF; } /* * Return may be zero if we have a NULL; handle this like * any other character. */ while (1) { size_t cnt = mbrtowc(&outchar, &c, 1, &mbs); if (cnt == MB_INVALID) { /* * Invalid input. Hmm, what's the right thing to do here? */ memset(&mbs, 0, sizeof mbs); return lastchar_wide = WEOF; } if (cnt != MB_INCOMPLETE) break; /* * Always apply KEYTIMEOUT to the remains of the input * character. The parts of a multibyte character should * arrive together. If we don't do this the input can * get stuck if an invalid byte sequence arrives. */ inchar = getbyte(1L, &timeout); /* getbyte deliberately resets lastchar_wide_valid */ lastchar_wide_valid = 1; if (inchar == EOF) { memset(&mbs, 0, sizeof mbs); if (timeout) { /* * This case means that we got a valid initial byte * (since we tested for EOF above), but the followup * timed out. This probably indicates a duff character. * Return a '?'. */ lastchar = '?'; return lastchar_wide = L'?'; } else return lastchar_wide = WEOF; } c = inchar; } return lastchar_wide = (ZLE_INT_T)outchar; }
static int default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, const CHAR_T **dst, const char *enc) { int j; size_t i = 0; CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1; size_t *blen = &cw->blen1; mbstate_t mbs; size_t n; ssize_t nlen = len; const char *src = (const char *)str; iconv_t id = (iconv_t)-1; char buffer[CONV_BUFFER_SIZE]; size_t left = len; int error = 1; MEMSET(&mbs, 0, 1); BINC_RETW(NULL, *tostr, *blen, nlen); #ifdef USE_ICONV if (strcmp(nl_langinfo(CODESET), enc)) { id = iconv_open(nl_langinfo(CODESET), enc); if (id == (iconv_t)-1) goto err; CONVERT(str, left, src, len); } #endif for (i = 0, j = 0; j < len; ) { n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); /* NULL character converted */ if (n == (size_t)-2) error = -(len-j); if (n == (size_t)-1 || n == (size_t)-2) HANDLE_MBR_ERROR(n, mbs, (*tostr)[i], src[j]); if (n == 0) n = 1; j += n; if (++i >= *blen) { nlen += 256; BINC_RETW(NULL, *tostr, *blen, nlen); } if (id != (iconv_t)-1 && j == len && left) { CONVERT(str, left, src, len); j = 0; } } *tolen = i; if (id != (iconv_t)-1) iconv_close(id); *dst = cw->bp1; return 0; err: *tolen = i; if (id != (iconv_t)-1) iconv_close(id); *dst = cw->bp1; return error; }
int main (int argc, char *argv[]) { mbstate_t state; wchar_t wc; size_t ret; /* configure should already have checked that the locale is supported. */ if (setlocale (LC_ALL, "") == NULL) return 1; /* Test NUL byte input. */ { const char *src; memset (&state, '\0', sizeof (mbstate_t)); src = ""; ret = mbsnrtowcs (NULL, &src, 1, 0, &state); ASSERT (ret == 0); ASSERT (mbsinit (&state)); src = ""; ret = mbsnrtowcs (NULL, &src, 1, 1, &state); ASSERT (ret == 0); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; src = ""; ret = mbsnrtowcs (&wc, &src, 1, 0, &state); ASSERT (ret == 0); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; src = ""; ret = mbsnrtowcs (&wc, &src, 1, 1, &state); ASSERT (ret == 0); ASSERT (wc == 0); ASSERT (mbsinit (&state)); } if (argc > 1) { int unlimited; for (unlimited = 0; unlimited < 2; unlimited++) { #define BUFSIZE 10 wchar_t buf[BUFSIZE]; const char *src; mbstate_t temp_state; { size_t i; for (i = 0; i < BUFSIZE; i++) buf[i] = (wchar_t) 0xBADFACE; } switch (argv[1][0]) { case '1': /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ { char input[] = "B\374\337er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\374'); ASSERT (mbsinit (&state)); input[1] = '\0'; src = input + 2; temp_state = state; ret = mbsnrtowcs (NULL, &src, 4, unlimited ? BUFSIZE : 1, &temp_state); ASSERT (ret == 3); ASSERT (src == input + 2); ASSERT (mbsinit (&state)); src = input + 2; ret = mbsnrtowcs (buf, &src, 4, unlimited ? BUFSIZE : 1, &state); ASSERT (ret == (unlimited ? 3 : 1)); ASSERT (src == (unlimited ? NULL : input + 3)); ASSERT (wctob (buf[0]) == (unsigned char) '\337'); if (unlimited) { ASSERT (buf[1] == 'e'); ASSERT (buf[2] == 'r'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (wchar_t) 0xBADFACE); } else ASSERT (buf[1] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } break; case '2': /* Locale encoding is UTF-8. */ { char input[] = "B\303\274\303\237er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; src = input + 2; temp_state = state; ret = mbsnrtowcs (NULL, &src, 6, unlimited ? BUFSIZE : 2, &temp_state); ASSERT (ret == 4); ASSERT (src == input + 2); ASSERT (!mbsinit (&state)); src = input + 2; ret = mbsnrtowcs (buf, &src, 6, unlimited ? BUFSIZE : 2, &state); ASSERT (ret == (unlimited ? 4 : 2)); ASSERT (src == (unlimited ? NULL : input + 5)); ASSERT (wctob (buf[0]) == EOF); ASSERT (wctob (buf[1]) == EOF); if (unlimited) { ASSERT (buf[2] == 'e'); ASSERT (buf[3] == 'r'); ASSERT (buf[4] == 0); ASSERT (buf[5] == (wchar_t) 0xBADFACE); } else ASSERT (buf[2] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } break; case '3': /* Locale encoding is EUC-JP. */ { char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; src = input + 4; temp_state = state; ret = mbsnrtowcs (NULL, &src, 5, unlimited ? BUFSIZE : 2, &temp_state); ASSERT (ret == 3); ASSERT (src == input + 4); ASSERT (!mbsinit (&state)); src = input + 4; ret = mbsnrtowcs (buf, &src, 5, unlimited ? BUFSIZE : 2, &state); ASSERT (ret == (unlimited ? 3 : 2)); ASSERT (src == (unlimited ? NULL : input + 7)); ASSERT (wctob (buf[0]) == EOF); ASSERT (wctob (buf[1]) == EOF); if (unlimited) { ASSERT (buf[2] == '>'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (wchar_t) 0xBADFACE); } else ASSERT (buf[2] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } break; case '4': /* Locale encoding is GB18030. */ { char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; src = input + 2; temp_state = state; ret = mbsnrtowcs (NULL, &src, 8, unlimited ? BUFSIZE : 2, &temp_state); ASSERT (ret == 4); ASSERT (src == input + 2); ASSERT (!mbsinit (&state)); src = input + 2; ret = mbsnrtowcs (buf, &src, 8, unlimited ? BUFSIZE : 2, &state); ASSERT (ret == (unlimited ? 4 : 2)); ASSERT (src == (unlimited ? NULL : input + 7)); ASSERT (wctob (buf[0]) == EOF); ASSERT (wctob (buf[1]) == EOF); if (unlimited) { ASSERT (buf[2] == 'e'); ASSERT (buf[3] == 'r'); ASSERT (buf[4] == 0); ASSERT (buf[5] == (wchar_t) 0xBADFACE); } else ASSERT (buf[2] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } break; default: return 1; } } return 0; } return 1; }
static void ttyin(char buf[], register int nmax, char pchar) { char *sp; int c; int slash = 0; int maxlen; sp = buf; maxlen = 0; while (sp - buf < nmax) { if (promptlen > maxlen) maxlen = promptlen; c = readch(); if (c == '\\') { slash++; } else if (((cc_t) c == otty.c_cc[VERASE]) && !slash) { if (sp > buf) { #ifdef HAVE_WIDECHAR if (MB_CUR_MAX > 1) { wchar_t wc; size_t pos = 0, mblength; mbstate_t state, state_bak; memset(&state, '\0', sizeof(mbstate_t)); while (1) { state_bak = state; mblength = mbrtowc(&wc, buf + pos, sp - buf, &state); state = (mblength == (size_t)-2 || mblength == (size_t)-1) ? state_bak : state; mblength = (mblength == (size_t)-2 || mblength == (size_t)-1 || mblength == 0) ? 1 : mblength; if (buf + pos + mblength >= sp) break; pos += mblength; } if (mblength == 1) { ERASEONECOLUMN(docrterase); } else { int wc_width; wc_width = wcwidth(wc); wc_width = (wc_width < 1) ? 1 : wc_width; while (wc_width--) { ERASEONECOLUMN(docrterase); } } while (mblength--) { --promptlen; --sp; } } else #endif /* HAVE_WIDECHAR */ { --promptlen; ERASEONECOLUMN(docrterase); --sp; } if ((*sp < ' ' && *sp != '\n') || *sp == RUBOUT) { --promptlen; ERASEONECOLUMN(docrterase); } continue; } else { if (!eraseln) promptlen = maxlen; siglongjmp(restore, 1); } } else if (((cc_t) c == otty.c_cc[VKILL]) && !slash) { if (hard) { show(c); putchar('\n'); putchar(pchar); } else { putchar('\r'); putchar(pchar); if (eraseln) erasep(1); else if (docrtkill) while (promptlen-- > 1) putserr(BSB); promptlen = 1; } sp = buf; fflush(stdout); continue; } if (slash && ((cc_t) c == otty.c_cc[VKILL] || (cc_t) c == otty.c_cc[VERASE])) { ERASEONECOLUMN(docrterase); --sp; } if (c != '\\') slash = 0; *sp++ = c; if ((c < ' ' && c != '\n' && c != ESC) || c == RUBOUT) { c += (c == RUBOUT) ? -0100 : 0100; putserr(CARAT); promptlen++; } if (c != '\n' && c != ESC) { putcerr(c); promptlen++; } else break; } *--sp = '\0'; if (!eraseln) promptlen = maxlen; if (sp - buf >= nmax - 1) more_error(_("Line too long")); }
int glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob) { const char *patnext; size_t limit; Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot; mbstate_t mbs; wchar_t wc; size_t clen; patnext = pattern; if (!(flags & GLOB_APPEND)) { pglob->gl_pathc = 0; pglob->gl_pathv = NULL; if (!(flags & GLOB_DOOFFS)) pglob->gl_offs = 0; } limit = 0; pglob->gl_flags = flags & ~GLOB_MAGCHAR; pglob->gl_errfunc = errfunc; pglob->gl_matchc = 0; bufnext = patbuf; bufend = bufnext + MAXPATHLEN - 1; if (flags & GLOB_NOESCAPE) { memset(&mbs, 0, sizeof(mbs)); while (bufend - bufnext >= MB_CUR_MAX) { clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs); if (clen == (size_t)-1 || clen == (size_t)-2) return (GLOB_NOMATCH); else if (clen == 0) break; *bufnext++ = wc; patnext += clen; } } else { /* Protect the quoted characters. */ memset(&mbs, 0, sizeof(mbs)); while (bufend - bufnext >= MB_CUR_MAX) { if (*patnext == QUOTE) { if (*++patnext == EOS) { *bufnext++ = QUOTE | M_PROTECT; continue; } prot = M_PROTECT; } else prot = 0; clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs); if (clen == (size_t)-1 || clen == (size_t)-2) return (GLOB_NOMATCH); else if (clen == 0) break; *bufnext++ = wc | prot; patnext += clen; } } *bufnext = EOS; if (flags & GLOB_BRACE) return globexp1(patbuf, pglob, &limit); else return glob0(patbuf, pglob, &limit); }
static int decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen, const char **reason, int surrogateescape) { wchar_t *res; size_t argsize; size_t count; #ifdef HAVE_MBRTOWC unsigned char *in; wchar_t *out; mbstate_t mbs; #endif #ifdef HAVE_BROKEN_MBSTOWCS /* Some platforms have a broken implementation of * mbstowcs which does not count the characters that * would result from conversion. Use an upper bound. */ argsize = strlen(arg); #else argsize = mbstowcs(NULL, arg, 0); #endif if (argsize != (size_t)-1) { if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { return -1; } res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t)); if (!res) { return -1; } count = mbstowcs(res, arg, argsize + 1); if (count != (size_t)-1) { wchar_t *tmp; /* Only use the result if it contains no surrogate characters. */ for (tmp = res; *tmp != 0 && !Py_UNICODE_IS_SURROGATE(*tmp); tmp++) ; if (*tmp == 0) { if (wlen != NULL) { *wlen = count; } *wstr = res; return 0; } } PyMem_RawFree(res); } /* Conversion failed. Fall back to escaping with surrogateescape. */ #ifdef HAVE_MBRTOWC /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */ /* Overallocate; as multi-byte characters are in the argument, the actual output could use less memory. */ argsize = strlen(arg) + 1; if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) { return -1; } res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t)); if (!res) { return -1; } in = (unsigned char*)arg; out = res; memset(&mbs, 0, sizeof mbs); while (argsize) { size_t converted = mbrtowc(out, (char*)in, argsize, &mbs); if (converted == 0) { /* Reached end of string; null char stored. */ break; } if (converted == (size_t)-2) { /* Incomplete character. This should never happen, since we provide everything that we have - unless there is a bug in the C library, or I misunderstood how mbrtowc works. */ goto decode_error; } if (converted == (size_t)-1) { if (!surrogateescape) { goto decode_error; } /* Conversion error. Escape as UTF-8b, and start over in the initial shift state. */ *out++ = 0xdc00 + *in++; argsize--; memset(&mbs, 0, sizeof mbs); continue; } if (Py_UNICODE_IS_SURROGATE(*out)) { if (!surrogateescape) { goto decode_error; } /* Surrogate character. Escape the original byte sequence with surrogateescape. */ argsize -= converted; while (converted--) { *out++ = 0xdc00 + *in++; } continue; } /* successfully converted some bytes */ in += converted; argsize -= converted; out++; } if (wlen != NULL) { *wlen = out - res; } *wstr = res; return 0; decode_error: PyMem_RawFree(res); if (wlen) { *wlen = in - (unsigned char*)arg; } if (reason) { *reason = "decoding error"; } return -2; #else /* HAVE_MBRTOWC */ /* Cannot use C locale for escaping; manually escape as if charset is ASCII (i.e. escape all bytes > 128. This will still roundtrip correctly in the locale's charset, which must be an ASCII superset. */ return decode_ascii(arg, wstr, wlen, reason, surrogateescape); #endif /* HAVE_MBRTOWC */ }
static int glob3(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, Char *restpattern, glob_t *pglob, size_t *limit) { struct dirent *dp; DIR *dirp; int err; char buf[MAXPATHLEN]; /* * The readdirfunc declaration can't be prototyped, because it is * assigned, below, to two functions which are prototyped in glob.h * and dirent.h as taking pointers to differently typed opaque * structures. */ struct dirent *(*readdirfunc)(); if (pathend > pathend_last) return (GLOB_ABORTED); *pathend = EOS; errno = 0; if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { /* TODO: don't call for ENOENT or ENOTDIR? */ if (pglob->gl_errfunc) { if (g_Ctoc(pathbuf, buf, sizeof(buf))) return (GLOB_ABORTED); if (pglob->gl_errfunc(buf, errno) || pglob->gl_flags & GLOB_ERR) return (GLOB_ABORTED); } return(0); } err = 0; /* Search directory for matching names. */ if (pglob->gl_flags & GLOB_ALTDIRFUNC) readdirfunc = pglob->gl_readdir; else readdirfunc = readdir; while ((dp = (*readdirfunc)(dirp))) { char *sc; Char *dc; wchar_t wc; size_t clen; mbstate_t mbs; /* Initial DOT must be matched literally. */ if (dp->d_name[0] == DOT && *pattern != DOT) continue; memset(&mbs, 0, sizeof(mbs)); dc = pathend; sc = dp->d_name; while (dc < pathend_last) { clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs); if (clen == (size_t)-1 || clen == (size_t)-2) { wc = *sc; clen = 1; memset(&mbs, 0, sizeof(mbs)); } if ((*dc++ = wc) == EOS) break; sc += clen; } if (!match(pathend, pattern, restpattern)) { *pathend = EOS; continue; } err = glob2(pathbuf, --dc, pathend_last, restpattern, pglob, limit); if (err) break; } if (pglob->gl_flags & GLOB_ALTDIRFUNC) (*pglob->gl_closedir)(dirp); else closedir(dirp); return(err); }
TEST(wchar, wcstombs_wcrtombs) { const wchar_t chars[] = { L'h', L'e', L'l', L'l', L'o', 0 }; const wchar_t bad_chars[] = { L'h', L'i', static_cast<wchar_t>(0xffffffff), 0 }; const wchar_t* src; char bytes[BUFSIZ]; // Given a NULL destination, these functions count valid characters. EXPECT_EQ(5U, wcstombs(NULL, chars, 0)); EXPECT_EQ(5U, wcstombs(NULL, chars, 4)); EXPECT_EQ(5U, wcstombs(NULL, chars, 256)); src = chars; EXPECT_EQ(5U, wcsrtombs(NULL, &src, 0, NULL)); EXPECT_EQ(&chars[0], src); src = chars; EXPECT_EQ(5U, wcsrtombs(NULL, &src, 4, NULL)); EXPECT_EQ(&chars[0], src); src = chars; EXPECT_EQ(5U, wcsrtombs(NULL, &src, 256, NULL)); EXPECT_EQ(&chars[0], src); // An unrepresentable char just returns an error from wcstombs... errno = 0; EXPECT_EQ(static_cast<size_t>(-1), wcstombs(NULL, bad_chars, 0)); EXPECT_EQ(EILSEQ, errno); errno = 0; EXPECT_EQ(static_cast<size_t>(-1), wcstombs(NULL, bad_chars, 256)); EXPECT_EQ(EILSEQ, errno); // And wcsrtombs doesn't tell us where it got stuck because we didn't ask it // to actually convert anything... errno = 0; src = bad_chars; EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(NULL, &src, 0, NULL)); EXPECT_EQ(&bad_chars[0], src); EXPECT_EQ(EILSEQ, errno); errno = 0; src = bad_chars; EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(NULL, &src, 256, NULL)); EXPECT_EQ(&bad_chars[0], src); EXPECT_EQ(EILSEQ, errno); // Okay, now let's test actually converting something... memset(bytes, 'x', sizeof(bytes)); EXPECT_EQ(0U, wcstombs(bytes, chars, 0)); memset(bytes, 'x', sizeof(bytes)); EXPECT_EQ(4U, wcstombs(bytes, chars, 4)); bytes[5] = 0; EXPECT_STREQ("hellx", bytes); memset(bytes, 'x', sizeof(bytes)); EXPECT_EQ(5U, wcstombs(bytes, chars, 256)); EXPECT_STREQ("hello", bytes); memset(bytes, 'x', sizeof(bytes)); EXPECT_EQ(5U, wcstombs(bytes, chars, 6)); EXPECT_STREQ("hello", bytes); errno = 0; memset(bytes, 'x', sizeof(bytes)); EXPECT_EQ(static_cast<size_t>(-1), wcstombs(bytes, bad_chars, 256)); EXPECT_EQ(EILSEQ, errno); bytes[3] = 0; EXPECT_STREQ("hix", bytes); // wcsrtombs is a bit more informative... memset(bytes, 'x', sizeof(bytes)); src = chars; EXPECT_EQ(0U, wcsrtombs(bytes, &src, 0, NULL)); EXPECT_EQ(&chars[0], src); // No input consumed. EXPECT_EQ(EILSEQ, errno); memset(bytes, 'x', sizeof(bytes)); src = chars; EXPECT_EQ(4U, wcsrtombs(bytes, &src, 4, NULL)); EXPECT_EQ(&chars[4], src); // Some input consumed. EXPECT_EQ(EILSEQ, errno); bytes[5] = 0; EXPECT_STREQ("hellx", bytes); memset(bytes, 'x', sizeof(bytes)); src = chars; EXPECT_EQ(5U, wcsrtombs(bytes, &src, 256, NULL)); EXPECT_EQ(NULL, src); // All input consumed! EXPECT_EQ(EILSEQ, errno); EXPECT_STREQ("hello", bytes); memset(bytes, 'x', sizeof(bytes)); src = chars; EXPECT_EQ(5U, wcsrtombs(bytes, &src, 6, NULL)); EXPECT_EQ(NULL, src); // All input consumed. EXPECT_EQ(EILSEQ, errno); EXPECT_STREQ("hello", bytes); memset(bytes, 'x', sizeof(bytes)); src = bad_chars; EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(bytes, &src, 256, NULL)); EXPECT_EQ(&bad_chars[2], src); EXPECT_EQ(EILSEQ, errno); bytes[3] = 0; EXPECT_STREQ("hix", bytes); // Any non-initial state is invalid when calling wcsrtombs. mbstate_t ps; src = chars; memset(&ps, 0, sizeof(ps)); ASSERT_EQ(static_cast<size_t>(-2), mbrtowc(NULL, "\xc2", 1, &ps)); EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(NULL, &src, 0, &ps)); EXPECT_EQ(EILSEQ, errno); }
static inline void do_layout(VisualRow *vrow, VTLayout *vtlayout) { #define SIZE 4096 size_t logical_char2cell_index[SIZE]; size_t in2out[SIZE]; size_t out2in[SIZE]; char logical[SIZE]; /* is this enough? */ int logical_width, logical_length; char visual[SIZE]; /* is this enough? */ size_t visual_length = SIZE; wchar_t wc; mbstate_t ps; size_t mb_len; int consumed = 0; int cursor_pos = 0; int current_char = 0; int width; int logical_cell_index; Char mbchar; memset(logical_char2cell_index,0,SIZE); memset(logical,0,SIZE); memset(visual,0,SIZE); memset(in2out,0,SIZE); memset(out2in,0,SIZE); memset(&ps,0,sizeof(ps)); construct_char2cell_index(vrow->row,logical_char2cell_index); Row_compose_bytes(vrow->row, 0,vrow->row->num_columns, logical,SIZE, &logical_length,&logical_width,1); #undef SIZE #if debug printf("logical: [%s]\n",logical); printf("logical_length = %u\n",logical_length); #endif vtlayout->transform(vtlayout, logical, logical_length, visual, &visual_length, in2out,out2in,NULL); #if debug printf("visual : [%s]\n",visual); printf("visual_length = %u\n\n",visual_length); #endif /* Well off course, I need to refine this code ;-) */ while(consumed < visual_length) { mb_len = mbrtowc(&wc,visual+consumed,visual_length-consumed,&ps); if(mb_len == 0 || mb_len == (size_t)-1 || mb_len == (size_t)-2 ) { #if debug printf("found invalid or NULL character(%u)\n",mb_len); #endif break; } else { logical_cell_index = logical_char2cell_index[out2in[current_char]]; #if debug printf("logical char index[%d] <-> visual char index[%d]\n", out2in[current_char],current_char); #endif if(Row_is_char_drawn(vrow->row,logical_cell_index)) { width = wcwidth(wc); Char_init(mbchar,visual+consumed,mb_len,width,(width>0)); Row_add_char(vrow->vrow, cursor_pos, &mbchar, Row_is_bold(vrow->row,logical_cell_index), Row_is_blink(vrow->row,logical_cell_index), Row_is_inverse(vrow->row,logical_cell_index), Row_is_underline(vrow->row,logical_cell_index), Row_get_foreground(vrow->row,logical_cell_index), Row_get_background(vrow->row,logical_cell_index), Row_get_charset(vrow->row,logical_cell_index)); } else { Row_set_char_drawn(vrow->vrow,cursor_pos,0); width = 1; } vrow->visual2logical_index[cursor_pos] = logical_cell_index; vrow->logical2visual_index[logical_cell_index] = cursor_pos; #if debug printf("logical cell index[%d] <-> visual cell index[%d]\n", logical_cell_index,cursor_pos); #endif } /* process for multi column character */ if(width > 1 && Row_is_char_drawn(vrow->row,logical_cell_index) && width == Row_get_cell_width(vrow->row,logical_cell_index) ) { int i; for(i=1;i<width;i++) { vrow->visual2logical_index[cursor_pos+i] = logical_cell_index+i; vrow->logical2visual_index[logical_cell_index+i] = cursor_pos+i; } } cursor_pos+=width; current_char++; consumed+=mb_len; } /* The visual row possibly has ligatures */ if(cursor_pos < vrow->row->num_columns && !vtlayout->is_direction_LTR(vtlayout)) Row_insert_cells(vrow->vrow,0,vrow->row->num_columns-cursor_pos); }
size_t mbsrtowcs (wchar_t *dest, const char **srcp, size_t len, mbstate_t *ps) { if (ps == NULL) ps = &_gl_mbsrtowcs_state; { const char *src = *srcp; if (dest != NULL) { wchar_t *destptr = dest; for (; len > 0; destptr++, len--) { size_t src_avail; size_t ret; /* An optimized variant of src_avail = strnlen1 (src, MB_LEN_MAX); */ if (src[0] == '\0') src_avail = 1; else if (src[1] == '\0') src_avail = 2; else if (src[2] == '\0') src_avail = 3; else if (MB_LEN_MAX <= 4 || src[3] == '\0') src_avail = 4; else src_avail = 4 + strnlen1 (src + 4, MB_LEN_MAX - 4); /* Parse the next multibyte character. */ ret = mbrtowc (destptr, src, src_avail, ps); if (ret == (size_t)(-2)) /* Encountered a multibyte character that extends past a '\0' byte or that is longer than MB_LEN_MAX bytes. Cannot happen. */ abort (); if (ret == (size_t)(-1)) goto bad_input; if (ret == 0) { src = NULL; /* Here mbsinit (ps). */ break; } src += ret; } *srcp = src; return destptr - dest; } else { /* Ignore dest and len, don't store *srcp at the end, and don't clobber *ps. */ mbstate_t state = *ps; size_t totalcount = 0; for (;; totalcount++) { size_t src_avail; size_t ret; /* An optimized variant of src_avail = strnlen1 (src, MB_LEN_MAX); */ if (src[0] == '\0') src_avail = 1; else if (src[1] == '\0') src_avail = 2; else if (src[2] == '\0') src_avail = 3; else if (MB_LEN_MAX <= 4 || src[3] == '\0') src_avail = 4; else src_avail = 4 + strnlen1 (src + 4, MB_LEN_MAX - 4); /* Parse the next multibyte character. */ ret = mbrtowc (NULL, src, src_avail, &state); if (ret == (size_t)(-2)) /* Encountered a multibyte character that extends past a '\0' byte or that is longer than MB_LEN_MAX bytes. Cannot happen. */ abort (); if (ret == (size_t)(-1)) goto bad_input2; if (ret == 0) { /* Here mbsinit (&state). */ break; } src += ret; } return totalcount; } bad_input: *srcp = src; bad_input2: errno = EILSEQ; return (size_t)(-1); } }
void conv_c(PR *pr, u_char *p, size_t bufsize) { char buf[10]; char const *str; wchar_t wc; size_t clen, oclen; int converr, pad, width; u_char peekbuf[MB_LEN_MAX]; if (pr->mbleft > 0) { str = "**"; pr->mbleft--; goto strpr; } switch(*p) { case '\0': str = "\\0"; goto strpr; /* case '\a': */ case '\007': str = "\\a"; goto strpr; case '\b': str = "\\b"; goto strpr; case '\f': str = "\\f"; goto strpr; case '\n': str = "\\n"; goto strpr; case '\r': str = "\\r"; goto strpr; case '\t': str = "\\t"; goto strpr; case '\v': str = "\\v"; goto strpr; default: break; } /* * Multibyte characters are disabled for hexdump(1) for backwards * compatibility and consistency (none of its other output formats * recognize them correctly). */ converr = 0; if (odmode && MB_CUR_MAX > 1) { oclen = 0; retry: clen = mbrtowc(&wc, (const char *)p, bufsize, &pr->mbstate); if (clen == 0) clen = 1; else if (clen == (size_t)-1 || (clen == (size_t)-2 && p == peekbuf)) { memset(&pr->mbstate, 0, sizeof(pr->mbstate)); wc = *p; clen = 1; converr = 1; } else if (clen == (size_t)-2) { /* * Incomplete character; peek ahead and see if we * can complete it. */ oclen = bufsize; bufsize = peek(p = peekbuf, MB_CUR_MAX); goto retry; } clen += oclen; } else { wc = *p; clen = 1; } if (!converr && iswprint(wc)) { if (!odmode) { *pr->cchar = 'c'; (void)printf(pr->fmt, (int)wc); } else { *pr->cchar = 'C'; assert(strcmp(pr->fmt, "%3C") == 0); width = wcwidth(wc); assert(width >= 0); pad = 3 - width; if (pad < 0) pad = 0; (void)printf("%*s%C", pad, "", wc); pr->mbleft = clen - 1; } } else { (void)sprintf(buf, "%03o", (int)*p); str = buf; strpr: *pr->cchar = 's'; (void)printf(pr->fmt, str); } }
static int test_one_locale (const char *name, int codepage) { mbstate_t state; wchar_t wc; size_t ret; # if 1 /* Portable code to set the locale. */ { char name_with_codepage[1024]; sprintf (name_with_codepage, "%s.%d", name, codepage); /* Set the locale. */ if (setlocale (LC_ALL, name_with_codepage) == NULL) return 77; } # else /* Hacky way to set a locale.codepage combination that setlocale() refuses to set. */ { /* Codepage of the current locale, set with setlocale(). Not necessarily the same as GetACP(). */ extern __declspec(dllimport) unsigned int __lc_codepage; /* Set the locale. */ if (setlocale (LC_ALL, name) == NULL) return 77; /* Clobber the codepage and MB_CUR_MAX, both set by setlocale(). */ __lc_codepage = codepage; switch (codepage) { case 1252: case 1256: MB_CUR_MAX = 1; break; case 932: case 950: case 936: MB_CUR_MAX = 2; break; case 54936: case 65001: MB_CUR_MAX = 4; break; } /* Test whether the codepage is really available. */ memset (&state, '\0', sizeof (mbstate_t)); if (mbrtowc (&wc, " ", 1, &state) == (size_t)(-1)) return 77; } # endif /* Test zero-length input. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "x", 0, &state); /* gnulib's implementation returns (size_t)(-2). The AIX 5.1 implementation returns (size_t)(-1). glibc's implementation returns 0. */ ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0); ASSERT (mbsinit (&state)); } /* Test NUL byte input. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "", 1, &state); ASSERT (ret == 0); ASSERT (wc == 0); ASSERT (mbsinit (&state)); ret = mbrtowc (NULL, "", 1, &state); ASSERT (ret == 0); ASSERT (mbsinit (&state)); } /* Test single-byte input. */ { int c; char buf[1]; memset (&state, '\0', sizeof (mbstate_t)); for (c = 0; c < 0x100; c++) switch (c) { case '\t': case '\v': case '\f': case ' ': case '!': case '"': case '#': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ':': case ';': case '<': case '=': case '>': case '?': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '[': case '\\': case ']': case '^': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': /* c is in the ISO C "basic character set". */ buf[0] = c; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, buf, 1, &state); ASSERT (ret == 1); ASSERT (wc == c); ASSERT (mbsinit (&state)); ret = mbrtowc (NULL, buf, 1, &state); ASSERT (ret == 1); ASSERT (mbsinit (&state)); break; } } /* Test special calling convention, passing a NULL pointer. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, NULL, 5, &state); ASSERT (ret == 0); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } switch (codepage) { case 1252: /* Locale encoding is CP1252, an extension of ISO-8859-1. */ { char input[] = "B\374\337er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\374'); ASSERT (wc == 0x00FC); ASSERT (mbsinit (&state)); input[1] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\337'); ASSERT (wc == 0x00DF); ASSERT (mbsinit (&state)); input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); } return 0; case 1256: /* Locale encoding is CP1256, not the same as ISO-8859-6. */ { char input[] = "x\302\341\346y"; /* "xآلوy" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'x'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\302'); ASSERT (wc == 0x0622); ASSERT (mbsinit (&state)); input[1] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\341'); ASSERT (wc == 0x0644); ASSERT (mbsinit (&state)); input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 2, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\346'); ASSERT (wc == 0x0648); ASSERT (mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'y'); ASSERT (mbsinit (&state)); } return 0; case 932: /* Locale encoding is CP932, similar to Shift_JIS. */ { char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x65E5); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 4, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x672C); ASSERT (mbsinit (&state)); input[4] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x8A9E); ASSERT (mbsinit (&state)); input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 1, &state); ASSERT (ret == 1); ASSERT (wc == '>'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; case 950: /* Locale encoding is CP950, similar to Big5. */ { char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x65E5); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 4, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x672C); ASSERT (mbsinit (&state)); input[4] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x8A9E); ASSERT (mbsinit (&state)); input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 1, &state); ASSERT (ret == 1); ASSERT (wc == '>'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; case 936: /* Locale encoding is CP936 = GBK, an extension of GB2312. */ { char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x65E5); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 4, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x672C); ASSERT (mbsinit (&state)); input[4] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x8A9E); ASSERT (mbsinit (&state)); input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 1, &state); ASSERT (ret == 1); ASSERT (wc == '>'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; case 54936: /* Locale encoding is CP54936 = GB18030. */ { char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 7, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x00FC); ASSERT (mbsinit (&state)); input[2] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 3, 6, &state); ASSERT (ret == 4); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 6, &state); ASSERT (ret == 4); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x00DF); ASSERT (mbsinit (&state)); input[3] = '\0'; input[4] = '\0'; input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[5] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 8, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\201\045", 2, &state); /* 0x81 0x25 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\201\060\377", 3, &state); /* 0x81 0x30 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\201\060\377\064", 4, &state); /* 0x81 0x30 0xFF 0x34 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\201\060\211\072", 4, &state); /* 0x81 0x30 0x89 0x3A */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; case 65001: /* Locale encoding is CP65001 = UTF-8. */ { char input[] = "B\303\274\303\237er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 5, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x00FC); ASSERT (mbsinit (&state)); input[2] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 3, 4, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 4, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x00DF); ASSERT (mbsinit (&state)); input[3] = '\0'; input[4] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[5] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 6, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\303\300", 2, &state); /* 0xC3 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\343\300", 2, &state); /* 0xE3 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\343\300\200", 3, &state); /* 0xE3 0xC0 0x80 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\343\200\300", 3, &state); /* 0xE3 0x80 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\300", 2, &state); /* 0xF3 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\300\200\200", 4, &state); /* 0xF3 0xC0 0x80 0x80 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\200\300", 3, &state); /* 0xF3 0x80 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\200\300\200", 4, &state); /* 0xF3 0x80 0xC0 0x80 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\200\200\300", 4, &state); /* 0xF3 0x80 0x80 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; default: return 1; } }
static int cnt(const char *file) { struct stat sb; uintmax_t linect, wordct, charct, llct, tmpll; int fd, len, warned; size_t clen; short gotsp; u_char *p; u_char buf[MAXBSIZE]; wchar_t wch; mbstate_t mbs; linect = wordct = charct = llct = tmpll = 0; if (file == NULL) fd = STDIN_FILENO; else { if ((fd = open(file, O_RDONLY, 0)) < 0) { warn("%s: open", file); return (1); } if (doword || (domulti && MB_CUR_MAX != 1)) goto word; /* * Line counting is split out because it's a lot faster to get * lines than to get words, since the word count requires some * logic. */ if (doline) { while ((len = read(fd, buf, MAXBSIZE))) { if (len == -1) { warn("%s: read", file); (void)close(fd); return (1); } if (siginfo) { show_cnt(file, linect, wordct, charct, llct); } charct += len; for (p = buf; len--; ++p) if (*p == '\n') { if (tmpll > llct) llct = tmpll; tmpll = 0; ++linect; } else tmpll++; } reset_siginfo(); tlinect += linect; if (dochar) tcharct += charct; if (dolongline) { if (llct > tlongline) tlongline = llct; } show_cnt(file, linect, wordct, charct, llct); (void)close(fd); return (0); } /* * If all we need is the number of characters and it's a * regular file, just stat the puppy. */ if (dochar || domulti) { if (fstat(fd, &sb)) { warn("%s: fstat", file); (void)close(fd); return (1); } if (S_ISREG(sb.st_mode)) { reset_siginfo(); charct = sb.st_size; show_cnt(file, linect, wordct, charct, llct); tcharct += charct; (void)close(fd); return (0); } } } /* Do it the hard way... */ word: gotsp = 1; warned = 0; memset(&mbs, 0, sizeof(mbs)); while ((len = read(fd, buf, MAXBSIZE)) != 0) { if (len == -1) { warn("%s: read", file != NULL ? file : "stdin"); (void)close(fd); return (1); } p = buf; while (len > 0) { if (siginfo) show_cnt(file, linect, wordct, charct, llct); if (!domulti || MB_CUR_MAX == 1) { clen = 1; wch = (unsigned char)*p; } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == (size_t)-1) { if (!warned) { errno = EILSEQ; warn("%s", file != NULL ? file : "stdin"); warned = 1; } memset(&mbs, 0, sizeof(mbs)); clen = 1; wch = (unsigned char)*p; } else if (clen == (size_t)-2) break; else if (clen == 0) clen = 1; charct++; if (wch != L'\n') tmpll++; len -= clen; p += clen; if (wch == L'\n') { if (tmpll > llct) llct = tmpll; tmpll = 0; ++linect; } if (iswspace(wch)) gotsp = 1; else if (gotsp) { gotsp = 0; ++wordct; } } } reset_siginfo(); if (domulti && MB_CUR_MAX > 1) if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) warn("%s", file != NULL ? file : "stdin"); if (doline) tlinect += linect; if (doword) twordct += wordct; if (dochar || domulti) tcharct += charct; if (dolongline) { if (llct > tlongline) tlongline = llct; } show_cnt(file, linect, wordct, charct, llct); (void)close(fd); return (0); }