/* * Is a UTF-8 character well-formed? */ int is_utf8_well_formed(const char *s) { int i; int len; if (IS_UTF8_INVALID(s[0])) return (0); len = utf_len((char)s[0]); if (len == 1) return (1); if (len == 2) { if ((unsigned char)(s[0]) < 0xC2) return (0); } else { unsigned char mask; mask = (~((1 << (8-len)) - 1)) & 0xFF; if (s[0] == mask && (s[1] & mask) == 0x80) return (0); } for (i = 1; i < len; i++) if (!IS_UTF8_TRAIL(s[i])) return (0); return (1); }
/* * Append a character to the line buffer. * Expand tabs into spaces, handle underlining, boldfacing, etc. * Returns 0 if ok, 1 if couldn't fit in buffer. */ int pappend(char c, off_t pos) { int r; if (pendc) { if (do_append(pendc, NULL, pendpos)) /* * Oops. We've probably lost the char which * was in pendc, since caller won't back up. */ return (1); pendc = '\0'; } if (c == '\r' && bs_mode == BS_SPECIAL) { if (mbc_buf_len > 0) /* utf_mode must be on. */ { /* Flush incomplete (truncated) sequence. */ r = flush_mbc_buf(mbc_pos); mbc_buf_index = r + 1; mbc_buf_len = 0; if (r) return (mbc_buf_index); } /* * Don't put the CR into the buffer until we see * the next char. If the next char is a newline, * discard the CR. */ pendc = c; pendpos = pos; return (0); } if (!utf_mode) { r = do_append((LWCHAR) c, NULL, pos); } else { /* Perform strict validation in all possible cases. */ if (mbc_buf_len == 0) { retry: mbc_buf_index = 1; *mbc_buf = c; if (IS_ASCII_OCTET(c)) { r = do_append((LWCHAR) c, NULL, pos); } else if (IS_UTF8_LEAD(c)) { mbc_buf_len = utf_len(c); mbc_pos = pos; return (0); } else { /* UTF8_INVALID or stray UTF8_TRAIL */ r = flush_mbc_buf(pos); } } else if (IS_UTF8_TRAIL(c)) { mbc_buf[mbc_buf_index++] = c; if (mbc_buf_index < mbc_buf_len) return (0); if (is_utf8_well_formed(mbc_buf)) r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos); else /* Complete, but not shortest form, sequence. */ mbc_buf_index = r = flush_mbc_buf(mbc_pos); mbc_buf_len = 0; } else { /* Flush incomplete (truncated) sequence. */ r = flush_mbc_buf(mbc_pos); mbc_buf_index = r + 1; mbc_buf_len = 0; /* Handle new char. */ if (!r) goto retry; } } /* * If we need to shift the line, do it. * But wait until we get to at least the middle of the screen, * so shifting it doesn't affect the chars we're currently * pappending. (Bold & underline can get messed up otherwise.) */ if (cshift < hshift && column > sc_width / 2) { linebuf[curr] = '\0'; pshift(hshift - cshift); } if (r) { /* How many chars should caller back up? */ r = (!utf_mode) ? 1 : mbc_buf_index; } return (r); }