/* * Is a UTF-8 character well-formed? */ int is_utf8_well_formed(const char *s) { int i; int len; if (IS_UTF8_INVALID(s[0])) return (0); len = utf_len((char)s[0]); if (len == 1) return (1); if (len == 2) { if ((unsigned char)(s[0]) < 0xC2) return (0); } else { unsigned char mask; mask = (~((1 << (8-len)) - 1)) & 0xFF; if (s[0] == mask && (s[1] & mask) == 0x80) return (0); } for (i = 1; i < len; i++) if (!IS_UTF8_TRAIL(s[i])) return (0); return (1); }
/* * Get the value of a UTF-8 character. */ LWCHAR get_wchar(const char *p) { switch (utf_len(p[0])) { case 1: default: /* 0xxxxxxx */ return (LWCHAR) (p[0] & 0xFF); case 2: /* 110xxxxx 10xxxxxx */ return (LWCHAR) ( ((p[0] & 0x1F) << 6) | (p[1] & 0x3F)); case 3: /* 1110xxxx 10xxxxxx 10xxxxxx */ return (LWCHAR) ( ((p[0] & 0x0F) << 12) | ((p[1] & 0x3F) << 6) | (p[2] & 0x3F)); case 4: /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ return (LWCHAR) ( ((p[0] & 0x07) << 18) | ((p[1] & 0x3F) << 12) | ((p[2] & 0x3F) << 6) | (p[3] & 0x3F)); case 5: /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ return (LWCHAR) ( ((p[0] & 0x03) << 24) | ((p[1] & 0x3F) << 18) | ((p[2] & 0x3F) << 12) | ((p[3] & 0x3F) << 6) | (p[4] & 0x3F)); case 6: /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ return (LWCHAR) ( ((p[0] & 0x01) << 30) | ((p[1] & 0x3F) << 24) | ((p[2] & 0x3F) << 18) | ((p[3] & 0x3F) << 12) | ((p[4] & 0x3F) << 6) | (p[5] & 0x3F)); } }
static int store_char(LWCHAR ch, char a, char *rep, off_t pos) { int w; int replen; char cs; int matches; w = (a & (AT_UNDERLINE|AT_BOLD)); /* Pre-use w. */ if (w != AT_NORMAL) last_overstrike = w; if (is_hilited(pos, pos+1, 0, &matches)) { /* * This character should be highlighted. * Override the attribute passed in. */ if (a != AT_ANSI) { if (highest_hilite != -1 && pos > highest_hilite) highest_hilite = pos; a |= AT_HILITE; } } if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq()) { if (!is_ansi_end(ch) && !is_ansi_middle(ch)) { /* Remove whole unrecognized sequence. */ char *p = &linebuf[curr]; LWCHAR bch; do { bch = step_char(&p, -1, linebuf); } while (p > linebuf && !IS_CSI_START(bch)); curr = p - linebuf; return (0); } a = AT_ANSI; /* Will force re-AT_'ing around it. */ w = 0; } else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)) { a = AT_ANSI; /* Will force re-AT_'ing around it. */ w = 0; } else { char *p = &linebuf[curr]; LWCHAR prev_ch = step_char(&p, -1, linebuf); w = pwidth(ch, a, prev_ch); } if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width) /* * Won't fit on screen. */ return (1); if (rep == NULL) { cs = (char)ch; rep = &cs; replen = 1; } else { replen = utf_len(rep[0]); } if (curr + replen >= size_linebuf-6) { /* * Won't fit in line buffer. * Try to expand it. */ if (expand_linebuf()) return (1); } while (replen-- > 0) { linebuf[curr] = *rep++; attr[curr] = a; curr++; } column += w; return (0); }
/* * Append a character to the line buffer. * Expand tabs into spaces, handle underlining, boldfacing, etc. * Returns 0 if ok, 1 if couldn't fit in buffer. */ int pappend(char c, off_t pos) { int r; if (pendc) { if (do_append(pendc, NULL, pendpos)) /* * Oops. We've probably lost the char which * was in pendc, since caller won't back up. */ return (1); pendc = '\0'; } if (c == '\r' && bs_mode == BS_SPECIAL) { if (mbc_buf_len > 0) /* utf_mode must be on. */ { /* Flush incomplete (truncated) sequence. */ r = flush_mbc_buf(mbc_pos); mbc_buf_index = r + 1; mbc_buf_len = 0; if (r) return (mbc_buf_index); } /* * Don't put the CR into the buffer until we see * the next char. If the next char is a newline, * discard the CR. */ pendc = c; pendpos = pos; return (0); } if (!utf_mode) { r = do_append((LWCHAR) c, NULL, pos); } else { /* Perform strict validation in all possible cases. */ if (mbc_buf_len == 0) { retry: mbc_buf_index = 1; *mbc_buf = c; if (IS_ASCII_OCTET(c)) { r = do_append((LWCHAR) c, NULL, pos); } else if (IS_UTF8_LEAD(c)) { mbc_buf_len = utf_len(c); mbc_pos = pos; return (0); } else { /* UTF8_INVALID or stray UTF8_TRAIL */ r = flush_mbc_buf(pos); } } else if (IS_UTF8_TRAIL(c)) { mbc_buf[mbc_buf_index++] = c; if (mbc_buf_index < mbc_buf_len) return (0); if (is_utf8_well_formed(mbc_buf)) r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos); else /* Complete, but not shortest form, sequence. */ mbc_buf_index = r = flush_mbc_buf(mbc_pos); mbc_buf_len = 0; } else { /* Flush incomplete (truncated) sequence. */ r = flush_mbc_buf(mbc_pos); mbc_buf_index = r + 1; mbc_buf_len = 0; /* Handle new char. */ if (!r) goto retry; } } /* * If we need to shift the line, do it. * But wait until we get to at least the middle of the screen, * so shifting it doesn't affect the chars we're currently * pappending. (Bold & underline can get messed up otherwise.) */ if (cshift < hshift && column > sc_width / 2) { linebuf[curr] = '\0'; pshift(hshift - cshift); } if (r) { /* How many chars should caller back up? */ r = (!utf_mode) ? 1 : mbc_buf_index; } return (r); }
/* * Shift the input line left. * This means discarding N printable chars at the start of the buffer. */ static void pshift(int shift) { LWCHAR prev_ch = 0; unsigned char c; int shifted = 0; int to; int from; int len; int width; int prev_attr; int next_attr; if (shift > column - lmargin) shift = column - lmargin; if (shift > curr - lmargin) shift = curr - lmargin; to = from = lmargin; /* * We keep on going when shifted == shift * to get all combining chars. */ while (shifted <= shift && from < curr) { c = linebuf[from]; if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) { /* Keep cumulative effect. */ linebuf[to] = c; attr[to++] = attr[from++]; while (from < curr && linebuf[from]) { linebuf[to] = linebuf[from]; attr[to++] = attr[from]; if (!is_ansi_middle(linebuf[from++])) break; } continue; } width = 0; if (!IS_ASCII_OCTET(c) && utf_mode) { /* Assumes well-formedness validation already done. */ LWCHAR ch; len = utf_len(c); if (from + len > curr) break; ch = get_wchar(linebuf + from); if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch)) width = is_wide_char(ch) ? 2 : 1; prev_ch = ch; } else { len = 1; if (c == '\b') /* XXX - Incorrect if several '\b' in a row. */ width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1; else if (!control_char(c)) width = 1; prev_ch = 0; } if (width == 2 && shift - shifted == 1) { /* Should never happen when called by pshift_all(). */ attr[to] = attr[from]; /* * Assume a wide_char will never be the first half of a * combining_char pair, so reset prev_ch in case we're * followed by a '\b'. */ prev_ch = linebuf[to++] = ' '; from += len; shifted++; continue; } /* Adjust width for magic cookies. */ prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL; next_attr = (from + len < curr) ? attr[from + len] : prev_attr; if (!is_at_equiv(attr[from], prev_attr) && !is_at_equiv(attr[from], next_attr)) { width += attr_swidth(attr[from]); if (from + len < curr) width += attr_ewidth(attr[from]); if (is_at_equiv(prev_attr, next_attr)) { width += attr_ewidth(prev_attr); if (from + len < curr) width += attr_swidth(next_attr); } } if (shift - shifted < width) break; from += len; shifted += width; if (shifted < 0) shifted = 0; } while (from < curr) { linebuf[to] = linebuf[from]; attr[to++] = attr[from++]; } curr = to; column -= shifted; cshift += shifted; }