Пример #1
0
/*
 * Is a UTF-8 character well-formed?
 */
int
is_utf8_well_formed(const char *s)
{
	int i;
	int len;

	if (IS_UTF8_INVALID(s[0]))
		return (0);

	len = utf_len((char)s[0]);
	if (len == 1)
		return (1);
	if (len == 2) {
		if ((unsigned char)(s[0]) < 0xC2)
			return (0);
	} else {
		unsigned char mask;
		mask = (~((1 << (8-len)) - 1)) & 0xFF;
		if (s[0] == mask && (s[1] & mask) == 0x80)
			return (0);
	}

	for (i = 1;  i < len;  i++)
		if (!IS_UTF8_TRAIL(s[i]))
			return (0);
	return (1);
}
Пример #2
0
/*
 * Get the value of a UTF-8 character.
 */
LWCHAR
get_wchar(const char *p)
{
	switch (utf_len(p[0])) {
	case 1:
	default:
		/* 0xxxxxxx */
		return (LWCHAR)
		    (p[0] & 0xFF);
	case 2:
		/* 110xxxxx 10xxxxxx */
		return (LWCHAR) (
		    ((p[0] & 0x1F) << 6) |
		    (p[1] & 0x3F));
	case 3:
		/* 1110xxxx 10xxxxxx 10xxxxxx */
		return (LWCHAR) (
		    ((p[0] & 0x0F) << 12) |
		    ((p[1] & 0x3F) << 6) |
		    (p[2] & 0x3F));
	case 4:
		/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
		return (LWCHAR) (
		    ((p[0] & 0x07) << 18) |
		    ((p[1] & 0x3F) << 12) |
		    ((p[2] & 0x3F) << 6) |
		    (p[3] & 0x3F));
	case 5:
		/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
		return (LWCHAR) (
		    ((p[0] & 0x03) << 24) |
		    ((p[1] & 0x3F) << 18) |
		    ((p[2] & 0x3F) << 12) |
		    ((p[3] & 0x3F) << 6) |
		    (p[4] & 0x3F));
	case 6:
		/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
		return (LWCHAR) (
		    ((p[0] & 0x01) << 30) |
		    ((p[1] & 0x3F) << 24) |
		    ((p[2] & 0x3F) << 18) |
		    ((p[3] & 0x3F) << 12) |
		    ((p[4] & 0x3F) << 6) |
		    (p[5] & 0x3F));
	}
}
Пример #3
0
static int
store_char(LWCHAR ch, char a, char *rep, off_t pos)
{
	int w;
	int replen;
	char cs;
	int matches;

	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
	if (w != AT_NORMAL)
		last_overstrike = w;

	if (is_hilited(pos, pos+1, 0, &matches)) {
		/*
		 * This character should be highlighted.
		 * Override the attribute passed in.
		 */
		if (a != AT_ANSI) {
			if (highest_hilite != -1 && pos > highest_hilite)
				highest_hilite = pos;
			a |= AT_HILITE;
		}
	}

	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq()) {
		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
			/* Remove whole unrecognized sequence.  */
			char *p = &linebuf[curr];
			LWCHAR bch;
			do {
				bch = step_char(&p, -1, linebuf);
			} while (p > linebuf && !IS_CSI_START(bch));
			curr = p - linebuf;
			return (0);
		}
		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
		w = 0;
	} else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)) {
		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
		w = 0;
	} else {
		char *p = &linebuf[curr];
		LWCHAR prev_ch = step_char(&p, -1, linebuf);
		w = pwidth(ch, a, prev_ch);
	}

	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
		/*
		 * Won't fit on screen.
		 */
		return (1);

	if (rep == NULL) {
		cs = (char)ch;
		rep = &cs;
		replen = 1;
	} else {
		replen = utf_len(rep[0]);
	}
	if (curr + replen >= size_linebuf-6) {
		/*
		 * Won't fit in line buffer.
		 * Try to expand it.
		 */
		if (expand_linebuf())
			return (1);
	}

	while (replen-- > 0) {
		linebuf[curr] = *rep++;
		attr[curr] = a;
		curr++;
	}
	column += w;
	return (0);
}
Пример #4
0
/*
 * Append a character to the line buffer.
 * Expand tabs into spaces, handle underlining, boldfacing, etc.
 * Returns 0 if ok, 1 if couldn't fit in buffer.
 */
int
pappend(char c, off_t pos)
{
	int r;

	if (pendc) {
		if (do_append(pendc, NULL, pendpos))
			/*
			 * Oops.  We've probably lost the char which
			 * was in pendc, since caller won't back up.
			 */
			return (1);
		pendc = '\0';
	}

	if (c == '\r' && bs_mode == BS_SPECIAL) {
		if (mbc_buf_len > 0)  /* utf_mode must be on. */ {
			/* Flush incomplete (truncated) sequence. */
			r = flush_mbc_buf(mbc_pos);
			mbc_buf_index = r + 1;
			mbc_buf_len = 0;
			if (r)
				return (mbc_buf_index);
		}

		/*
		 * Don't put the CR into the buffer until we see
		 * the next char.  If the next char is a newline,
		 * discard the CR.
		 */
		pendc = c;
		pendpos = pos;
		return (0);
	}

	if (!utf_mode) {
		r = do_append((LWCHAR) c, NULL, pos);
	} else {
		/* Perform strict validation in all possible cases. */
		if (mbc_buf_len == 0) {
retry:
			mbc_buf_index = 1;
			*mbc_buf = c;
			if (IS_ASCII_OCTET(c)) {
				r = do_append((LWCHAR) c, NULL, pos);
			} else if (IS_UTF8_LEAD(c)) {
				mbc_buf_len = utf_len(c);
				mbc_pos = pos;
				return (0);
			} else {
				/* UTF8_INVALID or stray UTF8_TRAIL */
				r = flush_mbc_buf(pos);
			}
		} else if (IS_UTF8_TRAIL(c)) {
			mbc_buf[mbc_buf_index++] = c;
			if (mbc_buf_index < mbc_buf_len)
				return (0);
			if (is_utf8_well_formed(mbc_buf))
				r = do_append(get_wchar(mbc_buf), mbc_buf,
				    mbc_pos);
			else
				/* Complete, but not shortest form, sequence. */
				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
			mbc_buf_len = 0;
		} else {
			/* Flush incomplete (truncated) sequence.  */
			r = flush_mbc_buf(mbc_pos);
			mbc_buf_index = r + 1;
			mbc_buf_len = 0;
			/* Handle new char.  */
			if (!r)
				goto retry;
		}
	}

	/*
	 * If we need to shift the line, do it.
	 * But wait until we get to at least the middle of the screen,
	 * so shifting it doesn't affect the chars we're currently
	 * pappending.  (Bold & underline can get messed up otherwise.)
	 */
	if (cshift < hshift && column > sc_width / 2) {
		linebuf[curr] = '\0';
		pshift(hshift - cshift);
	}
	if (r) {
		/* How many chars should caller back up? */
		r = (!utf_mode) ? 1 : mbc_buf_index;
	}
	return (r);
}
Пример #5
0
/*
 * Shift the input line left.
 * This means discarding N printable chars at the start of the buffer.
 */
static void
pshift(int shift)
{
	LWCHAR prev_ch = 0;
	unsigned char c;
	int shifted = 0;
	int to;
	int from;
	int len;
	int width;
	int prev_attr;
	int next_attr;

	if (shift > column - lmargin)
		shift = column - lmargin;
	if (shift > curr - lmargin)
		shift = curr - lmargin;

	to = from = lmargin;
	/*
	 * We keep on going when shifted == shift
	 * to get all combining chars.
	 */
	while (shifted <= shift && from < curr) {
		c = linebuf[from];
		if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) {
			/* Keep cumulative effect.  */
			linebuf[to] = c;
			attr[to++] = attr[from++];
			while (from < curr && linebuf[from]) {
				linebuf[to] = linebuf[from];
				attr[to++] = attr[from];
				if (!is_ansi_middle(linebuf[from++]))
					break;
			}
			continue;
		}

		width = 0;

		if (!IS_ASCII_OCTET(c) && utf_mode) {
			/* Assumes well-formedness validation already done.  */
			LWCHAR ch;

			len = utf_len(c);
			if (from + len > curr)
				break;
			ch = get_wchar(linebuf + from);
			if (!is_composing_char(ch) &&
			    !is_combining_char(prev_ch, ch))
				width = is_wide_char(ch) ? 2 : 1;
			prev_ch = ch;
		} else {
			len = 1;
			if (c == '\b')
				/* XXX - Incorrect if several '\b' in a row.  */
				width = (utf_mode && is_wide_char(prev_ch)) ?
				    -2 : -1;
			else if (!control_char(c))
				width = 1;
			prev_ch = 0;
		}

		if (width == 2 && shift - shifted == 1) {
			/* Should never happen when called by pshift_all().  */
			attr[to] = attr[from];
			/*
			 * Assume a wide_char will never be the first half of a
			 * combining_char pair, so reset prev_ch in case we're
			 * followed by a '\b'.
			 */
			prev_ch = linebuf[to++] = ' ';
			from += len;
			shifted++;
			continue;
		}

		/* Adjust width for magic cookies. */
		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
		if (!is_at_equiv(attr[from], prev_attr) &&
		    !is_at_equiv(attr[from], next_attr)) {
			width += attr_swidth(attr[from]);
			if (from + len < curr)
				width += attr_ewidth(attr[from]);
			if (is_at_equiv(prev_attr, next_attr)) {
				width += attr_ewidth(prev_attr);
				if (from + len < curr)
					width += attr_swidth(next_attr);
			}
		}

		if (shift - shifted < width)
			break;
		from += len;
		shifted += width;
		if (shifted < 0)
			shifted = 0;
	}
	while (from < curr) {
		linebuf[to] = linebuf[from];
		attr[to++] = attr[from++];
	}
	curr = to;
	column -= shifted;
	cshift += shifted;
}