Example #1
0
/*
 * Is a UTF-8 character well-formed?
 */
int
is_utf8_well_formed(const char *s)
{
	int i;
	int len;

	if (IS_UTF8_INVALID(s[0]))
		return (0);

	len = utf_len((char)s[0]);
	if (len == 1)
		return (1);
	if (len == 2) {
		if ((unsigned char)(s[0]) < 0xC2)
			return (0);
	} else {
		unsigned char mask;
		mask = (~((1 << (8-len)) - 1)) & 0xFF;
		if (s[0] == mask && (s[1] & mask) == 0x80)
			return (0);
	}

	for (i = 1;  i < len;  i++)
		if (!IS_UTF8_TRAIL(s[i]))
			return (0);
	return (1);
}
Example #2
0
/*
 * Append a character to the line buffer.
 * Expand tabs into spaces, handle underlining, boldfacing, etc.
 * Returns 0 if ok, 1 if couldn't fit in buffer.
 */
int
pappend(char c, off_t pos)
{
	int r;

	if (pendc) {
		if (do_append(pendc, NULL, pendpos))
			/*
			 * Oops.  We've probably lost the char which
			 * was in pendc, since caller won't back up.
			 */
			return (1);
		pendc = '\0';
	}

	if (c == '\r' && bs_mode == BS_SPECIAL) {
		if (mbc_buf_len > 0)  /* utf_mode must be on. */ {
			/* Flush incomplete (truncated) sequence. */
			r = flush_mbc_buf(mbc_pos);
			mbc_buf_index = r + 1;
			mbc_buf_len = 0;
			if (r)
				return (mbc_buf_index);
		}

		/*
		 * Don't put the CR into the buffer until we see
		 * the next char.  If the next char is a newline,
		 * discard the CR.
		 */
		pendc = c;
		pendpos = pos;
		return (0);
	}

	if (!utf_mode) {
		r = do_append((LWCHAR) c, NULL, pos);
	} else {
		/* Perform strict validation in all possible cases. */
		if (mbc_buf_len == 0) {
retry:
			mbc_buf_index = 1;
			*mbc_buf = c;
			if (IS_ASCII_OCTET(c)) {
				r = do_append((LWCHAR) c, NULL, pos);
			} else if (IS_UTF8_LEAD(c)) {
				mbc_buf_len = utf_len(c);
				mbc_pos = pos;
				return (0);
			} else {
				/* UTF8_INVALID or stray UTF8_TRAIL */
				r = flush_mbc_buf(pos);
			}
		} else if (IS_UTF8_TRAIL(c)) {
			mbc_buf[mbc_buf_index++] = c;
			if (mbc_buf_index < mbc_buf_len)
				return (0);
			if (is_utf8_well_formed(mbc_buf))
				r = do_append(get_wchar(mbc_buf), mbc_buf,
				    mbc_pos);
			else
				/* Complete, but not shortest form, sequence. */
				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
			mbc_buf_len = 0;
		} else {
			/* Flush incomplete (truncated) sequence.  */
			r = flush_mbc_buf(mbc_pos);
			mbc_buf_index = r + 1;
			mbc_buf_len = 0;
			/* Handle new char.  */
			if (!r)
				goto retry;
		}
	}

	/*
	 * If we need to shift the line, do it.
	 * But wait until we get to at least the middle of the screen,
	 * so shifting it doesn't affect the chars we're currently
	 * pappending.  (Bold & underline can get messed up otherwise.)
	 */
	if (cshift < hshift && column > sc_width / 2) {
		linebuf[curr] = '\0';
		pshift(hshift - cshift);
	}
	if (r) {
		/* How many chars should caller back up? */
		r = (!utf_mode) ? 1 : mbc_buf_index;
	}
	return (r);
}