Ejemplo n.º 1
0
/* Parse a multibyte character from buf.  Return the number of bytes
 * used.  If chr isn't NULL, store the multibyte character in it.  If
 * col isn't NULL, store the new display width in it.  If *buf is '\t',
 * we expect col to have the current display width. */
int parse_mbchar(const char *buf, char *chr, size_t *col)
{
    int buf_mb_len;

    assert(buf != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	/* Get the number of bytes in the multibyte character. */
	buf_mb_len = mblen(buf, MB_CUR_MAX);

	/* If buf contains an invalid multibyte character, only
	 * interpret buf's first byte. */
	if (buf_mb_len < 0) {
	    IGNORE_CALL_RESULT(mblen(NULL, 0));
	    buf_mb_len = 1;
	} else if (buf_mb_len == 0)
	    buf_mb_len++;

	/* Save the multibyte character in chr. */
	if (chr != NULL) {
	    int i;

	    for (i = 0; i < buf_mb_len; i++)
		chr[i] = buf[i];
	}

	/* Save the column width of the wide character in col. */
	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
	    /* If we have a control character, get its width using one
	     * column for the "^" that will be displayed in front of it,
	     * and the width in columns of its visible equivalent as
	     * returned by control_mbrep(). */
	    else if (is_cntrl_mbchar(buf)) {
		char *ctrl_buf_mb = charalloc(MB_CUR_MAX);
		int ctrl_buf_mb_len;

		(*col)++;

		ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb,
			&ctrl_buf_mb_len);

		*col += mbwidth(ctrl_buf_mb);

		free(ctrl_buf_mb);
	    /* If we have a normal character, get its width in columns
	     * normally. */
	    } else
		*col += mbwidth(buf);
	}
    } else {
#endif
	/* Get the number of bytes in the byte character. */
	buf_mb_len = 1;

	/* Save the byte character in chr. */
	if (chr != NULL)
	    *chr = *buf;

	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
	    /* If we have a control character, it's two columns wide:
	     * one column for the "^" that will be displayed in front of
	     * it, and one column for its visible equivalent as returned
	     * by control_mbrep(). */
	    else if (is_cntrl_char((unsigned char)*buf))
		*col += 2;
	    /* If we have a normal character, it's one column wide. */
	    else
		(*col)++;
	}
#ifdef ENABLE_UTF8
    }
#endif

    return buf_mb_len;
}
Ejemplo n.º 2
0
char *display_string(const char *buf, size_t start_col, size_t len, bool dollars)
{
	size_t start_index;
	/* Index in buf of the first character shown. */
	size_t column;
	/* Screen column that start_index corresponds to. */
	size_t alloc_len;
	/* The length of memory allocated for converted. */
	char *converted;
	/* The string we return. */
	size_t index;
	/* Current position in converted. */
	char *buf_mb;
	int buf_mb_len;

	/* If dollars is true, make room for the "$" at the end of the
	 * line. */
	if (dollars && len > 0 && strlenpt(buf) > start_col + len) {
		len--;
	}

	if (len == 0) {
		return mallocstrcpy(NULL, "");
	}

	buf_mb = charalloc(mb_cur_max());

	start_index = actual_x(buf, start_col);
	column = strnlenpt(buf, start_index);

	assert(column <= start_col);

	/* Make sure there's enough room for the initial character, whether
	 * it's a multibyte control character, a non-control multibyte
	 * character, a tab character, or a null terminator.  Rationale:
	 *
	 * multibyte control character followed by a null terminator:
	 *     1 byte ('^') + mb_cur_max() bytes + 1 byte ('\0')
	 * multibyte non-control character followed by a null terminator:
	 *     mb_cur_max() bytes + 1 byte ('\0')
	 * tab character followed by a null terminator:
	 *     mb_cur_max() bytes + (tabsize - 1) bytes + 1 byte ('\0')
	 *
	 * Since tabsize has a minimum value of 1, it can substitute for 1
	 * byte above. */
	alloc_len = (mb_cur_max() + tabsize + 1) * MAX_BUF_SIZE;
	converted = charalloc(alloc_len);

	index = 0;

	if (buf[start_index] != '\0' && buf[start_index] != '\t' && (column < start_col || (dollars && column > 0))) {
		/* We don't display all of buf[start_index] since it starts to
		 * the left of the screen. */
		buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL);

		if (is_cntrl_mbchar(buf_mb)) {
			if (column < start_col) {
				char *ctrl_buf_mb = charalloc(mb_cur_max());
				int ctrl_buf_mb_len, i;

				ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb, &ctrl_buf_mb_len);

				for (i = 0; i < ctrl_buf_mb_len; i++) {
					converted[index++] = ctrl_buf_mb[i];
				}

				start_col += mbwidth(ctrl_buf_mb);

				free(ctrl_buf_mb);

				start_index += buf_mb_len;
			}
		} else if (using_utf8() && mbwidth(buf_mb) == 2) {
			if (column >= start_col) {
				converted[index++] = ' ';
				start_col++;
			}

			converted[index++] = ' ';
			start_col++;

			start_index += buf_mb_len;
		}
	}

	while (buf[start_index] != '\0') {
		buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL);

		/* Make sure there's enough room for the next character, whether
		 * it's a multibyte control character, a non-control multibyte
		 * character, a tab character, or a null terminator. */
		if (index + mb_cur_max() + tabsize + 1 >= alloc_len - 1) {
			alloc_len += (mb_cur_max() + tabsize + 1) * MAX_BUF_SIZE;
			converted = charealloc(converted, alloc_len);
		}

		/* If buf contains a tab character, interpret it. */
		if (*buf_mb == '\t') {
			if (ISSET(WHITESPACE_DISPLAY)) {
				int i;

				for (i = 0; i < whitespace_len[0]; i++) {
					converted[index++] = whitespace[i];
				}
			} else {
				converted[index++] = ' ';
			}
			start_col++;
			while (start_col % tabsize != 0) {
				converted[index++] = ' ';
				start_col++;
			}
		} else if (is_cntrl_mbchar(buf_mb)) {
			/* If buf contains a control character, interpret it. */
			char *ctrl_buf_mb = charalloc(mb_cur_max());
			int ctrl_buf_mb_len, i;

			converted[index++] = '^';
			start_col++;

			ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb, &ctrl_buf_mb_len);

			for (i = 0; i < ctrl_buf_mb_len; i++) {
				converted[index++] = ctrl_buf_mb[i];
			}

			start_col += mbwidth(ctrl_buf_mb);

			free(ctrl_buf_mb);
			/* If buf contains a space character, interpret it. */
		} else if (*buf_mb == ' ') {
			if (ISSET(WHITESPACE_DISPLAY)) {
				int i;

				for (i = whitespace_len[0]; i < whitespace_len[0] + whitespace_len[1]; i++) {
					converted[index++] = whitespace[i];
				}
			} else {
				converted[index++] = ' ';
			}
			start_col++;
		} else {
			/* If buf contains a non-control character, interpret it.  If buf
			 * contains an invalid multibyte sequence, display it as such. */
			char *nctrl_buf_mb = charalloc(mb_cur_max());
			int nctrl_buf_mb_len, i;

			/* Make sure an invalid sequence-starter byte is properly
			 * terminated, so that it doesn't pick up lingering bytes
			 * of any previous content. */
			null_at(&buf_mb, buf_mb_len);

			nctrl_buf_mb = mbrep(buf_mb, nctrl_buf_mb, &nctrl_buf_mb_len);

			for (i = 0; i < nctrl_buf_mb_len; i++) {
				converted[index++] = nctrl_buf_mb[i];
			}

			start_col += mbwidth(nctrl_buf_mb);

			free(nctrl_buf_mb);
		}

		start_index += buf_mb_len;
	}

	free(buf_mb);

	assert(alloc_len >= index + 1);

	/* Null-terminate converted. */
	converted[index] = '\0';

	/* Make sure converted takes up no more than len columns. */
	index = actual_x(converted, len);
	null_at(&converted, index);

	return converted;
}