/* Parse a multibyte character from buf. Return the number of bytes * used. If chr isn't NULL, store the multibyte character in it. If * col isn't NULL, store the new display width in it. If *buf is '\t', * we expect col to have the current display width. */ int parse_mbchar(const char *buf, char *chr, size_t *col) { int buf_mb_len; assert(buf != NULL); #ifdef ENABLE_UTF8 if (use_utf8) { /* Get the number of bytes in the multibyte character. */ buf_mb_len = mblen(buf, MB_CUR_MAX); /* If buf contains an invalid multibyte character, only * interpret buf's first byte. */ if (buf_mb_len < 0) { IGNORE_CALL_RESULT(mblen(NULL, 0)); buf_mb_len = 1; } else if (buf_mb_len == 0) buf_mb_len++; /* Save the multibyte character in chr. */ if (chr != NULL) { int i; for (i = 0; i < buf_mb_len; i++) chr[i] = buf[i]; } /* Save the column width of the wide character in col. */ if (col != NULL) { /* If we have a tab, get its width in columns using the * current value of col. */ if (*buf == '\t') *col += tabsize - *col % tabsize; /* If we have a control character, get its width using one * column for the "^" that will be displayed in front of it, * and the width in columns of its visible equivalent as * returned by control_mbrep(). */ else if (is_cntrl_mbchar(buf)) { char *ctrl_buf_mb = charalloc(MB_CUR_MAX); int ctrl_buf_mb_len; (*col)++; ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb, &ctrl_buf_mb_len); *col += mbwidth(ctrl_buf_mb); free(ctrl_buf_mb); /* If we have a normal character, get its width in columns * normally. */ } else *col += mbwidth(buf); } } else { #endif /* Get the number of bytes in the byte character. */ buf_mb_len = 1; /* Save the byte character in chr. */ if (chr != NULL) *chr = *buf; if (col != NULL) { /* If we have a tab, get its width in columns using the * current value of col. */ if (*buf == '\t') *col += tabsize - *col % tabsize; /* If we have a control character, it's two columns wide: * one column for the "^" that will be displayed in front of * it, and one column for its visible equivalent as returned * by control_mbrep(). */ else if (is_cntrl_char((unsigned char)*buf)) *col += 2; /* If we have a normal character, it's one column wide. */ else (*col)++; } #ifdef ENABLE_UTF8 } #endif return buf_mb_len; }
char *display_string(const char *buf, size_t start_col, size_t len, bool dollars) { size_t start_index; /* Index in buf of the first character shown. */ size_t column; /* Screen column that start_index corresponds to. */ size_t alloc_len; /* The length of memory allocated for converted. */ char *converted; /* The string we return. */ size_t index; /* Current position in converted. */ char *buf_mb; int buf_mb_len; /* If dollars is true, make room for the "$" at the end of the * line. */ if (dollars && len > 0 && strlenpt(buf) > start_col + len) { len--; } if (len == 0) { return mallocstrcpy(NULL, ""); } buf_mb = charalloc(mb_cur_max()); start_index = actual_x(buf, start_col); column = strnlenpt(buf, start_index); assert(column <= start_col); /* Make sure there's enough room for the initial character, whether * it's a multibyte control character, a non-control multibyte * character, a tab character, or a null terminator. Rationale: * * multibyte control character followed by a null terminator: * 1 byte ('^') + mb_cur_max() bytes + 1 byte ('\0') * multibyte non-control character followed by a null terminator: * mb_cur_max() bytes + 1 byte ('\0') * tab character followed by a null terminator: * mb_cur_max() bytes + (tabsize - 1) bytes + 1 byte ('\0') * * Since tabsize has a minimum value of 1, it can substitute for 1 * byte above. */ alloc_len = (mb_cur_max() + tabsize + 1) * MAX_BUF_SIZE; converted = charalloc(alloc_len); index = 0; if (buf[start_index] != '\0' && buf[start_index] != '\t' && (column < start_col || (dollars && column > 0))) { /* We don't display all of buf[start_index] since it starts to * the left of the screen. */ buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL); if (is_cntrl_mbchar(buf_mb)) { if (column < start_col) { char *ctrl_buf_mb = charalloc(mb_cur_max()); int ctrl_buf_mb_len, i; ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb, &ctrl_buf_mb_len); for (i = 0; i < ctrl_buf_mb_len; i++) { converted[index++] = ctrl_buf_mb[i]; } start_col += mbwidth(ctrl_buf_mb); free(ctrl_buf_mb); start_index += buf_mb_len; } } else if (using_utf8() && mbwidth(buf_mb) == 2) { if (column >= start_col) { converted[index++] = ' '; start_col++; } converted[index++] = ' '; start_col++; start_index += buf_mb_len; } } while (buf[start_index] != '\0') { buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL); /* Make sure there's enough room for the next character, whether * it's a multibyte control character, a non-control multibyte * character, a tab character, or a null terminator. */ if (index + mb_cur_max() + tabsize + 1 >= alloc_len - 1) { alloc_len += (mb_cur_max() + tabsize + 1) * MAX_BUF_SIZE; converted = charealloc(converted, alloc_len); } /* If buf contains a tab character, interpret it. */ if (*buf_mb == '\t') { if (ISSET(WHITESPACE_DISPLAY)) { int i; for (i = 0; i < whitespace_len[0]; i++) { converted[index++] = whitespace[i]; } } else { converted[index++] = ' '; } start_col++; while (start_col % tabsize != 0) { converted[index++] = ' '; start_col++; } } else if (is_cntrl_mbchar(buf_mb)) { /* If buf contains a control character, interpret it. */ char *ctrl_buf_mb = charalloc(mb_cur_max()); int ctrl_buf_mb_len, i; converted[index++] = '^'; start_col++; ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb, &ctrl_buf_mb_len); for (i = 0; i < ctrl_buf_mb_len; i++) { converted[index++] = ctrl_buf_mb[i]; } start_col += mbwidth(ctrl_buf_mb); free(ctrl_buf_mb); /* If buf contains a space character, interpret it. */ } else if (*buf_mb == ' ') { if (ISSET(WHITESPACE_DISPLAY)) { int i; for (i = whitespace_len[0]; i < whitespace_len[0] + whitespace_len[1]; i++) { converted[index++] = whitespace[i]; } } else { converted[index++] = ' '; } start_col++; } else { /* If buf contains a non-control character, interpret it. If buf * contains an invalid multibyte sequence, display it as such. */ char *nctrl_buf_mb = charalloc(mb_cur_max()); int nctrl_buf_mb_len, i; /* Make sure an invalid sequence-starter byte is properly * terminated, so that it doesn't pick up lingering bytes * of any previous content. */ null_at(&buf_mb, buf_mb_len); nctrl_buf_mb = mbrep(buf_mb, nctrl_buf_mb, &nctrl_buf_mb_len); for (i = 0; i < nctrl_buf_mb_len; i++) { converted[index++] = nctrl_buf_mb[i]; } start_col += mbwidth(nctrl_buf_mb); free(nctrl_buf_mb); } start_index += buf_mb_len; } free(buf_mb); assert(alloc_len >= index + 1); /* Null-terminate converted. */ converted[index] = '\0'; /* Make sure converted takes up no more than len columns. */ index = actual_x(converted, len); null_at(&converted, index); return converted; }