/* Convert the Unicode value in chr to a multibyte character, if possible. * If the conversion succeeds, return the (dynamically allocated) multibyte * character and its length. Otherwise, return an undefined (dynamically * allocated) multibyte character and a length of zero. */ char *make_mbchar(long chr, int *chr_mb_len) { char *chr_mb; #ifdef ENABLE_UTF8 if (use_utf8) { chr_mb = charalloc(MB_CUR_MAX); *chr_mb_len = wctomb(chr_mb, (wchar_t)chr); /* Reject invalid Unicode characters. */ if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) { IGNORE_CALL_RESULT(wctomb(NULL, 0)); *chr_mb_len = 0; } } else #endif { *chr_mb_len = 1; chr_mb = mallocstrncpy(NULL, (char *)&chr, 1); } return chr_mb; }
/* Parse a multibyte character from buf. Return the number of bytes * used. If chr isn't NULL, store the multibyte character in it. If * col isn't NULL, store the new display width in it. If *buf is '\t', * we expect col to have the current display width. */ int parse_mbchar(const char *buf, char *chr, size_t *col) { int buf_mb_len; assert(buf != NULL); #ifdef ENABLE_UTF8 if (use_utf8) { /* Get the number of bytes in the multibyte character. */ buf_mb_len = mblen(buf, MB_CUR_MAX); /* If buf contains an invalid multibyte character, only * interpret buf's first byte. */ if (buf_mb_len < 0) { IGNORE_CALL_RESULT(mblen(NULL, 0)); buf_mb_len = 1; } else if (buf_mb_len == 0) buf_mb_len++; /* Save the multibyte character in chr. */ if (chr != NULL) { int i; for (i = 0; i < buf_mb_len; i++) chr[i] = buf[i]; } /* Save the column width of the wide character in col. */ if (col != NULL) { /* If we have a tab, get its width in columns using the * current value of col. */ if (*buf == '\t') *col += tabsize - *col % tabsize; /* If we have a control character, get its width using one * column for the "^" that will be displayed in front of it, * and the width in columns of its visible equivalent as * returned by control_mbrep(). */ else if (is_cntrl_mbchar(buf)) { char *ctrl_buf_mb = charalloc(MB_CUR_MAX); int ctrl_buf_mb_len; (*col)++; ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb, &ctrl_buf_mb_len); *col += mbwidth(ctrl_buf_mb); free(ctrl_buf_mb); /* If we have a normal character, get its width in columns * normally. */ } else *col += mbwidth(buf); } } else { #endif /* Get the number of bytes in the byte character. */ buf_mb_len = 1; /* Save the byte character in chr. */ if (chr != NULL) *chr = *buf; if (col != NULL) { /* If we have a tab, get its width in columns using the * current value of col. */ if (*buf == '\t') *col += tabsize - *col % tabsize; /* If we have a control character, it's two columns wide: * one column for the "^" that will be displayed in front of * it, and one column for its visible equivalent as returned * by control_mbrep(). */ else if (is_cntrl_char((unsigned char)*buf)) *col += 2; /* If we have a normal character, it's one column wide. */ else (*col)++; } #ifdef ENABLE_UTF8 } #endif return buf_mb_len; }
void wctomb_reset(void) { IGNORE_CALL_RESULT(wctomb(NULL, 0)); }
void mbtowc_reset(void) { IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0)); }
/* Parse a multibyte character from buf. Return the number of bytes * used. If chr isn't NULL, store the multibyte character in it. If * col isn't NULL, add the character's width (in columns) to it. */ int parse_mbchar(const char *buf, char *chr, size_t *col) { int length; assert(buf != NULL); #ifdef ENABLE_UTF8 if (use_utf8) { /* Get the number of bytes in the multibyte character. */ length = mblen(buf, MB_CUR_MAX); /* When the multibyte sequence is invalid, only take the first byte. */ if (length <= 0) { IGNORE_CALL_RESULT(mblen(NULL, 0)); length = 1; } /* When requested, store the multibyte character in chr. */ if (chr != NULL) { int i; for (i = 0; i < length; i++) chr[i] = buf[i]; } /* When requested, add the width of the character to col. */ if (col != NULL) { /* If we have a tab, compute its width in columns based on the * current value of col. */ if (*buf == '\t') *col += tabsize - *col % tabsize; /* If we have a control character, it's two columns wide: one * column for the "^", and one for the visible character. */ else if (is_cntrl_mbchar(buf)) { *col += 2; /* If we have a normal character, get its width normally. */ } else *col += mbwidth(buf); } } else #endif { /* A byte character is one byte long. */ length = 1; /* When requested, store the byte character in chr. */ if (chr != NULL) *chr = *buf; /* When requested, add the width of the character to col. */ if (col != NULL) { /* If we have a tab, compute its width in columns using the * current value of col. */ if (*buf == '\t') *col += tabsize - *col % tabsize; /* If we have a control character, it's two columns wide: one * column for the "^", and one for the visible character. */ else if (is_cntrl_char((unsigned char)*buf)) *col += 2; /* If we have a normal character, it's one column wide. */ else (*col)++; } } return length; }