/* Convert the Unicode value in chr to a multibyte character with the * same wide character value as chr, if possible. If the conversion * succeeds, return the (dynamically allocated) multibyte character and * its length. Otherwise, return an undefined (dynamically allocated) * multibyte character and a length of zero. */ char *make_mbchar(long chr, int *chr_mb_len) { char *chr_mb; assert(chr_mb_len != NULL); #ifdef ENABLE_UTF8 if (use_utf8) { chr_mb = charalloc(MB_CUR_MAX); *chr_mb_len = wctomb(chr_mb, (wchar_t)chr); /* Reject invalid Unicode characters. */ if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) { wctomb_reset(); *chr_mb_len = 0; } } else { #endif *chr_mb_len = 1; chr_mb = mallocstrncpy(NULL, (char *)&chr, 1); #ifdef ENABLE_UTF8 } #endif return chr_mb; }
/* c is a multibyte non-control character. We return that multibyte * character. If crep is an invalid multibyte sequence, it will be * replaced with Unicode 0xFFFD (Replacement Character). */ char *mbrep(const char *c, char *crep, int *crep_len) { assert(c != NULL && crep != NULL && crep_len != NULL); #ifdef ENABLE_UTF8 if (use_utf8) { wchar_t wc; /* Reject invalid Unicode characters. */ if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) { mbtowc_reset(); *crep_len = bad_mbchar_len; strncpy(crep, bad_mbchar, *crep_len); } else { *crep_len = wctomb(crep, wc); if (*crep_len < 0) { wctomb_reset(); *crep_len = 0; } } } else { #endif *crep_len = 1; *crep = *c; #ifdef ENABLE_UTF8 } #endif return crep; }
/* Assess how many bytes the given (multibyte) character occupies. Return -1 * if the byte sequence is invalid, and return the number of bytes minus 8 * when it encodes an invalid codepoint. Also, in the second parameter, * return the number of columns that the character occupies. */ int length_of_char(const char *c, int *width) { assert(c != NULL); #ifdef ENABLE_UTF8 if (use_utf8) { wchar_t wc; int charlen = mbtowc(&wc, c, MB_CUR_MAX); /* If the sequence is invalid... */ if (charlen < 0) { mbtowc_reset(); return -1; } /* If the codepoint is invalid... */ if (!is_valid_unicode(wc)) return charlen - 8; else { *width = wcwidth(wc); /* If the codepoint is unassigned, assume a width of one. */ if (*width < 0) *width = 1; return charlen; } } else #endif return 1; }
/* Convert the Unicode value in chr to a multibyte character, if possible. * If the conversion succeeds, return the (dynamically allocated) multibyte * character and its length. Otherwise, return an undefined (dynamically * allocated) multibyte character and a length of zero. */ char *make_mbchar(long chr, int *chr_mb_len) { char *chr_mb; #ifdef ENABLE_UTF8 if (use_utf8) { chr_mb = charalloc(MB_CUR_MAX); *chr_mb_len = wctomb(chr_mb, (wchar_t)chr); /* Reject invalid Unicode characters. */ if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) { IGNORE_CALL_RESULT(wctomb(NULL, 0)); *chr_mb_len = 0; } } else #endif { *chr_mb_len = 1; chr_mb = mallocstrncpy(NULL, (char *)&chr, 1); } return chr_mb; }