int main () { ucs4_t uc; for (uc = 0; uc < 0x110000; uc++) { int w1 = uc_width (uc, "UTF-8"); int w2 = uc_width (uc, "GBK"); char width = (w1 == 0 && w2 == 0 ? '0' : w1 == 1 && w2 == 1 ? '1' : w1 == 1 && w2 == 2 ? 'A' : w1 == 2 && w2 == 2 ? '2' : 0); if (width == 0) { /* uc must be a control character. */ ASSERT (w1 < 0 && w2 < 0); } else add_to_interval (uc, width); } finish_interval (); return 0; }
int rpl_wcwidth (wchar_t wc) { /* In UTF-8 locales, use a Unicode aware width function. */ const char *encoding = locale_charset (); if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0)) { /* We assume that in a UTF-8 locale, a wide character is the same as a Unicode character. */ return uc_width (wc, encoding); } else { /* Otherwise, fall back to the system's wcwidth function. */ #if HAVE_WCWIDTH return wcwidth (wc); #else return wc == 0 ? 0 : iswprint (wc) ? 1 : -1; #endif } }
int u8_width (const uint8_t *s, size_t n, const char *encoding) { const uint8_t *s_end = s + n; int width = 0; while (s < s_end) { ucs4_t uc; int w; s += u8_mbtouc_unsafe (&uc, s, s_end - s); if (uc == 0) break; /* end of string reached */ w = uc_width (uc, encoding); if (w >= 0) /* ignore control characters in the string */ width += w; } return width; }
int main () { ucs4_t uc; /* Test width of ASCII characters. */ for (uc = 0x0020; uc < 0x007F; uc++) ASSERT (uc_width (uc, "ISO-8859-2") == 1); /* Test width of some non-spacing characters. */ ASSERT (uc_width (0x0301, "UTF-8") == 0); ASSERT (uc_width (0x05B0, "UTF-8") == 0); /* Test width of some format control characters. */ ASSERT (uc_width (0x200E, "UTF-8") == 0); ASSERT (uc_width (0x2060, "UTF-8") == 0); ASSERT (uc_width (0xE0001, "UTF-8") == 0); ASSERT (uc_width (0xE0044, "UTF-8") == 0); /* Test width of some zero width characters. */ ASSERT (uc_width (0x200B, "UTF-8") == 0); ASSERT (uc_width (0xFEFF, "UTF-8") == 0); /* Test width of some CJK characters. */ ASSERT (uc_width (0x3000, "UTF-8") == 2); ASSERT (uc_width (0xB250, "UTF-8") == 2); ASSERT (uc_width (0xFF1A, "UTF-8") == 2); ASSERT (uc_width (0x20369, "UTF-8") == 2); ASSERT (uc_width (0x2F876, "UTF-8") == 2); return 0; }
int u32_width_linebreaks (const uint32_t *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p) { const uint32_t *s_end; char *last_p; int last_column; int piece_width; u32_possible_linebreaks (s, n, encoding, p); s_end = s + n; last_p = NULL; last_column = start_column; piece_width = 0; while (s < s_end) { ucs4_t uc = *s; /* Respect the override. */ if (o != NULL && *o != UC_BREAK_UNDEFINED) *p = *o; if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY) { /* An atomic piece of text ends here. */ if (last_p != NULL && last_column + piece_width > width) { /* Insert a line break. */ *last_p = UC_BREAK_POSSIBLE; last_column = 0; } } if (*p == UC_BREAK_MANDATORY) { /* uc is a line break character. */ /* Start a new piece at column 0. */ last_p = NULL; last_column = 0; piece_width = 0; } else { /* uc is not a line break character. */ int w; if (*p == UC_BREAK_POSSIBLE) { /* Start a new piece. */ last_p = p; last_column += piece_width; piece_width = 0; /* No line break for the moment, may be turned into UC_BREAK_POSSIBLE later, via last_p. */ } *p = UC_BREAK_PROHIBITED; w = uc_width (uc, encoding); if (w >= 0) /* ignore control characters in the string */ piece_width += w; } s++; p++; if (o != NULL) o++; } /* The last atomic piece of text ends here. */ if (last_p != NULL && last_column + piece_width + at_end_columns > width) { /* Insert a line break. */ *last_p = UC_BREAK_POSSIBLE; last_column = 0; } return last_column + piece_width; }