/* * Return the printing width of a given character and attribute, * if the character were added to the current position in the line buffer. * Adding a character with a given attribute may cause an enter or exit * attribute sequence to be inserted, so this must be taken into account. */ static int pwidth(LWCHAR ch, int a, LWCHAR prev_ch) { int w; if (ch == '\b') /* * Backspace moves backwards one or two positions. * XXX - Incorrect if several '\b' in a row. */ return ((utf_mode && is_wide_char(prev_ch)) ? -2 : -1); if (!utf_mode || is_ascii_char(ch)) { if (control_char((char)ch)) { /* * Control characters do unpredictable things, * so we don't even try to guess; say it doesn't move. * This can only happen if the -r flag is in effect. */ return (0); } } else { if (is_composing_char(ch) || is_combining_char(prev_ch, ch)) { /* * Composing and combining chars take up no space. * * Some terminals, upon failure to compose a * composing character with the character(s) that * precede(s) it will actually take up one column * for the composing character; there isn't much * we could do short of testing the (complex) * composition process ourselves and printing * a binary representation when it fails. */ return (0); } } /* * Other characters take one or two columns, * plus the width of any attribute enter/exit sequence. */ w = 1; if (is_wide_char(ch)) w++; if (curr > 0 && !is_at_equiv(attr[curr-1], a)) w += attr_ewidth(attr[curr-1]); if ((apply_at_specials(a) != AT_NORMAL) && (curr == 0 || !is_at_equiv(attr[curr-1], a))) w += attr_swidth(a); return (w); }
int main(int argc, char **argv) { unsigned int ch = 0x666; assert(is_digit(ch) == true); assert(is_base_char(ch) == false); assert(is_char(ch) == true); assert(is_extender(ch) == false); assert(is_combining_char(ch) == false); assert(is_ideographic(ch) == false); ch = 0xf40; assert(is_digit(ch) == false); assert(is_base_char(ch) == true); assert(is_char(ch) == true); assert(is_extender(ch) == false); assert(is_combining_char(ch) == false); assert(is_ideographic(ch) == false); printf("The test pass.\n"); return 0; }
/* Test whether the character can be a part of a NCName */ static bool is_name_char(uint32_t ch) { /* Refer http://www.w3.org/TR/REC-xml/ for detail */ if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || ((ch >= '0') && (ch <= '9')) || /* !start */ (ch == '_') || (ch == ':') || (ch == '-') || (ch == '.') || (ch == 0xB7) || /* !start */ ((ch >= 0xC0) && (ch <= 0xD6)) || ((ch >= 0xD8) && (ch <= 0xF6)) || ((ch >= 0xF8) && (ch <= 0x2FF)) || ((ch >= 0x300) && (ch <= 0x36F)) || /* !start */ ((ch >= 0x370) && (ch <= 0x37D)) || ((ch >= 0x37F) && (ch <= 0x1FFF)) || ((ch >= 0x200C) && (ch <= 0x200D)) || ((ch >= 0x203F) && (ch <= 0x2040)) || /* !start */ ((ch >= 0x2070) && (ch <= 0x218F)) || ((ch >= 0x2C00) && (ch <= 0x2FEF)) || ((ch >= 0x3001) && (ch <= 0xD7FF)) || ((ch >= 0xF900) && (ch <= 0xFDCF)) || ((ch >= 0xFDF0) && (ch <= 0xFFFD)) || ((ch >= 0x10000) && (ch <= 0xEFFFF))) return true; if (is_letter(ch) == true) return true; if (is_digit(ch) == true) return true; if (is_combining_char(ch) == true) return true; if (is_extender(ch) == true) return true; if (ch == (uint32_t) '.' || ch == (uint32_t) '-' || ch == (uint32_t) '_' || ch == (uint32_t) ':') return true; return false; }
static int do_append(LWCHAR ch, char *rep, off_t pos) { int a; LWCHAR prev_ch; a = AT_NORMAL; if (ch == '\b') { if (bs_mode == BS_CONTROL) goto do_control_char; /* * A better test is needed here so we don't * backspace over part of the printed * representation of a binary character. */ if (curr <= lmargin || column <= lmargin || (attr[curr - 1] & (AT_ANSI|AT_BINARY))) { STORE_PRCHAR('\b', pos); } else if (bs_mode == BS_NORMAL) { STORE_CHAR(ch, AT_NORMAL, NULL, pos); } else if (bs_mode == BS_SPECIAL) { overstrike = backc(); } return (0); } if (overstrike > 0) { /* * Overstrike the character at the current position * in the line buffer. This will cause either * underline (if a "_" is overstruck), * bold (if an identical character is overstruck), * or just deletion of the character in the buffer. */ overstrike = utf_mode ? -1 : 0; /* To be correct, this must be a base character. */ prev_ch = get_wchar(linebuf + curr); a = attr[curr]; if (ch == prev_ch) { /* * Overstriking a char with itself means make it bold. * But overstriking an underscore with itself is * ambiguous. It could mean make it bold, or * it could mean make it underlined. * Use the previous overstrike to resolve it. */ if (ch == '_') { if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL) a |= (AT_BOLD|AT_UNDERLINE); else if (last_overstrike != AT_NORMAL) a |= last_overstrike; else a |= AT_BOLD; } else { a |= AT_BOLD; } } else if (ch == '_') { a |= AT_UNDERLINE; ch = prev_ch; rep = linebuf + curr; } else if (prev_ch == '_') { a |= AT_UNDERLINE; } /* Else we replace prev_ch, but we keep its attributes. */ } else if (overstrike < 0) { if (is_composing_char(ch) || is_combining_char(get_wchar(linebuf + curr), ch)) /* Continuation of the same overstrike. */ a = last_overstrike; else overstrike = 0; } if (ch == '\t') { /* * Expand a tab into spaces. */ switch (bs_mode) { case BS_CONTROL: goto do_control_char; case BS_NORMAL: case BS_SPECIAL: STORE_TAB(a, pos); break; } } else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch)) { do_control_char: if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))) { /* * Output as a normal character. */ STORE_CHAR(ch, AT_NORMAL, rep, pos); } else { STORE_PRCHAR((char)ch, pos); } } else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch)) { char *s; s = prutfchar(ch); if (column + (int)strlen(s) - 1 + pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width) return (1); for (; *s != 0; s++) STORE_CHAR(*s, AT_BINARY, NULL, pos); } else { STORE_CHAR(ch, a, rep, pos); } return (0); }
/* * Shift the input line left. * This means discarding N printable chars at the start of the buffer. */ static void pshift(int shift) { LWCHAR prev_ch = 0; unsigned char c; int shifted = 0; int to; int from; int len; int width; int prev_attr; int next_attr; if (shift > column - lmargin) shift = column - lmargin; if (shift > curr - lmargin) shift = curr - lmargin; to = from = lmargin; /* * We keep on going when shifted == shift * to get all combining chars. */ while (shifted <= shift && from < curr) { c = linebuf[from]; if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) { /* Keep cumulative effect. */ linebuf[to] = c; attr[to++] = attr[from++]; while (from < curr && linebuf[from]) { linebuf[to] = linebuf[from]; attr[to++] = attr[from]; if (!is_ansi_middle(linebuf[from++])) break; } continue; } width = 0; if (!IS_ASCII_OCTET(c) && utf_mode) { /* Assumes well-formedness validation already done. */ LWCHAR ch; len = utf_len(c); if (from + len > curr) break; ch = get_wchar(linebuf + from); if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch)) width = is_wide_char(ch) ? 2 : 1; prev_ch = ch; } else { len = 1; if (c == '\b') /* XXX - Incorrect if several '\b' in a row. */ width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1; else if (!control_char(c)) width = 1; prev_ch = 0; } if (width == 2 && shift - shifted == 1) { /* Should never happen when called by pshift_all(). */ attr[to] = attr[from]; /* * Assume a wide_char will never be the first half of a * combining_char pair, so reset prev_ch in case we're * followed by a '\b'. */ prev_ch = linebuf[to++] = ' '; from += len; shifted++; continue; } /* Adjust width for magic cookies. */ prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL; next_attr = (from + len < curr) ? attr[from + len] : prev_attr; if (!is_at_equiv(attr[from], prev_attr) && !is_at_equiv(attr[from], next_attr)) { width += attr_swidth(attr[from]); if (from + len < curr) width += attr_ewidth(attr[from]); if (is_at_equiv(prev_attr, next_attr)) { width += attr_ewidth(prev_attr); if (from + len < curr) width += attr_swidth(next_attr); } } if (shift - shifted < width) break; from += len; shifted += width; if (shifted < 0) shifted = 0; } while (from < curr) { linebuf[to] = linebuf[from]; attr[to++] = attr[from++]; } curr = to; column -= shifted; cshift += shifted; }