/* * Returns first character length in bytes for multi-byte `text` according to * `encoding`. * * - The `text` pointer is updated to point at the next character. * - When `remainder_p` is not NULL, on entry `*remainder_p` is how much bytes * we can consume from text, and on exit `*remainder_p` is reduced by returned * character length. Otherwise `text` is treated as limited by NUL. */ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding) { int chrlen; const char *p = *text; size_t r = (remainder_p ? *remainder_p : SIZE_MAX); if (r < 1) return 0; if (is_encoding_utf8(encoding)) { pick_one_utf8_char(&p, &r); chrlen = p ? (p - *text) : 1 /* not valid UTF-8 -> raw byte sequence */; } else { /* * TODO use iconv to decode one char and obtain its chrlen * for now, let's treat encodings != UTF-8 as one-byte */ chrlen = 1; } *text += chrlen; if (remainder_p) *remainder_p -= chrlen; return chrlen; }
/* * This function returns the number of columns occupied by the character * pointed to by the variable start. The pointer is updated to point at * the next character. When remainder_p is not NULL, it points at the * location that stores the number of remaining bytes we can use to pick * a character (see pick_one_utf8_char() above). */ int utf8_width(const char **start, size_t *remainder_p) { ucs_char_t ch = pick_one_utf8_char(start, remainder_p); if (!*start) return 0; return git_wcwidth(ch); }
/* * Pick the next char from the stream, folding as an HFS+ filename comparison * would. Note that this is _not_ complete by any means. It's just enough * to make is_hfs_dotgit() work, and should not be used otherwise. */ static ucs_char_t next_hfs_char(const char **in) { while (1) { ucs_char_t out = pick_one_utf8_char(in, NULL); /* * check for malformed utf8. Technically this * gets converted to a percent-sequence, but * returning 0 is good enough for is_hfs_dotgit * to realize it cannot be .git */ if (!*in) return 0; /* these code points are ignored completely */ switch (out) { case 0x200c: /* ZERO WIDTH NON-JOINER */ case 0x200d: /* ZERO WIDTH JOINER */ case 0x200e: /* LEFT-TO-RIGHT MARK */ case 0x200f: /* RIGHT-TO-LEFT MARK */ case 0x202a: /* LEFT-TO-RIGHT EMBEDDING */ case 0x202b: /* RIGHT-TO-LEFT EMBEDDING */ case 0x202c: /* POP DIRECTIONAL FORMATTING */ case 0x202d: /* LEFT-TO-RIGHT OVERRIDE */ case 0x202e: /* RIGHT-TO-LEFT OVERRIDE */ case 0x206a: /* INHIBIT SYMMETRIC SWAPPING */ case 0x206b: /* ACTIVATE SYMMETRIC SWAPPING */ case 0x206c: /* INHIBIT ARABIC FORM SHAPING */ case 0x206d: /* ACTIVATE ARABIC FORM SHAPING */ case 0x206e: /* NATIONAL DIGIT SHAPES */ case 0x206f: /* NOMINAL DIGIT SHAPES */ case 0xfeff: /* ZERO WIDTH NO-BREAK SPACE */ continue; } /* * there's a great deal of other case-folding that occurs, * but this is enough to catch anything that will convert * to ".git" */ return tolower(out); } }