Example #1
0
static int git_wcwidth(ucs_char_t ch)
{
	/*
	 * Sorted list of non-overlapping intervals of non-spacing characters,
	 */
#include "unicode_width.h"

	/* test for 8-bit control characters */
	if (ch == 0)
		return 0;
	if (ch < 32 || (ch >= 0x7f && ch < 0xa0))
		return -1;

	/* binary search in table of non-spacing characters */
	if (bisearch(ch, zero_width, sizeof(zero_width)
				/ sizeof(struct interval) - 1))
		return 0;

	/* binary search in table of double width characters */
	if (bisearch(ch, double_width, sizeof(double_width)
				/ sizeof(struct interval) - 1))
		return 2;

	return 1;
}
Example #2
0
int
xcwidth(xchar c)
{
  /* null character */
  if (c == 0)
    return 0;

  /* printable ASCII characters */
  if (c >= 0x20 && c < 0x7f)
    return 1;

  /* control characters */
  if (c < 0xa0)
    return -1;

  /* non-spacing characters */
  if (bisearch(c, combining, lengthof(combining)))
    return 0;

  /* CJK ambiguous characters */
  if (bisearch(c, ambiguous, lengthof(ambiguous)))
    return font_ambig_wide + 1;

  /* wide characters */
  if (bisearch(c, wide, lengthof(wide)))
    return 2;

  /* anything else */
  return 1;
}
Example #3
0
TEST(Search_Binary, bisearch)
{
	int size = 100;
	int *data = (int *)malloc(size * sizeof(int));
	int i;

	for (i = 0; i < size; ++i) {
		data[i] = i;
	}

	int result;
	int *target = (int *)malloc(sizeof(int));

	*target = 10;
	result = bisearch(data, target, size, sizeof(int), compare);
	EXPECT_EQ(10, result);

	*target = 101;
	result = bisearch(data, target, size, sizeof(int), compare);
	EXPECT_EQ(-1, result);

	*target = 99;
	result = bisearch(data, target, size, sizeof(int), compare);
	EXPECT_EQ(99, result);

}
 vector<int> searchRange(vector<int>& nums, int target) {
     vector<int> ans;
     int left = bisearch(nums, target, 0);
     int right = bisearch(nums, target, 1);
     
     ans.push_back(left); ans.push_back(right);
     
     return ans;
 }
Example #5
0
static int mk_wcwidth(uint32_t ucs)
{
  /* test for 8-bit control characters */
  if (ucs == 0)
    return 0;
  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
    return -1;

  /* binary search in table of non-spacing characters */
  if (bisearch(ucs, combining,
               sizeof(combining) / sizeof(struct interval) - 1))
    return 0;

  /* if we arrive here, ucs is not a combining or C0/C1 control character */

  return 1 +
    (ucs >= 0x1100 &&
     (ucs <= 0x115f ||                    /* Hangul Jamo init. consonants */
      ucs == 0x2329 || ucs == 0x232a ||
      (ucs >= 0x2e80 && ucs <= 0xa4cf &&
       ucs != 0x303f) ||                  /* CJK ... Yi */
      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
      (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
      (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
      (ucs >= 0xffe0 && ucs <= 0xffe6) ||
      (ucs >= 0x20000 && ucs <= 0x2fffd) ||
      (ucs >= 0x30000 && ucs <= 0x3fffd)));
}
Example #6
0
float IVT::get_p(int yindex)
{
	int i;
	i = bisearch(yindex, offset, offset+paircnt);
	if (i == -1)
		return (float) MAXPLOG;
	return postnode[i].p;
}
Example #7
0
static char is_double_width(unsigned ucs)
{
	static const struct interval double_width[] = {
		{ 0x1100, 0x115F}, { 0x2329, 0x232A}, { 0x2E80, 0x303E},
		{ 0x3040, 0xA4CF}, { 0xAC00, 0xD7A3}, { 0xF900, 0xFAFF},
		{ 0xFE10, 0xFE19}, { 0xFE30, 0xFE6F}, { 0xFF00, 0xFF60},
		{ 0xFFE0, 0xFFE6}, { 0x20000, 0x2FFFD}, { 0x30000, 0x3FFFD}
	};
	return bisearch(ucs, double_width, sizeof(double_width) / sizeof(struct interval) - 1);
}
Example #8
0
int IVT::lookup(int key){
	int i;

	i = bisearch(key, maxIVT);
	if ( i == -1) return 0;
	index = i;
	offset = ivtnode[i].offset;
	paircnt = ivtnode[i].paircnt;
	return paircnt;
}
int main()
{
	int a;
	int array[N] = {1,3,4,6,7,8,9,10,12,15};
	scanf("%d",&a);
	int index = bisearch(array,a,N);
	if(index != -1)
		printf("array[%d] is %d\n",index,a);
	else
		printf("not found\n");
}
Example #10
0
int main()
{
	int t;
	scanf("%d", &t);
	while(t--)
	{
		init();
		bfs_init();
		bisearch();
	}
	return 0;
}
Example #11
0
/*
 * The following functions are the same as mk_wcwidth() and
 * mk_wcswidth(), except that spacing characters in the East Asian
 * Ambiguous (A) category as defined in Unicode Technical Report #11
 * have a column width of 2. This variant might be useful for users of
 * CJK legacy encodings who want to migrate to UCS without changing
 * the traditional terminal character-width behaviour. It is not
 * otherwise recommended for general use.
 */
int mk_wcwidth_cjk(int ucs) {
    /* sorted list of non-overlapping intervals of East Asian Ambiguous
     * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
    static const struct interval ambiguous[] = {
        {0x00A1, 0x00A1}, {0x00A4, 0x00A4}, {0x00A7, 0x00A8},   {0x00AA, 0x00AA},
        {0x00AE, 0x00AE}, {0x00B0, 0x00B4}, {0x00B6, 0x00BA},   {0x00BC, 0x00BF},
        {0x00C6, 0x00C6}, {0x00D0, 0x00D0}, {0x00D7, 0x00D8},   {0x00DE, 0x00E1},
        {0x00E6, 0x00E6}, {0x00E8, 0x00EA}, {0x00EC, 0x00ED},   {0x00F0, 0x00F0},
        {0x00F2, 0x00F3}, {0x00F7, 0x00FA}, {0x00FC, 0x00FC},   {0x00FE, 0x00FE},
        {0x0101, 0x0101}, {0x0111, 0x0111}, {0x0113, 0x0113},   {0x011B, 0x011B},
        {0x0126, 0x0127}, {0x012B, 0x012B}, {0x0131, 0x0133},   {0x0138, 0x0138},
        {0x013F, 0x0142}, {0x0144, 0x0144}, {0x0148, 0x014B},   {0x014D, 0x014D},
        {0x0152, 0x0153}, {0x0166, 0x0167}, {0x016B, 0x016B},   {0x01CE, 0x01CE},
        {0x01D0, 0x01D0}, {0x01D2, 0x01D2}, {0x01D4, 0x01D4},   {0x01D6, 0x01D6},
        {0x01D8, 0x01D8}, {0x01DA, 0x01DA}, {0x01DC, 0x01DC},   {0x0251, 0x0251},
        {0x0261, 0x0261}, {0x02C4, 0x02C4}, {0x02C7, 0x02C7},   {0x02C9, 0x02CB},
        {0x02CD, 0x02CD}, {0x02D0, 0x02D0}, {0x02D8, 0x02DB},   {0x02DD, 0x02DD},
        {0x02DF, 0x02DF}, {0x0391, 0x03A1}, {0x03A3, 0x03A9},   {0x03B1, 0x03C1},
        {0x03C3, 0x03C9}, {0x0401, 0x0401}, {0x0410, 0x044F},   {0x0451, 0x0451},
        {0x2010, 0x2010}, {0x2013, 0x2016}, {0x2018, 0x2019},   {0x201C, 0x201D},
        {0x2020, 0x2022}, {0x2024, 0x2027}, {0x2030, 0x2030},   {0x2032, 0x2033},
        {0x2035, 0x2035}, {0x203B, 0x203B}, {0x203E, 0x203E},   {0x2074, 0x2074},
        {0x207F, 0x207F}, {0x2081, 0x2084}, {0x20AC, 0x20AC},   {0x2103, 0x2103},
        {0x2105, 0x2105}, {0x2109, 0x2109}, {0x2113, 0x2113},   {0x2116, 0x2116},
        {0x2121, 0x2122}, {0x2126, 0x2126}, {0x212B, 0x212B},   {0x2153, 0x2154},
        {0x215B, 0x215E}, {0x2160, 0x216B}, {0x2170, 0x2179},   {0x2190, 0x2199},
        {0x21B8, 0x21B9}, {0x21D2, 0x21D2}, {0x21D4, 0x21D4},   {0x21E7, 0x21E7},
        {0x2200, 0x2200}, {0x2202, 0x2203}, {0x2207, 0x2208},   {0x220B, 0x220B},
        {0x220F, 0x220F}, {0x2211, 0x2211}, {0x2215, 0x2215},   {0x221A, 0x221A},
        {0x221D, 0x2220}, {0x2223, 0x2223}, {0x2225, 0x2225},   {0x2227, 0x222C},
        {0x222E, 0x222E}, {0x2234, 0x2237}, {0x223C, 0x223D},   {0x2248, 0x2248},
        {0x224C, 0x224C}, {0x2252, 0x2252}, {0x2260, 0x2261},   {0x2264, 0x2267},
        {0x226A, 0x226B}, {0x226E, 0x226F}, {0x2282, 0x2283},   {0x2286, 0x2287},
        {0x2295, 0x2295}, {0x2299, 0x2299}, {0x22A5, 0x22A5},   {0x22BF, 0x22BF},
        {0x2312, 0x2312}, {0x2460, 0x24E9}, {0x24EB, 0x254B},   {0x2550, 0x2573},
        {0x2580, 0x258F}, {0x2592, 0x2595}, {0x25A0, 0x25A1},   {0x25A3, 0x25A9},
        {0x25B2, 0x25B3}, {0x25B6, 0x25B7}, {0x25BC, 0x25BD},   {0x25C0, 0x25C1},
        {0x25C6, 0x25C8}, {0x25CB, 0x25CB}, {0x25CE, 0x25D1},   {0x25E2, 0x25E5},
        {0x25EF, 0x25EF}, {0x2605, 0x2606}, {0x2609, 0x2609},   {0x260E, 0x260F},
        {0x2614, 0x2615}, {0x261C, 0x261C}, {0x261E, 0x261E},   {0x2640, 0x2640},
        {0x2642, 0x2642}, {0x2660, 0x2661}, {0x2663, 0x2665},   {0x2667, 0x266A},
        {0x266C, 0x266D}, {0x266F, 0x266F}, {0x273D, 0x273D},   {0x2776, 0x277F},
        {0xE000, 0xF8FF}, {0xFFFD, 0xFFFD}, {0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD}
    };

    /* binary search in table of non-spacing characters */
    if (bisearch(ucs, ambiguous, sizeof(ambiguous) / sizeof(struct interval) - 1))
        return 2;

    return mk_wcwidth(ucs);
}
Example #12
0
int spell(char (*dictionary)[SPELL_SIZE], int size, const char *word) {

/*****************************************************************************
*                                                                            *
*  Look up the word.                                                         *
*                                                                            *
*****************************************************************************/

if (bisearch(dictionary, word, size, SPELL_SIZE, compare_str) >= 0)
   return 1;
else
   return 0; 

}
Example #13
0
int				get_display_width(t_utf8 c)
{
	if (c == 0)
		return (0);
	if (c < 32 || (c >= 0X7f && c < 0xa0))
		return (-1);
	if (bisearch(c))
		return (0);
	return (1 + (c >= 0x1100
	&& (c <= 0x115f || c == 0x2329 || c == 0x232a
	|| (c >= 0x2e80 && c <= 0xa4cf && c != 0x303f)
	|| (c >= 0xac00 && c <= 0xd7a3)
	|| (c >= 0xf900 && c <= 0xfaff)
	|| (c >= 0xfe10 && c <= 0xfe19)
	|| (c >= 0xfe30 && c <= 0xfe6f)
	|| (c >= 0xff00 && c <= 0xff60)
	|| (c >= 0xffe0 && c <= 0xffe6)
	|| (c >= 0x20000 && c <= 0x2fffd)
	|| (c >= 0x30000 && c <= 0x3fffd))));
}
Example #14
0
	static int
vterm_is_combining(uint32_t codepoint)
{
  return bisearch(codepoint, combining, sizeof(combining) / sizeof(struct interval) - 1);
}
Example #15
0
int wcwidth_ucs(wchar_t ucs)
{
  /* sorted list of non-overlapping intervals of non-spacing characters */
  /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
  static const struct interval combining[] = {
    { 0x0300, 0x036f }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
    { 0x0591, 0x05bd }, { 0x05bf, 0x05bf }, { 0x05c1, 0x05c2 },
    { 0x05c4, 0x05c5 }, { 0x05c7, 0x05c7 }, { 0x0600, 0x0603 },
    { 0x0610, 0x0615 }, { 0x064b, 0x065e }, { 0x0670, 0x0670 },
    { 0x06d6, 0x06e4 }, { 0x06e7, 0x06e8 }, { 0x06ea, 0x06ed },
    { 0x070f, 0x070f }, { 0x0711, 0x0711 }, { 0x0730, 0x074a },
    { 0x07a6, 0x07b0 }, { 0x07eb, 0x07f3 }, { 0x0901, 0x0902 },
    { 0x093c, 0x093c }, { 0x0941, 0x0948 }, { 0x094d, 0x094d },
    { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
    { 0x09bc, 0x09bc }, { 0x09c1, 0x09c4 }, { 0x09cd, 0x09cd },
    { 0x09e2, 0x09e3 }, { 0x0a01, 0x0a02 }, { 0x0a3c, 0x0a3c },
    { 0x0a41, 0x0a42 }, { 0x0a47, 0x0a48 }, { 0x0a4b, 0x0a4d },
    { 0x0a70, 0x0a71 }, { 0x0a81, 0x0a82 }, { 0x0abc, 0x0abc },
    { 0x0ac1, 0x0ac5 }, { 0x0ac7, 0x0ac8 }, { 0x0acd, 0x0acd },
    { 0x0ae2, 0x0ae3 }, { 0x0b01, 0x0b01 }, { 0x0b3c, 0x0b3c },
    { 0x0b3f, 0x0b3f }, { 0x0b41, 0x0b43 }, { 0x0b4d, 0x0b4d },
    { 0x0b56, 0x0b56 }, { 0x0b82, 0x0b82 }, { 0x0bc0, 0x0bc0 },
    { 0x0bcd, 0x0bcd }, { 0x0c3e, 0x0c40 }, { 0x0c46, 0x0c48 },
    { 0x0c4a, 0x0c4d }, { 0x0c55, 0x0c56 }, { 0x0cbc, 0x0cbc },
    { 0x0cbf, 0x0cbf }, { 0x0cc6, 0x0cc6 }, { 0x0ccc, 0x0ccd },
    { 0x0ce2, 0x0ce3 }, { 0x0d41, 0x0d43 }, { 0x0d4d, 0x0d4d },
    { 0x0dca, 0x0dca }, { 0x0dd2, 0x0dd4 }, { 0x0dd6, 0x0dd6 },
    { 0x0e31, 0x0e31 }, { 0x0e34, 0x0e3a }, { 0x0e47, 0x0e4e },
    { 0x0eb1, 0x0eb1 }, { 0x0eb4, 0x0eb9 }, { 0x0ebb, 0x0ebc },
    { 0x0ec8, 0x0ecd }, { 0x0f18, 0x0f19 }, { 0x0f35, 0x0f35 },
    { 0x0f37, 0x0f37 }, { 0x0f39, 0x0f39 }, { 0x0f71, 0x0f7e },
    { 0x0f80, 0x0f84 }, { 0x0f86, 0x0f87 }, { 0x0f90, 0x0f97 },
    { 0x0f99, 0x0fbc }, { 0x0fc6, 0x0fc6 }, { 0x102d, 0x1030 },
    { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
    { 0x1058, 0x1059 }, { 0x1160, 0x11ff }, { 0x135f, 0x135f },
    { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
    { 0x1772, 0x1773 }, { 0x17b4, 0x17b5 }, { 0x17b7, 0x17bd },
    { 0x17c6, 0x17c6 }, { 0x17c9, 0x17d3 }, { 0x17dd, 0x17dd },
    { 0x180b, 0x180d }, { 0x18a9, 0x18a9 }, { 0x1920, 0x1922 },
    { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193b },
    { 0x1a17, 0x1a18 }, { 0x1b00, 0x1b03 }, { 0x1b34, 0x1b34 },
    { 0x1b36, 0x1b3a }, { 0x1b3c, 0x1b3c }, { 0x1b42, 0x1b42 },
    { 0x1b6b, 0x1b73 }, { 0x1dc0, 0x1dca }, { 0x1dfe, 0x1dff },
    { 0x200b, 0x200f }, { 0x202a, 0x202e }, { 0x2060, 0x2063 },
    { 0x206a, 0x206f }, { 0x20d0, 0x20ef }, { 0x302a, 0x302f },
#ifndef __APPLE__
    { 0x3099, 0x309a },
#endif
                        { 0xa806, 0xa806 }, { 0xa80b, 0xa80b },
    { 0xa825, 0xa826 }, { 0xfb1e, 0xfb1e }, { 0xfe00, 0xfe0f },
    { 0xfe20, 0xfe23 }, { 0xfeff, 0xfeff }, { 0xfff9, 0xfffb },
    { 0x10a01, 0x10a03 }, { 0x10a05, 0x10a06 }, { 0x10a0c, 0x10a0f },
    { 0x10a38, 0x10a3a }, { 0x10a3f, 0x10a3f }, { 0x1d167, 0x1d169 },
    { 0x1d173, 0x1d182 }, { 0x1d185, 0x1d18b }, { 0x1d1aa, 0x1d1ad },
    { 0x1d242, 0x1d244 }, { 0xe0001, 0xe0001 }, { 0xe0020, 0xe007f },
    { 0xe0100, 0xe01ef }
  };

  /* test for 8-bit control characters */
  if (ucs == 0)
    return 0;
  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
    return -1;

  /* binary search in table of non-spacing characters */
  if (bisearch(ucs, combining,
               sizeof(combining) / sizeof(struct interval) - 1))
    return 0;

  /* if we arrive here, ucs is not a combining or C0/C1 control character */

  /* fast test for majority of non-wide scripts */
  if (ucs < 0x1100)
    return 1;

  return 1 +
    (ucs >= 0x1100 &&
     (ucs <= 0x115f ||                    /* Hangul Jamo init. consonants */
      ucs == 0x2329 || ucs == 0x232a ||
      (ucs >= 0x2e80 && ucs <= 0xa4cf &&
       ucs != 0x303f) ||                  /* CJK ... Yi */
      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
      (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
      (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
      (ucs >= 0xffe0 && ucs <= 0xffe6) ||
      (ucs >= 0x20000 && ucs <= 0x2fffd) ||
      (ucs >= 0x30000 && ucs <= 0x3fffd)));
}
Example #16
0
bool
indicwide(xchar c)
{
  return bisearch(c, indic, lengthof(indic));
}
Example #17
0
bool
ambigwide(xchar c)
{
  return bisearch(c, ambiguous, lengthof(ambiguous))
     && !bisearch(c, wide, lengthof(wide));
}
Example #18
0
bool
combiningdouble(xchar c)
{
  return bisearch(c, combdouble, lengthof(combdouble));
}
Example #19
0
bool
extrawide(xchar c)
{
  return bisearch(c, extra, lengthof(extra));
}
Example #20
0
int mk_wcwidth(wchar_t ucs)
{
  /* sorted list of non-overlapping intervals of non-spacing characters */
  /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */

  // From https://github.com/jquast/wcwidth/blob/master/wcwidth/table_zero.py
  // at commit 0d7de112202cc8b2ebe9232ff4a5c954f19d561a (2016-07-02):
  // Original source: DerivedGeneralCategory-9.0.0.txt
  static const struct interval combining[] = {
    {0x0300, 0x036f},  // Combining Grave Accent  ..Combining Latin Small Le
    {0x0483, 0x0489},  // Combining Cyrillic Titlo..Combining Cyrillic Milli
    {0x0591, 0x05bd},  // Hebrew Accent Etnahta   ..Hebrew Point Meteg
    {0x05bf, 0x05bf},  // Hebrew Point Rafe       ..Hebrew Point Rafe
    {0x05c1, 0x05c2},  // Hebrew Point Shin Dot   ..Hebrew Point Sin Dot
    {0x05c4, 0x05c5},  // Hebrew Mark Upper Dot   ..Hebrew Mark Lower Dot
    {0x05c7, 0x05c7},  // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata
    {0x0610, 0x061a},  // Arabic Sign Sallallahou ..Arabic Small Kasra
    {0x064b, 0x065f},  // Arabic Fathatan         ..Arabic Wavy Hamza Below
    {0x0670, 0x0670},  // Arabic Letter Superscrip..Arabic Letter Superscrip
    {0x06d6, 0x06dc},  // Arabic Small High Ligatu..Arabic Small High Seen
    {0x06df, 0x06e4},  // Arabic Small High Rounde..Arabic Small High Madda
    {0x06e7, 0x06e8},  // Arabic Small High Yeh   ..Arabic Small High Noon
    {0x06ea, 0x06ed},  // Arabic Empty Centre Low ..Arabic Small Low Meem
    {0x0711, 0x0711},  // Syriac Letter Superscrip..Syriac Letter Superscrip
    {0x0730, 0x074a},  // Syriac Pthaha Above     ..Syriac Barrekh
    {0x07a6, 0x07b0},  // Thaana Abafili          ..Thaana Sukun
    {0x07eb, 0x07f3},  // Nko Combining Sh||t High..Nko Combining Double Dot
    {0x0816, 0x0819},  // Samaritan Mark In       ..Samaritan Mark Dagesh
    {0x081b, 0x0823},  // Samaritan Mark Epentheti..Samaritan Vowel Sign A
    {0x0825, 0x0827},  // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
    {0x0829, 0x082d},  // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
    {0x0859, 0x085b},  // Mandaic Affrication Mark..Mandaic Gemination Mark
    {0x08d4, 0x08e1},  // (nil)                   ..
    {0x08e3, 0x0902},  // Arabic Turned Damma Belo..Devanagari Sign Anusvara
    {0x093a, 0x093a},  // Devanagari Vowel Sign Oe..Devanagari Vowel Sign Oe
    {0x093c, 0x093c},  // Devanagari Sign Nukta   ..Devanagari Sign Nukta
    {0x0941, 0x0948},  // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai
    {0x094d, 0x094d},  // Devanagari Sign Virama  ..Devanagari Sign Virama
    {0x0951, 0x0957},  // Devanagari Stress Sign U..Devanagari Vowel Sign Uu
    {0x0962, 0x0963},  // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo
    {0x0981, 0x0981},  // Bengali Sign Candrabindu..Bengali Sign Candrabindu
    {0x09bc, 0x09bc},  // Bengali Sign Nukta      ..Bengali Sign Nukta
    {0x09c1, 0x09c4},  // Bengali Vowel Sign U    ..Bengali Vowel Sign Vocal
    {0x09cd, 0x09cd},  // Bengali Sign Virama     ..Bengali Sign Virama
    {0x09e2, 0x09e3},  // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal
    {0x0a01, 0x0a02},  // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi
    {0x0a3c, 0x0a3c},  // Gurmukhi Sign Nukta     ..Gurmukhi Sign Nukta
    {0x0a41, 0x0a42},  // Gurmukhi Vowel Sign U   ..Gurmukhi Vowel Sign Uu
    {0x0a47, 0x0a48},  // Gurmukhi Vowel Sign Ee  ..Gurmukhi Vowel Sign Ai
    {0x0a4b, 0x0a4d},  // Gurmukhi Vowel Sign Oo  ..Gurmukhi Sign Virama
    {0x0a51, 0x0a51},  // Gurmukhi Sign Udaat     ..Gurmukhi Sign Udaat
    {0x0a70, 0x0a71},  // Gurmukhi Tippi          ..Gurmukhi Addak
    {0x0a75, 0x0a75},  // Gurmukhi Sign Yakash    ..Gurmukhi Sign Yakash
    {0x0a81, 0x0a82},  // Gujarati Sign Candrabind..Gujarati Sign Anusvara
    {0x0abc, 0x0abc},  // Gujarati Sign Nukta     ..Gujarati Sign Nukta
    {0x0ac1, 0x0ac5},  // Gujarati Vowel Sign U   ..Gujarati Vowel Sign Cand
    {0x0ac7, 0x0ac8},  // Gujarati Vowel Sign E   ..Gujarati Vowel Sign Ai
    {0x0acd, 0x0acd},  // Gujarati Sign Virama    ..Gujarati Sign Virama
    {0x0ae2, 0x0ae3},  // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca
    {0x0b01, 0x0b01},  // ||iya Sign Candrabindu  ..||iya Sign Candrabindu
    {0x0b3c, 0x0b3c},  // ||iya Sign Nukta        ..||iya Sign Nukta
    {0x0b3f, 0x0b3f},  // ||iya Vowel Sign I      ..||iya Vowel Sign I
    {0x0b41, 0x0b44},  // ||iya Vowel Sign U      ..||iya Vowel Sign Vocalic
    {0x0b4d, 0x0b4d},  // ||iya Sign Virama       ..||iya Sign Virama
    {0x0b56, 0x0b56},  // ||iya Ai Length Mark    ..||iya Ai Length Mark
    {0x0b62, 0x0b63},  // ||iya Vowel Sign Vocalic..||iya Vowel Sign Vocalic
    {0x0b82, 0x0b82},  // Tamil Sign Anusvara     ..Tamil Sign Anusvara
    {0x0bc0, 0x0bc0},  // Tamil Vowel Sign Ii     ..Tamil Vowel Sign Ii
    {0x0bcd, 0x0bcd},  // Tamil Sign Virama       ..Tamil Sign Virama
    {0x0c00, 0x0c00},  // Telugu Sign Combining Ca..Telugu Sign Combining Ca
    {0x0c3e, 0x0c40},  // Telugu Vowel Sign Aa    ..Telugu Vowel Sign Ii
    {0x0c46, 0x0c48},  // Telugu Vowel Sign E     ..Telugu Vowel Sign Ai
    {0x0c4a, 0x0c4d},  // Telugu Vowel Sign O     ..Telugu Sign Virama
    {0x0c55, 0x0c56},  // Telugu Length Mark      ..Telugu Ai Length Mark
    {0x0c62, 0x0c63},  // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali
    {0x0c81, 0x0c81},  // Kannada Sign Candrabindu..Kannada Sign Candrabindu
    {0x0cbc, 0x0cbc},  // Kannada Sign Nukta      ..Kannada Sign Nukta
    {0x0cbf, 0x0cbf},  // Kannada Vowel Sign I    ..Kannada Vowel Sign I
    {0x0cc6, 0x0cc6},  // Kannada Vowel Sign E    ..Kannada Vowel Sign E
    {0x0ccc, 0x0ccd},  // Kannada Vowel Sign Au   ..Kannada Sign Virama
    {0x0ce2, 0x0ce3},  // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal
    {0x0d01, 0x0d01},  // Malayalam Sign Candrabin..Malayalam Sign Candrabin
    {0x0d41, 0x0d44},  // Malayalam Vowel Sign U  ..Malayalam Vowel Sign Voc
    {0x0d4d, 0x0d4d},  // Malayalam Sign Virama   ..Malayalam Sign Virama
    {0x0d62, 0x0d63},  // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc
    {0x0dca, 0x0dca},  // Sinhala Sign Al-lakuna  ..Sinhala Sign Al-lakuna
    {0x0dd2, 0x0dd4},  // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti
    {0x0dd6, 0x0dd6},  // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga
    {0x0e31, 0x0e31},  // Thai Character Mai Han-a..Thai Character Mai Han-a
    {0x0e34, 0x0e3a},  // Thai Character Sara I   ..Thai Character Phinthu
    {0x0e47, 0x0e4e},  // Thai Character Maitaikhu..Thai Character Yamakkan
    {0x0eb1, 0x0eb1},  // Lao Vowel Sign Mai Kan  ..Lao Vowel Sign Mai Kan
    {0x0eb4, 0x0eb9},  // Lao Vowel Sign I        ..Lao Vowel Sign Uu
    {0x0ebb, 0x0ebc},  // Lao Vowel Sign Mai Kon  ..Lao Semivowel Sign Lo
    {0x0ec8, 0x0ecd},  // Lao Tone Mai Ek         ..Lao Niggahita
    {0x0f18, 0x0f19},  // Tibetan Astrological Sig..Tibetan Astrological Sig
    {0x0f35, 0x0f35},  // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung
    {0x0f37, 0x0f37},  // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung
    {0x0f39, 0x0f39},  // Tibetan Mark Tsa -phru  ..Tibetan Mark Tsa -phru
    {0x0f71, 0x0f7e},  // Tibetan Vowel Sign Aa   ..Tibetan Sign Rjes Su Nga
    {0x0f80, 0x0f84},  // Tibetan Vowel Sign Rever..Tibetan Mark Halanta
    {0x0f86, 0x0f87},  // Tibetan Sign Lci Rtags  ..Tibetan Sign Yang Rtags
    {0x0f8d, 0x0f97},  // Tibetan Subjoined Sign L..Tibetan Subjoined Letter
    {0x0f99, 0x0fbc},  // Tibetan Subjoined Letter..Tibetan Subjoined Letter
    {0x0fc6, 0x0fc6},  // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda
    {0x102d, 0x1030},  // Myanmar Vowel Sign I    ..Myanmar Vowel Sign Uu
    {0x1032, 0x1037},  // Myanmar Vowel Sign Ai   ..Myanmar Sign Dot Below
    {0x1039, 0x103a},  // Myanmar Sign Virama     ..Myanmar Sign Asat
    {0x103d, 0x103e},  // Myanmar Consonant Sign M..Myanmar Consonant Sign M
    {0x1058, 0x1059},  // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal
    {0x105e, 0x1060},  // Myanmar Consonant Sign M..Myanmar Consonant Sign M
    {0x1071, 0x1074},  // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah
    {0x1082, 0x1082},  // Myanmar Consonant Sign S..Myanmar Consonant Sign S
    {0x1085, 0x1086},  // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan
    {0x108d, 0x108d},  // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci
    {0x109d, 0x109d},  // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton
    {0x135d, 0x135f},  // Ethiopic Combining Gemin..Ethiopic Combining Gemin
    {0x1712, 0x1714},  // Tagalog Vowel Sign I    ..Tagalog Sign Virama
    {0x1732, 0x1734},  // Hanunoo Vowel Sign I    ..Hanunoo Sign Pamudpod
    {0x1752, 0x1753},  // Buhid Vowel Sign I      ..Buhid Vowel Sign U
    {0x1772, 0x1773},  // Tagbanwa Vowel Sign I   ..Tagbanwa Vowel Sign U
    {0x17b4, 0x17b5},  // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa
    {0x17b7, 0x17bd},  // Khmer Vowel Sign I      ..Khmer Vowel Sign Ua
    {0x17c6, 0x17c6},  // Khmer Sign Nikahit      ..Khmer Sign Nikahit
    {0x17c9, 0x17d3},  // Khmer Sign Muusikatoan  ..Khmer Sign Bathamasat
    {0x17dd, 0x17dd},  // Khmer Sign Atthacan     ..Khmer Sign Atthacan
    {0x180b, 0x180d},  // Mongolian Free Variation..Mongolian Free Variation
    {0x1885, 0x1886},  // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
    {0x18a9, 0x18a9},  // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
    {0x1920, 0x1922},  // Limbu Vowel Sign A      ..Limbu Vowel Sign U
    {0x1927, 0x1928},  // Limbu Vowel Sign E      ..Limbu Vowel Sign O
    {0x1932, 0x1932},  // Limbu Small Letter Anusv..Limbu Small Letter Anusv
    {0x1939, 0x193b},  // Limbu Sign Mukphreng    ..Limbu Sign Sa-i
    {0x1a17, 0x1a18},  // Buginese Vowel Sign I   ..Buginese Vowel Sign U
    {0x1a1b, 0x1a1b},  // Buginese Vowel Sign Ae  ..Buginese Vowel Sign Ae
    {0x1a56, 0x1a56},  // Tai Tham Consonant Sign ..Tai Tham Consonant Sign
    {0x1a58, 0x1a5e},  // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign
    {0x1a60, 0x1a60},  // Tai Tham Sign Sakot     ..Tai Tham Sign Sakot
    {0x1a62, 0x1a62},  // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai
    {0x1a65, 0x1a6c},  // Tai Tham Vowel Sign I   ..Tai Tham Vowel Sign Oa B
    {0x1a73, 0x1a7c},  // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue
    {0x1a7f, 0x1a7f},  // Tai Tham Combining Crypt..Tai Tham Combining Crypt
    {0x1ab0, 0x1abe},  // Combining Doubled Circum..Combining Parentheses Ov
    {0x1b00, 0x1b03},  // Balinese Sign Ulu Ricem ..Balinese Sign Surang
    {0x1b34, 0x1b34},  // Balinese Sign Rerekan   ..Balinese Sign Rerekan
    {0x1b36, 0x1b3a},  // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R
    {0x1b3c, 0x1b3c},  // Balinese Vowel Sign La L..Balinese Vowel Sign La L
    {0x1b42, 0x1b42},  // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe
    {0x1b6b, 0x1b73},  // Balinese Musical Symbol ..Balinese Musical Symbol
    {0x1b80, 0x1b81},  // Sundanese Sign Panyecek ..Sundanese Sign Panglayar
    {0x1ba2, 0x1ba5},  // Sundanese Consonant Sign..Sundanese Vowel Sign Pan
    {0x1ba8, 0x1ba9},  // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan
    {0x1bab, 0x1bad},  // Sundanese Sign Virama   ..Sundanese Consonant Sign
    {0x1be6, 0x1be6},  // Batak Sign Tompi        ..Batak Sign Tompi
    {0x1be8, 0x1be9},  // Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee
    {0x1bed, 0x1bed},  // Batak Vowel Sign Karo O ..Batak Vowel Sign Karo O
    {0x1bef, 0x1bf1},  // Batak Vowel Sign U F|| S..Batak Consonant Sign H
    {0x1c2c, 0x1c33},  // Lepcha Vowel Sign E     ..Lepcha Consonant Sign T
    {0x1c36, 0x1c37},  // Lepcha Sign Ran         ..Lepcha Sign Nukta
    {0x1cd0, 0x1cd2},  // Vedic Tone Karshana     ..Vedic Tone Prenkha
    {0x1cd4, 0x1ce0},  // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash
    {0x1ce2, 0x1ce8},  // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda
    {0x1ced, 0x1ced},  // Vedic Sign Tiryak       ..Vedic Sign Tiryak
    {0x1cf4, 0x1cf4},  // Vedic Tone Candra Above ..Vedic Tone Candra Above
    {0x1cf8, 0x1cf9},  // Vedic Tone Ring Above   ..Vedic Tone Double Ring A
    {0x1dc0, 0x1df5},  // Combining Dotted Grave A..Combining Up Tack Above
    {0x1dfb, 0x1dff},  // (nil)                   ..Combining Right Arrowhea
    {0x20d0, 0x20f0},  // Combining Left Harpoon A..Combining Asterisk Above
    {0x2cef, 0x2cf1},  // Coptic Combining Ni Abov..Coptic Combining Spiritu
    {0x2d7f, 0x2d7f},  // Tifinagh Consonant Joine..Tifinagh Consonant Joine
    {0x2de0, 0x2dff},  // Combining Cyrillic Lette..Combining Cyrillic Lette
    {0x302a, 0x302d},  // Ideographic Level Tone M..Ideographic Entering Ton
    {0x3099, 0x309a},  // Combining Katakana-hirag..Combining Katakana-hirag
    {0xa66f, 0xa672},  // Combining Cyrillic Vzmet..Combining Cyrillic Thous
    {0xa674, 0xa67d},  // Combining Cyrillic Lette..Combining Cyrillic Payer
    {0xa69e, 0xa69f},  // Combining Cyrillic Lette..Combining Cyrillic Lette
    {0xa6f0, 0xa6f1},  // Bamum Combining Mark Koq..Bamum Combining Mark Tuk
    {0xa802, 0xa802},  // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva
    {0xa806, 0xa806},  // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant
    {0xa80b, 0xa80b},  // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva
    {0xa825, 0xa826},  // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
    {0xa8c4, 0xa8c5},  // Saurashtra Sign Virama  ..
    {0xa8e0, 0xa8f1},  // Combining Devanagari Dig..Combining Devanagari Sig
    {0xa926, 0xa92d},  // Kayah Li Vowel Ue       ..Kayah Li Tone Calya Plop
    {0xa947, 0xa951},  // Rejang Vowel Sign I     ..Rejang Consonant Sign R
    {0xa980, 0xa982},  // Javanese Sign Panyangga ..Javanese Sign Layar
    {0xa9b3, 0xa9b3},  // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu
    {0xa9b6, 0xa9b9},  // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku
    {0xa9bc, 0xa9bc},  // Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe
    {0xa9e5, 0xa9e5},  // Myanmar Sign Shan Saw   ..Myanmar Sign Shan Saw
    {0xaa29, 0xaa2e},  // Cham Vowel Sign Aa      ..Cham Vowel Sign Oe
    {0xaa31, 0xaa32},  // Cham Vowel Sign Au      ..Cham Vowel Sign Ue
    {0xaa35, 0xaa36},  // Cham Consonant Sign La  ..Cham Consonant Sign Wa
    {0xaa43, 0xaa43},  // Cham Consonant Sign Fina..Cham Consonant Sign Fina
    {0xaa4c, 0xaa4c},  // Cham Consonant Sign Fina..Cham Consonant Sign Fina
    {0xaa7c, 0xaa7c},  // Myanmar Sign Tai Laing T..Myanmar Sign Tai Laing T
    {0xaab0, 0xaab0},  // Tai Viet Mai Kang       ..Tai Viet Mai Kang
    {0xaab2, 0xaab4},  // Tai Viet Vowel I        ..Tai Viet Vowel U
    {0xaab7, 0xaab8},  // Tai Viet Mai Khit       ..Tai Viet Vowel Ia
    {0xaabe, 0xaabf},  // Tai Viet Vowel Am       ..Tai Viet Tone Mai Ek
    {0xaac1, 0xaac1},  // Tai Viet Tone Mai Tho   ..Tai Viet Tone Mai Tho
    {0xaaec, 0xaaed},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
    {0xaaf6, 0xaaf6},  // Meetei Mayek Virama     ..Meetei Mayek Virama
    {0xabe5, 0xabe5},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
    {0xabe8, 0xabe8},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
    {0xabed, 0xabed},  // Meetei Mayek Apun Iyek  ..Meetei Mayek Apun Iyek
    {0xfb1e, 0xfb1e},  // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani
    {0xfe00, 0xfe0f},  // Variation Select||-1    ..Variation Select||-16
    {0xfe20, 0xfe2f},  // Combining Ligature Left ..Combining Cyrillic Titlo
    {0x101fd, 0x101fd},  // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi
    {0x102e0, 0x102e0},  // Coptic Epact Thousands M..Coptic Epact Thousands M
    {0x10376, 0x1037a},  // Combining Old Permic Let..Combining Old Permic Let
    {0x10a01, 0x10a03},  // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
    {0x10a05, 0x10a06},  // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
    {0x10a0c, 0x10a0f},  // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga
    {0x10a38, 0x10a3a},  // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo
    {0x10a3f, 0x10a3f},  // Kharoshthi Virama       ..Kharoshthi Virama
    {0x10ae5, 0x10ae6},  // Manichaean Abbreviation ..Manichaean Abbreviation
    {0x11001, 0x11001},  // Brahmi Sign Anusvara    ..Brahmi Sign Anusvara
    {0x11038, 0x11046},  // Brahmi Vowel Sign Aa    ..Brahmi Virama
    {0x1107f, 0x11081},  // Brahmi Number Joiner    ..Kaithi Sign Anusvara
    {0x110b3, 0x110b6},  // Kaithi Vowel Sign U     ..Kaithi Vowel Sign Ai
    {0x110b9, 0x110ba},  // Kaithi Sign Virama      ..Kaithi Sign Nukta
    {0x11100, 0x11102},  // Chakma Sign Candrabindu ..Chakma Sign Visarga
    {0x11127, 0x1112b},  // Chakma Vowel Sign A     ..Chakma Vowel Sign Uu
    {0x1112d, 0x11134},  // Chakma Vowel Sign Ai    ..Chakma Maayyaa
    {0x11173, 0x11173},  // Mahajani Sign Nukta     ..Mahajani Sign Nukta
    {0x11180, 0x11181},  // Sharada Sign Candrabindu..Sharada Sign Anusvara
    {0x111b6, 0x111be},  // Sharada Vowel Sign U    ..Sharada Vowel Sign O
    {0x111ca, 0x111cc},  // Sharada Sign Nukta      ..Sharada Extra Sh||t Vowe
    {0x1122f, 0x11231},  // Khojki Vowel Sign U     ..Khojki Vowel Sign Ai
    {0x11234, 0x11234},  // Khojki Sign Anusvara    ..Khojki Sign Anusvara
    {0x11236, 0x11237},  // Khojki Sign Nukta       ..Khojki Sign Shadda
    {0x1123e, 0x1123e},  // (nil)                   ..
    {0x112df, 0x112df},  // Khudawadi Sign Anusvara ..Khudawadi Sign Anusvara
    {0x112e3, 0x112ea},  // Khudawadi Vowel Sign U  ..Khudawadi Sign Virama
    {0x11300, 0x11301},  // Grantha Sign Combining A..Grantha Sign Candrabindu
    {0x1133c, 0x1133c},  // Grantha Sign Nukta      ..Grantha Sign Nukta
    {0x11340, 0x11340},  // Grantha Vowel Sign Ii   ..Grantha Vowel Sign Ii
    {0x11366, 0x1136c},  // Combining Grantha Digit ..Combining Grantha Digit
    {0x11370, 0x11374},  // Combining Grantha Letter..Combining Grantha Letter
    {0x11438, 0x1143f},  // (nil)                   ..
    {0x11442, 0x11444},  // (nil)                   ..
    {0x11446, 0x11446},  // (nil)                   ..
    {0x114b3, 0x114b8},  // Tirhuta Vowel Sign U    ..Tirhuta Vowel Sign Vocal
    {0x114ba, 0x114ba},  // Tirhuta Vowel Sign Sh||t..Tirhuta Vowel Sign Sh||t
    {0x114bf, 0x114c0},  // Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara
    {0x114c2, 0x114c3},  // Tirhuta Sign Virama     ..Tirhuta Sign Nukta
    {0x115b2, 0x115b5},  // Siddham Vowel Sign U    ..Siddham Vowel Sign Vocal
    {0x115bc, 0x115bd},  // Siddham Sign Candrabindu..Siddham Sign Anusvara
    {0x115bf, 0x115c0},  // Siddham Sign Virama     ..Siddham Sign Nukta
    {0x115dc, 0x115dd},  // Siddham Vowel Sign Alter..Siddham Vowel Sign Alter
    {0x11633, 0x1163a},  // Modi Vowel Sign U       ..Modi Vowel Sign Ai
    {0x1163d, 0x1163d},  // Modi Sign Anusvara      ..Modi Sign Anusvara
    {0x1163f, 0x11640},  // Modi Sign Virama        ..Modi Sign Ardhacandra
    {0x116ab, 0x116ab},  // Takri Sign Anusvara     ..Takri Sign Anusvara
    {0x116ad, 0x116ad},  // Takri Vowel Sign Aa     ..Takri Vowel Sign Aa
    {0x116b0, 0x116b5},  // Takri Vowel Sign U      ..Takri Vowel Sign Au
    {0x116b7, 0x116b7},  // Takri Sign Nukta        ..Takri Sign Nukta
    {0x1171d, 0x1171f},  // Ahom Consonant Sign Medi..Ahom Consonant Sign Medi
    {0x11722, 0x11725},  // Ahom Vowel Sign I       ..Ahom Vowel Sign Uu
    {0x11727, 0x1172b},  // Ahom Vowel Sign Aw      ..Ahom Sign Killer
    {0x11c30, 0x11c36},  // (nil)                   ..
    {0x11c38, 0x11c3d},  // (nil)                   ..
    {0x11c3f, 0x11c3f},  // (nil)                   ..
    {0x11c92, 0x11ca7},  // (nil)                   ..
    {0x11caa, 0x11cb0},  // (nil)                   ..
    {0x11cb2, 0x11cb3},  // (nil)                   ..
    {0x11cb5, 0x11cb6},  // (nil)                   ..
    {0x16af0, 0x16af4},  // Bassa Vah Combining High..Bassa Vah Combining High
    {0x16b30, 0x16b36},  // Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta
    {0x16f8f, 0x16f92},  // Miao Tone Right         ..Miao Tone Below
    {0x1bc9d, 0x1bc9e},  // Duployan Thick Letter Se..Duployan Double Mark
    {0x1d167, 0x1d169},  // Musical Symbol Combining..Musical Symbol Combining
    {0x1d17b, 0x1d182},  // Musical Symbol Combining..Musical Symbol Combining
    {0x1d185, 0x1d18b},  // Musical Symbol Combining..Musical Symbol Combining
    {0x1d1aa, 0x1d1ad},  // Musical Symbol Combining..Musical Symbol Combining
    {0x1d242, 0x1d244},  // Combining Greek Musical ..Combining Greek Musical
    {0x1da00, 0x1da36},  // Signwriting Head Rim    ..Signwriting Air Sucking
    {0x1da3b, 0x1da6c},  // Signwriting Mouth Closed..Signwriting Excitement
    {0x1da75, 0x1da75},  // Signwriting Upper Body T..Signwriting Upper Body T
    {0x1da84, 0x1da84},  // Signwriting Location Hea..Signwriting Location Hea
    {0x1da9b, 0x1da9f},  // Signwriting Fill Modifie..Signwriting Fill Modifie
    {0x1daa1, 0x1daaf},  // Signwriting Rotation Mod..Signwriting Rotation Mod
    {0x1e000, 0x1e006},  // (nil)                   ..
    {0x1e008, 0x1e018},  // (nil)                   ..
    {0x1e01b, 0x1e021},  // (nil)                   ..
    {0x1e023, 0x1e024},  // (nil)                   ..
    {0x1e026, 0x1e02a},  // (nil)                   ..
    {0x1e8d0, 0x1e8d6},  // Mende Kikakui Combining ..Mende Kikakui Combining
    {0x1e944, 0x1e94a},  // (nil)                   ..
    {0xe0100, 0xe01ef},  // Variation Select||-17   ..Variation Select||-256
  };

  // https://github.com/jquast/wcwidth/blob/master/wcwidth/table_wide.py
  // at commit 0d7de112202cc8b2ebe9232ff4a5c954f19d561a (2016-07-02):
  // Original source: EastAsianWidth-9.0.0.txt
  static struct interval WIDE_GLYPH[] = {
    {0x1100, 0x115f},  // Hangul Choseong Kiyeok  ..Hangul Choseong Filler
    {0x231a, 0x231b},  // Watch                   ..Hourglass
    {0x2329, 0x232a},  // Left-pointing Angle Brac..Right-pointing Angle Bra
    {0x23e9, 0x23ec},  // Black Right-pointing Dou..Black Down-pointing Doub
    {0x23f0, 0x23f0},  // Alarm Clock             ..Alarm Clock
    {0x23f3, 0x23f3},  // Hourglass With Flowing S..Hourglass With Flowing S
    {0x25fd, 0x25fe},  // White Medium Small Squar..Black Medium Small Squar
    {0x2614, 0x2615},  // Umbrella With Rain Drops..Hot Beverage
    {0x2648, 0x2653},  // Aries                   ..Pisces
    {0x267f, 0x267f},  // Wheelchair Symbol       ..Wheelchair Symbol
    {0x2693, 0x2693},  // Anch||                  ..Anch||
    {0x26a1, 0x26a1},  // High Voltage Sign       ..High Voltage Sign
    {0x26aa, 0x26ab},  // Medium White Circle     ..Medium Black Circle
    {0x26bd, 0x26be},  // Soccer Ball             ..Baseball
    {0x26c4, 0x26c5},  // Snowman Without Snow    ..Sun Behind Cloud
    {0x26ce, 0x26ce},  // Ophiuchus               ..Ophiuchus
    {0x26d4, 0x26d4},  // No Entry                ..No Entry
    {0x26ea, 0x26ea},  // Church                  ..Church
    {0x26f2, 0x26f3},  // Fountain                ..Flag In Hole
    {0x26f5, 0x26f5},  // Sailboat                ..Sailboat
    {0x26fa, 0x26fa},  // Tent                    ..Tent
    {0x26fd, 0x26fd},  // Fuel Pump               ..Fuel Pump
    {0x2705, 0x2705},  // White Heavy Check Mark  ..White Heavy Check Mark
    {0x270a, 0x270b},  // Raised Fist             ..Raised Hand
    {0x2728, 0x2728},  // Sparkles                ..Sparkles
    {0x274c, 0x274c},  // Cross Mark              ..Cross Mark
    {0x274e, 0x274e},  // Negative Squared Cross M..Negative Squared Cross M
    {0x2753, 0x2755},  // Black Question Mark ||na..White Exclamation Mark O
    {0x2757, 0x2757},  // Heavy Exclamation Mark S..Heavy Exclamation Mark S
    {0x2795, 0x2797},  // Heavy Plus Sign         ..Heavy Division Sign
    {0x27b0, 0x27b0},  // Curly Loop              ..Curly Loop
    {0x27bf, 0x27bf},  // Double Curly Loop       ..Double Curly Loop
    {0x2b1b, 0x2b1c},  // Black Large Square      ..White Large Square
    {0x2b50, 0x2b50},  // White Medium Star       ..White Medium Star
    {0x2b55, 0x2b55},  // Heavy Large Circle      ..Heavy Large Circle
    {0x2e80, 0x2e99},  // Cjk Radical Repeat      ..Cjk Radical Rap
    {0x2e9b, 0x2ef3},  // Cjk Radical Choke       ..Cjk Radical C-simplified
    {0x2f00, 0x2fd5},  // Kangxi Radical One      ..Kangxi Radical Flute
    {0x2ff0, 0x2ffb},  // Ideographic Description ..Ideographic Description
    {0x3000, 0x303e},  // Ideographic Space       ..Ideographic Variation In
    {0x3041, 0x3096},  // Hiragana Letter Small A ..Hiragana Letter Small Ke
    {0x3099, 0x30ff},  // Combining Katakana-hirag..Katakana Digraph Koto
    {0x3105, 0x312d},  // Bopomofo Letter B       ..Bopomofo Letter Ih
    {0x3131, 0x318e},  // Hangul Letter Kiyeok    ..Hangul Letter Araeae
    {0x3190, 0x31ba},  // Ideographic Annotation L..Bopomofo Letter Zy
    {0x31c0, 0x31e3},  // Cjk Stroke T            ..Cjk Stroke Q
    {0x31f0, 0x321e},  // Katakana Letter Small Ku..Parenthesized K||ean Cha
    {0x3220, 0x3247},  // Parenthesized Ideograph ..Circled Ideograph Koto
    {0x3250, 0x32fe},  // Partnership Sign        ..Circled Katakana Wo
    {0x3300, 0x4dbf},  // Square Apaato           ..
    {0x4e00, 0xa48c},  // Cjk Unified Ideograph-4e..Yi Syllable Yyr
    {0xa490, 0xa4c6},  // Yi Radical Qot          ..Yi Radical Ke
    {0xa960, 0xa97c},  // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo
    {0xac00, 0xd7a3},  // Hangul Syllable Ga      ..Hangul Syllable Hih
    {0xf900, 0xfaff},  // Cjk Compatibility Ideogr..
    {0xfe10, 0xfe19},  // Presentation F||m F|| Ve..Presentation F||m F|| Ve
    {0xfe30, 0xfe52},  // Presentation F||m F|| Ve..Small Full Stop
    {0xfe54, 0xfe66},  // Small Semicolon         ..Small Equals Sign
    {0xfe68, 0xfe6b},  // Small Reverse Solidus   ..Small Commercial At
    {0xff01, 0xff60},  // Fullwidth Exclamation Ma..Fullwidth Right White Pa
    {0xffe0, 0xffe6},  // Fullwidth Cent Sign     ..Fullwidth Won Sign
    {0x16fe0, 0x16fe0},  // (nil)                   ..
    {0x17000, 0x187ec},  // (nil)                   ..
    {0x18800, 0x18af2},  // (nil)                   ..
    {0x1b000, 0x1b001},  // Katakana Letter Archaic ..Hiragana Letter Archaic
    {0x1f004, 0x1f004},  // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon
    {0x1f0cf, 0x1f0cf},  // Playing Card Black Joker..Playing Card Black Joker
    {0x1f18e, 0x1f18e},  // Negative Squared Ab     ..Negative Squared Ab
    {0x1f191, 0x1f19a},  // Squared Cl              ..Squared Vs
    {0x1f200, 0x1f202},  // Square Hiragana Hoka    ..Squared Katakana Sa
    {0x1f210, 0x1f23b},  // Squared Cjk Unified Ideo..
    {0x1f240, 0x1f248},  // T||toise Shell Bracketed..T||toise Shell Bracketed
    {0x1f250, 0x1f251},  // Circled Ideograph Advant..Circled Ideograph Accept
    {0x1f300, 0x1f320},  // Cyclone                 ..Shooting Star
    {0x1f32d, 0x1f335},  // Hot Dog                 ..Cactus
    {0x1f337, 0x1f37c},  // Tulip                   ..Baby Bottle
    {0x1f37e, 0x1f393},  // Bottle With Popping C||k..Graduation Cap
    {0x1f3a0, 0x1f3ca},  // Carousel H||se          ..Swimmer
    {0x1f3cf, 0x1f3d3},  // Cricket Bat And Ball    ..Table Tennis Paddle And
    {0x1f3e0, 0x1f3f0},  // House Building          ..European Castle
    {0x1f3f4, 0x1f3f4},  // Waving Black Flag       ..Waving Black Flag
    {0x1f3f8, 0x1f43e},  // Badminton Racquet And Sh..Paw Prints
    {0x1f440, 0x1f440},  // Eyes                    ..Eyes
    {0x1f442, 0x1f4fc},  // Ear                     ..Videocassette
    {0x1f4ff, 0x1f53d},  // Prayer Beads            ..Down-pointing Small Red
    {0x1f54b, 0x1f54e},  // Kaaba                   ..Men||ah With Nine Branch
    {0x1f550, 0x1f567},  // Clock Face One Oclock   ..Clock Face Twelve-thirty
    {0x1f57a, 0x1f57a},  // (nil)                   ..
    {0x1f595, 0x1f596},  // Reversed Hand With Middl..Raised Hand With Part Be
    {0x1f5a4, 0x1f5a4},  // (nil)                   ..
    {0x1f5fb, 0x1f64f},  // Mount Fuji              ..Person With Folded Hands
    {0x1f680, 0x1f6c5},  // Rocket                  ..Left Luggage
    {0x1f6cc, 0x1f6cc},  // Sleeping Accommodation  ..Sleeping Accommodation
    {0x1f6d0, 0x1f6d2},  // Place Of W||ship        ..
    {0x1f6eb, 0x1f6ec},  // Airplane Departure      ..Airplane Arriving
    {0x1f6f4, 0x1f6f6},  // (nil)                   ..
    {0x1f910, 0x1f91e},  // Zipper-mouth Face       ..
    {0x1f920, 0x1f927},  // (nil)                   ..
    {0x1f930, 0x1f930},  // (nil)                   ..
    {0x1f933, 0x1f93e},  // (nil)                   ..
    {0x1f940, 0x1f94b},  // (nil)                   ..
    {0x1f950, 0x1f95e},  // (nil)                   ..
    {0x1f980, 0x1f991},  // Crab                    ..
    {0x1f9c0, 0x1f9c0},  // Cheese Wedge            ..Cheese Wedge
    {0x20000, 0x2fffd},  // Cjk Unified Ideograph-20..
    {0x30000, 0x3fffd},  // (nil)                   ..
};

  /* test for 8-bit control characters */
  if (ucs == 0)
    return 0;
  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
    return -1;

  /* Fast return for latin. */
  if (ucs < 0x300) return 1;

  /* Binary search in table of non-spacing characters. */
  if (bisearch(ucs, combining,
               sizeof(combining) / sizeof(struct interval) - 1))
    return 0;

  /* Other zero-width characters not in table above. */
  if ((0x200B <= ucs && ucs <= 0x200F) ||
      ucs == 0x2028 ||
      ucs == 0x2029 ||
      (0x202A <= ucs && ucs <= 0x202E) ||
      (0x2060 <= ucs && ucs <= 0x2063))
    return 0;

  /* If we arrive here, ucs is not a combining or C0/C1 control character */
  if (bisearch(ucs, WIDE_GLYPH,
               sizeof(WIDE_GLYPH) / sizeof(struct interval) - 1))
    return 2;
  return 1;
}
Example #21
0
File: utf8.c Project: Advael/git
static int git_wcwidth(ucs_char_t ch)
{
	/*
	 * Sorted list of non-overlapping intervals of non-spacing characters,
	 * generated by
	 *   "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c".
	 */
	static const struct interval combining[] = {
		{ 0x0300, 0x0357 }, { 0x035D, 0x036F }, { 0x0483, 0x0486 },
		{ 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
		{ 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
		{ 0x05C4, 0x05C4 }, { 0x0600, 0x0603 }, { 0x0610, 0x0615 },
		{ 0x064B, 0x0658 }, { 0x0670, 0x0670 }, { 0x06D6, 0x06E4 },
		{ 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x070F, 0x070F },
		{ 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
		{ 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
		{ 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
		{ 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
		{ 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 },
		{ 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
		{ 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
		{ 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
		{ 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 },
		{ 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 },
		{ 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 },
		{ 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 },
		{ 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 },
		{ 0x0CBC, 0x0CBC }, { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 },
		{ 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
		{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
		{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
		{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
		{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
		{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
		{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
		{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
		{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
		{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x1712, 0x1714 },
		{ 0x1732, 0x1734 }, { 0x1752, 0x1753 }, { 0x1772, 0x1773 },
		{ 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 },
		{ 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, { 0x180B, 0x180D },
		{ 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, { 0x1927, 0x1928 },
		{ 0x1932, 0x1932 }, { 0x1939, 0x193B }, { 0x200B, 0x200F },
		{ 0x202A, 0x202E }, { 0x2060, 0x2063 }, { 0x206A, 0x206F },
		{ 0x20D0, 0x20EA }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
		{ 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE23 },
		{ 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, { 0x1D167, 0x1D169 },
		{ 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B },
		{ 0x1D1AA, 0x1D1AD }, { 0xE0001, 0xE0001 },
		{ 0xE0020, 0xE007F }, { 0xE0100, 0xE01EF }
	};

	/* test for 8-bit control characters */
	if (ch == 0)
		return 0;
	if (ch < 32 || (ch >= 0x7f && ch < 0xa0))
		return -1;

	/* binary search in table of non-spacing characters */
	if (bisearch(ch, combining, sizeof(combining)
				/ sizeof(struct interval) - 1))
		return 0;

	/*
	 * If we arrive here, ch is neither a combining nor a C0/C1
	 * control character.
	 */

	return 1 +
		(ch >= 0x1100 &&
                    /* Hangul Jamo init. consonants */
		 (ch <= 0x115f ||
		  ch == 0x2329 || ch == 0x232a ||
                  /* CJK ... Yi */
		  (ch >= 0x2e80 && ch <= 0xa4cf &&
		   ch != 0x303f) ||
		  /* Hangul Syllables */
		  (ch >= 0xac00 && ch <= 0xd7a3) ||
		  /* CJK Compatibility Ideographs */
		  (ch >= 0xf900 && ch <= 0xfaff) ||
		  /* CJK Compatibility Forms */
		  (ch >= 0xfe30 && ch <= 0xfe6f) ||
		  /* Fullwidth Forms */
		  (ch >= 0xff00 && ch <= 0xff60) ||
		  (ch >= 0xffe0 && ch <= 0xffe6) ||
		  (ch >= 0x20000 && ch <= 0x2fffd) ||
		  (ch >= 0x30000 && ch <= 0x3fffd)));
}
Example #22
0
/*
 * The following functions are the same as mk_wcwidth() and
 * mk_wcwidth_cjk(), except that spacing characters in the East Asian
 * Ambiguous (A) category as defined in Unicode Technical Report #11
 * have a column width of 2. This variant might be useful for users of
 * CJK legacy encodings who want to migrate to UCS without changing
 * the traditional terminal character-width behaviour. It is not
 * otherwise recommended for general use.
 */
int
mk_wcwidth_cjk(wchar_t ucs)
{
    /* sorted list of non-overlapping intervals of East Asian Ambiguous
     * characters, generated by
     *
     * uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf \
     *    +E000..F8FF \
     *    +F0000..FFFFD \
     *    +100000..10FFFD  c
     *
     * "WIDTH-A" is a file extracted from EastAsianWidth.txt by selecting
     * only those with width "A", and omitting:
     *
     *    0xAD
     *    all lines with "COMBINING"
     *
     * (uniset does not recognize the range expressions in WIDTH-A).
     */
  /* *INDENT-OFF* */
  static const struct interval ambiguous[] = {
    { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
    { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
    { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
    { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
    { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
    { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
    { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
    { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
    { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
    { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
    { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
    { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
    { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
    { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
    { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
    { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
    { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
    { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
    { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
    { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
    { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
    { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
    { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
    { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
    { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
    { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
    { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
    { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
    { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
    { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
    { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
    { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
    { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
    { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
    { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
    { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
    { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
    { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
    { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
    { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
    { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
    { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
    { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
    { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
    { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
    { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
    { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
    { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
    { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
    { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
    { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
    { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
  };
  /* *INDENT-ON* */

    /* binary search in table of non-spacing characters */
    if (bisearch((unsigned long) ucs, ambiguous,
		 (int) (sizeof(ambiguous) / sizeof(struct interval) - 1)))
	  return 2;

    return mk_wcwidth(ucs);
}
Example #23
0
int wcwidth(wchar_t ucs)
{
  /* sorted list of non-overlapping intervals of non-spacing characters */
  static const struct interval combining[] = {
    { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
    { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
    { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
    { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
    { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
    { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
    { 0x07A6, 0x07B0 }, { 0x0901, 0x0902 }, { 0x093C, 0x093C },
    { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0954 },
    { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC },
    { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 },
    { 0x0A02, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 },
    { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 },
    { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 },
    { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 },
    { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 },
    { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 },
    { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 },
    { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 },
    { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
    { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA },
    { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 },
    { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 },
    { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD },
    { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 },
    { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 },
    { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC },
    { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, { 0x1032, 0x1032 },
    { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, { 0x1058, 0x1059 },
    { 0x1160, 0x11FF }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 },
    { 0x17C9, 0x17D3 }, { 0x180B, 0x180E }, { 0x18A9, 0x18A9 },
    { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x206A, 0x206F },
    { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
    { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF },
    { 0xFFF9, 0xFFFB }
  };

  /* test for 8-bit control characters */
  if (ucs == 0)
    return 0;
  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
    return -1;

  /* binary search in table of non-spacing characters */
  if (bisearch(ucs, combining,
	       sizeof(combining) / sizeof(struct interval) - 1))
    return 0;

  /* if we arrive here, ucs is not a combining or C0/C1 control character */

  return 1 + 
    (ucs >= 0x1100 &&
     (ucs <= 0x115f ||                    /* Hangul Jamo init. consonants */
      (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
       ucs != 0x303f) ||                  /* CJK ... Yi */
      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
      (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
      (ucs >= 0xffe0 && ucs <= 0xffe6) ||
      (ucs >= 0x20000 && ucs <= 0x2ffff)));
}
Example #24
0
int inSet(int s)
{
	return bisearch(set,s,0,count-1);
}
Example #25
0
int TBL::getindex(char* word)
{
	//  char -> index
	return bisearch(word);
}
Example #26
0
int wcwidth_cjk(wchar_t ucs)
{
  /* sorted list of non-overlapping intervals of East Asian Ambiguous
   * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
  static const struct interval ambiguous[] = {
    { 0x00a1, 0x00a1 }, { 0x00a4, 0x00a4 }, { 0x00a7, 0x00a8 },
    { 0x00aa, 0x00aa }, { 0x00ae, 0x00ae }, { 0x00b0, 0x00b4 },
    { 0x00b6, 0x00ba }, { 0x00bc, 0x00bf }, { 0x00c6, 0x00c6 },
    { 0x00d0, 0x00d0 }, { 0x00d7, 0x00d8 }, { 0x00de, 0x00e1 },
    { 0x00e6, 0x00e6 }, { 0x00e8, 0x00ea }, { 0x00ec, 0x00ed },
    { 0x00f0, 0x00f0 }, { 0x00f2, 0x00f3 }, { 0x00f7, 0x00fa },
    { 0x00fc, 0x00fc }, { 0x00fe, 0x00fe }, { 0x0101, 0x0101 },
    { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011b, 0x011b },
    { 0x0126, 0x0127 }, { 0x012b, 0x012b }, { 0x0131, 0x0133 },
    { 0x0138, 0x0138 }, { 0x013f, 0x0142 }, { 0x0144, 0x0144 },
    { 0x0148, 0x014b }, { 0x014d, 0x014d }, { 0x0152, 0x0153 },
    { 0x0166, 0x0167 }, { 0x016b, 0x016b }, { 0x01ce, 0x01ce },
    { 0x01d0, 0x01d0 }, { 0x01d2, 0x01d2 }, { 0x01d4, 0x01d4 },
    { 0x01d6, 0x01d6 }, { 0x01d8, 0x01d8 }, { 0x01da, 0x01da },
    { 0x01dc, 0x01dc }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
    { 0x02c4, 0x02c4 }, { 0x02c7, 0x02c7 }, { 0x02c9, 0x02cb },
    { 0x02cd, 0x02cd }, { 0x02d0, 0x02d0 }, { 0x02d8, 0x02db },
    { 0x02dd, 0x02dd }, { 0x02df, 0x02df }, { 0x0391, 0x03a1 },
    { 0x03a3, 0x03a9 }, { 0x03b1, 0x03c1 }, { 0x03c3, 0x03c9 },
    { 0x0401, 0x0401 }, { 0x0410, 0x044f }, { 0x0451, 0x0451 },
    { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
    { 0x201c, 0x201d }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
    { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
    { 0x203b, 0x203b }, { 0x203e, 0x203e }, { 0x2074, 0x2074 },
    { 0x207f, 0x207f }, { 0x2081, 0x2084 }, { 0x20ac, 0x20ac },
    { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
    { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
    { 0x2126, 0x2126 }, { 0x212b, 0x212b }, { 0x2153, 0x2154 },
    { 0x215b, 0x215e }, { 0x2160, 0x216b }, { 0x2170, 0x2179 },
    { 0x2190, 0x2199 }, { 0x21b8, 0x21b9 }, { 0x21d2, 0x21d2 },
    { 0x21d4, 0x21d4 }, { 0x21e7, 0x21e7 }, { 0x2200, 0x2200 },
    { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220b, 0x220b },
    { 0x220f, 0x220f }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
    { 0x221a, 0x221a }, { 0x221d, 0x2220 }, { 0x2223, 0x2223 },
    { 0x2225, 0x2225 }, { 0x2227, 0x222c }, { 0x222e, 0x222e },
    { 0x2234, 0x2237 }, { 0x223c, 0x223d }, { 0x2248, 0x2248 },
    { 0x224c, 0x224c }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
    { 0x2264, 0x2267 }, { 0x226a, 0x226b }, { 0x226e, 0x226f },
    { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
    { 0x2299, 0x2299 }, { 0x22a5, 0x22a5 }, { 0x22bf, 0x22bf },
    { 0x2312, 0x2312 }, { 0x2460, 0x24e9 }, { 0x24eb, 0x254b },
    { 0x2550, 0x2573 }, { 0x2580, 0x258f }, { 0x2592, 0x2595 },
    { 0x25a0, 0x25a1 }, { 0x25a3, 0x25a9 }, { 0x25b2, 0x25b3 },
    { 0x25b6, 0x25b7 }, { 0x25bc, 0x25bd }, { 0x25c0, 0x25c1 },
    { 0x25c6, 0x25c8 }, { 0x25cb, 0x25cb }, { 0x25ce, 0x25d1 },
    { 0x25e2, 0x25e5 }, { 0x25ef, 0x25ef }, { 0x2605, 0x2606 },
    { 0x2609, 0x2609 }, { 0x260e, 0x260f }, { 0x2614, 0x2615 },
    { 0x261c, 0x261c }, { 0x261e, 0x261e }, { 0x2640, 0x2640 },
    { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
    { 0x2667, 0x266a }, { 0x266c, 0x266d }, { 0x266f, 0x266f },
    { 0x273d, 0x273d }, { 0x2776, 0x277f }, { 0xe000, 0xf8ff },
    { 0xfffd, 0xfffd }, { 0xf0000, 0xffffd }, { 0x100000, 0x10fffd }
  };

#ifdef JA_LEGACY
  /* For Japanese legacy encodings, the following characters are added. */
  static const struct interval legacy_ja[] = {
    { 0x00A2, 0x00A3 }, { 0x00A5, 0x00A6 }, { 0x00AC, 0x00AC },
    { 0x00AF, 0x00AF }, { 0x2212, 0x2212 }
  };
#endif /* JA_LEGACY */

  /* binary search in table of non-spacing characters */
  if (bisearch(ucs, ambiguous,
              sizeof(ambiguous) / sizeof(struct interval) - 1))
    return 2;
#ifdef JA_LEGACY
  if (bisearch(ucs, legacy_ja,
              sizeof(legacy_ja) / sizeof(struct interval) - 1))
    return 2;
#endif /* JA_LEGACY */

  return wcwidth_ucs(ucs);
}
Example #27
0
int mk_wcwidth(wchar_t ucs)
{
  /* sorted list of non-overlapping intervals of non-spacing characters */
  /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
  static const struct interval combining[] = {
    { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
    { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
    { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
    { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
    { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
    { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
    { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
    { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
    { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
    { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
    { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
    { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
    { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
    { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
    { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
    { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
    { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
    { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
    { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
    { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
    { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
    { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
    { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
    { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
    { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
    { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
    { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
    { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
    { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
    { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
    { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
    { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
    { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
    { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
    { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
    { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
    { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
    { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
    { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
    { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
    { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
    { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
    { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
    { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
    { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
    { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
    { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
    { 0xE0100, 0xE01EF }
  };

  /* test for 8-bit control characters */
  if (ucs == 0)
    return 0;
  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
    return -1;

  /* binary search in table of non-spacing characters */
  if (bisearch(ucs, combining,
               sizeof(combining) / sizeof(struct interval) - 1))
    return 0;

  /* if we arrive here, ucs is not a combining or C0/C1 control character */

  return 1 +
    (ucs >= 0x1100 &&
     (ucs <= 0x115f ||                    /* Hangul Jamo init. consonants */
      ucs == 0x2329 || ucs == 0x232a ||
      (ucs >= 0x2e80 && ucs <= 0xa4cf &&
       ucs != 0x303f) ||                  /* CJK ... Yi */
      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
      (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
      (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
      (ucs >= 0xffe0 && ucs <= 0xffe6) ||
      (ucs >= 0x20000 && ucs <= 0x2fffd) ||
      (ucs >= 0x30000 && ucs <= 0x3fffd)));
}
Example #28
0
int mk_wcwidth(wchar_t ucs)
{
  /* sorted list of non-overlapping intervals of non-spacing characters */
  /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
    /* "uniset cat:Me + cat:Mn + cat:Cf - U+00AD + U+1160..U+11FF + U+200B" */
  static const struct interval combining[] = {
        { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD },
        { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 },
        { 0x05C7, 0x05C7 }, { 0x0600, 0x0604 }, { 0x0610, 0x061A },
        { 0x064B, 0x065F }, { 0x0670, 0x0670 }, { 0x06D6, 0x06DD },
        { 0x06DF, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
    { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
        { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0816, 0x0819 },
        { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, { 0x0829, 0x082D },
        { 0x0859, 0x085B }, { 0x08E4, 0x08FE }, { 0x0900, 0x0902 },
        { 0x093A, 0x093A }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
        { 0x094D, 0x094D }, { 0x0951, 0x0957 }, { 0x0962, 0x0963 },
        { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
        { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 },
        { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
        { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 }, { 0x0A70, 0x0A71 },
        { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
    { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
    { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
        { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 }, { 0x0B4D, 0x0B4D },
        { 0x0B56, 0x0B56 }, { 0x0B62, 0x0B63 }, { 0x0B82, 0x0B82 },
        { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 },
        { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 },
        { 0x0C62, 0x0C63 }, { 0x0CBC, 0x0CBC }, { 0x0CBF, 0x0CBF },
        { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0CE2, 0x0CE3 },
        { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D }, { 0x0D62, 0x0D63 },
    { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
    { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
    { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
    { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
    { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
        { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F8D, 0x0F97 },
    { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
        { 0x1032, 0x1037 }, { 0x1039, 0x103A }, { 0x103D, 0x103E },
        { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, { 0x1071, 0x1074 },
        { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, { 0x108D, 0x108D },
        { 0x109D, 0x109D }, { 0x1160, 0x11FF }, { 0x135D, 0x135F },
    { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
    { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
    { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
    { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
    { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
        { 0x1A17, 0x1A18 }, { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E },
        { 0x1A60, 0x1A60 }, { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C },
        { 0x1A73, 0x1A7C }, { 0x1A7F, 0x1A7F }, { 0x1B00, 0x1B03 },
        { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C },
        { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 },
        { 0x1BA2, 0x1BA5 }, { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAB },
        { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED },
        { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 },
        { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 },
        { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1DC0, 0x1DE6 },
        { 0x1DFC, 0x1DFF }, { 0x200B, 0x200F }, { 0x202A, 0x202E },
        { 0x2060, 0x2064 }, { 0x206A, 0x206F }, { 0x20D0, 0x20F0 },
        { 0x2CEF, 0x2CF1 }, { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF },
        { 0x302A, 0x302D }, { 0x3099, 0x309A }, { 0xA66F, 0xA672 },
        { 0xA674, 0xA67D }, { 0xA69F, 0xA69F }, { 0xA6F0, 0xA6F1 },
        { 0xA802, 0xA802 }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
        { 0xA825, 0xA826 }, { 0xA8C4, 0xA8C4 }, { 0xA8E0, 0xA8F1 },
        { 0xA926, 0xA92D }, { 0xA947, 0xA951 }, { 0xA980, 0xA982 },
        { 0xA9B3, 0xA9B3 }, { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BC },
        { 0xAA29, 0xAA2E }, { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 },
        { 0xAA43, 0xAA43 }, { 0xAA4C, 0xAA4C }, { 0xAAB0, 0xAAB0 },
        { 0xAAB2, 0xAAB4 }, { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF },
        { 0xAAC1, 0xAAC1 }, { 0xAAEC, 0xAAED }, { 0xAAF6, 0xAAF6 },
        { 0xABE5, 0xABE5 }, { 0xABE8, 0xABE8 }, { 0xABED, 0xABED },
        { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE26 },
        { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, { 0x101FD, 0x101FD },
    { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
        { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x11001, 0x11001 },
        { 0x11038, 0x11046 }, { 0x11080, 0x11081 }, { 0x110B3, 0x110B6 },
        { 0x110B9, 0x110BA }, { 0x110BD, 0x110BD }, { 0x11100, 0x11102 },
        { 0x11127, 0x1112B }, { 0x1112D, 0x11134 }, { 0x11180, 0x11181 },
        { 0x111B6, 0x111BE }, { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD },
        { 0x116B0, 0x116B5 }, { 0x116B7, 0x116B7 }, { 0x16F8F, 0x16F92 },
        { 0x1D167, 0x1D169 }, { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B },
        { 0x1D1AA, 0x1D1AD }, { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 },
        { 0xE0020, 0xE007F }, { 0xE0100, 0xE01EF },
  };

  /* test for 8-bit control characters */
  if (ucs == 0)
    return 0;
  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
    return -1;

  /* binary search in table of non-spacing characters */
  if (bisearch(ucs, combining,
	       sizeof(combining) / sizeof(struct interval) - 1))
    return 0;

  /* if we arrive here, ucs is not a combining or C0/C1 control character */

  return 1 + 
    (ucs >= 0x1100 &&
     (ucs <= 0x115f ||                    /* Hangul Jamo init. consonants */
      ucs == 0x2329 || ucs == 0x232a ||
      (ucs >= 0x2e80 && ucs <= 0xa4cf &&
       ucs != 0x303f) ||                  /* CJK ... Yi */
      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
      (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
      (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
             (ucs >= 0xffe0 && ucs <= 0xffe6)
#ifndef CYGWIN
                                              ||
      (ucs >= 0x20000 && ucs <= 0x2fffd) ||
             (ucs >= 0x30000 && ucs <= 0x3fffd)
#endif
            )
           );
}
Example #29
0
int __wcwidth(__CONST wint_t ucs)
{
#ifdef _MB_CAPABLE
  /* sorted list of non-overlapping intervals of East Asian Ambiguous
   * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
  static const struct interval ambiguous[] = {
    { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
    { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
    { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
    { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
    { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
    { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
    { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
    { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
    { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
    { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
    { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
    { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
    { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
    { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
    { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
    { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
    { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
    { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
    { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
    { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
    { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
    { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
    { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
    { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
    { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
    { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
    { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
    { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
    { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
    { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
    { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
    { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
    { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
    { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
    { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
    { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
    { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
    { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
    { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
    { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
    { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
    { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
    { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
    { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
    { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
    { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
    { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
    { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
    { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
    { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
    { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
    { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
  };
  /* sorted list of non-overlapping intervals of non-spacing characters */
  /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
  static const struct interval combining[] = {
    { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
    { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
    { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
    { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
    { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
    { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
    { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
    { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
    { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
    { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
    { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
    { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
    { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
    { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
    { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
    { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
    { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
    { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
    { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
    { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
    { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
    { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
    { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
    { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
    { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
    { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
    { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
    { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
    { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
    { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
    { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
    { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
    { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
    { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
    { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
    { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
    { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
    { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
    { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
    { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
    { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
    { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
    { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
    { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
    { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
    { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
    { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
    { 0xE0100, 0xE01EF }
  };

  /* Test for NUL character */
  if (ucs == 0)
    return 0;

  /* Test for printable ASCII characters */
  if (ucs >= 0x20 && ucs < 0x7f)
    return 1;

  /* Test for control characters */
  if (ucs < 0xa0)
    return -1;

  /* Test for surrogate pair values. */
  if (ucs >= 0xd800 && ucs <= 0xdfff)
    return -1;

  /* binary search in table of ambiguous characters */
  if (__locale_cjk_lang ()
      && bisearch(ucs, ambiguous,
		  sizeof(ambiguous) / sizeof(struct interval) - 1))
    return 2;

  /* binary search in table of non-spacing characters */
  if (bisearch(ucs, combining,
	       sizeof(combining) / sizeof(struct interval) - 1))
    return 0;

  /* if we arrive here, ucs is not a combining or C0/C1 control character */

  return 1 + 
    (ucs >= 0x1100 &&
     (ucs <= 0x115f ||                    /* Hangul Jamo init. consonants */
      ucs == 0x2329 || ucs == 0x232a ||
      (ucs >= 0x2e80 && ucs <= 0xa4cf &&
       ucs != 0x303f) ||                  /* CJK ... Yi */
      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
      (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
      (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
      (ucs >= 0xffe0 && ucs <= 0xffe6) ||
      (ucs >= 0x20000 && ucs <= 0x2fffd) ||
      (ucs >= 0x30000 && ucs <= 0x3fffd)));
#else /* !_MB_CAPABLE */
  if (iswprint (ucs))
    return 1;
  if (iswcntrl (ucs) || ucs == L'\0')
    return 0;
  return -1;
#endif /* _MB_CAPABLE */
}
Example #30
0
INTERNAL int vterm_unicode_is_combining(int codepoint)
{
  return bisearch(codepoint, combining, sizeof(combining) / sizeof(struct interval) - 1);
}