Пример #1
0
int
main ()
{
  ucs4_t uc;

  for (uc = 0; uc < 0x110000; uc++)
    {
      int w1 = uc_width (uc, "UTF-8");
      int w2 = uc_width (uc, "GBK");
      char width =
        (w1 == 0 && w2 == 0 ? '0' :
         w1 == 1 && w2 == 1 ? '1' :
         w1 == 1 && w2 == 2 ? 'A' :
         w1 == 2 && w2 == 2 ? '2' :
         0);
      if (width == 0)
        {
          /* uc must be a control character.  */
          ASSERT (w1 < 0 && w2 < 0);
        }
      else
        add_to_interval (uc, width);
    }
  finish_interval ();

  return 0;
}
Пример #2
0
int
rpl_wcwidth (wchar_t wc)
{
  /* In UTF-8 locales, use a Unicode aware width function.  */
  const char *encoding = locale_charset ();
  if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0))
    {
      /* We assume that in a UTF-8 locale, a wide character is the same as a
         Unicode character.  */
      return uc_width (wc, encoding);
    }
  else
    {
      /* Otherwise, fall back to the system's wcwidth function.  */
#if HAVE_WCWIDTH
      return wcwidth (wc);
#else
      return wc == 0 ? 0 : iswprint (wc) ? 1 : -1;
#endif
    }
}
Пример #3
0
int
u8_width (const uint8_t *s, size_t n, const char *encoding)
{
  const uint8_t *s_end = s + n;
  int width = 0;

  while (s < s_end)
    {
      ucs4_t uc;
      int w;

      s += u8_mbtouc_unsafe (&uc, s, s_end - s);

      if (uc == 0)
        break; /* end of string reached */

      w = uc_width (uc, encoding);
      if (w >= 0) /* ignore control characters in the string */
        width += w;
    }

  return width;
}
Пример #4
0
int
main ()
{
  ucs4_t uc;

  /* Test width of ASCII characters.  */
  for (uc = 0x0020; uc < 0x007F; uc++)
    ASSERT (uc_width (uc, "ISO-8859-2") == 1);

  /* Test width of some non-spacing characters.  */
  ASSERT (uc_width (0x0301, "UTF-8") == 0);
  ASSERT (uc_width (0x05B0, "UTF-8") == 0);

  /* Test width of some format control characters.  */
  ASSERT (uc_width (0x200E, "UTF-8") == 0);
  ASSERT (uc_width (0x2060, "UTF-8") == 0);
  ASSERT (uc_width (0xE0001, "UTF-8") == 0);
  ASSERT (uc_width (0xE0044, "UTF-8") == 0);

  /* Test width of some zero width characters.  */
  ASSERT (uc_width (0x200B, "UTF-8") == 0);
  ASSERT (uc_width (0xFEFF, "UTF-8") == 0);

  /* Test width of some CJK characters.  */
  ASSERT (uc_width (0x3000, "UTF-8") == 2);
  ASSERT (uc_width (0xB250, "UTF-8") == 2);
  ASSERT (uc_width (0xFF1A, "UTF-8") == 2);
  ASSERT (uc_width (0x20369, "UTF-8") == 2);
  ASSERT (uc_width (0x2F876, "UTF-8") == 2);

  return 0;
}
Пример #5
0
int
u32_width_linebreaks (const uint32_t *s, size_t n,
                      int width, int start_column, int at_end_columns,
                      const char *o, const char *encoding,
                      char *p)
{
  const uint32_t *s_end;
  char *last_p;
  int last_column;
  int piece_width;

  u32_possible_linebreaks (s, n, encoding, p);

  s_end = s + n;
  last_p = NULL;
  last_column = start_column;
  piece_width = 0;
  while (s < s_end)
    {
      ucs4_t uc = *s;

      /* Respect the override.  */
      if (o != NULL && *o != UC_BREAK_UNDEFINED)
        *p = *o;

      if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
        {
          /* An atomic piece of text ends here.  */
          if (last_p != NULL && last_column + piece_width > width)
            {
              /* Insert a line break.  */
              *last_p = UC_BREAK_POSSIBLE;
              last_column = 0;
            }
        }

      if (*p == UC_BREAK_MANDATORY)
        {
          /* uc is a line break character.  */
          /* Start a new piece at column 0.  */
          last_p = NULL;
          last_column = 0;
          piece_width = 0;
        }
      else
        {
          /* uc is not a line break character.  */
          int w;

          if (*p == UC_BREAK_POSSIBLE)
            {
              /* Start a new piece.  */
              last_p = p;
              last_column += piece_width;
              piece_width = 0;
              /* No line break for the moment, may be turned into
                 UC_BREAK_POSSIBLE later, via last_p. */
            }

          *p = UC_BREAK_PROHIBITED;

          w = uc_width (uc, encoding);
          if (w >= 0) /* ignore control characters in the string */
            piece_width += w;
        }

      s++;
      p++;
      if (o != NULL)
        o++;
    }

  /* The last atomic piece of text ends here.  */
  if (last_p != NULL && last_column + piece_width + at_end_columns > width)
    {
      /* Insert a line break.  */
      *last_p = UC_BREAK_POSSIBLE;
      last_column = 0;
    }

  return last_column + piece_width;
}