コード例 #1
0
int
main ()
{
  /* Test case n = 0.  */
  u8_width_linebreaks (NULL, 0, 80, 0, 0, NULL, "GB18030", NULL);

  {
    static const uint8_t input[91] =
      /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a)  日本語,中文,한글" */
      "Gr\303\274\303\237 Gott. \320\227\320\264\321\200\320\260\320\262\321\201\321\202\320\262\321\203\320\271\321\202\320\265! x=(-b\302\261sqrt(b\302\262-4ac))/(2a)  \346\227\245\346\234\254\350\252\236,\344\270\255\346\226\207,\355\225\234\352\270\200\n";

    {
      char *p = (char *) malloc (SIZEOF (input));
      size_t i;

      u8_width_linebreaks (input, SIZEOF (input), 25, 0, 0, NULL, "GB18030", p);
      for (i = 0; i < 91; i++)
        {
          ASSERT (p[i] == (i == 90 ? UC_BREAK_MANDATORY :
                           i == 39 || i == 61 ? UC_BREAK_POSSIBLE :
                           UC_BREAK_PROHIBITED));
        }
      free (p);
    }

    {
      char *p = (char *) malloc (SIZEOF (input));
      size_t i;

      u8_width_linebreaks (input, SIZEOF (input), 25, 0, 0, NULL, "GB2312", p);
      for (i = 0; i < 91; i++)
        {
          ASSERT (p[i] == (i == 90 ? UC_BREAK_MANDATORY :
                           i == 13 || i == 39 || i == 61 ? UC_BREAK_POSSIBLE :
                           UC_BREAK_PROHIBITED));
        }
      free (p);
    }
  }

  return 0;
}
コード例 #2
0
ファイル: u8-width-linebreaks.c プロジェクト: djmitche/gnulib
int
main (int argc, char * argv[])
{
  if (argc == 2)
    {
      /* Insert line breaks for a given width.  */
      int width = atoi (argv[1]);
      char *input = read_file (stdin);
      int length = strlen (input);
      char *breaks = malloc (length);
      int i;

      u8_width_linebreaks ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks);

      for (i = 0; i < length; i++)
        {
          switch (breaks[i])
            {
            case UC_BREAK_POSSIBLE:
              putc ('\n', stdout);
              break;
            case UC_BREAK_MANDATORY:
              break;
            case UC_BREAK_PROHIBITED:
              break;
            default:
              abort ();
            }
          putc (input[i], stdout);
        }

      free (breaks);

      return 0;
    }
  else
    return 1;
}
コード例 #3
0
int
ulc_width_linebreaks (const char *s, size_t n,
                      int width, int start_column, int at_end_columns,
                      const char *o, const char *encoding,
                      char *p)
{
  if (n > 0)
    {
      if (is_utf8_encoding (encoding))
        return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p);
      else
        {
          /* Convert the string to UTF-8 and build a translation table
             from offsets into s to offsets into the translated string.  */
          size_t *offsets = (size_t *) malloc (n * sizeof (size_t));

          if (offsets != NULL)
            {
              uint8_t *t;
              size_t m;

              t = u8_conv_from_encoding (encoding, iconveh_question_mark,
                                         s, n, offsets, NULL, &m);
              if (t != NULL)
                {
                  char *memory =
                    (char *) (m > 0 ? malloc (m + (o != NULL ? m : 0)) : NULL);

                  if (m == 0 || memory != NULL)
                    {
                      char *q = (char *) memory;
                      char *o8 = (o != NULL ? (char *) (q + m) : NULL);
                      int res_column;
                      size_t i;

                      /* Translate the overrides to the UTF-8 string.  */
                      if (o != NULL)
                        {
                          memset (o8, UC_BREAK_UNDEFINED, m);
                          for (i = 0; i < n; i++)
                            if (offsets[i] != (size_t)(-1))
                              o8[offsets[i]] = o[i];
                        }

                      /* Determine the line breaks of the UTF-8 string.  */
                      res_column =
                        u8_width_linebreaks (t, m, width, start_column, at_end_columns, o8, encoding, q);

                      /* Translate the result back to the original string.  */
                      memset (p, UC_BREAK_PROHIBITED, n);
                      for (i = 0; i < n; i++)
                        if (offsets[i] != (size_t)(-1))
                          p[i] = q[offsets[i]];

                      free (memory);
                      free (t);
                      free (offsets);
                      return res_column;
                    }
                  free (t);
                }
              free (offsets);
            }
          /* Impossible to convert.  */
#if C_CTYPE_ASCII
          if (is_all_ascii (s, n))
            {
              /* ASCII is a subset of UTF-8.  */
              return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p);
            }
#endif
          /* We have a non-ASCII string and cannot convert it.
             Don't produce line breaks except those already present in the
             input string.  All we assume here is that the encoding is
             minimally ASCII compatible.  */
          {
            const char *s_end = s + n;
            while (s < s_end)
              {
                *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n'
                      ? UC_BREAK_MANDATORY
                      : UC_BREAK_PROHIBITED);
                s++;
                p++;
                if (o != NULL)
                  o++;
              }
            /* We cannot compute widths in this case.  */
          }
        }
    }
  return start_column;
}