Beispiel #1
0
/* A tricky optimization, but probably worth it.  */
unsigned long 
scm_i_utf8_string_hash (const char *str, size_t len)
{
  const scm_t_uint8 *end, *ustr = (const scm_t_uint8 *) str;
  unsigned long ret;

  /* The length of the string in characters.  This name corresponds to
     Jenkins' original name.  */
  size_t length;

  scm_t_uint32 a, b, c, u32;

  if (len == (size_t) -1)
    len = strlen (str);

  end = ustr + len;

  if (u8_check (ustr, len) != NULL)
    /* Invalid UTF-8; punt.  */
    return scm_i_string_hash (scm_from_utf8_stringn (str, len));

  length = u8_strnlen (ustr, len);

  /* Set up the internal state.  */
  a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47;

  /* Handle most of the key.  */
  while (length > 3)
    {
      ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
      a += u32;
      ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
      b += u32;
      ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
      c += u32;
      mix (a, b, c);
      length -= 3;
    }

  /* Handle the last 3 elements's.  */
  ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
  a += u32;
  if (--length)
    {
      ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
      b += u32;
      if (--length)
        {
          ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
          c += u32;
        }
    }

  final (a, b, c);
Beispiel #2
0
int
u8_width (const uint8_t *s, size_t n, const char *encoding)
{
  const uint8_t *s_end = s + n;
  int width = 0;

  while (s < s_end)
    {
      ucs4_t uc;
      int w;

      s += u8_mbtouc_unsafe (&uc, s, s_end - s);

      if (uc == 0)
        break; /* end of string reached */

      w = uc_width (uc, encoding);
      if (w >= 0) /* ignore control characters in the string */
        width += w;
    }

  return width;
}
Beispiel #3
0
static int
mem_cd_iconveh_internal (const char *src, size_t srclen,
                         iconv_t cd, iconv_t cd1, iconv_t cd2,
                         enum iconv_ilseq_handler handler,
                         size_t extra_alloc,
                         size_t *offsets,
                         char **resultp, size_t *lengthp)
{
  /* When a conversion error occurs, we cannot start using CD1 and CD2 at
     this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
     Instead, we have to start afresh from the beginning of SRC.  */
  /* Use a temporary buffer, so that for small strings, a single malloc()
     call will be sufficient.  */
# define tmpbufsize 4096
  /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
     libiconv's UCS-4-INTERNAL encoding.  */
  union { unsigned int align; char buf[tmpbufsize]; } tmp;
# define tmpbuf tmp.buf

  char *initial_result;
  char *result;
  size_t allocated;
  size_t length;
  size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */

  if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))
    {
      initial_result = *resultp;
      allocated = *lengthp;
    }
  else
    {
      initial_result = tmpbuf;
      allocated = sizeof (tmpbuf);
    }
  result = initial_result;

  /* Test whether a direct conversion is possible at all.  */
  if (cd == (iconv_t)(-1))
    goto indirectly;

  if (offsets != NULL)
    {
      size_t i;

      for (i = 0; i < srclen; i++)
        offsets[i] = (size_t)(-1);

      last_length = (size_t)(-1);
    }
  length = 0;

  /* First, try a direct conversion, and see whether a conversion error
     occurs at all.  */
  {
    const char *inptr = src;
    size_t insize = srclen;

    /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
    /* Set to the initial state.  */
    iconv (cd, NULL, NULL, NULL, NULL);
# endif

    while (insize > 0)
      {
        char *outptr = result + length;
        size_t outsize = allocated - extra_alloc - length;
        bool incremented;
        size_t res;
        bool grow;

        if (offsets != NULL)
          {
            if (length != last_length) /* ensure that offset[] be increasing */
              {
                offsets[inptr - src] = length;
                last_length = length;
              }
            res = iconv_carefully_1 (cd,
                                     &inptr, &insize,
                                     &outptr, &outsize,
                                     &incremented);
          }
        else
          /* Use iconv_carefully instead of iconv here, because:
             - If TO_CODESET is UTF-8, we can do the error handling in this
               loop, no need for a second loop,
             - With iconv() implementations other than GNU libiconv and GNU
               libc, if we use iconv() in a big swoop, checking for an E2BIG
               return, we lose the number of irreversible conversions.  */
          res = iconv_carefully (cd,
                                 &inptr, &insize,
                                 &outptr, &outsize,
                                 &incremented);

        length = outptr - result;
        grow = (length + extra_alloc > allocated / 2);
        if (res == (size_t)(-1))
          {
            if (errno == E2BIG)
              grow = true;
            else if (errno == EINVAL)
              break;
            else if (errno == EILSEQ && handler != iconveh_error)
              {
                if (cd2 == (iconv_t)(-1))
                  {
                    /* TO_CODESET is UTF-8.  */
                    /* Error handling can produce up to 1 byte of output.  */
                    if (length + 1 + extra_alloc > allocated)
                      {
                        char *memory;

                        allocated = 2 * allocated;
                        if (length + 1 + extra_alloc > allocated)
                          abort ();
                        if (result == initial_result)
                          memory = (char *) malloc (allocated);
                        else
                          memory = (char *) realloc (result, allocated);
                        if (memory == NULL)
                          {
                            if (result != initial_result)
                              free (result);
                            errno = ENOMEM;
                            return -1;
                          }
                        if (result == initial_result)
                          memcpy (memory, initial_result, length);
                        result = memory;
                        grow = false;
                      }
                    /* The input is invalid in FROM_CODESET.  Eat up one byte
                       and emit a question mark.  */
                    if (!incremented)
                      {
                        if (insize == 0)
                          abort ();
                        inptr++;
                        insize--;
                      }
                    result[length] = '?';
                    length++;
                  }
                else
                  goto indirectly;
              }
            else
              {
                if (result != initial_result)
                  {
                    int saved_errno = errno;
                    free (result);
                    errno = saved_errno;
                  }
                return -1;
              }
          }
        if (insize == 0)
          break;
        if (grow)
          {
            char *memory;

            allocated = 2 * allocated;
            if (result == initial_result)
              memory = (char *) malloc (allocated);
            else
              memory = (char *) realloc (result, allocated);
            if (memory == NULL)
              {
                if (result != initial_result)
                  free (result);
                errno = ENOMEM;
                return -1;
              }
            if (result == initial_result)
              memcpy (memory, initial_result, length);
            result = memory;
          }
      }
  }

  /* Now get the conversion state back to the initial state.
     But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
#if defined _LIBICONV_VERSION \
    || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
         || defined __sun)
  for (;;)
    {
      char *outptr = result + length;
      size_t outsize = allocated - extra_alloc - length;
      size_t res;

      res = iconv (cd, NULL, NULL, &outptr, &outsize);
      length = outptr - result;
      if (res == (size_t)(-1))
        {
          if (errno == E2BIG)
            {
              char *memory;

              allocated = 2 * allocated;
              if (result == initial_result)
                memory = (char *) malloc (allocated);
              else
                memory = (char *) realloc (result, allocated);
              if (memory == NULL)
                {
                  if (result != initial_result)
                    free (result);
                  errno = ENOMEM;
                  return -1;
                }
              if (result == initial_result)
                memcpy (memory, initial_result, length);
              result = memory;
            }
          else
            {
              if (result != initial_result)
                {
                  int saved_errno = errno;
                  free (result);
                  errno = saved_errno;
                }
              return -1;
            }
        }
      else
        break;
    }
#endif

  /* The direct conversion succeeded.  */
  goto done;

 indirectly:
  /* The direct conversion failed.
     Use a conversion through UTF-8.  */
  if (offsets != NULL)
    {
      size_t i;

      for (i = 0; i < srclen; i++)
        offsets[i] = (size_t)(-1);

      last_length = (size_t)(-1);
    }
  length = 0;
  {
    const bool slowly = (offsets != NULL || handler == iconveh_error);
# define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
    char utf8buf[utf8bufsize + 1];
    size_t utf8len = 0;
    const char *in1ptr = src;
    size_t in1size = srclen;
    bool do_final_flush1 = true;
    bool do_final_flush2 = true;

    /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
    /* Set to the initial state.  */
    if (cd1 != (iconv_t)(-1))
      iconv (cd1, NULL, NULL, NULL, NULL);
    if (cd2 != (iconv_t)(-1))
      iconv (cd2, NULL, NULL, NULL, NULL);
# endif

    while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
      {
        char *out1ptr = utf8buf + utf8len;
        size_t out1size = utf8bufsize - utf8len;
        bool incremented1;
        size_t res1;
        int errno1;

        /* Conversion step 1: from FROM_CODESET to UTF-8.  */
        if (in1size > 0)
          {
            if (offsets != NULL
                && length != last_length) /* ensure that offset[] be increasing */
              {
                offsets[in1ptr - src] = length;
                last_length = length;
              }
            if (cd1 != (iconv_t)(-1))
              {
                if (slowly)
                  res1 = iconv_carefully_1 (cd1,
                                            &in1ptr, &in1size,
                                            &out1ptr, &out1size,
                                            &incremented1);
                else
                  res1 = iconv_carefully (cd1,
                                          &in1ptr, &in1size,
                                          &out1ptr, &out1size,
                                          &incremented1);
              }
            else
              {
                /* FROM_CODESET is UTF-8.  */
                res1 = utf8conv_carefully (slowly,
                                           &in1ptr, &in1size,
                                           &out1ptr, &out1size,
                                           &incremented1);
              }
          }
        else if (do_final_flush1)
          {
            /* Now get the conversion state of CD1 back to the initial state.
               But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
            if (cd1 != (iconv_t)(-1))
              res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
            else
# endif
              res1 = 0;
            do_final_flush1 = false;
            incremented1 = true;
          }
        else
          {
            res1 = 0;
            incremented1 = true;
          }
        if (res1 == (size_t)(-1)
            && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
          {
            if (result != initial_result)
              {
                int saved_errno = errno;
                free (result);
                errno = saved_errno;
              }
            return -1;
          }
        if (res1 == (size_t)(-1)
            && errno == EILSEQ && handler != iconveh_error)
          {
            /* The input is invalid in FROM_CODESET.  Eat up one byte and
               emit a question mark.  Room for the question mark was allocated
               at the end of utf8buf.  */
            if (!incremented1)
              {
                if (in1size == 0)
                  abort ();
                in1ptr++;
                in1size--;
              }
            *out1ptr++ = '?';
            res1 = 0;
          }
        errno1 = errno;
        utf8len = out1ptr - utf8buf;

        if (offsets != NULL
            || in1size == 0
            || utf8len > utf8bufsize / 2
            || (res1 == (size_t)(-1) && errno1 == E2BIG))
          {
            /* Conversion step 2: from UTF-8 to TO_CODESET.  */
            const char *in2ptr = utf8buf;
            size_t in2size = utf8len;

            while (in2size > 0
                   || (in1size == 0 && !do_final_flush1 && do_final_flush2))
              {
                char *out2ptr = result + length;
                size_t out2size = allocated - extra_alloc - length;
                bool incremented2;
                size_t res2;
                bool grow;

                if (in2size > 0)
                  {
                    if (cd2 != (iconv_t)(-1))
                      res2 = iconv_carefully (cd2,
                                              &in2ptr, &in2size,
                                              &out2ptr, &out2size,
                                              &incremented2);
                    else
                      /* TO_CODESET is UTF-8.  */
                      res2 = utf8conv_carefully (false,
                                                 &in2ptr, &in2size,
                                                 &out2ptr, &out2size,
                                                 &incremented2);
                  }
                else /* in1size == 0 && !do_final_flush1
                        && in2size == 0 && do_final_flush2 */
                  {
                    /* Now get the conversion state of CD1 back to the initial
                       state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
                    if (cd2 != (iconv_t)(-1))
                      res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
                    else
# endif
                      res2 = 0;
                    do_final_flush2 = false;
                    incremented2 = true;
                  }

                length = out2ptr - result;
                grow = (length + extra_alloc > allocated / 2);
                if (res2 == (size_t)(-1))
                  {
                    if (errno == E2BIG)
                      grow = true;
                    else if (errno == EINVAL)
                      break;
                    else if (errno == EILSEQ && handler != iconveh_error)
                      {
                        /* Error handling can produce up to 10 bytes of ASCII
                           output.  But TO_CODESET may be UCS-2, UTF-16 or
                           UCS-4, so use CD2 here as well.  */
                        char scratchbuf[10];
                        size_t scratchlen;
                        ucs4_t uc;
                        const char *inptr;
                        size_t insize;
                        size_t res;

                        if (incremented2)
                          {
                            if (u8_prev (&uc, (const uint8_t *) in2ptr,
                                         (const uint8_t *) utf8buf)
                                == NULL)
                              abort ();
                          }
                        else
                          {
                            int n;
                            if (in2size == 0)
                              abort ();
                            n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,
                                                  in2size);
                            in2ptr += n;
                            in2size -= n;
                          }

                        if (handler == iconveh_escape_sequence)
                          {
                            static char hex[16] = "0123456789ABCDEF";
                            scratchlen = 0;
                            scratchbuf[scratchlen++] = '\\';
                            if (uc < 0x10000)
                              scratchbuf[scratchlen++] = 'u';
                            else
                              {
                                scratchbuf[scratchlen++] = 'U';
                                scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
                                scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
                                scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
                                scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
                              }
                            scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
                            scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
                            scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
                            scratchbuf[scratchlen++] = hex[uc & 15];
                          }
                        else
                          {
                            scratchbuf[0] = '?';
                            scratchlen = 1;
                          }

                        inptr = scratchbuf;
                        insize = scratchlen;
                        if (cd2 != (iconv_t)(-1))
                          res = iconv (cd2,
                                       (ICONV_CONST char **) &inptr, &insize,
                                       &out2ptr, &out2size);
                        else
                          {
                            /* TO_CODESET is UTF-8.  */
                            if (out2size >= insize)
                              {
                                memcpy (out2ptr, inptr, insize);
                                out2ptr += insize;
                                out2size -= insize;
                                inptr += insize;
                                insize = 0;
                                res = 0;
                              }
                            else
                              {
                                errno = E2BIG;
                                res = (size_t)(-1);
                              }
                          }
                        length = out2ptr - result;
                        if (res == (size_t)(-1) && errno == E2BIG)
                          {
                            char *memory;

                            allocated = 2 * allocated;
                            if (length + 1 + extra_alloc > allocated)
                              abort ();
                            if (result == initial_result)
                              memory = (char *) malloc (allocated);
                            else
                              memory = (char *) realloc (result, allocated);
                            if (memory == NULL)
                              {
                                if (result != initial_result)
                                  free (result);
                                errno = ENOMEM;
                                return -1;
                              }
                            if (result == initial_result)
                              memcpy (memory, initial_result, length);
                            result = memory;
                            grow = false;

                            out2ptr = result + length;
                            out2size = allocated - extra_alloc - length;
                            if (cd2 != (iconv_t)(-1))
                              res = iconv (cd2,
                                           (ICONV_CONST char **) &inptr,
                                           &insize,
                                           &out2ptr, &out2size);
                            else
                              {
                                /* TO_CODESET is UTF-8.  */
                                if (!(out2size >= insize))
                                  abort ();
                                memcpy (out2ptr, inptr, insize);
                                out2ptr += insize;
                                out2size -= insize;
                                inptr += insize;
                                insize = 0;
                                res = 0;
                              }
                            length = out2ptr - result;
                          }
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
                        /* Irix iconv() inserts a NUL byte if it cannot convert.
                           NetBSD iconv() inserts a question mark if it cannot
                           convert.
                           Only GNU libiconv and GNU libc are known to prefer
                           to fail rather than doing a lossy conversion.  */
                        if (res != (size_t)(-1) && res > 0)
                          {
                            errno = EILSEQ;
                            res = (size_t)(-1);
                          }
# endif
                        if (res == (size_t)(-1))
                          {
                            /* Failure converting the ASCII replacement.  */
                            if (result != initial_result)
                              {
                                int saved_errno = errno;
                                free (result);
                                errno = saved_errno;
                              }
                            return -1;
                          }
                      }
                    else
                      {
                        if (result != initial_result)
                          {
                            int saved_errno = errno;
                            free (result);
                            errno = saved_errno;
                          }
                        return -1;
                      }
                  }
                if (!(in2size > 0
                      || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
                  break;
                if (grow)
                  {
                    char *memory;

                    allocated = 2 * allocated;
                    if (result == initial_result)
                      memory = (char *) malloc (allocated);
                    else
                      memory = (char *) realloc (result, allocated);
                    if (memory == NULL)
                      {
                        if (result != initial_result)
                          free (result);
                        errno = ENOMEM;
                        return -1;
                      }
                    if (result == initial_result)
                      memcpy (memory, initial_result, length);
                    result = memory;
                  }
              }

            /* Move the remaining bytes to the beginning of utf8buf.  */
            if (in2size > 0)
              memmove (utf8buf, in2ptr, in2size);
            utf8len = in2size;
          }
void
u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, char *p)
{
  int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL);
  const uint8_t *s_end = s + n;
  int last_prop = LBP_BK; /* line break property of last non-space character */
  char *seen_space = NULL; /* Was a space seen after the last non-space character? */
  char *seen_space2 = NULL; /* At least two spaces after the last non-space? */

  /* Don't break inside multibyte characters.  */
  memset (p, UC_BREAK_PROHIBITED, n);

  while (s < s_end)
    {
      ucs4_t uc;
      int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
      int prop = unilbrkprop_lookup (uc);

      if (prop == LBP_BK)
        {
          /* Mandatory break.  */
          *p = UC_BREAK_MANDATORY;
          last_prop = LBP_BK;
          seen_space = NULL;
          seen_space2 = NULL;
        }
      else
        {
          char *q;

          /* Resolve property values whose behaviour is not fixed.  */
          switch (prop)
            {
            case LBP_AI:
              /* Resolve ambiguous.  */
              prop = LBP_AI_REPLACEMENT;
              break;
            case LBP_CB:
              /* This is arbitrary.  */
              prop = LBP_ID;
              break;
            case LBP_SA:
              /* We don't handle complex scripts yet.
                 Treat LBP_SA like LBP_XX.  */
            case LBP_XX:
              /* This is arbitrary.  */
              prop = LBP_AL;
              break;
            }

          /* Deal with spaces and combining characters.  */
          q = p;
          if (prop == LBP_SP)
            {
              /* Don't break just before a space.  */
              *p = UC_BREAK_PROHIBITED;
              seen_space2 = seen_space;
              seen_space = p;
            }
          else if (prop == LBP_ZW)
            {
              /* Don't break just before a zero-width space.  */
              *p = UC_BREAK_PROHIBITED;
              last_prop = LBP_ZW;
              seen_space = NULL;
              seen_space2 = NULL;
            }
          else if (prop == LBP_CM)
            {
              /* Don't break just before a combining character, except immediately after a
                 zero-width space.  */
              if (last_prop == LBP_ZW)
                {
                  /* Break after zero-width space.  */
                  *p = UC_BREAK_POSSIBLE;
                  /* A combining character turns a preceding space into LBP_ID.  */
                  last_prop = LBP_ID;
                }
              else
                {
                  *p = UC_BREAK_PROHIBITED;
                  /* A combining character turns a preceding space into LBP_ID.  */
                  if (seen_space != NULL)
                    {
                      q = seen_space;
                      seen_space = seen_space2;
                      prop = LBP_ID;
                      goto lookup_via_table;
                    }
                }
            }
          else
            {
             lookup_via_table:
              /* prop must be usable as an index for table 7.3 of UTR #14.  */
              if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0])))
                abort ();

              if (last_prop == LBP_BK)
                {
                  /* Don't break at the beginning of a line.  */
                  *q = UC_BREAK_PROHIBITED;
                }
              else if (last_prop == LBP_ZW)
                {
                  /* Break after zero-width space.  */
                  *q = UC_BREAK_POSSIBLE;
                }
              else
                {
                  switch (unilbrk_table [last_prop] [prop])
                    {
                    case D:
                      *q = UC_BREAK_POSSIBLE;
                      break;
                    case I:
                      *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED);
                      break;
                    case P:
                      *q = UC_BREAK_PROHIBITED;
                      break;
                    default:
                      abort ();
                    }
                }
              last_prop = prop;
              seen_space = NULL;
              seen_space2 = NULL;
            }
        }

      s += count;
      p += count;
    }
}
Beispiel #5
0
int
u8_width_linebreaks (const uint8_t *s, size_t n,
                     int width, int start_column, int at_end_columns,
                     const char *o, const char *encoding,
                     char *p)
{
  const uint8_t *s_end;
  char *last_p;
  int last_column;
  int piece_width;

  u8_possible_linebreaks (s, n, encoding, p);

  s_end = s + n;
  last_p = NULL;
  last_column = start_column;
  piece_width = 0;
  while (s < s_end)
    {
      ucs4_t uc;
      int count = u8_mbtouc_unsafe (&uc, s, s_end - s);

      /* Respect the override.  */
      if (o != NULL && *o != UC_BREAK_UNDEFINED)
        *p = *o;

      if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
        {
          /* An atomic piece of text ends here.  */
          if (last_p != NULL && last_column + piece_width > width)
            {
              /* Insert a line break.  */
              *last_p = UC_BREAK_POSSIBLE;
              last_column = 0;
            }
        }

      if (*p == UC_BREAK_MANDATORY)
        {
          /* uc is a line break character.  */
          /* Start a new piece at column 0.  */
          last_p = NULL;
          last_column = 0;
          piece_width = 0;
        }
      else
        {
          /* uc is not a line break character.  */
          int w;

          if (*p == UC_BREAK_POSSIBLE)
            {
              /* Start a new piece.  */
              last_p = p;
              last_column += piece_width;
              piece_width = 0;
              /* No line break for the moment, may be turned into
                 UC_BREAK_POSSIBLE later, via last_p. */
            }

          *p = UC_BREAK_PROHIBITED;

          w = uc_width (uc, encoding);
          if (w >= 0) /* ignore control characters in the string */
            piece_width += w;
        }

      s += count;
      p += count;
      if (o != NULL)
        o += count;
    }

  /* The last atomic piece of text ends here.  */
  if (last_p != NULL && last_column + piece_width + at_end_columns > width)
    {
      /* Insert a line break.  */
      *last_p = UC_BREAK_POSSIBLE;
      last_column = 0;
    }

  return last_column + piece_width;
}