static void test_u8_grapheme_breaks (const char *input, const char *expected) { const uint8_t *s = (const uint8_t *) input; size_t n = strlen (expected); char *breaks; size_t i; breaks = malloc (n); if (!breaks) abort (); memset (breaks, 0xcc, n); u8_grapheme_breaks (s, n, breaks); for (i = 0; i < n; i++) if (breaks[i] != (expected[i] == '#')) { size_t j; fprintf (stderr, "wrong grapheme breaks:\n"); fprintf (stderr, " input:"); for (j = 0; j < n; j++) fprintf (stderr, " %02x", s[j]); putc ('\n', stderr); fprintf (stderr, "expected:"); for (j = 0; j < n; j++) fprintf (stderr, " %d", expected[j] == '#'); putc ('\n', stderr); fprintf (stderr, " actual:"); for (j = 0; j < n; j++) fprintf (stderr, " %d", breaks[j]); putc ('\n', stderr); abort (); } free (breaks); }
void ulc_grapheme_breaks (const char *s, size_t n, char *p) { if (n > 0) { const char *encoding = locale_charset (); if (is_utf8_encoding (encoding)) u8_grapheme_breaks ((const uint8_t *) s, n, p); else { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); if (offsets != NULL) { uint8_t *t; size_t m; t = u8_conv_from_encoding (encoding, iconveh_question_mark, s, n, offsets, NULL, &m); if (t != NULL) { char *q = (char *) (m > 0 ? malloc (m) : NULL); if (m == 0 || q != NULL) { size_t i; /* Determine the grapheme breaks of the UTF-8 string. */ u8_grapheme_breaks (t, m, q); /* Translate the result back to the original string. */ memset (p, 0, n); for (i = 0; i < n; i++) if (offsets[i] != (size_t)(-1)) p[i] = q[offsets[i]]; free (q); free (t); free (offsets); return; } free (t); } free (offsets); } /* Impossible to convert. */ #if C_CTYPE_ASCII /* Fall back to ASCII as best we can. */ ascii_grapheme_breaks (s, n, p); #else /* We cannot make any assumptions. */ p[0] = 1; memset (p + 1, 0, n - 1); #endif } } }