void ulc_wordbreaks (const char *s, size_t n, char *p) { if (n > 0) { const char *encoding = locale_charset (); if (is_utf8_encoding (encoding)) u8_wordbreaks ((const uint8_t *) s, n, p); else { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); if (offsets != NULL) { uint8_t *t; size_t m; t = u8_conv_from_encoding (encoding, iconveh_question_mark, s, n, offsets, NULL, &m); if (t != NULL) { char *q = (char *) (m > 0 ? malloc (m) : NULL); if (m == 0 || q != NULL) { size_t i; /* Determine the word breaks of the UTF-8 string. */ u8_wordbreaks (t, m, q); /* Translate the result back to the original string. */ memset (p, 0, n); for (i = 0; i < n; i++) if (offsets[i] != (size_t)(-1)) p[i] = q[offsets[i]]; free (q); free (t); free (offsets); return; } free (t); } free (offsets); } /* Impossible to convert. */ #if C_CTYPE_ASCII if (is_all_ascii (s, n)) { /* ASCII is a subset of UTF-8. */ u8_wordbreaks ((const uint8_t *) s, n, p); return; } #endif /* We have a non-ASCII string and cannot convert it. Don't produce any word breaks. */ memset (p, 0, n); } } }
int ulc_width_linebreaks (const char *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p) { if (n > 0) { if (is_utf8_encoding (encoding)) return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p); else { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); if (offsets != NULL) { uint8_t *t; size_t m; t = u8_conv_from_encoding (encoding, iconveh_question_mark, s, n, offsets, NULL, &m); if (t != NULL) { char *memory = (char *) (m > 0 ? malloc (m + (o != NULL ? m : 0)) : NULL); if (m == 0 || memory != NULL) { char *q = (char *) memory; char *o8 = (o != NULL ? (char *) (q + m) : NULL); int res_column; size_t i; /* Translate the overrides to the UTF-8 string. */ if (o != NULL) { memset (o8, UC_BREAK_UNDEFINED, m); for (i = 0; i < n; i++) if (offsets[i] != (size_t)(-1)) o8[offsets[i]] = o[i]; } /* Determine the line breaks of the UTF-8 string. */ res_column = u8_width_linebreaks (t, m, width, start_column, at_end_columns, o8, encoding, q); /* Translate the result back to the original string. */ memset (p, UC_BREAK_PROHIBITED, n); for (i = 0; i < n; i++) if (offsets[i] != (size_t)(-1)) p[i] = q[offsets[i]]; free (memory); free (t); free (offsets); return res_column; } free (t); } free (offsets); } /* Impossible to convert. */ #if C_CTYPE_ASCII if (is_all_ascii (s, n)) { /* ASCII is a subset of UTF-8. */ return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p); } #endif /* We have a non-ASCII string and cannot convert it. Don't produce line breaks except those already present in the input string. All we assume here is that the encoding is minimally ASCII compatible. */ { const char *s_end = s + n; while (s < s_end) { *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n' ? UC_BREAK_MANDATORY : UC_BREAK_PROHIBITED); s++; p++; if (o != NULL) o++; } /* We cannot compute widths in this case. */ } } } return start_column; }
void ulc_possible_linebreaks (const char *s, size_t n, const char *encoding, char *p) { if (n > 0) { if (is_utf8_encoding (encoding)) u8_possible_linebreaks ((const uint8_t *) s, n, encoding, p); else { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); if (offsets != NULL) { uint8_t *t; size_t m; t = u8_conv_from_encoding (encoding, iconveh_question_mark, s, n, offsets, NULL, &m); if (t != NULL) { char *q = (char *) (m > 0 ? malloc (m) : NULL); if (m == 0 || q != NULL) { size_t i; /* Determine the possible line breaks of the UTF-8 string. */ u8_possible_linebreaks (t, m, encoding, q); /* Translate the result back to the original string. */ memset (p, UC_BREAK_PROHIBITED, n); for (i = 0; i < n; i++) if (offsets[i] != (size_t)(-1)) p[i] = q[offsets[i]]; free (q); free (t); free (offsets); return; } free (t); } free (offsets); } /* Impossible to convert. */ #if C_CTYPE_ASCII if (is_all_ascii (s, n)) { /* ASCII is a subset of UTF-8. */ u8_possible_linebreaks ((const uint8_t *) s, n, encoding, p); return; } #endif /* We have a non-ASCII string and cannot convert it. Don't produce line breaks except those already present in the input string. All we assume here is that the encoding is minimally ASCII compatible. */ { const char *s_end = s + n; while (s < s_end) { *p = (*s == '\n' ? UC_BREAK_MANDATORY : UC_BREAK_PROHIBITED); s++; p++; } } } } }
void ulc_grapheme_breaks (const char *s, size_t n, char *p) { if (n > 0) { const char *encoding = locale_charset (); if (is_utf8_encoding (encoding)) u8_grapheme_breaks ((const uint8_t *) s, n, p); else { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); if (offsets != NULL) { uint8_t *t; size_t m; t = u8_conv_from_encoding (encoding, iconveh_question_mark, s, n, offsets, NULL, &m); if (t != NULL) { char *q = (char *) (m > 0 ? malloc (m) : NULL); if (m == 0 || q != NULL) { size_t i; /* Determine the grapheme breaks of the UTF-8 string. */ u8_grapheme_breaks (t, m, q); /* Translate the result back to the original string. */ memset (p, 0, n); for (i = 0; i < n; i++) if (offsets[i] != (size_t)(-1)) p[i] = q[offsets[i]]; free (q); free (t); free (offsets); return; } free (t); } free (offsets); } /* Impossible to convert. */ #if C_CTYPE_ASCII /* Fall back to ASCII as best we can. */ ascii_grapheme_breaks (s, n, p); #else /* We cannot make any assumptions. */ p[0] = 1; memset (p + 1, 0, n - 1); #endif } } }