int main () { /* Test case n = 0. */ u8_width_linebreaks (NULL, 0, 80, 0, 0, NULL, "GB18030", NULL); { static const uint8_t input[91] = /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ "Gr\303\274\303\237 Gott. \320\227\320\264\321\200\320\260\320\262\321\201\321\202\320\262\321\203\320\271\321\202\320\265! x=(-b\302\261sqrt(b\302\262-4ac))/(2a) \346\227\245\346\234\254\350\252\236,\344\270\255\346\226\207,\355\225\234\352\270\200\n"; { char *p = (char *) malloc (SIZEOF (input)); size_t i; u8_width_linebreaks (input, SIZEOF (input), 25, 0, 0, NULL, "GB18030", p); for (i = 0; i < 91; i++) { ASSERT (p[i] == (i == 90 ? UC_BREAK_MANDATORY : i == 39 || i == 61 ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED)); } free (p); } { char *p = (char *) malloc (SIZEOF (input)); size_t i; u8_width_linebreaks (input, SIZEOF (input), 25, 0, 0, NULL, "GB2312", p); for (i = 0; i < 91; i++) { ASSERT (p[i] == (i == 90 ? UC_BREAK_MANDATORY : i == 13 || i == 39 || i == 61 ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED)); } free (p); } } return 0; }
int main (int argc, char * argv[]) { if (argc == 2) { /* Insert line breaks for a given width. */ int width = atoi (argv[1]); char *input = read_file (stdin); int length = strlen (input); char *breaks = malloc (length); int i; u8_width_linebreaks ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks); for (i = 0; i < length; i++) { switch (breaks[i]) { case UC_BREAK_POSSIBLE: putc ('\n', stdout); break; case UC_BREAK_MANDATORY: break; case UC_BREAK_PROHIBITED: break; default: abort (); } putc (input[i], stdout); } free (breaks); return 0; } else return 1; }
int ulc_width_linebreaks (const char *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p) { if (n > 0) { if (is_utf8_encoding (encoding)) return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p); else { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); if (offsets != NULL) { uint8_t *t; size_t m; t = u8_conv_from_encoding (encoding, iconveh_question_mark, s, n, offsets, NULL, &m); if (t != NULL) { char *memory = (char *) (m > 0 ? malloc (m + (o != NULL ? m : 0)) : NULL); if (m == 0 || memory != NULL) { char *q = (char *) memory; char *o8 = (o != NULL ? (char *) (q + m) : NULL); int res_column; size_t i; /* Translate the overrides to the UTF-8 string. */ if (o != NULL) { memset (o8, UC_BREAK_UNDEFINED, m); for (i = 0; i < n; i++) if (offsets[i] != (size_t)(-1)) o8[offsets[i]] = o[i]; } /* Determine the line breaks of the UTF-8 string. */ res_column = u8_width_linebreaks (t, m, width, start_column, at_end_columns, o8, encoding, q); /* Translate the result back to the original string. */ memset (p, UC_BREAK_PROHIBITED, n); for (i = 0; i < n; i++) if (offsets[i] != (size_t)(-1)) p[i] = q[offsets[i]]; free (memory); free (t); free (offsets); return res_column; } free (t); } free (offsets); } /* Impossible to convert. */ #if C_CTYPE_ASCII if (is_all_ascii (s, n)) { /* ASCII is a subset of UTF-8. */ return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p); } #endif /* We have a non-ASCII string and cannot convert it. Don't produce line breaks except those already present in the input string. All we assume here is that the encoding is minimally ASCII compatible. */ { const char *s_end = s + n; while (s < s_end) { *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n' ? UC_BREAK_MANDATORY : UC_BREAK_PROHIBITED); s++; p++; if (o != NULL) o++; } /* We cannot compute widths in this case. */ } } } return start_column; }