void u16_possible_linebreaks (const uint16_t *s, size_t n, const char *encoding, char *p) { int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL); const uint16_t *s_end = s + n; int last_prop = LBP_BK; /* line break property of last non-space character */ char *seen_space = NULL; /* Was a space seen after the last non-space character? */ char *seen_space2 = NULL; /* At least two spaces after the last non-space? */ /* Don't break inside multibyte characters. */ memset (p, UC_BREAK_PROHIBITED, n); while (s < s_end) { ucs4_t uc; int count = u16_mbtouc_unsafe (&uc, s, s_end - s); int prop = unilbrkprop_lookup (uc); if (prop == LBP_BK) { /* Mandatory break. */ *p = UC_BREAK_MANDATORY; last_prop = LBP_BK; seen_space = NULL; seen_space2 = NULL; } else { char *q; /* Resolve property values whose behaviour is not fixed. */ switch (prop) { case LBP_AI: /* Resolve ambiguous. */ prop = LBP_AI_REPLACEMENT; break; case LBP_CB: /* This is arbitrary. */ prop = LBP_ID; break; case LBP_SA: /* We don't handle complex scripts yet. Treat LBP_SA like LBP_XX. */ case LBP_XX: /* This is arbitrary. */ prop = LBP_AL; break; } /* Deal with spaces and combining characters. */ q = p; if (prop == LBP_SP) { /* Don't break just before a space. */ *p = UC_BREAK_PROHIBITED; seen_space2 = seen_space; seen_space = p; } else if (prop == LBP_ZW) { /* Don't break just before a zero-width space. */ *p = UC_BREAK_PROHIBITED; last_prop = LBP_ZW; seen_space = NULL; seen_space2 = NULL; } else if (prop == LBP_CM) { /* Don't break just before a combining character, except immediately after a zero-width space. */ if (last_prop == LBP_ZW) { /* Break after zero-width space. */ *p = UC_BREAK_POSSIBLE; /* A combining character turns a preceding space into LBP_ID. */ last_prop = LBP_ID; } else { *p = UC_BREAK_PROHIBITED; /* A combining character turns a preceding space into LBP_ID. */ if (seen_space != NULL) { q = seen_space; seen_space = seen_space2; prop = LBP_ID; goto lookup_via_table; } } } else { lookup_via_table: /* prop must be usable as an index for table 7.3 of UTR #14. */ if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0]))) abort (); if (last_prop == LBP_BK) { /* Don't break at the beginning of a line. */ *q = UC_BREAK_PROHIBITED; } else if (last_prop == LBP_ZW) { /* Break after zero-width space. */ *q = UC_BREAK_POSSIBLE; } else { switch (unilbrk_table [last_prop] [prop]) { case D: *q = UC_BREAK_POSSIBLE; break; case I: *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED); break; case P: *q = UC_BREAK_PROHIBITED; break; default: abort (); } } last_prop = prop; seen_space = NULL; seen_space2 = NULL; } } s += count; p += count; } }
int u16_width_linebreaks (const uint16_t *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p) { const uint16_t *s_end; char *last_p; int last_column; int piece_width; u16_possible_linebreaks (s, n, encoding, p); s_end = s + n; last_p = NULL; last_column = start_column; piece_width = 0; while (s < s_end) { ucs4_t uc; int count = u16_mbtouc_unsafe (&uc, s, s_end - s); /* Respect the override. */ if (o != NULL && *o != UC_BREAK_UNDEFINED) *p = *o; if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY) { /* An atomic piece of text ends here. */ if (last_p != NULL && last_column + piece_width > width) { /* Insert a line break. */ *last_p = UC_BREAK_POSSIBLE; last_column = 0; } } if (*p == UC_BREAK_MANDATORY) { /* uc is a line break character. */ /* Start a new piece at column 0. */ last_p = NULL; last_column = 0; piece_width = 0; } else { /* uc is not a line break character. */ int w; if (*p == UC_BREAK_POSSIBLE) { /* Start a new piece. */ last_p = p; last_column += piece_width; piece_width = 0; /* No line break for the moment, may be turned into UC_BREAK_POSSIBLE later, via last_p. */ } *p = UC_BREAK_PROHIBITED; w = uc_width (uc, encoding); if (w >= 0) /* ignore control characters in the string */ piece_width += w; } s += count; p += count; if (o != NULL) o += count; } /* The last atomic piece of text ends here. */ if (last_p != NULL && last_column + piece_width + at_end_columns > width) { /* Insert a line break. */ *last_p = UC_BREAK_POSSIBLE; last_column = 0; } return last_column + piece_width; }