/** * Initializes line breaking context for a given language. * * @param[in,out] lbpCtx pointer to the line breaking context * @param[in] ch the first character to process * @param[in] lang language of the input * @post the line breaking context is initialized */ void lb_init_break_context( struct LineBreakContext* lbpCtx, utf32_t ch, const char* lang) { lbpCtx->lang = lang; lbpCtx->lbpLang = get_lb_prop_lang(lang); lbpCtx->lbcLast = LBP_Undefined; lbpCtx->lbcNew = LBP_Undefined; lbpCtx->lbcCur = resolve_lb_class( get_char_lb_class_lang(ch, lbpCtx->lbpLang), lbpCtx->lang); treat_first_char(lbpCtx); }
/** * Updates LineBreakingContext for the next code point and returns * the detected break. * * @param[in,out] lbpCtx pointer to the line breaking context * @param[in] ch Unicode code point * @return break result, one of #LINEBREAK_MUSTBREAK, * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK * @post the line breaking context is updated */ int lb_process_next_char( struct LineBreakContext *lbpCtx, utf32_t ch ) { int brk; lbpCtx->lbcLast = lbpCtx->lbcNew; lbpCtx->lbcNew = get_char_lb_class_lang(ch, lbpCtx->lbpLang); brk = get_lb_result_simple(lbpCtx); switch (brk) { case LINEBREAK_MUSTBREAK: lbpCtx->lbcCur = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang); treat_first_char(lbpCtx); break; case LINEBREAK_UNDEFINED: lbpCtx->lbcNew = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang); brk = get_lb_result_lookup(lbpCtx); break; default: break; } return brk; }
/** * Sets the line breaking information for a generic input string. * * @param[in] s input string * @param[in] len length of the input * @param[in] lang language of the input * @param[out] brks pointer to the output breaking data, * containing #LINEBREAK_MUSTBREAK, * #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK, * or #LINEBREAK_INSIDEACHAR * @param[in] get_next_char function to get the next UTF-32 character */ void set_linebreaks( const void *s, size_t len, const char *lang, char *brks, get_next_char_t get_next_char) { utf32_t ch; enum LineBreakClass lbcCur; enum LineBreakClass lbcNew; enum LineBreakClass lbcLast; struct LineBreakProperties *lbpLang; size_t posCur = 0; size_t posLast = 0; --posLast; /* To be ++'d later */ ch = get_next_char(s, len, &posCur); if (ch == EOS) return; lbpLang = get_lb_prop_lang(lang); lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang); lbcNew = LBP_Undefined; nextline: /* Special treatment for the first character */ switch (lbcCur) { case LBP_LF: case LBP_NL: lbcCur = LBP_BK; break; case LBP_CB: lbcCur = LBP_BA; break; case LBP_SP: lbcCur = LBP_WJ; break; default: break; } /* Process a line till an explicit break or end of string */ for (;;) { for (++posLast; posLast < posCur - 1; ++posLast) { brks[posLast] = LINEBREAK_INSIDEACHAR; } assert(posLast == posCur - 1); lbcLast = lbcNew; ch = get_next_char(s, len, &posCur); if (ch == EOS) break; lbcNew = get_char_lb_class_lang(ch, lbpLang); if (lbcCur == LBP_BK || (lbcCur == LBP_CR && lbcNew != LBP_LF)) { brks[posLast] = LINEBREAK_MUSTBREAK; lbcCur = resolve_lb_class(lbcNew, lang); goto nextline; } switch (lbcNew) { case LBP_SP: brks[posLast] = LINEBREAK_NOBREAK; continue; case LBP_BK: case LBP_LF: case LBP_NL: brks[posLast] = LINEBREAK_NOBREAK; lbcCur = LBP_BK; continue; case LBP_CR: brks[posLast] = LINEBREAK_NOBREAK; lbcCur = LBP_CR; continue; case LBP_CB: brks[posLast] = LINEBREAK_ALLOWBREAK; lbcCur = LBP_BA; continue; default: break; } lbcNew = resolve_lb_class(lbcNew, lang); assert(lbcCur <= LBP_JT); assert(lbcNew <= LBP_JT); switch (baTable[lbcCur - 1][lbcNew - 1]) { case DIR_BRK: brks[posLast] = LINEBREAK_ALLOWBREAK; break; case CMI_BRK: case IND_BRK: if (lbcLast == LBP_SP) { brks[posLast] = LINEBREAK_ALLOWBREAK; } else { brks[posLast] = LINEBREAK_NOBREAK; } break; case CMP_BRK: brks[posLast] = LINEBREAK_NOBREAK; if (lbcLast != LBP_SP) continue; break; case PRH_BRK: brks[posLast] = LINEBREAK_NOBREAK; break; } lbcCur = lbcNew; } assert(posLast == posCur - 1 && posCur <= len); /* Break after the last character */ brks[posLast] = LINEBREAK_MUSTBREAK; /* When the input contains incomplete sequences */ while (posCur < len) { brks[posCur++] = LINEBREAK_INSIDEACHAR; } }