size_t term_strlen(const struct termp *p, const char *cp) { size_t sz, rsz, i; int ssz, skip, uc; const char *seq, *rhs; enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; /* * Account for escaped sequences within string length * calculations. This follows the logic in term_word() as we * must calculate the width of produced strings. */ sz = 0; skip = 0; while ('\0' != *cp) { rsz = strcspn(cp, rej); for (i = 0; i < rsz; i++) sz += cond_width(p, *cp++, &skip); switch (*cp) { case '\\': cp++; esc = mandoc_escape(&cp, &seq, &ssz); if (ESCAPE_ERROR == esc) continue; rhs = NULL; switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, ssz - 1); break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, ssz); if (uc < 0) continue; break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { rhs = mchars_spec2str(p->symtab, seq, ssz, &rsz); if (rhs != NULL) break; } else { uc = mchars_spec2cp(p->symtab, seq, ssz); if (uc > 0) sz += cond_width(p, uc, &skip); } continue; case ESCAPE_SKIPCHAR: skip = 1; continue; case ESCAPE_OVERSTRIKE: rsz = 0; rhs = seq + ssz; while (seq < rhs) { if (*seq == '\\') { mandoc_escape(&seq, NULL, NULL); continue; } i = (*p->width)(p, *seq++); if (rsz < i) rsz = i; } sz += rsz; continue; default: continue; } /* * Common handling for Unicode and numbered * character escape sequences. */ if (rhs == NULL) { if (p->enc == TERMENC_ASCII) { rhs = ascii_uc2str(uc); rsz = strlen(rhs); } else { if ((uc < 0x20 && uc != 0x09) || (uc > 0x7E && uc < 0xA0)) uc = 0xFFFD; sz += cond_width(p, uc, &skip); continue; } } if (skip) { skip = 0; break; } /* * Common handling for all escape sequences * printing more than one character. */ for (i = 0; i < rsz; i++) sz += (*p->width)(p, *rhs++); break; case ASCII_NBRSP: sz += cond_width(p, ' ', &skip); cp++; break; case ASCII_HYPH: sz += cond_width(p, '-', &skip); cp++; /* FALLTHROUGH */ case ASCII_BREAK: break; default: break; } } return(sz); }
size_t term_strlen(const struct termp *p, const char *cp) { size_t sz, rsz, i; int ssz, c; const char *seq, *rhs; enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; /* * Account for escaped sequences within string length * calculations. This follows the logic in term_word() as we * must calculate the width of produced strings. */ sz = 0; while ('\0' != *cp) { rsz = strcspn(cp, rej); for (i = 0; i < rsz; i++) sz += (*p->width)(p, *cp++); c = 0; switch (*cp) { case ('\\'): cp++; esc = mandoc_escape(&cp, &seq, &ssz); if (ESCAPE_ERROR == esc) return(sz); if (TERMENC_ASCII != p->enc) switch (esc) { case (ESCAPE_UNICODE): c = mchars_num2uc (seq + 1, ssz - 1); if ('\0' == c) break; sz += (*p->width)(p, c); continue; case (ESCAPE_SPECIAL): c = mchars_spec2cp (p->symtab, seq, ssz); if (c <= 0) break; sz += (*p->width)(p, c); continue; default: break; } rhs = NULL; switch (esc) { case (ESCAPE_UNICODE): sz += (*p->width)(p, '?'); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, ssz); if ('\0' != c) sz += (*p->width)(p, c); break; case (ESCAPE_SPECIAL): rhs = mchars_spec2str (p->symtab, seq, ssz, &rsz); if (ssz != 1 || rhs) break; rhs = seq; rsz = ssz; break; default: break; } if (NULL == rhs) break; for (i = 0; i < rsz; i++) sz += (*p->width)(p, *rhs++); break; case (ASCII_NBRSP): sz += (*p->width)(p, ' '); cp++; break; case (ASCII_HYPH): sz += (*p->width)(p, '-'); cp++; break; default: break; } } return(sz); }
/* * Handle pwords, partial words, which may be either a single word or a * phrase that cannot be broken down (such as a literal string). This * handles word styling. */ void term_word(struct termp *p, const char *word) { const char nbrsp[2] = { ASCII_NBRSP, 0 }; const char *seq, *cp; int sz, uc; size_t ssz; enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { bufferc(p, ' '); if (TERMP_SENTENCE & p->flags) bufferc(p, ' '); } else bufferc(p, ASCII_NBRSP); } if (TERMP_PREKEEP & p->flags) p->flags |= TERMP_KEEP; if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; else p->flags |= TERMP_NOSPACE; p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); p->skipvsp = 0; while ('\0' != *word) { if ('\\' != *word) { if (TERMP_SKIPCHAR & p->flags) { p->flags &= ~TERMP_SKIPCHAR; word++; continue; } if (TERMP_NBRWORD & p->flags) { if (' ' == *word) { encode(p, nbrsp, 1); word++; continue; } ssz = strcspn(word, "\\ "); } else ssz = strcspn(word, "\\"); encode(p, word, ssz); word += (int)ssz; continue; } word++; esc = mandoc_escape(&word, &seq, &sz); if (ESCAPE_ERROR == esc) continue; switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, sz - 1); break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, sz); if (uc < 0) continue; break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { cp = mchars_spec2str(p->symtab, seq, sz, &ssz); if (cp != NULL) encode(p, cp, ssz); } else { uc = mchars_spec2cp(p->symtab, seq, sz); if (uc > 0) encode1(p, uc); } continue; case ESCAPE_FONTBOLD: term_fontrepl(p, TERMFONT_BOLD); continue; case ESCAPE_FONTITALIC: term_fontrepl(p, TERMFONT_UNDER); continue; case ESCAPE_FONTBI: term_fontrepl(p, TERMFONT_BI); continue; case ESCAPE_FONT: /* FALLTHROUGH */ case ESCAPE_FONTROMAN: term_fontrepl(p, TERMFONT_NONE); continue; case ESCAPE_FONTPREV: term_fontlast(p); continue; case ESCAPE_NOSPACE: if (TERMP_SKIPCHAR & p->flags) p->flags &= ~TERMP_SKIPCHAR; else if ('\0' == *word) p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); continue; case ESCAPE_SKIPCHAR: p->flags |= TERMP_SKIPCHAR; continue; case ESCAPE_OVERSTRIKE: cp = seq + sz; while (seq < cp) { if (*seq == '\\') { mandoc_escape(&seq, NULL, NULL); continue; } encode1(p, *seq++); if (seq < cp) encode(p, "\b", 1); } default: continue; } /* * Common handling for Unicode and numbered * character escape sequences. */ if (p->enc == TERMENC_ASCII) { cp = ascii_uc2str(uc); encode(p, cp, strlen(cp)); } else { if ((uc < 0x20 && uc != 0x09) || (uc > 0x7E && uc < 0xA0)) uc = 0xFFFD; encode1(p, uc); } } p->flags &= ~TERMP_NBRWORD; }
/* * Handle pwords, partial words, which may be either a single word or a * phrase that cannot be broken down (such as a literal string). This * handles word styling. */ void term_word(struct termp *p, const char *word) { const char *seq, *cp; char c; int sz, uc; size_t ssz; enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { if (TERMP_PREKEEP & p->flags) p->flags |= TERMP_KEEP; bufferc(p, ' '); if (TERMP_SENTENCE & p->flags) bufferc(p, ' '); } else bufferc(p, ASCII_NBRSP); } if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; else p->flags |= TERMP_NOSPACE; p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); while ('\0' != *word) { if ((ssz = strcspn(word, "\\")) > 0) encode(p, word, ssz); word += (int)ssz; if ('\\' != *word) continue; word++; esc = mandoc_escape(&word, &seq, &sz); if (ESCAPE_ERROR == esc) break; if (TERMENC_ASCII != p->enc) switch (esc) { case (ESCAPE_UNICODE): uc = mchars_num2uc(seq + 1, sz - 1); if ('\0' == uc) break; encode1(p, uc); continue; case (ESCAPE_SPECIAL): uc = mchars_spec2cp(p->symtab, seq, sz); if (uc <= 0) break; encode1(p, uc); continue; default: break; } switch (esc) { case (ESCAPE_UNICODE): encode1(p, '?'); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, sz); if ('\0' != c) encode(p, &c, 1); break; case (ESCAPE_SPECIAL): cp = mchars_spec2str(p->symtab, seq, sz, &ssz); if (NULL != cp) encode(p, cp, ssz); else if (1 == ssz) encode(p, seq, sz); break; case (ESCAPE_FONTBOLD): term_fontrepl(p, TERMFONT_BOLD); break; case (ESCAPE_FONTITALIC): term_fontrepl(p, TERMFONT_UNDER); break; case (ESCAPE_FONT): /* FALLTHROUGH */ case (ESCAPE_FONTROMAN): term_fontrepl(p, TERMFONT_NONE); break; case (ESCAPE_FONTPREV): term_fontlast(p); break; case (ESCAPE_NOSPACE): if ('\0' == *word) p->flags |= TERMP_NOSPACE; break; default: break; } } }