size_t term_strlen(const struct termp *p, const char *cp) { size_t sz, rsz, i; int ssz, skip, uc; const char *seq, *rhs; enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; /* * Account for escaped sequences within string length * calculations. This follows the logic in term_word() as we * must calculate the width of produced strings. */ sz = 0; skip = 0; while ('\0' != *cp) { rsz = strcspn(cp, rej); for (i = 0; i < rsz; i++) sz += cond_width(p, *cp++, &skip); switch (*cp) { case '\\': cp++; esc = mandoc_escape(&cp, &seq, &ssz); if (ESCAPE_ERROR == esc) continue; rhs = NULL; switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, ssz - 1); break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, ssz); if (uc < 0) continue; break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { rhs = mchars_spec2str(p->symtab, seq, ssz, &rsz); if (rhs != NULL) break; } else { uc = mchars_spec2cp(p->symtab, seq, ssz); if (uc > 0) sz += cond_width(p, uc, &skip); } continue; case ESCAPE_SKIPCHAR: skip = 1; continue; case ESCAPE_OVERSTRIKE: rsz = 0; rhs = seq + ssz; while (seq < rhs) { if (*seq == '\\') { mandoc_escape(&seq, NULL, NULL); continue; } i = (*p->width)(p, *seq++); if (rsz < i) rsz = i; } sz += rsz; continue; default: continue; } /* * Common handling for Unicode and numbered * character escape sequences. */ if (rhs == NULL) { if (p->enc == TERMENC_ASCII) { rhs = ascii_uc2str(uc); rsz = strlen(rhs); } else { if ((uc < 0x20 && uc != 0x09) || (uc > 0x7E && uc < 0xA0)) uc = 0xFFFD; sz += cond_width(p, uc, &skip); continue; } } if (skip) { skip = 0; break; } /* * Common handling for all escape sequences * printing more than one character. */ for (i = 0; i < rsz; i++) sz += (*p->width)(p, *rhs++); break; case ASCII_NBRSP: sz += cond_width(p, ' ', &skip); cp++; break; case ASCII_HYPH: sz += cond_width(p, '-', &skip); cp++; /* FALLTHROUGH */ case ASCII_BREAK: break; default: break; } } return(sz); }
static int print_encode(struct html *h, const char *p, int norecurse) { size_t sz; int c, len, nospace; const char *seq; enum mandoc_esc esc; static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; nospace = 0; while ('\0' != *p) { if (HTML_SKIPCHAR & h->flags && '\\' != *p) { h->flags &= ~HTML_SKIPCHAR; p++; continue; } sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); p += (int)sz; if ('\0' == *p) break; switch (*p++) { case ('<'): printf("<"); continue; case ('>'): printf(">"); continue; case ('&'): printf("&"); continue; case (ASCII_HYPH): putchar('-'); continue; default: break; } esc = mandoc_escape(&p, &seq, &len); if (ESCAPE_ERROR == esc) break; switch (esc) { case (ESCAPE_FONT): /* FALLTHROUGH */ case (ESCAPE_FONTPREV): /* FALLTHROUGH */ case (ESCAPE_FONTBOLD): /* FALLTHROUGH */ case (ESCAPE_FONTITALIC): /* FALLTHROUGH */ case (ESCAPE_FONTBI): /* FALLTHROUGH */ case (ESCAPE_FONTROMAN): if (0 == norecurse) print_metaf(h, esc); continue; case (ESCAPE_SKIPCHAR): h->flags |= HTML_SKIPCHAR; continue; default: break; } if (h->flags & HTML_SKIPCHAR) { h->flags &= ~HTML_SKIPCHAR; continue; } switch (esc) { case (ESCAPE_UNICODE): /* Skip passed "u" header. */ c = mchars_num2uc(seq + 1, len - 1); if ('\0' != c) printf("&#x%x;", c); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, len); if ('\0' != c) putchar(c); break; case (ESCAPE_SPECIAL): c = mchars_spec2cp(h->symtab, seq, len); if (c > 0) printf("&#%d;", c); else if (-1 == c && 1 == len) putchar((int)*seq); break; case (ESCAPE_NOSPACE): if ('\0' == *p) nospace = 1; break; default: break; } } return(nospace); }
/* * Handle pwords, partial words, which may be either a single word or a * phrase that cannot be broken down (such as a literal string). This * handles word styling. */ void term_word(struct termp *p, const char *word) { const char nbrsp[2] = { ASCII_NBRSP, 0 }; const char *seq, *cp; int sz, uc; size_t ssz; enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { bufferc(p, ' '); if (TERMP_SENTENCE & p->flags) bufferc(p, ' '); } else bufferc(p, ASCII_NBRSP); } if (TERMP_PREKEEP & p->flags) p->flags |= TERMP_KEEP; if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; else p->flags |= TERMP_NOSPACE; p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); p->skipvsp = 0; while ('\0' != *word) { if ('\\' != *word) { if (TERMP_SKIPCHAR & p->flags) { p->flags &= ~TERMP_SKIPCHAR; word++; continue; } if (TERMP_NBRWORD & p->flags) { if (' ' == *word) { encode(p, nbrsp, 1); word++; continue; } ssz = strcspn(word, "\\ "); } else ssz = strcspn(word, "\\"); encode(p, word, ssz); word += (int)ssz; continue; } word++; esc = mandoc_escape(&word, &seq, &sz); if (ESCAPE_ERROR == esc) continue; switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, sz - 1); break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, sz); if (uc < 0) continue; break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { cp = mchars_spec2str(p->symtab, seq, sz, &ssz); if (cp != NULL) encode(p, cp, ssz); } else { uc = mchars_spec2cp(p->symtab, seq, sz); if (uc > 0) encode1(p, uc); } continue; case ESCAPE_FONTBOLD: term_fontrepl(p, TERMFONT_BOLD); continue; case ESCAPE_FONTITALIC: term_fontrepl(p, TERMFONT_UNDER); continue; case ESCAPE_FONTBI: term_fontrepl(p, TERMFONT_BI); continue; case ESCAPE_FONT: /* FALLTHROUGH */ case ESCAPE_FONTROMAN: term_fontrepl(p, TERMFONT_NONE); continue; case ESCAPE_FONTPREV: term_fontlast(p); continue; case ESCAPE_NOSPACE: if (TERMP_SKIPCHAR & p->flags) p->flags &= ~TERMP_SKIPCHAR; else if ('\0' == *word) p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); continue; case ESCAPE_SKIPCHAR: p->flags |= TERMP_SKIPCHAR; continue; case ESCAPE_OVERSTRIKE: cp = seq + sz; while (seq < cp) { if (*seq == '\\') { mandoc_escape(&seq, NULL, NULL); continue; } encode1(p, *seq++); if (seq < cp) encode(p, "\b", 1); } default: continue; } /* * Common handling for Unicode and numbered * character escape sequences. */ if (p->enc == TERMENC_ASCII) { cp = ascii_uc2str(uc); encode(p, cp, strlen(cp)); } else { if ((uc < 0x20 && uc != 0x09) || (uc > 0x7E && uc < 0xA0)) uc = 0xFFFD; encode1(p, uc); } } p->flags &= ~TERMP_NBRWORD; }
static int print_encode(struct html *h, const char *p, int norecurse) { size_t sz; int c, len, nospace; const char *seq; enum mandoc_esc esc; static const char rejs[9] = { '\\', '<', '>', '&', '"', ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; nospace = 0; while ('\0' != *p) { if (HTML_SKIPCHAR & h->flags && '\\' != *p) { h->flags &= ~HTML_SKIPCHAR; p++; continue; } sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); p += (int)sz; if ('\0' == *p) break; if (print_escape(*p++)) continue; esc = mandoc_escape(&p, &seq, &len); if (ESCAPE_ERROR == esc) break; switch (esc) { case ESCAPE_FONT: case ESCAPE_FONTPREV: case ESCAPE_FONTBOLD: case ESCAPE_FONTITALIC: case ESCAPE_FONTBI: case ESCAPE_FONTROMAN: if (0 == norecurse) print_metaf(h, esc); continue; case ESCAPE_SKIPCHAR: h->flags |= HTML_SKIPCHAR; continue; default: break; } if (h->flags & HTML_SKIPCHAR) { h->flags &= ~HTML_SKIPCHAR; continue; } switch (esc) { case ESCAPE_UNICODE: /* Skip past "u" header. */ c = mchars_num2uc(seq + 1, len - 1); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); if (c < 0) continue; break; case ESCAPE_SPECIAL: c = mchars_spec2cp(seq, len); if (c <= 0) continue; break; case ESCAPE_NOSPACE: if ('\0' == *p) nospace = 1; continue; case ESCAPE_OVERSTRIKE: if (len == 0) continue; c = seq[len - 1]; break; default: continue; } if ((c < 0x20 && c != 0x09) || (c > 0x7E && c < 0xA0)) c = 0xFFFD; if (c > 0x7E) printf("&#%d;", c); else if ( ! print_escape(c)) putchar(c); } return nospace; }
size_t term_strlen(const struct termp *p, const char *cp) { size_t sz, rsz, i; int ssz, c; const char *seq, *rhs; enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; /* * Account for escaped sequences within string length * calculations. This follows the logic in term_word() as we * must calculate the width of produced strings. */ sz = 0; while ('\0' != *cp) { rsz = strcspn(cp, rej); for (i = 0; i < rsz; i++) sz += (*p->width)(p, *cp++); c = 0; switch (*cp) { case ('\\'): cp++; esc = mandoc_escape(&cp, &seq, &ssz); if (ESCAPE_ERROR == esc) return(sz); if (TERMENC_ASCII != p->enc) switch (esc) { case (ESCAPE_UNICODE): c = mchars_num2uc (seq + 1, ssz - 1); if ('\0' == c) break; sz += (*p->width)(p, c); continue; case (ESCAPE_SPECIAL): c = mchars_spec2cp (p->symtab, seq, ssz); if (c <= 0) break; sz += (*p->width)(p, c); continue; default: break; } rhs = NULL; switch (esc) { case (ESCAPE_UNICODE): sz += (*p->width)(p, '?'); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, ssz); if ('\0' != c) sz += (*p->width)(p, c); break; case (ESCAPE_SPECIAL): rhs = mchars_spec2str (p->symtab, seq, ssz, &rsz); if (ssz != 1 || rhs) break; rhs = seq; rsz = ssz; break; default: break; } if (NULL == rhs) break; for (i = 0; i < rsz; i++) sz += (*p->width)(p, *rhs++); break; case (ASCII_NBRSP): sz += (*p->width)(p, ' '); cp++; break; case (ASCII_HYPH): sz += (*p->width)(p, '-'); cp++; break; default: break; } } return(sz); }
/* * Handle pwords, partial words, which may be either a single word or a * phrase that cannot be broken down (such as a literal string). This * handles word styling. */ void term_word(struct termp *p, const char *word) { const char *seq, *cp; char c; int sz, uc; size_t ssz; enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { if (TERMP_PREKEEP & p->flags) p->flags |= TERMP_KEEP; bufferc(p, ' '); if (TERMP_SENTENCE & p->flags) bufferc(p, ' '); } else bufferc(p, ASCII_NBRSP); } if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; else p->flags |= TERMP_NOSPACE; p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); while ('\0' != *word) { if ((ssz = strcspn(word, "\\")) > 0) encode(p, word, ssz); word += (int)ssz; if ('\\' != *word) continue; word++; esc = mandoc_escape(&word, &seq, &sz); if (ESCAPE_ERROR == esc) break; if (TERMENC_ASCII != p->enc) switch (esc) { case (ESCAPE_UNICODE): uc = mchars_num2uc(seq + 1, sz - 1); if ('\0' == uc) break; encode1(p, uc); continue; case (ESCAPE_SPECIAL): uc = mchars_spec2cp(p->symtab, seq, sz); if (uc <= 0) break; encode1(p, uc); continue; default: break; } switch (esc) { case (ESCAPE_UNICODE): encode1(p, '?'); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, sz); if ('\0' != c) encode(p, &c, 1); break; case (ESCAPE_SPECIAL): cp = mchars_spec2str(p->symtab, seq, sz, &ssz); if (NULL != cp) encode(p, cp, ssz); else if (1 == ssz) encode(p, seq, sz); break; case (ESCAPE_FONTBOLD): term_fontrepl(p, TERMFONT_BOLD); break; case (ESCAPE_FONTITALIC): term_fontrepl(p, TERMFONT_UNDER); break; case (ESCAPE_FONT): /* FALLTHROUGH */ case (ESCAPE_FONTROMAN): term_fontrepl(p, TERMFONT_NONE); break; case (ESCAPE_FONTPREV): term_fontlast(p); break; case (ESCAPE_NOSPACE): if ('\0' == *word) p->flags |= TERMP_NOSPACE; break; default: break; } } }
/* * Print text and mdoc(7) syntax elements. */ static void md_word(const char *s) { const char *seq, *prevfont, *currfont, *nextfont; char c; int bs, sz, uc, breakline; /* No spacing before closing delimiters. */ if (s[0] != '\0' && s[1] == '\0' && strchr("!),.:;?]", s[0]) != NULL && (outflags & MD_spc_force) == 0) outflags &= ~MD_spc; md_preword(); if (*s == '\0') return; /* No spacing after opening delimiters. */ if ((s[0] == '(' || s[0] == '[') && s[1] == '\0') outflags &= ~MD_spc; breakline = 0; prevfont = currfont = ""; while ((c = *s++) != '\0') { bs = 0; switch(c) { case ASCII_NBRSP: if (code_blocks) c = ' '; else { md_named("nbsp"); c = '\0'; } break; case ASCII_HYPH: bs = escflags & ESC_BOL && !code_blocks; c = '-'; break; case ASCII_BREAK: continue; case '#': case '+': case '-': bs = escflags & ESC_BOL && !code_blocks; break; case '(': bs = escflags & ESC_HYP && !code_blocks; break; case ')': bs = escflags & ESC_NUM && !code_blocks; break; case '*': case '[': case '_': case '`': bs = !code_blocks; break; case '.': bs = escflags & ESC_NUM && !code_blocks; break; case '<': if (code_blocks == 0) { md_named("lt"); c = '\0'; } break; case '=': if (escflags & ESC_BOL && !code_blocks) { md_named("equals"); c = '\0'; } break; case '>': if (code_blocks == 0) { md_named("gt"); c = '\0'; } break; case '\\': uc = 0; nextfont = NULL; switch (mandoc_escape(&s, &seq, &sz)) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, sz - 1); break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, sz); break; case ESCAPE_SPECIAL: uc = mchars_spec2cp(seq, sz); break; case ESCAPE_FONTBOLD: nextfont = "**"; break; case ESCAPE_FONTITALIC: nextfont = "*"; break; case ESCAPE_FONTBI: nextfont = "***"; break; case ESCAPE_FONT: case ESCAPE_FONTROMAN: nextfont = ""; break; case ESCAPE_FONTPREV: nextfont = prevfont; break; case ESCAPE_BREAK: breakline = 1; break; case ESCAPE_NOSPACE: case ESCAPE_SKIPCHAR: case ESCAPE_OVERSTRIKE: /* XXX not implemented */ /* FALLTHROUGH */ case ESCAPE_ERROR: default: break; } if (nextfont != NULL && !code_blocks) { if (*currfont != '\0') { outflags &= ~MD_spc; md_rawword(currfont); } prevfont = currfont; currfont = nextfont; if (*currfont != '\0') { outflags &= ~MD_spc; md_rawword(currfont); } } if (uc) { if ((uc < 0x20 && uc != 0x09) || (uc > 0x7E && uc < 0xA0)) uc = 0xFFFD; if (code_blocks) { seq = mchars_uc2str(uc); fputs(seq, stdout); outcount += strlen(seq); } else { printf("&#%d;", uc); outcount++; } escflags &= ~ESC_FON; } c = '\0'; break; case ']': bs = escflags & ESC_SQU && !code_blocks; escflags |= ESC_HYP; break; default: break; } if (bs) putchar('\\'); md_char(c); if (breakline && (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) { printf(" \n"); breakline = 0; while (*s == ' ' || *s == ASCII_NBRSP) s++; } } if (*currfont != '\0') { outflags &= ~MD_spc; md_rawword(currfont); } else if (s[-2] == ' ') escflags |= ESC_EOL; else escflags &= ~ESC_EOL; }