/* * Pass over recursive numerical expressions. This context of this * function is important: it's only called within character-terminating * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial * recursion: we don't care about what's in these blocks. * This returns the number of characters skipped or -1 if an error * occurs (the caller should bail). */ static int numescape(const char *start) { int i; size_t sz; const char *cp; i = 0; /* The expression consists of a subexpression. */ if ('\\' == start[i]) { cp = &start[++i]; /* * Read past the end of the subexpression. * Bail immediately on errors. */ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) return(-1); return(i + cp - &start[i]); } if ('(' != start[i++]) return(0); /* * A parenthesised subexpression. Read until the closing * parenthesis, making sure to handle any nested subexpressions * that might ruin our parse. */ while (')' != start[i]) { sz = strcspn(&start[i], ")\\"); i += (int)sz; if ('\0' == start[i]) return(-1); else if ('\\' != start[i]) continue; cp = &start[++i]; if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) return(-1); i += cp - &start[i]; } /* Read past the terminating ')'. */ return(++i); }
/* * Process text streams: * Convert all breakable hyphens into ASCII_HYPH. * Decrement and spring input line trap. */ static enum rofferr roff_parsetext(char **bufp, size_t *szp, int pos, int *offs) { size_t sz; const char *start; char *p; int isz; enum mandoc_esc esc; start = p = *bufp + pos; while ('\0' != *p) { sz = strcspn(p, "-\\"); p += sz; if ('\0' == *p) break; if ('\\' == *p) { /* Skip over escapes. */ p++; esc = mandoc_escape((const char **)&p, NULL, NULL); if (ESCAPE_ERROR == esc) break; continue; } else if (p == start) { p++; continue; } if (isalpha((unsigned char)p[-1]) && isalpha((unsigned char)p[1])) *p = ASCII_HYPH; p++; } /* Spring the input line trap. */ if (1 == roffit_lines) { isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro); if (-1 == isz) { perror(NULL); exit((int)MANDOCLEVEL_SYSERR); } free(*bufp); *bufp = p; *szp = isz + 1; *offs = 0; free(roffit_macro); roffit_lines = 0; return(ROFF_REPARSE); } else if (1 < roffit_lines) --roffit_lines; return(ROFF_CONT); }
int html_strlen(const char *cp) { size_t rsz; int skip, sz; /* * Account for escaped sequences within string length * calculations. This follows the logic in term_strlen() as we * must calculate the width of produced strings. * Assume that characters are always width of "1". This is * hacky, but it gets the job done for approximation of widths. */ sz = 0; skip = 0; while (1) { rsz = strcspn(cp, "\\"); if (rsz) { cp += rsz; if (skip) { skip = 0; rsz--; } sz += rsz; } if ('\0' == *cp) break; cp++; switch (mandoc_escape(&cp, NULL, NULL)) { case (ESCAPE_ERROR): return(sz); case (ESCAPE_UNICODE): /* FALLTHROUGH */ case (ESCAPE_NUMBERED): /* FALLTHROUGH */ case (ESCAPE_SPECIAL): if (skip) skip = 0; else sz++; break; case (ESCAPE_SKIPCHAR): skip = 1; break; default: break; } } return(sz); }
void man_deroff(char **dest, const struct man_node *n) { char *cp; size_t sz; if (MAN_TEXT != n->type) { for (n = n->child; n; n = n->next) man_deroff(dest, n); return; } /* Skip leading whitespace and escape sequences. */ cp = n->string; while ('\0' != *cp) { if ('\\' == *cp) { cp++; mandoc_escape((const char **)&cp, NULL, NULL); } else if (isspace((unsigned char)*cp)) cp++; else break; } /* Skip trailing whitespace. */ for (sz = strlen(cp); sz; sz--) if (0 == isspace((unsigned char)cp[sz-1])) break; /* Skip empty strings. */ if (0 == sz) return; if (NULL == *dest) { *dest = mandoc_strndup(cp, sz); return; } mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); free(*dest); *dest = cp; }
static int man_strlen(const char *cp) { size_t rsz; int skip, sz; sz = 0; skip = 0; for (;;) { rsz = strcspn(cp, "\\"); if (rsz) { cp += rsz; if (skip) { skip = 0; rsz--; } sz += rsz; } if ('\0' == *cp) break; cp++; switch (mandoc_escape(&cp, NULL, NULL)) { case ESCAPE_ERROR: return sz; case ESCAPE_UNICODE: case ESCAPE_NUMBERED: case ESCAPE_SPECIAL: case ESCAPE_UNDEF: case ESCAPE_OVERSTRIKE: if (skip) skip = 0; else sz++; break; case ESCAPE_SKIPCHAR: skip = 1; break; default: break; } } return sz; }
/* * Process text streams: convert all breakable hyphens into ASCII_HYPH. */ static enum rofferr roff_parsetext(char *p) { size_t sz; const char *start; enum mandoc_esc esc; start = p; while ('\0' != *p) { sz = strcspn(p, "-\\"); p += sz; if ('\0' == *p) break; if ('\\' == *p) { /* Skip over escapes. */ p++; esc = mandoc_escape ((const char **)&p, NULL, NULL); if (ESCAPE_ERROR == esc) break; continue; } else if (p == start) { p++; continue; } if (isalpha((unsigned char)p[-1]) && isalpha((unsigned char)p[1])) *p = ASCII_HYPH; p++; } return(ROFF_CONT); }
int html_strlen(const char *cp) { int ssz, sz; const char *seq, *p; /* * Account for escaped sequences within string length * calculations. This follows the logic in term_strlen() as we * must calculate the width of produced strings. * Assume that characters are always width of "1". This is * hacky, but it gets the job done for approximation of widths. */ sz = 0; while (NULL != (p = strchr(cp, '\\'))) { sz += (int)(p - cp); ++cp; switch (mandoc_escape(&cp, &seq, &ssz)) { case (ESCAPE_ERROR): return(sz); case (ESCAPE_UNICODE): /* FALLTHROUGH */ case (ESCAPE_NUMBERED): /* FALLTHROUGH */ case (ESCAPE_SPECIAL): sz++; break; default: break; } } assert(sz >= 0); return(sz + strlen(cp)); }
/* * Handle pwords, partial words, which may be either a single word or a * phrase that cannot be broken down (such as a literal string). This * handles word styling. */ void term_word(struct termp *p, const char *word) { const char nbrsp[2] = { ASCII_NBRSP, 0 }; const char *seq, *cp; int sz, uc; size_t ssz; enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { bufferc(p, ' '); if (TERMP_SENTENCE & p->flags) bufferc(p, ' '); } else bufferc(p, ASCII_NBRSP); } if (TERMP_PREKEEP & p->flags) p->flags |= TERMP_KEEP; if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; else p->flags |= TERMP_NOSPACE; p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); p->skipvsp = 0; while ('\0' != *word) { if ('\\' != *word) { if (TERMP_SKIPCHAR & p->flags) { p->flags &= ~TERMP_SKIPCHAR; word++; continue; } if (TERMP_NBRWORD & p->flags) { if (' ' == *word) { encode(p, nbrsp, 1); word++; continue; } ssz = strcspn(word, "\\ "); } else ssz = strcspn(word, "\\"); encode(p, word, ssz); word += (int)ssz; continue; } word++; esc = mandoc_escape(&word, &seq, &sz); if (ESCAPE_ERROR == esc) continue; switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, sz - 1); break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, sz); if (uc < 0) continue; break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { cp = mchars_spec2str(p->symtab, seq, sz, &ssz); if (cp != NULL) encode(p, cp, ssz); } else { uc = mchars_spec2cp(p->symtab, seq, sz); if (uc > 0) encode1(p, uc); } continue; case ESCAPE_FONTBOLD: term_fontrepl(p, TERMFONT_BOLD); continue; case ESCAPE_FONTITALIC: term_fontrepl(p, TERMFONT_UNDER); continue; case ESCAPE_FONTBI: term_fontrepl(p, TERMFONT_BI); continue; case ESCAPE_FONT: /* FALLTHROUGH */ case ESCAPE_FONTROMAN: term_fontrepl(p, TERMFONT_NONE); continue; case ESCAPE_FONTPREV: term_fontlast(p); continue; case ESCAPE_NOSPACE: if (TERMP_SKIPCHAR & p->flags) p->flags &= ~TERMP_SKIPCHAR; else if ('\0' == *word) p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); continue; case ESCAPE_SKIPCHAR: p->flags |= TERMP_SKIPCHAR; continue; case ESCAPE_OVERSTRIKE: cp = seq + sz; while (seq < cp) { if (*seq == '\\') { mandoc_escape(&seq, NULL, NULL); continue; } encode1(p, *seq++); if (seq < cp) encode(p, "\b", 1); } default: continue; } /* * Common handling for Unicode and numbered * character escape sequences. */ if (p->enc == TERMENC_ASCII) { cp = ascii_uc2str(uc); encode(p, cp, strlen(cp)); } else { if ((uc < 0x20 && uc != 0x09) || (uc > 0x7E && uc < 0xA0)) uc = 0xFFFD; encode1(p, uc); } } p->flags &= ~TERMP_NBRWORD; }
size_t term_strlen(const struct termp *p, const char *cp) { size_t sz, rsz, i; int ssz, skip, uc; const char *seq, *rhs; enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; /* * Account for escaped sequences within string length * calculations. This follows the logic in term_word() as we * must calculate the width of produced strings. */ sz = 0; skip = 0; while ('\0' != *cp) { rsz = strcspn(cp, rej); for (i = 0; i < rsz; i++) sz += cond_width(p, *cp++, &skip); switch (*cp) { case '\\': cp++; esc = mandoc_escape(&cp, &seq, &ssz); if (ESCAPE_ERROR == esc) continue; rhs = NULL; switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, ssz - 1); break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, ssz); if (uc < 0) continue; break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { rhs = mchars_spec2str(p->symtab, seq, ssz, &rsz); if (rhs != NULL) break; } else { uc = mchars_spec2cp(p->symtab, seq, ssz); if (uc > 0) sz += cond_width(p, uc, &skip); } continue; case ESCAPE_SKIPCHAR: skip = 1; continue; case ESCAPE_OVERSTRIKE: rsz = 0; rhs = seq + ssz; while (seq < rhs) { if (*seq == '\\') { mandoc_escape(&seq, NULL, NULL); continue; } i = (*p->width)(p, *seq++); if (rsz < i) rsz = i; } sz += rsz; continue; default: continue; } /* * Common handling for Unicode and numbered * character escape sequences. */ if (rhs == NULL) { if (p->enc == TERMENC_ASCII) { rhs = ascii_uc2str(uc); rsz = strlen(rhs); } else { if ((uc < 0x20 && uc != 0x09) || (uc > 0x7E && uc < 0xA0)) uc = 0xFFFD; sz += cond_width(p, uc, &skip); continue; } } if (skip) { skip = 0; break; } /* * Common handling for all escape sequences * printing more than one character. */ for (i = 0; i < rsz; i++) sz += (*p->width)(p, *rhs++); break; case ASCII_NBRSP: sz += cond_width(p, ' ', &skip); cp++; break; case ASCII_HYPH: sz += cond_width(p, '-', &skip); cp++; /* FALLTHROUGH */ case ASCII_BREAK: break; default: break; } } return(sz); }
/* ARGSUSED */ static enum rofferr roff_tr(ROFF_ARGS) { const char *p, *first, *second; size_t fsz, ssz; enum mandoc_esc esc; p = *bufp + pos; if ('\0' == *p) { mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); return(ROFF_IGN); } while ('\0' != *p) { fsz = ssz = 1; first = p++; if ('\\' == *first) { esc = mandoc_escape(&p, NULL, NULL); if (ESCAPE_ERROR == esc) { mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(p - *bufp), NULL); return(ROFF_IGN); } fsz = (size_t)(p - first); } second = p++; if ('\\' == *second) { esc = mandoc_escape(&p, NULL, NULL); if (ESCAPE_ERROR == esc) { mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(p - *bufp), NULL); return(ROFF_IGN); } ssz = (size_t)(p - second); } else if ('\0' == *second) { mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, (int)(p - *bufp), NULL); second = " "; p--; } if (fsz > 1) { roff_setstrn(&r->xmbtab, first, fsz, second, ssz, 0); continue; } if (NULL == r->xtab) r->xtab = mandoc_calloc (128, sizeof(struct roffstr)); free(r->xtab[(int)*first].p); r->xtab[(int)*first].p = mandoc_strndup(second, ssz); r->xtab[(int)*first].sz = ssz; } return(ROFF_IGN); }
/* * Strip the escapes out of a string, emitting the results. */ static void pstring(const char *p, int col, int *colp, int list) { enum mandoc_esc esc; const char *start, *end; int emit; /* * Print as many column spaces til we achieve parity with the * input document. */ again: if (list && '\0' != *p) { while (isspace((unsigned char)*p)) p++; while ('\'' == *p || '(' == *p || '"' == *p) p++; emit = isalpha((unsigned char)p[0]) && isalpha((unsigned char)p[1]); for (start = p; '\0' != *p; p++) if ('\\' == *p) { p++; esc = mandoc_escape(&p, NULL, NULL); if (ESCAPE_ERROR == esc) return; emit = 0; } else if (isspace((unsigned char)*p)) break; end = p - 1; while (end > start) if ('.' == *end || ',' == *end || '\'' == *end || '"' == *end || ')' == *end || '!' == *end || '?' == *end || ':' == *end || ';' == *end) end--; else break; if (emit && end - start >= 1) { for ( ; start <= end; start++) if (ASCII_HYPH == *start) putchar('-'); else putchar((unsigned char)*start); putchar('\n'); } if (isspace((unsigned char)*p)) goto again; return; } while (*colp < col) { putchar(' '); (*colp)++; } /* * Print the input word, skipping any special characters. */ while ('\0' != *p) if ('\\' == *p) { p++; esc = mandoc_escape(&p, NULL, NULL); if (ESCAPE_ERROR == esc) break; } else { putchar((unsigned char )*p++); (*colp)++; } }
static int print_encode(struct html *h, const char *p, int norecurse) { size_t sz; int c, len, nospace; const char *seq; enum mandoc_esc esc; static const char rejs[9] = { '\\', '<', '>', '&', '"', ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; nospace = 0; while ('\0' != *p) { if (HTML_SKIPCHAR & h->flags && '\\' != *p) { h->flags &= ~HTML_SKIPCHAR; p++; continue; } sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); p += (int)sz; if ('\0' == *p) break; if (print_escape(*p++)) continue; esc = mandoc_escape(&p, &seq, &len); if (ESCAPE_ERROR == esc) break; switch (esc) { case ESCAPE_FONT: case ESCAPE_FONTPREV: case ESCAPE_FONTBOLD: case ESCAPE_FONTITALIC: case ESCAPE_FONTBI: case ESCAPE_FONTROMAN: if (0 == norecurse) print_metaf(h, esc); continue; case ESCAPE_SKIPCHAR: h->flags |= HTML_SKIPCHAR; continue; default: break; } if (h->flags & HTML_SKIPCHAR) { h->flags &= ~HTML_SKIPCHAR; continue; } switch (esc) { case ESCAPE_UNICODE: /* Skip past "u" header. */ c = mchars_num2uc(seq + 1, len - 1); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); if (c < 0) continue; break; case ESCAPE_SPECIAL: c = mchars_spec2cp(seq, len); if (c <= 0) continue; break; case ESCAPE_NOSPACE: if ('\0' == *p) nospace = 1; continue; case ESCAPE_OVERSTRIKE: if (len == 0) continue; c = seq[len - 1]; break; default: continue; } if ((c < 0x20 && c != 0x09) || (c > 0x7E && c < 0xA0)) c = 0xFFFD; if (c > 0x7E) printf("&#%d;", c); else if ( ! print_escape(c)) putchar(c); } return nospace; }
enum mandoc_esc mandoc_escape(const char **end, const char **start, int *sz) { const char *local_start; int local_sz; char term; enum mandoc_esc gly; /* * When the caller doesn't provide return storage, * use local storage. */ if (NULL == start) start = &local_start; if (NULL == sz) sz = &local_sz; /* * Beyond the backslash, at least one input character * is part of the escape sequence. With one exception * (see below), that character won't be returned. */ gly = ESCAPE_ERROR; *start = ++*end; *sz = 0; term = '\0'; switch ((*start)[-1]) { /* * First the glyphs. There are several different forms of * these, but each eventually returns a substring of the glyph * name. */ case '(': gly = ESCAPE_SPECIAL; *sz = 2; break; case '[': gly = ESCAPE_SPECIAL; term = ']'; break; case 'C': if ('\'' != **start) return(ESCAPE_ERROR); *start = ++*end; gly = ESCAPE_SPECIAL; term = '\''; break; /* * Escapes taking no arguments at all. */ case 'd': /* FALLTHROUGH */ case 'u': return(ESCAPE_IGNORE); /* * The \z escape is supposed to output the following * character without advancing the cursor position. * Since we are mostly dealing with terminal mode, * let us just skip the next character. */ case 'z': return(ESCAPE_SKIPCHAR); /* * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where * 'X' is the trigger. These have opaque sub-strings. */ case 'F': /* FALLTHROUGH */ case 'g': /* FALLTHROUGH */ case 'k': /* FALLTHROUGH */ case 'M': /* FALLTHROUGH */ case 'm': /* FALLTHROUGH */ case 'n': /* FALLTHROUGH */ case 'V': /* FALLTHROUGH */ case 'Y': gly = ESCAPE_IGNORE; /* FALLTHROUGH */ case 'f': if (ESCAPE_ERROR == gly) gly = ESCAPE_FONT; switch (**start) { case '(': *start = ++*end; *sz = 2; break; case '[': *start = ++*end; term = ']'; break; default: *sz = 1; break; } break; /* * These escapes are of the form \X'Y', where 'X' is the trigger * and 'Y' is any string. These have opaque sub-strings. * The \B and \w escapes are handled in roff.c, roff_res(). */ case 'A': /* FALLTHROUGH */ case 'b': /* FALLTHROUGH */ case 'D': /* FALLTHROUGH */ case 'o': /* FALLTHROUGH */ case 'R': /* FALLTHROUGH */ case 'X': /* FALLTHROUGH */ case 'Z': if ('\0' == **start) return(ESCAPE_ERROR); gly = ESCAPE_IGNORE; term = **start; *start = ++*end; break; /* * These escapes are of the form \X'N', where 'X' is the trigger * and 'N' resolves to a numerical expression. */ case 'h': /* FALLTHROUGH */ case 'H': /* FALLTHROUGH */ case 'L': /* FALLTHROUGH */ case 'l': /* FALLTHROUGH */ case 'S': /* FALLTHROUGH */ case 'v': /* FALLTHROUGH */ case 'x': if (strchr(" %&()*+-./0123456789:<=>", **start)) { if ('\0' != **start) ++*end; return(ESCAPE_ERROR); } gly = ESCAPE_IGNORE; term = **start; *start = ++*end; break; /* * Special handling for the numbered character escape. * XXX Do any other escapes need similar handling? */ case 'N': if ('\0' == **start) return(ESCAPE_ERROR); (*end)++; if (isdigit((unsigned char)**start)) { *sz = 1; return(ESCAPE_IGNORE); } (*start)++; while (isdigit((unsigned char)**end)) (*end)++; *sz = *end - *start; if ('\0' != **end) (*end)++; return(ESCAPE_NUMBERED); /* * Sizes get a special category of their own. */ case 's': gly = ESCAPE_IGNORE; /* See +/- counts as a sign. */ if ('+' == **end || '-' == **end || ASCII_HYPH == **end) (*end)++; switch (**end) { case '(': *start = ++*end; *sz = 2; break; case '[': *start = ++*end; term = ']'; break; case '\'': *start = ++*end; term = '\''; break; default: *sz = 1; break; } break; /* * Anything else is assumed to be a glyph. * In this case, pass back the character after the backslash. */ default: gly = ESCAPE_SPECIAL; *start = --*end; *sz = 1; break; } assert(ESCAPE_ERROR != gly); /* * Read up to the terminating character, * paying attention to nested escapes. */ if ('\0' != term) { while (**end != term) { switch (**end) { case '\0': return(ESCAPE_ERROR); case '\\': (*end)++; if (ESCAPE_ERROR == mandoc_escape(end, NULL, NULL)) return(ESCAPE_ERROR); break; default: (*end)++; break; } } *sz = (*end)++ - *start; } else { assert(*sz > 0); if ((size_t)*sz > strlen(*start)) return(ESCAPE_ERROR); *end += *sz; } /* Run post-processors. */ switch (gly) { case ESCAPE_FONT: if (2 == *sz) { if ('C' == **start) { /* * Treat constant-width font modes * just like regular font modes. */ (*start)++; (*sz)--; } else { if ('B' == (*start)[0] && 'I' == (*start)[1]) gly = ESCAPE_FONTBI; break; } } else if (1 != *sz) break; switch (**start) { case '3': /* FALLTHROUGH */ case 'B': gly = ESCAPE_FONTBOLD; break; case '2': /* FALLTHROUGH */ case 'I': gly = ESCAPE_FONTITALIC; break; case 'P': gly = ESCAPE_FONTPREV; break; case '1': /* FALLTHROUGH */ case 'R': gly = ESCAPE_FONTROMAN; break; } break; case ESCAPE_SPECIAL: if (1 == *sz && 'c' == **start) gly = ESCAPE_NOSPACE; /* * Unicode escapes are defined in groff as \[u0000] * to \[u10FFFF], where the contained value must be * a valid Unicode codepoint. Here, however, only * check the length and range. */ if (**start != 'u' || *sz < 5 || *sz > 7) break; if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) break; if (*sz == 6 && (*start)[1] == '0') break; if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") + 1 == *sz) gly = ESCAPE_UNICODE; break; default: break; } return(gly); }
/* * Handle pwords, partial words, which may be either a single word or a * phrase that cannot be broken down (such as a literal string). This * handles word styling. */ void term_word(struct termp *p, const char *word) { const char *seq, *cp; char c; int sz, uc; size_t ssz; enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { if (TERMP_PREKEEP & p->flags) p->flags |= TERMP_KEEP; bufferc(p, ' '); if (TERMP_SENTENCE & p->flags) bufferc(p, ' '); } else bufferc(p, ASCII_NBRSP); } if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; else p->flags |= TERMP_NOSPACE; p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); while ('\0' != *word) { if ((ssz = strcspn(word, "\\")) > 0) encode(p, word, ssz); word += (int)ssz; if ('\\' != *word) continue; word++; esc = mandoc_escape(&word, &seq, &sz); if (ESCAPE_ERROR == esc) break; if (TERMENC_ASCII != p->enc) switch (esc) { case (ESCAPE_UNICODE): uc = mchars_num2uc(seq + 1, sz - 1); if ('\0' == uc) break; encode1(p, uc); continue; case (ESCAPE_SPECIAL): uc = mchars_spec2cp(p->symtab, seq, sz); if (uc <= 0) break; encode1(p, uc); continue; default: break; } switch (esc) { case (ESCAPE_UNICODE): encode1(p, '?'); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, sz); if ('\0' != c) encode(p, &c, 1); break; case (ESCAPE_SPECIAL): cp = mchars_spec2str(p->symtab, seq, sz, &ssz); if (NULL != cp) encode(p, cp, ssz); else if (1 == ssz) encode(p, seq, sz); break; case (ESCAPE_FONTBOLD): term_fontrepl(p, TERMFONT_BOLD); break; case (ESCAPE_FONTITALIC): term_fontrepl(p, TERMFONT_UNDER); break; case (ESCAPE_FONT): /* FALLTHROUGH */ case (ESCAPE_FONTROMAN): term_fontrepl(p, TERMFONT_NONE); break; case (ESCAPE_FONTPREV): term_fontlast(p); break; case (ESCAPE_NOSPACE): if ('\0' == *word) p->flags |= TERMP_NOSPACE; break; default: break; } } }
size_t term_strlen(const struct termp *p, const char *cp) { size_t sz, rsz, i; int ssz, c; const char *seq, *rhs; enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; /* * Account for escaped sequences within string length * calculations. This follows the logic in term_word() as we * must calculate the width of produced strings. */ sz = 0; while ('\0' != *cp) { rsz = strcspn(cp, rej); for (i = 0; i < rsz; i++) sz += (*p->width)(p, *cp++); c = 0; switch (*cp) { case ('\\'): cp++; esc = mandoc_escape(&cp, &seq, &ssz); if (ESCAPE_ERROR == esc) return(sz); if (TERMENC_ASCII != p->enc) switch (esc) { case (ESCAPE_UNICODE): c = mchars_num2uc (seq + 1, ssz - 1); if ('\0' == c) break; sz += (*p->width)(p, c); continue; case (ESCAPE_SPECIAL): c = mchars_spec2cp (p->symtab, seq, ssz); if (c <= 0) break; sz += (*p->width)(p, c); continue; default: break; } rhs = NULL; switch (esc) { case (ESCAPE_UNICODE): sz += (*p->width)(p, '?'); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, ssz); if ('\0' != c) sz += (*p->width)(p, c); break; case (ESCAPE_SPECIAL): rhs = mchars_spec2str (p->symtab, seq, ssz, &rsz); if (ssz != 1 || rhs) break; rhs = seq; rsz = ssz; break; default: break; } if (NULL == rhs) break; for (i = 0; i < rsz; i++) sz += (*p->width)(p, *rhs++); break; case (ASCII_NBRSP): sz += (*p->width)(p, ' '); cp++; break; case (ASCII_HYPH): sz += (*p->width)(p, '-'); cp++; break; default: break; } } return(sz); }
/* * In the current line, expand user-defined strings ("\*") * and references to number registers ("\n"). * Also check the syntax of other escape sequences. */ static enum rofferr roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { char ubuf[12]; /* buffer to print the number */ const char *stesc; /* start of an escape sequence ('\\') */ const char *stnam; /* start of the name, after "[(*" */ const char *cp; /* end of the name, e.g. before ']' */ const char *res; /* the string to be substituted */ char *nbuf; /* new buffer to copy bufp to */ size_t nsz; /* size of the new buffer */ size_t maxl; /* expected length of the escape name */ size_t naml; /* actual length of the escape name */ int expand_count; /* to avoid infinite loops */ expand_count = 0; again: cp = *bufp + pos; while (NULL != (cp = strchr(cp, '\\'))) { stesc = cp++; /* * The second character must be an asterisk or an n. * If it isn't, skip it anyway: It is escaped, * so it can't start another escape sequence. */ if ('\0' == *cp) return(ROFF_CONT); switch (*cp) { case ('*'): res = NULL; break; case ('n'): res = ubuf; break; default: if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL)) continue; mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); return(ROFF_CONT); } cp++; /* * The third character decides the length * of the name of the string or register. * Save a pointer to the name. */ switch (*cp) { case ('\0'): return(ROFF_CONT); case ('('): cp++; maxl = 2; break; case ('['): cp++; maxl = 0; break; default: maxl = 1; break; } stnam = cp; /* Advance to the end of the name. */ for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) { if ('\0' == *cp) { mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); return(ROFF_CONT); } if (0 == maxl && ']' == *cp) break; } /* * Retrieve the replacement string; if it is * undefined, resume searching for escapes. */ if (NULL == res) res = roff_getstrn(r, stnam, naml); else snprintf(ubuf, sizeof(ubuf), "%d", roff_getregn(r, stnam, naml)); if (NULL == res) { mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); res = ""; } /* Replace the escape sequence by the string. */ pos = stesc - *bufp; nsz = *szp + strlen(res) + 1; nbuf = mandoc_malloc(nsz); strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1)); strlcat(nbuf, res, nsz); strlcat(nbuf, cp + (maxl ? 0 : 1), nsz); free(*bufp); *bufp = nbuf; *szp = nsz; if (EXPAND_LIMIT >= ++expand_count) goto again; /* Just leave the string unexpanded. */ mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); return(ROFF_IGN); } return(ROFF_CONT); }
/* * Parse a macro line, that is, a line beginning with the control * character. */ static int mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) { struct mdoc_node *n; const char *cp; enum mdoct tok; int i, sv; char mac[5]; sv = offs; /* * Copy the first word into a nil-terminated buffer. * Stop when a space, tab, escape, or eoln is encountered. */ i = 0; while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) mac[i++] = buf[offs++]; mac[i] = '\0'; tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; if (tok == MDOC_MAX) { mandoc_msg(MANDOCERR_MACRO, mdoc->parse, ln, sv, buf + sv - 1); return(1); } /* Skip a leading escape sequence or tab. */ switch (buf[offs]) { case '\\': cp = buf + offs + 1; mandoc_escape(&cp, NULL, NULL); offs = cp - buf; break; case '\t': offs++; break; default: break; } /* Jump to the next non-whitespace word. */ while (buf[offs] && ' ' == buf[offs]) offs++; /* * Trailing whitespace. Note that tabs are allowed to be passed * into the parser as "text", so we only warn about spaces here. */ if ('\0' == buf[offs] && ' ' == buf[offs - 1]) mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, ln, offs - 1, NULL); /* * If an initial macro or a list invocation, divert directly * into macro processing. */ if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { mdoc_macro(mdoc, tok, ln, sv, &offs, buf); return(1); } n = mdoc->last; assert(mdoc->last); /* * If the first macro of a `Bl -column', open an `It' block * context around the parsed macro. */ if (MDOC_Bl == n->tok && MDOC_BODY == n->type && LIST_column == n->norm->Bl.type) { mdoc->flags |= MDOC_FREECOL; mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); return(1); } /* * If we're following a block-level `It' within a `Bl -column' * context (perhaps opened in the above block or in ptext()), * then open an `It' block context around the parsed macro. */ if (MDOC_It == n->tok && MDOC_BLOCK == n->type && NULL != n->parent && MDOC_Bl == n->parent->tok && LIST_column == n->parent->norm->Bl.type) { mdoc->flags |= MDOC_FREECOL; mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); return(1); } /* Normal processing of a macro. */ mdoc_macro(mdoc, tok, ln, sv, &offs, buf); /* In quick mode (for mandocdb), abort after the NAME section. */ if (mdoc->quick && MDOC_Sh == tok && SEC_NAME != mdoc->last->sec) return(2); return(1); }
/* * Duplicate an input string, making the appropriate character * conversations (as stipulated by `tr') along the way. * Returns a heap-allocated string with all the replacements made. */ char * roff_strdup(const struct roff *r, const char *p) { const struct roffkv *cp; char *res; const char *pp; size_t ssz, sz; enum mandoc_esc esc; if (NULL == r->xmbtab && NULL == r->xtab) return(mandoc_strdup(p)); else if ('\0' == *p) return(mandoc_strdup("")); /* * Step through each character looking for term matches * (remember that a `tr' can be invoked with an escape, which is * a glyph but the escape is multi-character). * We only do this if the character hash has been initialised * and the string is >0 length. */ res = NULL; ssz = 0; while ('\0' != *p) { if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { sz = r->xtab[(int)*p].sz; res = mandoc_realloc(res, ssz + sz + 1); memcpy(res + ssz, r->xtab[(int)*p].p, sz); ssz += sz; p++; continue; } else if ('\\' != *p) { res = mandoc_realloc(res, ssz + 2); res[ssz++] = *p++; continue; } /* Search for term matches. */ for (cp = r->xmbtab; cp; cp = cp->next) if (0 == strncmp(p, cp->key.p, cp->key.sz)) break; if (NULL != cp) { /* * A match has been found. * Append the match to the array and move * forward by its keysize. */ res = mandoc_realloc (res, ssz + cp->val.sz + 1); memcpy(res + ssz, cp->val.p, cp->val.sz); ssz += cp->val.sz; p += (int)cp->key.sz; continue; } /* * Handle escapes carefully: we need to copy * over just the escape itself, or else we might * do replacements within the escape itself. * Make sure to pass along the bogus string. */ pp = p++; esc = mandoc_escape(&p, NULL, NULL); if (ESCAPE_ERROR == esc) { sz = strlen(pp); res = mandoc_realloc(res, ssz + sz + 1); memcpy(res + ssz, pp, sz); break; } /* * We bail out on bad escapes. * No need to warn: we already did so when * roff_res() was called. */ sz = (int)(p - pp); res = mandoc_realloc(res, ssz + sz + 1); memcpy(res + ssz, pp, sz); ssz += sz; } res[(int)ssz] = '\0'; return(res); }
static int man_pmacro(struct roff_man *man, int ln, char *buf, int offs) { struct roff_node *n; const char *cp; int tok; int i, ppos; int bline; char mac[5]; ppos = offs; /* * Copy the first word into a nil-terminated buffer. * Stop when a space, tab, escape, or eoln is encountered. */ i = 0; while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) mac[i++] = buf[offs++]; mac[i] = '\0'; tok = (i > 0 && i < 4) ? man_hash_find(mac) : TOKEN_NONE; if (tok == TOKEN_NONE) { mandoc_msg(MANDOCERR_MACRO, man->parse, ln, ppos, buf + ppos - 1); return 1; } /* Skip a leading escape sequence or tab. */ switch (buf[offs]) { case '\\': cp = buf + offs + 1; mandoc_escape(&cp, NULL, NULL); offs = cp - buf; break; case '\t': offs++; break; default: break; } /* Jump to the next non-whitespace word. */ while (buf[offs] && buf[offs] == ' ') offs++; /* * Trailing whitespace. Note that tabs are allowed to be passed * into the parser as "text", so we only warn about spaces here. */ if (buf[offs] == '\0' && buf[offs - 1] == ' ') mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, ln, offs - 1, NULL); /* * Some macros break next-line scopes; otherwise, remember * whether we are in next-line scope for a block head. */ man_breakscope(man, tok); bline = man->flags & MAN_BLINE; /* Call to handler... */ assert(man_macros[tok].fp); (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf); /* In quick mode (for mandocdb), abort after the NAME section. */ if (man->quick && tok == MAN_SH) { n = man->last; if (n->type == ROFFT_BODY && strcmp(n->prev->child->string, "NAME")) return 2; } /* * If we are in a next-line scope for a block head, * close it out now and switch to the body, * unless the next-line scope is allowed to continue. */ if ( ! bline || man->flags & MAN_ELINE || man_macros[tok].flags & MAN_NSCOPED) return 1; assert(man->flags & MAN_BLINE); man->flags &= ~MAN_BLINE; man_unscope(man, man->last->parent); roff_body_alloc(man, ln, ppos, man->last->tok); return 1; }
/* * Pre-filter each and every line for reserved words (one beginning with * `\*', e.g., `\*(ab'). These must be handled before the actual line * is processed. * This also checks the syntax of regular escapes. */ static enum rofferr roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { enum mandoc_esc esc; const char *stesc; /* start of an escape sequence ('\\') */ const char *stnam; /* start of the name, after "[(*" */ const char *cp; /* end of the name, e.g. before ']' */ const char *res; /* the string to be substituted */ int i, maxl, expand_count; size_t nsz; char *n; expand_count = 0; again: cp = *bufp + pos; while (NULL != (cp = strchr(cp, '\\'))) { stesc = cp++; /* * The second character must be an asterisk. * If it isn't, skip it anyway: It is escaped, * so it can't start another escape sequence. */ if ('\0' == *cp) return(ROFF_CONT); if ('*' != *cp) { res = cp; esc = mandoc_escape(&cp, NULL, NULL); if (ESCAPE_ERROR != esc) continue; cp = res; mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); return(ROFF_CONT); } cp++; /* * The third character decides the length * of the name of the string. * Save a pointer to the name. */ switch (*cp) { case ('\0'): return(ROFF_CONT); case ('('): cp++; maxl = 2; break; case ('['): cp++; maxl = 0; break; default: maxl = 1; break; } stnam = cp; /* Advance to the end of the name. */ for (i = 0; 0 == maxl || i < maxl; i++, cp++) { if ('\0' == *cp) { mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); return(ROFF_CONT); } if (0 == maxl && ']' == *cp) break; } /* * Retrieve the replacement string; if it is * undefined, resume searching for escapes. */ res = roff_getstrn(r, stnam, (size_t)i); if (NULL == res) { mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); res = ""; } /* Replace the escape sequence by the string. */ pos = stesc - *bufp; nsz = *szp + strlen(res) + 1; n = mandoc_malloc(nsz); strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1)); strlcat(n, res, nsz); strlcat(n, cp + (maxl ? 0 : 1), nsz); free(*bufp); *bufp = n; *szp = nsz; if (EXPAND_LIMIT >= ++expand_count) goto again; /* Just leave the string unexpanded. */ mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); return(ROFF_IGN); } return(ROFF_CONT); }
enum mandoc_esc mandoc_escape(const char **end, const char **start, int *sz) { const char *local_start; int local_sz; char term; enum mandoc_esc gly; /* * When the caller doesn't provide return storage, * use local storage. */ if (NULL == start) start = &local_start; if (NULL == sz) sz = &local_sz; /* * Beyond the backslash, at least one input character * is part of the escape sequence. With one exception * (see below), that character won't be returned. */ gly = ESCAPE_ERROR; *start = ++*end; *sz = 0; term = '\0'; switch ((*start)[-1]) { /* * First the glyphs. There are several different forms of * these, but each eventually returns a substring of the glyph * name. */ case ('('): gly = ESCAPE_SPECIAL; *sz = 2; break; case ('['): gly = ESCAPE_SPECIAL; /* * Unicode escapes are defined in groff as \[uXXXX] to * \[u10FFFF], where the contained value must be a valid * Unicode codepoint. Here, however, only check whether * it's not a zero-width escape. */ if ('u' == (*start)[0] && ']' != (*start)[1]) gly = ESCAPE_UNICODE; term = ']'; break; case ('C'): if ('\'' != **start) return(ESCAPE_ERROR); gly = ESCAPE_SPECIAL; *start = ++*end; term = '\''; break; /* * The \z escape is supposed to output the following * character without advancing the cursor position. * Since we are mostly dealing with terminal mode, * let us just skip the next character. */ case ('z'): return(ESCAPE_SKIPCHAR); /* * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where * 'X' is the trigger. These have opaque sub-strings. */ case ('F'): /* FALLTHROUGH */ case ('g'): /* FALLTHROUGH */ case ('k'): /* FALLTHROUGH */ case ('M'): /* FALLTHROUGH */ case ('m'): /* FALLTHROUGH */ case ('n'): /* FALLTHROUGH */ case ('V'): /* FALLTHROUGH */ case ('Y'): gly = ESCAPE_IGNORE; /* FALLTHROUGH */ case ('f'): if (ESCAPE_ERROR == gly) gly = ESCAPE_FONT; switch (**start) { case ('('): *start = ++*end; *sz = 2; break; case ('['): *start = ++*end; term = ']'; break; default: *sz = 1; break; } break; /* * These escapes are of the form \X'Y', where 'X' is the trigger * and 'Y' is any string. These have opaque sub-strings. */ case ('A'): /* FALLTHROUGH */ case ('b'): /* FALLTHROUGH */ case ('D'): /* FALLTHROUGH */ case ('o'): /* FALLTHROUGH */ case ('R'): /* FALLTHROUGH */ case ('X'): /* FALLTHROUGH */ case ('Z'): if ('\'' != **start) return(ESCAPE_ERROR); gly = ESCAPE_IGNORE; *start = ++*end; term = '\''; break; /* * These escapes are of the form \X'N', where 'X' is the trigger * and 'N' resolves to a numerical expression. */ case ('B'): /* FALLTHROUGH */ case ('h'): /* FALLTHROUGH */ case ('H'): /* FALLTHROUGH */ case ('L'): /* FALLTHROUGH */ case ('l'): gly = ESCAPE_NUMBERED; /* FALLTHROUGH */ case ('S'): /* FALLTHROUGH */ case ('v'): /* FALLTHROUGH */ case ('w'): /* FALLTHROUGH */ case ('x'): if ('\'' != **start) return(ESCAPE_ERROR); if (ESCAPE_ERROR == gly) gly = ESCAPE_IGNORE; *start = ++*end; term = '\''; break; /* * Special handling for the numbered character escape. * XXX Do any other escapes need similar handling? */ case ('N'): if ('\0' == **start) return(ESCAPE_ERROR); (*end)++; if (isdigit((unsigned char)**start)) { *sz = 1; return(ESCAPE_IGNORE); } (*start)++; while (isdigit((unsigned char)**end)) (*end)++; *sz = *end - *start; if ('\0' != **end) (*end)++; return(ESCAPE_NUMBERED); /* * Sizes get a special category of their own. */ case ('s'): gly = ESCAPE_IGNORE; /* See +/- counts as a sign. */ if ('+' == **end || '-' == **end || ASCII_HYPH == **end) (*end)++; switch (**end) { case ('('): *start = ++*end; *sz = 2; break; case ('['): *start = ++*end; term = ']'; break; case ('\''): *start = ++*end; term = '\''; break; default: *sz = 1; break; } break; /* * Anything else is assumed to be a glyph. * In this case, pass back the character after the backslash. */ default: gly = ESCAPE_SPECIAL; *start = --*end; *sz = 1; break; } assert(ESCAPE_ERROR != gly); /* * Read up to the terminating character, * paying attention to nested escapes. */ if ('\0' != term) { while (**end != term) { switch (**end) { case ('\0'): return(ESCAPE_ERROR); case ('\\'): (*end)++; if (ESCAPE_ERROR == mandoc_escape(end, NULL, NULL)) return(ESCAPE_ERROR); break; default: (*end)++; break; } } *sz = (*end)++ - *start; } else { assert(*sz > 0); if ((size_t)*sz > strlen(*start)) return(ESCAPE_ERROR); *end += *sz; } /* Run post-processors. */ switch (gly) { case (ESCAPE_FONT): /* * Pretend that the constant-width font modes are the * same as the regular font modes. */ if (2 == *sz && 'C' == **start) { (*start)++; (*sz)--; } else if (1 != *sz) break; switch (**start) { case ('3'): /* FALLTHROUGH */ case ('B'): gly = ESCAPE_FONTBOLD; break; case ('2'): /* FALLTHROUGH */ case ('I'): gly = ESCAPE_FONTITALIC; break; case ('P'): gly = ESCAPE_FONTPREV; break; case ('1'): /* FALLTHROUGH */ case ('R'): gly = ESCAPE_FONTROMAN; break; } break; case (ESCAPE_SPECIAL): if (1 == *sz && 'c' == **start) gly = ESCAPE_NOSPACE; break; default: break; } return(gly); }
/* * Print text and mdoc(7) syntax elements. */ static void md_word(const char *s) { const char *seq, *prevfont, *currfont, *nextfont; char c; int bs, sz, uc, breakline; /* No spacing before closing delimiters. */ if (s[0] != '\0' && s[1] == '\0' && strchr("!),.:;?]", s[0]) != NULL && (outflags & MD_spc_force) == 0) outflags &= ~MD_spc; md_preword(); if (*s == '\0') return; /* No spacing after opening delimiters. */ if ((s[0] == '(' || s[0] == '[') && s[1] == '\0') outflags &= ~MD_spc; breakline = 0; prevfont = currfont = ""; while ((c = *s++) != '\0') { bs = 0; switch(c) { case ASCII_NBRSP: if (code_blocks) c = ' '; else { md_named("nbsp"); c = '\0'; } break; case ASCII_HYPH: bs = escflags & ESC_BOL && !code_blocks; c = '-'; break; case ASCII_BREAK: continue; case '#': case '+': case '-': bs = escflags & ESC_BOL && !code_blocks; break; case '(': bs = escflags & ESC_HYP && !code_blocks; break; case ')': bs = escflags & ESC_NUM && !code_blocks; break; case '*': case '[': case '_': case '`': bs = !code_blocks; break; case '.': bs = escflags & ESC_NUM && !code_blocks; break; case '<': if (code_blocks == 0) { md_named("lt"); c = '\0'; } break; case '=': if (escflags & ESC_BOL && !code_blocks) { md_named("equals"); c = '\0'; } break; case '>': if (code_blocks == 0) { md_named("gt"); c = '\0'; } break; case '\\': uc = 0; nextfont = NULL; switch (mandoc_escape(&s, &seq, &sz)) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, sz - 1); break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, sz); break; case ESCAPE_SPECIAL: uc = mchars_spec2cp(seq, sz); break; case ESCAPE_FONTBOLD: nextfont = "**"; break; case ESCAPE_FONTITALIC: nextfont = "*"; break; case ESCAPE_FONTBI: nextfont = "***"; break; case ESCAPE_FONT: case ESCAPE_FONTROMAN: nextfont = ""; break; case ESCAPE_FONTPREV: nextfont = prevfont; break; case ESCAPE_BREAK: breakline = 1; break; case ESCAPE_NOSPACE: case ESCAPE_SKIPCHAR: case ESCAPE_OVERSTRIKE: /* XXX not implemented */ /* FALLTHROUGH */ case ESCAPE_ERROR: default: break; } if (nextfont != NULL && !code_blocks) { if (*currfont != '\0') { outflags &= ~MD_spc; md_rawword(currfont); } prevfont = currfont; currfont = nextfont; if (*currfont != '\0') { outflags &= ~MD_spc; md_rawword(currfont); } } if (uc) { if ((uc < 0x20 && uc != 0x09) || (uc > 0x7E && uc < 0xA0)) uc = 0xFFFD; if (code_blocks) { seq = mchars_uc2str(uc); fputs(seq, stdout); outcount += strlen(seq); } else { printf("&#%d;", uc); outcount++; } escflags &= ~ESC_FON; } c = '\0'; break; case ']': bs = escflags & ESC_SQU && !code_blocks; escflags |= ESC_HYP; break; default: break; } if (bs) putchar('\\'); md_char(c); if (breakline && (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) { printf(" \n"); breakline = 0; while (*s == ' ' || *s == ASCII_NBRSP) s++; } } if (*currfont != '\0') { outflags &= ~MD_spc; md_rawword(currfont); } else if (s[-2] == ' ') escflags |= ESC_EOL; else escflags &= ~ESC_EOL; }
static int print_encode(struct html *h, const char *p, int norecurse) { size_t sz; int c, len, nospace; const char *seq; enum mandoc_esc esc; static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; nospace = 0; while ('\0' != *p) { if (HTML_SKIPCHAR & h->flags && '\\' != *p) { h->flags &= ~HTML_SKIPCHAR; p++; continue; } sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); p += (int)sz; if ('\0' == *p) break; switch (*p++) { case ('<'): printf("<"); continue; case ('>'): printf(">"); continue; case ('&'): printf("&"); continue; case (ASCII_HYPH): putchar('-'); continue; default: break; } esc = mandoc_escape(&p, &seq, &len); if (ESCAPE_ERROR == esc) break; switch (esc) { case (ESCAPE_FONT): /* FALLTHROUGH */ case (ESCAPE_FONTPREV): /* FALLTHROUGH */ case (ESCAPE_FONTBOLD): /* FALLTHROUGH */ case (ESCAPE_FONTITALIC): /* FALLTHROUGH */ case (ESCAPE_FONTBI): /* FALLTHROUGH */ case (ESCAPE_FONTROMAN): if (0 == norecurse) print_metaf(h, esc); continue; case (ESCAPE_SKIPCHAR): h->flags |= HTML_SKIPCHAR; continue; default: break; } if (h->flags & HTML_SKIPCHAR) { h->flags &= ~HTML_SKIPCHAR; continue; } switch (esc) { case (ESCAPE_UNICODE): /* Skip passed "u" header. */ c = mchars_num2uc(seq + 1, len - 1); if ('\0' != c) printf("&#x%x;", c); break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, len); if ('\0' != c) putchar(c); break; case (ESCAPE_SPECIAL): c = mchars_spec2cp(h->symtab, seq, len); if (c > 0) printf("&#%d;", c); else if (-1 == c && 1 == len) putchar((int)*seq); break; case (ESCAPE_NOSPACE): if ('\0' == *p) nospace = 1; break; default: break; } } return(nospace); }
static int man_pmacro(struct roff_man *man, int ln, char *buf, int offs) { struct roff_node *n; const char *cp; size_t sz; enum roff_tok tok; int ppos; int bline; /* Determine the line macro. */ ppos = offs; tok = TOKEN_NONE; for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) offs++; if (sz > 0 && sz < 4) tok = roffhash_find(man->manmac, buf + ppos, sz); if (tok == TOKEN_NONE) { mandoc_msg(MANDOCERR_MACRO, man->parse, ln, ppos, buf + ppos - 1); return 1; } /* Skip a leading escape sequence or tab. */ switch (buf[offs]) { case '\\': cp = buf + offs + 1; mandoc_escape(&cp, NULL, NULL); offs = cp - buf; break; case '\t': offs++; break; default: break; } /* Jump to the next non-whitespace word. */ while (buf[offs] == ' ') offs++; /* * Trailing whitespace. Note that tabs are allowed to be passed * into the parser as "text", so we only warn about spaces here. */ if (buf[offs] == '\0' && buf[offs - 1] == ' ') mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, ln, offs - 1, NULL); /* * Some macros break next-line scopes; otherwise, remember * whether we are in next-line scope for a block head. */ man_breakscope(man, tok); bline = man->flags & MAN_BLINE; /* * If the line in next-line scope ends with \c, keep the * next-line scope open for the subsequent input line. * That is not at all portable, only groff >= 1.22.4 * does it, but *if* this weird idiom occurs in a manual * page, that's very likely what the author intended. */ if (bline) { cp = strchr(buf + offs, '\0') - 2; if (cp >= buf && cp[0] == '\\' && cp[1] == 'c') bline = 0; } /* Call to handler... */ assert(man_macros[tok].fp); (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf); /* In quick mode (for mandocdb), abort after the NAME section. */ if (man->quick && tok == MAN_SH) { n = man->last; if (n->type == ROFFT_BODY && strcmp(n->prev->child->string, "NAME")) return 2; } /* * If we are in a next-line scope for a block head, * close it out now and switch to the body, * unless the next-line scope is allowed to continue. */ if ( ! bline || man->flags & MAN_ELINE || man_macros[tok].flags & MAN_NSCOPED) return 1; assert(man->flags & MAN_BLINE); man->flags &= ~MAN_BLINE; man_unscope(man, man->last->parent); roff_body_alloc(man, ln, ppos, man->last->tok); return 1; }