/* * Pre-treat a text line. * Text lines can consist of equations, which must be handled apart from * the regular text. * Thus, use this function to step through a line checking if it has any * equations embedded in it. * This must handle multiple equations AND equations that do not end at * the end-of-line, i.e., will re-enter in the next roff parse. */ static int mdoc_preptext(struct mdoc *m, int line, char *buf, int offs) { char *start, *end; char delim; while ('\0' != buf[offs]) { /* Mark starting position if eqn is set. */ start = NULL; if ('\0' != (delim = roff_eqndelim(m->roff))) if (NULL != (start = strchr(buf + offs, delim))) *start++ = '\0'; /* Parse text as normal. */ if ( ! mdoc_ptext(m, line, buf, offs)) return(0); /* Continue only if an equation exists. */ if (NULL == start) break; /* Read past the end of the equation. */ offs += start - (buf + offs); assert(start == &buf[offs]); if (NULL != (end = strchr(buf + offs, delim))) { *end++ = '\0'; while (' ' == *end) end++; } /* Parse the equation itself. */ roff_openeqn(m->roff, NULL, line, offs, buf); /* Process a finished equation? */ if (roff_closeeqn(m->roff)) if ( ! mdoc_addeqn(m, roff_eqn(m->roff))) return(0); offs += (end - (buf + offs)); } return(1); }
/* * Main parse routine for an opened file. This is called for each * opened file and simply loops around the full input file, possibly * nesting (i.e., with `so'). */ static void mparse_buf_r(struct mparse *curp, struct buf blk, int start) { const struct tbl_span *span; struct buf ln; enum rofferr rr; int i, of, rc; int pos; /* byte number in the ln buffer */ int lnn; /* line number in the real file */ unsigned char c; memset(&ln, 0, sizeof(struct buf)); lnn = curp->line; pos = 0; for (i = 0; i < (int)blk.sz; ) { if (0 == pos && '\0' == blk.buf[i]) break; if (start) { curp->line = lnn; curp->reparse_count = 0; } while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { /* * When finding an unescaped newline character, * leave the character loop to process the line. * Skip a preceding carriage return, if any. */ if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz && '\n' == blk.buf[i + 1]) ++i; if ('\n' == blk.buf[i]) { ++i; ++lnn; break; } /* * Warn about bogus characters. If you're using * non-ASCII encoding, you're screwing your * readers. Since I'd rather this not happen, * I'll be helpful and drop these characters so * we don't display gibberish. Note to manual * writers: use special characters. */ c = (unsigned char) blk.buf[i]; if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) { mandoc_msg(MANDOCERR_BADCHAR, curp, curp->line, pos, "ignoring byte"); i++; continue; } /* Trailing backslash = a plain char. */ if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { if (pos >= (int)ln.sz) resize_buf(&ln, 256); ln.buf[pos++] = blk.buf[i++]; continue; } /* * Found escape and at least one other character. * When it's a newline character, skip it. * When there is a carriage return in between, * skip that one as well. */ if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz && '\n' == blk.buf[i + 2]) ++i; if ('\n' == blk.buf[i + 1]) { i += 2; ++lnn; continue; } if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { i += 2; /* Comment, skip to end of line */ for (; i < (int)blk.sz; ++i) { if ('\n' == blk.buf[i]) { ++i; ++lnn; break; } } /* Backout trailing whitespaces */ for (; pos > 0; --pos) { if (ln.buf[pos - 1] != ' ') break; if (pos > 2 && ln.buf[pos - 2] == '\\') break; } break; } /* Some other escape sequence, copy & cont. */ if (pos + 1 >= (int)ln.sz) resize_buf(&ln, 256); ln.buf[pos++] = blk.buf[i++]; ln.buf[pos++] = blk.buf[i++]; } if (pos >= (int)ln.sz) resize_buf(&ln, 256); ln.buf[pos] = '\0'; /* * A significant amount of complexity is contained by * the roff preprocessor. It's line-oriented but can be * expressed on one line, so we need at times to * readjust our starting point and re-run it. The roff * preprocessor can also readjust the buffers with new * data, so we pass them in wholesale. */ of = 0; /* * Maintain a lookaside buffer of all parsed lines. We * only do this if mparse_keep() has been invoked (the * buffer may be accessed with mparse_getkeep()). */ if (curp->secondary) { curp->secondary->buf = mandoc_realloc (curp->secondary->buf, curp->secondary->sz + pos + 2); memcpy(curp->secondary->buf + curp->secondary->sz, ln.buf, pos); curp->secondary->sz += pos; curp->secondary->buf [curp->secondary->sz] = '\n'; curp->secondary->sz++; curp->secondary->buf [curp->secondary->sz] = '\0'; } rerun: rr = roff_parseln (curp->roff, curp->line, &ln.buf, &ln.sz, of, &of); switch (rr) { case (ROFF_REPARSE): if (REPARSE_LIMIT >= ++curp->reparse_count) mparse_buf_r(curp, ln, 0); else mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, pos, NULL); pos = 0; continue; case (ROFF_APPEND): pos = (int)strlen(ln.buf); continue; case (ROFF_RERUN): goto rerun; case (ROFF_IGN): pos = 0; continue; case (ROFF_ERR): assert(MANDOCLEVEL_FATAL <= curp->file_status); break; case (ROFF_SO): /* * We remove `so' clauses from our lookaside * buffer because we're going to descend into * the file recursively. */ if (curp->secondary) curp->secondary->sz -= pos + 1; mparse_readfd_r(curp, -1, ln.buf + of, 1); if (MANDOCLEVEL_FATAL <= curp->file_status) break; pos = 0; continue; default: break; } /* * If we encounter errors in the recursive parse, make * sure we don't continue parsing. */ if (MANDOCLEVEL_FATAL <= curp->file_status) break; /* * If input parsers have not been allocated, do so now. * We keep these instanced between parsers, but set them * locally per parse routine since we can use different * parsers with each one. */ if ( ! (curp->man || curp->mdoc)) pset(ln.buf + of, pos - of, curp); /* * Lastly, push down into the parsers themselves. One * of these will have already been set in the pset() * routine. * If libroff returns ROFF_TBL, then add it to the * currently open parse. Since we only get here if * there does exist data (see tbl_data.c), we're * guaranteed that something's been allocated. * Do the same for ROFF_EQN. */ rc = -1; if (ROFF_TBL == rr) while (NULL != (span = roff_span(curp->roff))) { rc = curp->man ? man_addspan(curp->man, span) : mdoc_addspan(curp->mdoc, span); if (0 == rc) break; } else if (ROFF_EQN == rr) rc = curp->mdoc ? mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff)) : man_addeqn(curp->man, roff_eqn(curp->roff)); else if (curp->man || curp->mdoc) rc = curp->man ? man_parseln(curp->man, curp->line, ln.buf, of) : mdoc_parseln(curp->mdoc, curp->line, ln.buf, of); if (0 == rc) { assert(MANDOCLEVEL_FATAL <= curp->file_status); break; } /* Temporary buffers typically are not full. */ if (0 == start && '\0' == blk.buf[i]) break; /* Start the next input line. */ pos = 0; } free(ln.buf); }