Exemple #1
0
/*
 * Pre-treat a text line.
 * Text lines can consist of equations, which must be handled apart from
 * the regular text.
 * Thus, use this function to step through a line checking if it has any
 * equations embedded in it.
 * This must handle multiple equations AND equations that do not end at
 * the end-of-line, i.e., will re-enter in the next roff parse.
 */
static int
mdoc_preptext(struct mdoc *m, int line, char *buf, int offs)
{
	char		*start, *end;
	char		 delim;

	while ('\0' != buf[offs]) {
		/* Mark starting position if eqn is set. */
		start = NULL;
		if ('\0' != (delim = roff_eqndelim(m->roff)))
			if (NULL != (start = strchr(buf + offs, delim)))
				*start++ = '\0';

		/* Parse text as normal. */
		if ( ! mdoc_ptext(m, line, buf, offs))
			return(0);

		/* Continue only if an equation exists. */
		if (NULL == start)
			break;

		/* Read past the end of the equation. */
		offs += start - (buf + offs);
		assert(start == &buf[offs]);
		if (NULL != (end = strchr(buf + offs, delim))) {
			*end++ = '\0';
			while (' ' == *end)
				end++;
		}

		/* Parse the equation itself. */
		roff_openeqn(m->roff, NULL, line, offs, buf);

		/* Process a finished equation? */
		if (roff_closeeqn(m->roff))
			if ( ! mdoc_addeqn(m, roff_eqn(m->roff)))
				return(0);
		offs += (end - (buf + offs));
	} 

	return(1);
}
Exemple #2
0
/*
 * Main parse routine for an opened file.  This is called for each
 * opened file and simply loops around the full input file, possibly
 * nesting (i.e., with `so').
 */
static void
mparse_buf_r(struct mparse *curp, struct buf blk, int start)
{
	const struct tbl_span	*span;
	struct buf	 ln;
	enum rofferr	 rr;
	int		 i, of, rc;
	int		 pos; /* byte number in the ln buffer */
	int		 lnn; /* line number in the real file */
	unsigned char	 c;

	memset(&ln, 0, sizeof(struct buf));

	lnn = curp->line; 
	pos = 0; 

	for (i = 0; i < (int)blk.sz; ) {
		if (0 == pos && '\0' == blk.buf[i])
			break;

		if (start) {
			curp->line = lnn;
			curp->reparse_count = 0;
		}

		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {

			/*
			 * When finding an unescaped newline character,
			 * leave the character loop to process the line.
			 * Skip a preceding carriage return, if any.
			 */

			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
			    '\n' == blk.buf[i + 1])
				++i;
			if ('\n' == blk.buf[i]) {
				++i;
				++lnn;
				break;
			}

			/* 
			 * Warn about bogus characters.  If you're using
			 * non-ASCII encoding, you're screwing your
			 * readers.  Since I'd rather this not happen,
			 * I'll be helpful and drop these characters so
			 * we don't display gibberish.  Note to manual
			 * writers: use special characters.
			 */

			c = (unsigned char) blk.buf[i];

			if ( ! (isascii(c) && 
					(isgraph(c) || isblank(c)))) {
				mandoc_msg(MANDOCERR_BADCHAR, curp,
						curp->line, pos, "ignoring byte");
				i++;
				continue;
			}

			/* Trailing backslash = a plain char. */

			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
				if (pos >= (int)ln.sz)
					resize_buf(&ln, 256);
				ln.buf[pos++] = blk.buf[i++];
				continue;
			}

			/*
			 * Found escape and at least one other character.
			 * When it's a newline character, skip it.
			 * When there is a carriage return in between,
			 * skip that one as well.
			 */

			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
			    '\n' == blk.buf[i + 2])
				++i;
			if ('\n' == blk.buf[i + 1]) {
				i += 2;
				++lnn;
				continue;
			}

			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
				i += 2;
				/* Comment, skip to end of line */
				for (; i < (int)blk.sz; ++i) {
					if ('\n' == blk.buf[i]) {
						++i;
						++lnn;
						break;
					}
				}

				/* Backout trailing whitespaces */
				for (; pos > 0; --pos) {
					if (ln.buf[pos - 1] != ' ')
						break;
					if (pos > 2 && ln.buf[pos - 2] == '\\')
						break;
				}
				break;
			}

			/* Some other escape sequence, copy & cont. */

			if (pos + 1 >= (int)ln.sz)
				resize_buf(&ln, 256);

			ln.buf[pos++] = blk.buf[i++];
			ln.buf[pos++] = blk.buf[i++];
		}

 		if (pos >= (int)ln.sz)
			resize_buf(&ln, 256);

		ln.buf[pos] = '\0';

		/*
		 * A significant amount of complexity is contained by
		 * the roff preprocessor.  It's line-oriented but can be
		 * expressed on one line, so we need at times to
		 * readjust our starting point and re-run it.  The roff
		 * preprocessor can also readjust the buffers with new
		 * data, so we pass them in wholesale.
		 */

		of = 0;

		/*
		 * Maintain a lookaside buffer of all parsed lines.  We
		 * only do this if mparse_keep() has been invoked (the
		 * buffer may be accessed with mparse_getkeep()).
		 */

		if (curp->secondary) {
			curp->secondary->buf = 
				mandoc_realloc
				(curp->secondary->buf, 
				 curp->secondary->sz + pos + 2);
			memcpy(curp->secondary->buf + 
					curp->secondary->sz, 
					ln.buf, pos);
			curp->secondary->sz += pos;
			curp->secondary->buf
				[curp->secondary->sz] = '\n';
			curp->secondary->sz++;
			curp->secondary->buf
				[curp->secondary->sz] = '\0';
		}
rerun:
		rr = roff_parseln
			(curp->roff, curp->line, 
			 &ln.buf, &ln.sz, of, &of);

		switch (rr) {
		case (ROFF_REPARSE):
			if (REPARSE_LIMIT >= ++curp->reparse_count)
				mparse_buf_r(curp, ln, 0);
			else
				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
					curp->line, pos, NULL);
			pos = 0;
			continue;
		case (ROFF_APPEND):
			pos = (int)strlen(ln.buf);
			continue;
		case (ROFF_RERUN):
			goto rerun;
		case (ROFF_IGN):
			pos = 0;
			continue;
		case (ROFF_ERR):
			assert(MANDOCLEVEL_FATAL <= curp->file_status);
			break;
		case (ROFF_SO):
			/*
			 * We remove `so' clauses from our lookaside
			 * buffer because we're going to descend into
			 * the file recursively.
			 */
			if (curp->secondary) 
				curp->secondary->sz -= pos + 1;
			mparse_readfd_r(curp, -1, ln.buf + of, 1);
			if (MANDOCLEVEL_FATAL <= curp->file_status)
				break;
			pos = 0;
			continue;
		default:
			break;
		}

		/*
		 * If we encounter errors in the recursive parse, make
		 * sure we don't continue parsing.
		 */

		if (MANDOCLEVEL_FATAL <= curp->file_status)
			break;

		/*
		 * If input parsers have not been allocated, do so now.
		 * We keep these instanced between parsers, but set them
		 * locally per parse routine since we can use different
		 * parsers with each one.
		 */

		if ( ! (curp->man || curp->mdoc))
			pset(ln.buf + of, pos - of, curp);

		/* 
		 * Lastly, push down into the parsers themselves.  One
		 * of these will have already been set in the pset()
		 * routine.
		 * If libroff returns ROFF_TBL, then add it to the
		 * currently open parse.  Since we only get here if
		 * there does exist data (see tbl_data.c), we're
		 * guaranteed that something's been allocated.
		 * Do the same for ROFF_EQN.
		 */

		rc = -1;

		if (ROFF_TBL == rr)
			while (NULL != (span = roff_span(curp->roff))) {
				rc = curp->man ?
					man_addspan(curp->man, span) :
					mdoc_addspan(curp->mdoc, span);
				if (0 == rc)
					break;
			}
		else if (ROFF_EQN == rr)
			rc = curp->mdoc ? 
				mdoc_addeqn(curp->mdoc, 
					roff_eqn(curp->roff)) :
				man_addeqn(curp->man,
					roff_eqn(curp->roff));
		else if (curp->man || curp->mdoc)
			rc = curp->man ?
				man_parseln(curp->man, 
					curp->line, ln.buf, of) :
				mdoc_parseln(curp->mdoc, 
					curp->line, ln.buf, of);

		if (0 == rc) {
			assert(MANDOCLEVEL_FATAL <= curp->file_status);
			break;
		}

		/* Temporary buffers typically are not full. */

		if (0 == start && '\0' == blk.buf[i])
			break;

		/* Start the next input line. */

		pos = 0;
	}

	free(ln.buf);
}