Exemple #1
0
/*
 * Pass over recursive numerical expressions.  This context of this
 * function is important: it's only called within character-terminating
 * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
 * recursion: we don't care about what's in these blocks. 
 * This returns the number of characters skipped or -1 if an error
 * occurs (the caller should bail).
 */
static int
numescape(const char *start)
{
	int		 i;
	size_t		 sz;
	const char	*cp;

	i = 0;

	/* The expression consists of a subexpression. */

	if ('\\' == start[i]) {
		cp = &start[++i];
		/*
		 * Read past the end of the subexpression.
		 * Bail immediately on errors.
		 */
		if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
			return(-1);
		return(i + cp - &start[i]);
	} 

	if ('(' != start[i++])
		return(0);

	/*
	 * A parenthesised subexpression.  Read until the closing
	 * parenthesis, making sure to handle any nested subexpressions
	 * that might ruin our parse.
	 */

	while (')' != start[i]) {
		sz = strcspn(&start[i], ")\\");
		i += (int)sz;

		if ('\0' == start[i])
			return(-1);
		else if ('\\' != start[i])
			continue;

		cp = &start[++i];
		if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
			return(-1);
		i += cp - &start[i];
	}

	/* Read past the terminating ')'. */
	return(++i);
}
Exemple #2
0
/*
 * Process text streams:
 * Convert all breakable hyphens into ASCII_HYPH.
 * Decrement and spring input line trap.
 */
static enum rofferr
roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
{
	size_t		 sz;
	const char	*start;
	char		*p;
	int		 isz;
	enum mandoc_esc	 esc;

	start = p = *bufp + pos;

	while ('\0' != *p) {
		sz = strcspn(p, "-\\");
		p += sz;

		if ('\0' == *p)
			break;

		if ('\\' == *p) {
			/* Skip over escapes. */
			p++;
			esc = mandoc_escape((const char **)&p, NULL, NULL);
			if (ESCAPE_ERROR == esc)
				break;
			continue;
		} else if (p == start) {
			p++;
			continue;
		}

		if (isalpha((unsigned char)p[-1]) &&
		    isalpha((unsigned char)p[1]))
			*p = ASCII_HYPH;
		p++;
	}

	/* Spring the input line trap. */
	if (1 == roffit_lines) {
		isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
		if (-1 == isz) {
			perror(NULL);
			exit((int)MANDOCLEVEL_SYSERR);
		}
		free(*bufp);
		*bufp = p;
		*szp = isz + 1;
		*offs = 0;
		free(roffit_macro);
		roffit_lines = 0;
		return(ROFF_REPARSE);
	} else if (1 < roffit_lines)
		--roffit_lines;
	return(ROFF_CONT);
}
int
html_strlen(const char *cp)
{
	size_t		 rsz;
	int		 skip, sz;

	/*
	 * Account for escaped sequences within string length
	 * calculations.  This follows the logic in term_strlen() as we
	 * must calculate the width of produced strings.
	 * Assume that characters are always width of "1".  This is
	 * hacky, but it gets the job done for approximation of widths.
	 */

	sz = 0;
	skip = 0;
	while (1) {
		rsz = strcspn(cp, "\\");
		if (rsz) {
			cp += rsz;
			if (skip) {
				skip = 0;
				rsz--;
			}
			sz += rsz;
		}
		if ('\0' == *cp)
			break;
		cp++;
		switch (mandoc_escape(&cp, NULL, NULL)) {
		case (ESCAPE_ERROR):
			return(sz);
		case (ESCAPE_UNICODE):
			/* FALLTHROUGH */
		case (ESCAPE_NUMBERED):
			/* FALLTHROUGH */
		case (ESCAPE_SPECIAL):
			if (skip)
				skip = 0;
			else
				sz++;
			break;
		case (ESCAPE_SKIPCHAR):
			skip = 1;
			break;
		default:
			break;
		}
	}
	return(sz);
}
Exemple #4
0
void
man_deroff(char **dest, const struct man_node *n)
{
	char	*cp;
	size_t	 sz;

	if (MAN_TEXT != n->type) {
		for (n = n->child; n; n = n->next)
			man_deroff(dest, n);
		return;
	}

	/* Skip leading whitespace and escape sequences. */

	cp = n->string;
	while ('\0' != *cp) {
		if ('\\' == *cp) {
			cp++;
			mandoc_escape((const char **)&cp, NULL, NULL);
		} else if (isspace((unsigned char)*cp))
			cp++;
		else
			break;
	}

	/* Skip trailing whitespace. */

	for (sz = strlen(cp); sz; sz--)
		if (0 == isspace((unsigned char)cp[sz-1]))
			break;

	/* Skip empty strings. */

	if (0 == sz)
		return;

	if (NULL == *dest) {
		*dest = mandoc_strndup(cp, sz);
		return;
	}

	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
	free(*dest);
	*dest = cp;
}
Exemple #5
0
static int
man_strlen(const char *cp)
{
	size_t	 rsz;
	int	 skip, sz;

	sz = 0;
	skip = 0;
	for (;;) {
		rsz = strcspn(cp, "\\");
		if (rsz) {
			cp += rsz;
			if (skip) {
				skip = 0;
				rsz--;
			}
			sz += rsz;
		}
		if ('\0' == *cp)
			break;
		cp++;
		switch (mandoc_escape(&cp, NULL, NULL)) {
		case ESCAPE_ERROR:
			return sz;
		case ESCAPE_UNICODE:
		case ESCAPE_NUMBERED:
		case ESCAPE_SPECIAL:
		case ESCAPE_UNDEF:
		case ESCAPE_OVERSTRIKE:
			if (skip)
				skip = 0;
			else
				sz++;
			break;
		case ESCAPE_SKIPCHAR:
			skip = 1;
			break;
		default:
			break;
		}
	}
	return sz;
}
Exemple #6
0
/*
 * Process text streams: convert all breakable hyphens into ASCII_HYPH.
 */
static enum rofferr
roff_parsetext(char *p)
{
	size_t		 sz;
	const char	*start;
	enum mandoc_esc	 esc;

	start = p;

	while ('\0' != *p) {
		sz = strcspn(p, "-\\");
		p += sz;

		if ('\0' == *p)
			break;

		if ('\\' == *p) {
			/* Skip over escapes. */
			p++;
			esc = mandoc_escape
				((const char **)&p, NULL, NULL);
			if (ESCAPE_ERROR == esc)
				break;
			continue;
		} else if (p == start) {
			p++;
			continue;
		}

		if (isalpha((unsigned char)p[-1]) &&
		    isalpha((unsigned char)p[1]))
			*p = ASCII_HYPH;
		p++;
	}

	return(ROFF_CONT);
}
Exemple #7
0
int
html_strlen(const char *cp)
{
	int		 ssz, sz;
	const char	*seq, *p;

	/*
	 * Account for escaped sequences within string length
	 * calculations.  This follows the logic in term_strlen() as we
	 * must calculate the width of produced strings.
	 * Assume that characters are always width of "1".  This is
	 * hacky, but it gets the job done for approximation of widths.
	 */

	sz = 0;
	while (NULL != (p = strchr(cp, '\\'))) {
		sz += (int)(p - cp);
		++cp;
		switch (mandoc_escape(&cp, &seq, &ssz)) {
		case (ESCAPE_ERROR):
			return(sz);
		case (ESCAPE_UNICODE):
			/* FALLTHROUGH */
		case (ESCAPE_NUMBERED):
			/* FALLTHROUGH */
		case (ESCAPE_SPECIAL):
			sz++;
			break;
		default:
			break;
		}
	}

	assert(sz >= 0);
	return(sz + strlen(cp));
}
Exemple #8
0
/*
 * Handle pwords, partial words, which may be either a single word or a
 * phrase that cannot be broken down (such as a literal string).  This
 * handles word styling.
 */
void
term_word(struct termp *p, const char *word)
{
	const char	 nbrsp[2] = { ASCII_NBRSP, 0 };
	const char	*seq, *cp;
	int		 sz, uc;
	size_t		 ssz;
	enum mandoc_esc	 esc;

	if ( ! (TERMP_NOSPACE & p->flags)) {
		if ( ! (TERMP_KEEP & p->flags)) {
			bufferc(p, ' ');
			if (TERMP_SENTENCE & p->flags)
				bufferc(p, ' ');
		} else
			bufferc(p, ASCII_NBRSP);
	}
	if (TERMP_PREKEEP & p->flags)
		p->flags |= TERMP_KEEP;

	if ( ! (p->flags & TERMP_NONOSPACE))
		p->flags &= ~TERMP_NOSPACE;
	else
		p->flags |= TERMP_NOSPACE;

	p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
	p->skipvsp = 0;

	while ('\0' != *word) {
		if ('\\' != *word) {
			if (TERMP_SKIPCHAR & p->flags) {
				p->flags &= ~TERMP_SKIPCHAR;
				word++;
				continue;
			}
			if (TERMP_NBRWORD & p->flags) {
				if (' ' == *word) {
					encode(p, nbrsp, 1);
					word++;
					continue;
				}
				ssz = strcspn(word, "\\ ");
			} else
				ssz = strcspn(word, "\\");
			encode(p, word, ssz);
			word += (int)ssz;
			continue;
		}

		word++;
		esc = mandoc_escape(&word, &seq, &sz);
		if (ESCAPE_ERROR == esc)
			continue;

		switch (esc) {
		case ESCAPE_UNICODE:
			uc = mchars_num2uc(seq + 1, sz - 1);
			break;
		case ESCAPE_NUMBERED:
			uc = mchars_num2char(seq, sz);
			if (uc < 0)
				continue;
			break;
		case ESCAPE_SPECIAL:
			if (p->enc == TERMENC_ASCII) {
				cp = mchars_spec2str(p->symtab,
				    seq, sz, &ssz);
				if (cp != NULL)
					encode(p, cp, ssz);
			} else {
				uc = mchars_spec2cp(p->symtab, seq, sz);
				if (uc > 0)
					encode1(p, uc);
			}
			continue;
		case ESCAPE_FONTBOLD:
			term_fontrepl(p, TERMFONT_BOLD);
			continue;
		case ESCAPE_FONTITALIC:
			term_fontrepl(p, TERMFONT_UNDER);
			continue;
		case ESCAPE_FONTBI:
			term_fontrepl(p, TERMFONT_BI);
			continue;
		case ESCAPE_FONT:
			/* FALLTHROUGH */
		case ESCAPE_FONTROMAN:
			term_fontrepl(p, TERMFONT_NONE);
			continue;
		case ESCAPE_FONTPREV:
			term_fontlast(p);
			continue;
		case ESCAPE_NOSPACE:
			if (TERMP_SKIPCHAR & p->flags)
				p->flags &= ~TERMP_SKIPCHAR;
			else if ('\0' == *word)
				p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
			continue;
		case ESCAPE_SKIPCHAR:
			p->flags |= TERMP_SKIPCHAR;
			continue;
		case ESCAPE_OVERSTRIKE:
			cp = seq + sz;
			while (seq < cp) {
				if (*seq == '\\') {
					mandoc_escape(&seq, NULL, NULL);
					continue;
				}
				encode1(p, *seq++);
				if (seq < cp)
					encode(p, "\b", 1);
			}
		default:
			continue;
		}

		/*
		 * Common handling for Unicode and numbered
		 * character escape sequences.
		 */

		if (p->enc == TERMENC_ASCII) {
			cp = ascii_uc2str(uc);
			encode(p, cp, strlen(cp));
		} else {
			if ((uc < 0x20 && uc != 0x09) ||
			    (uc > 0x7E && uc < 0xA0))
				uc = 0xFFFD;
			encode1(p, uc);
		}
	}
	p->flags &= ~TERMP_NBRWORD;
}
Exemple #9
0
size_t
term_strlen(const struct termp *p, const char *cp)
{
	size_t		 sz, rsz, i;
	int		 ssz, skip, uc;
	const char	*seq, *rhs;
	enum mandoc_esc	 esc;
	static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
			ASCII_BREAK, '\0' };

	/*
	 * Account for escaped sequences within string length
	 * calculations.  This follows the logic in term_word() as we
	 * must calculate the width of produced strings.
	 */

	sz = 0;
	skip = 0;
	while ('\0' != *cp) {
		rsz = strcspn(cp, rej);
		for (i = 0; i < rsz; i++)
			sz += cond_width(p, *cp++, &skip);

		switch (*cp) {
		case '\\':
			cp++;
			esc = mandoc_escape(&cp, &seq, &ssz);
			if (ESCAPE_ERROR == esc)
				continue;

			rhs = NULL;

			switch (esc) {
			case ESCAPE_UNICODE:
				uc = mchars_num2uc(seq + 1, ssz - 1);
				break;
			case ESCAPE_NUMBERED:
				uc = mchars_num2char(seq, ssz);
				if (uc < 0)
					continue;
				break;
			case ESCAPE_SPECIAL:
				if (p->enc == TERMENC_ASCII) {
					rhs = mchars_spec2str(p->symtab,
					    seq, ssz, &rsz);
					if (rhs != NULL)
						break;
				} else {
					uc = mchars_spec2cp(p->symtab,
					    seq, ssz);
					if (uc > 0)
						sz += cond_width(p, uc, &skip);
				}
				continue;
			case ESCAPE_SKIPCHAR:
				skip = 1;
				continue;
			case ESCAPE_OVERSTRIKE:
				rsz = 0;
				rhs = seq + ssz;
				while (seq < rhs) {
					if (*seq == '\\') {
						mandoc_escape(&seq, NULL, NULL);
						continue;
					}
					i = (*p->width)(p, *seq++);
					if (rsz < i)
						rsz = i;
				}
				sz += rsz;
				continue;
			default:
				continue;
			}

			/*
			 * Common handling for Unicode and numbered
			 * character escape sequences.
			 */

			if (rhs == NULL) {
				if (p->enc == TERMENC_ASCII) {
					rhs = ascii_uc2str(uc);
					rsz = strlen(rhs);
				} else {
					if ((uc < 0x20 && uc != 0x09) ||
					    (uc > 0x7E && uc < 0xA0))
						uc = 0xFFFD;
					sz += cond_width(p, uc, &skip);
					continue;
				}
			}

			if (skip) {
				skip = 0;
				break;
			}

			/*
			 * Common handling for all escape sequences
			 * printing more than one character.
			 */

			for (i = 0; i < rsz; i++)
				sz += (*p->width)(p, *rhs++);
			break;
		case ASCII_NBRSP:
			sz += cond_width(p, ' ', &skip);
			cp++;
			break;
		case ASCII_HYPH:
			sz += cond_width(p, '-', &skip);
			cp++;
			/* FALLTHROUGH */
		case ASCII_BREAK:
			break;
		default:
			break;
		}
	}

	return(sz);
}
Exemple #10
0
/* ARGSUSED */
static enum rofferr
roff_tr(ROFF_ARGS)
{
	const char	*p, *first, *second;
	size_t		 fsz, ssz;
	enum mandoc_esc	 esc;

	p = *bufp + pos;

	if ('\0' == *p) {
		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
		return(ROFF_IGN);
	}

	while ('\0' != *p) {
		fsz = ssz = 1;

		first = p++;
		if ('\\' == *first) {
			esc = mandoc_escape(&p, NULL, NULL);
			if (ESCAPE_ERROR == esc) {
				mandoc_msg
					(MANDOCERR_BADESCAPE, r->parse, 
					 ln, (int)(p - *bufp), NULL);
				return(ROFF_IGN);
			}
			fsz = (size_t)(p - first);
		}

		second = p++;
		if ('\\' == *second) {
			esc = mandoc_escape(&p, NULL, NULL);
			if (ESCAPE_ERROR == esc) {
				mandoc_msg
					(MANDOCERR_BADESCAPE, r->parse, 
					 ln, (int)(p - *bufp), NULL);
				return(ROFF_IGN);
			}
			ssz = (size_t)(p - second);
		} else if ('\0' == *second) {
			mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, 
					ln, (int)(p - *bufp), NULL);
			second = " ";
			p--;
		}

		if (fsz > 1) {
			roff_setstrn(&r->xmbtab, first, 
					fsz, second, ssz, 0);
			continue;
		}

		if (NULL == r->xtab)
			r->xtab = mandoc_calloc
				(128, sizeof(struct roffstr));

		free(r->xtab[(int)*first].p);
		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
		r->xtab[(int)*first].sz = ssz;
	}

	return(ROFF_IGN);
}
Exemple #11
0
/*
 * Strip the escapes out of a string, emitting the results.
 */
static void
pstring(const char *p, int col, int *colp, int list)
{
	enum mandoc_esc	 esc;
	const char	*start, *end;
	int		 emit;

	/*
	 * Print as many column spaces til we achieve parity with the
	 * input document.
	 */

again:
	if (list && '\0' != *p) {
		while (isspace((unsigned char)*p))
			p++;

		while ('\'' == *p || '(' == *p || '"' == *p)
			p++;

		emit = isalpha((unsigned char)p[0]) &&
			isalpha((unsigned char)p[1]);

		for (start = p; '\0' != *p; p++)
			if ('\\' == *p) {
				p++;
				esc = mandoc_escape(&p, NULL, NULL);
				if (ESCAPE_ERROR == esc)
					return;
				emit = 0;
			} else if (isspace((unsigned char)*p))
				break;

		end = p - 1;

		while (end > start)
			if ('.' == *end || ',' == *end ||
					'\'' == *end || '"' == *end ||
					')' == *end || '!' == *end ||
					'?' == *end || ':' == *end ||
					';' == *end)
				end--;
			else
				break;

		if (emit && end - start >= 1) {
			for ( ; start <= end; start++)
				if (ASCII_HYPH == *start)
					putchar('-');
				else
					putchar((unsigned char)*start);
			putchar('\n');
		}

		if (isspace((unsigned char)*p))
			goto again;

		return;
	}

	while (*colp < col) {
		putchar(' ');
		(*colp)++;
	}

	/*
	 * Print the input word, skipping any special characters.
	 */
	while ('\0' != *p)
		if ('\\' == *p) {
			p++;
			esc = mandoc_escape(&p, NULL, NULL);
			if (ESCAPE_ERROR == esc)
				break;
		} else {
			putchar((unsigned char )*p++);
			(*colp)++;
		}
}
Exemple #12
0
static int
print_encode(struct html *h, const char *p, int norecurse)
{
	size_t		 sz;
	int		 c, len, nospace;
	const char	*seq;
	enum mandoc_esc	 esc;
	static const char rejs[9] = { '\\', '<', '>', '&', '"',
		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };

	nospace = 0;

	while ('\0' != *p) {
		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
			h->flags &= ~HTML_SKIPCHAR;
			p++;
			continue;
		}

		sz = strcspn(p, rejs);

		fwrite(p, 1, sz, stdout);
		p += (int)sz;

		if ('\0' == *p)
			break;

		if (print_escape(*p++))
			continue;

		esc = mandoc_escape(&p, &seq, &len);
		if (ESCAPE_ERROR == esc)
			break;

		switch (esc) {
		case ESCAPE_FONT:
		case ESCAPE_FONTPREV:
		case ESCAPE_FONTBOLD:
		case ESCAPE_FONTITALIC:
		case ESCAPE_FONTBI:
		case ESCAPE_FONTROMAN:
			if (0 == norecurse)
				print_metaf(h, esc);
			continue;
		case ESCAPE_SKIPCHAR:
			h->flags |= HTML_SKIPCHAR;
			continue;
		default:
			break;
		}

		if (h->flags & HTML_SKIPCHAR) {
			h->flags &= ~HTML_SKIPCHAR;
			continue;
		}

		switch (esc) {
		case ESCAPE_UNICODE:
			/* Skip past "u" header. */
			c = mchars_num2uc(seq + 1, len - 1);
			break;
		case ESCAPE_NUMBERED:
			c = mchars_num2char(seq, len);
			if (c < 0)
				continue;
			break;
		case ESCAPE_SPECIAL:
			c = mchars_spec2cp(seq, len);
			if (c <= 0)
				continue;
			break;
		case ESCAPE_NOSPACE:
			if ('\0' == *p)
				nospace = 1;
			continue;
		case ESCAPE_OVERSTRIKE:
			if (len == 0)
				continue;
			c = seq[len - 1];
			break;
		default:
			continue;
		}
		if ((c < 0x20 && c != 0x09) ||
		    (c > 0x7E && c < 0xA0))
			c = 0xFFFD;
		if (c > 0x7E)
			printf("&#%d;", c);
		else if ( ! print_escape(c))
			putchar(c);
	}

	return nospace;
}
Exemple #13
0
enum mandoc_esc
mandoc_escape(const char **end, const char **start, int *sz)
{
	const char	*local_start;
	int		 local_sz;
	char		 term;
	enum mandoc_esc	 gly;

	/*
	 * When the caller doesn't provide return storage,
	 * use local storage.
	 */

	if (NULL == start)
		start = &local_start;
	if (NULL == sz)
		sz = &local_sz;

	/*
	 * Beyond the backslash, at least one input character
	 * is part of the escape sequence.  With one exception
	 * (see below), that character won't be returned.
	 */

	gly = ESCAPE_ERROR;
	*start = ++*end;
	*sz = 0;
	term = '\0';

	switch ((*start)[-1]) {
	/*
	 * First the glyphs.  There are several different forms of
	 * these, but each eventually returns a substring of the glyph
	 * name.
	 */
	case '(':
		gly = ESCAPE_SPECIAL;
		*sz = 2;
		break;
	case '[':
		gly = ESCAPE_SPECIAL;
		term = ']';
		break;
	case 'C':
		if ('\'' != **start)
			return(ESCAPE_ERROR);
		*start = ++*end;
		gly = ESCAPE_SPECIAL;
		term = '\'';
		break;

	/*
	 * Escapes taking no arguments at all.
	 */
	case 'd':
		/* FALLTHROUGH */
	case 'u':
		return(ESCAPE_IGNORE);

	/*
	 * The \z escape is supposed to output the following
	 * character without advancing the cursor position.
	 * Since we are mostly dealing with terminal mode,
	 * let us just skip the next character.
	 */
	case 'z':
		return(ESCAPE_SKIPCHAR);

	/*
	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
	 * 'X' is the trigger.  These have opaque sub-strings.
	 */
	case 'F':
		/* FALLTHROUGH */
	case 'g':
		/* FALLTHROUGH */
	case 'k':
		/* FALLTHROUGH */
	case 'M':
		/* FALLTHROUGH */
	case 'm':
		/* FALLTHROUGH */
	case 'n':
		/* FALLTHROUGH */
	case 'V':
		/* FALLTHROUGH */
	case 'Y':
		gly = ESCAPE_IGNORE;
		/* FALLTHROUGH */
	case 'f':
		if (ESCAPE_ERROR == gly)
			gly = ESCAPE_FONT;
		switch (**start) {
		case '(':
			*start = ++*end;
			*sz = 2;
			break;
		case '[':
			*start = ++*end;
			term = ']';
			break;
		default:
			*sz = 1;
			break;
		}
		break;

	/*
	 * These escapes are of the form \X'Y', where 'X' is the trigger
	 * and 'Y' is any string.  These have opaque sub-strings.
	 * The \B and \w escapes are handled in roff.c, roff_res().
	 */
	case 'A':
		/* FALLTHROUGH */
	case 'b':
		/* FALLTHROUGH */
	case 'D':
		/* FALLTHROUGH */
	case 'o':
		/* FALLTHROUGH */
	case 'R':
		/* FALLTHROUGH */
	case 'X':
		/* FALLTHROUGH */
	case 'Z':
		if ('\0' == **start)
			return(ESCAPE_ERROR);
		gly = ESCAPE_IGNORE;
		term = **start;
		*start = ++*end;
		break;

	/*
	 * These escapes are of the form \X'N', where 'X' is the trigger
	 * and 'N' resolves to a numerical expression.
	 */
	case 'h':
		/* FALLTHROUGH */
	case 'H':
		/* FALLTHROUGH */
	case 'L':
		/* FALLTHROUGH */
	case 'l':
		/* FALLTHROUGH */
	case 'S':
		/* FALLTHROUGH */
	case 'v':
		/* FALLTHROUGH */
	case 'x':
		if (strchr(" %&()*+-./0123456789:<=>", **start)) {
			if ('\0' != **start)
				++*end;
			return(ESCAPE_ERROR);
		}
		gly = ESCAPE_IGNORE;
		term = **start;
		*start = ++*end;
		break;

	/*
	 * Special handling for the numbered character escape.
	 * XXX Do any other escapes need similar handling?
	 */
	case 'N':
		if ('\0' == **start)
			return(ESCAPE_ERROR);
		(*end)++;
		if (isdigit((unsigned char)**start)) {
			*sz = 1;
			return(ESCAPE_IGNORE);
		}
		(*start)++;
		while (isdigit((unsigned char)**end))
			(*end)++;
		*sz = *end - *start;
		if ('\0' != **end)
			(*end)++;
		return(ESCAPE_NUMBERED);

	/*
	 * Sizes get a special category of their own.
	 */
	case 's':
		gly = ESCAPE_IGNORE;

		/* See +/- counts as a sign. */
		if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
			(*end)++;

		switch (**end) {
		case '(':
			*start = ++*end;
			*sz = 2;
			break;
		case '[':
			*start = ++*end;
			term = ']';
			break;
		case '\'':
			*start = ++*end;
			term = '\'';
			break;
		default:
			*sz = 1;
			break;
		}

		break;

	/*
	 * Anything else is assumed to be a glyph.
	 * In this case, pass back the character after the backslash.
	 */
	default:
		gly = ESCAPE_SPECIAL;
		*start = --*end;
		*sz = 1;
		break;
	}

	assert(ESCAPE_ERROR != gly);

	/*
	 * Read up to the terminating character,
	 * paying attention to nested escapes.
	 */

	if ('\0' != term) {
		while (**end != term) {
			switch (**end) {
			case '\0':
				return(ESCAPE_ERROR);
			case '\\':
				(*end)++;
				if (ESCAPE_ERROR ==
				    mandoc_escape(end, NULL, NULL))
					return(ESCAPE_ERROR);
				break;
			default:
				(*end)++;
				break;
			}
		}
		*sz = (*end)++ - *start;
	} else {
		assert(*sz > 0);
		if ((size_t)*sz > strlen(*start))
			return(ESCAPE_ERROR);
		*end += *sz;
	}

	/* Run post-processors. */

	switch (gly) {
	case ESCAPE_FONT:
		if (2 == *sz) {
			if ('C' == **start) {
				/*
				 * Treat constant-width font modes
				 * just like regular font modes.
				 */
				(*start)++;
				(*sz)--;
			} else {
				if ('B' == (*start)[0] && 'I' == (*start)[1])
					gly = ESCAPE_FONTBI;
				break;
			}
		} else if (1 != *sz)
			break;

		switch (**start) {
		case '3':
			/* FALLTHROUGH */
		case 'B':
			gly = ESCAPE_FONTBOLD;
			break;
		case '2':
			/* FALLTHROUGH */
		case 'I':
			gly = ESCAPE_FONTITALIC;
			break;
		case 'P':
			gly = ESCAPE_FONTPREV;
			break;
		case '1':
			/* FALLTHROUGH */
		case 'R':
			gly = ESCAPE_FONTROMAN;
			break;
		}
		break;
	case ESCAPE_SPECIAL:
		if (1 == *sz && 'c' == **start)
			gly = ESCAPE_NOSPACE;
		/*
		 * Unicode escapes are defined in groff as \[u0000]
		 * to \[u10FFFF], where the contained value must be
		 * a valid Unicode codepoint.  Here, however, only
		 * check the length and range.
		 */
		if (**start != 'u' || *sz < 5 || *sz > 7)
			break;
		if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
			break;
		if (*sz == 6 && (*start)[1] == '0')
			break;
		if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
		    + 1 == *sz)
			gly = ESCAPE_UNICODE;
		break;
	default:
		break;
	}

	return(gly);
}
Exemple #14
0
/*
 * Handle pwords, partial words, which may be either a single word or a
 * phrase that cannot be broken down (such as a literal string).  This
 * handles word styling.
 */
void
term_word(struct termp *p, const char *word)
{
	const char	*seq, *cp;
	char		 c;
	int		 sz, uc;
	size_t		 ssz;
	enum mandoc_esc	 esc;

	if ( ! (TERMP_NOSPACE & p->flags)) {
		if ( ! (TERMP_KEEP & p->flags)) {
			if (TERMP_PREKEEP & p->flags)
				p->flags |= TERMP_KEEP;
			bufferc(p, ' ');
			if (TERMP_SENTENCE & p->flags)
				bufferc(p, ' ');
		} else
			bufferc(p, ASCII_NBRSP);
	}

	if ( ! (p->flags & TERMP_NONOSPACE))
		p->flags &= ~TERMP_NOSPACE;
	else
		p->flags |= TERMP_NOSPACE;

	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);

	while ('\0' != *word) {
		if ((ssz = strcspn(word, "\\")) > 0)
			encode(p, word, ssz);

		word += (int)ssz;
		if ('\\' != *word)
			continue;

		word++;
		esc = mandoc_escape(&word, &seq, &sz);
		if (ESCAPE_ERROR == esc)
			break;

		if (TERMENC_ASCII != p->enc)
			switch (esc) {
			case (ESCAPE_UNICODE):
				uc = mchars_num2uc(seq + 1, sz - 1);
				if ('\0' == uc)
					break;
				encode1(p, uc);
				continue;
			case (ESCAPE_SPECIAL):
				uc = mchars_spec2cp(p->symtab, seq, sz);
				if (uc <= 0)
					break;
				encode1(p, uc);
				continue;
			default:
				break;
			}

		switch (esc) {
		case (ESCAPE_UNICODE):
			encode1(p, '?');
			break;
		case (ESCAPE_NUMBERED):
			c = mchars_num2char(seq, sz);
			if ('\0' != c)
				encode(p, &c, 1);
			break;
		case (ESCAPE_SPECIAL):
			cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
			if (NULL != cp) 
				encode(p, cp, ssz);
			else if (1 == ssz)
				encode(p, seq, sz);
			break;
		case (ESCAPE_FONTBOLD):
			term_fontrepl(p, TERMFONT_BOLD);
			break;
		case (ESCAPE_FONTITALIC):
			term_fontrepl(p, TERMFONT_UNDER);
			break;
		case (ESCAPE_FONT):
			/* FALLTHROUGH */
		case (ESCAPE_FONTROMAN):
			term_fontrepl(p, TERMFONT_NONE);
			break;
		case (ESCAPE_FONTPREV):
			term_fontlast(p);
			break;
		case (ESCAPE_NOSPACE):
			if ('\0' == *word)
				p->flags |= TERMP_NOSPACE;
			break;
		default:
			break;
		}
	}
}
Exemple #15
0
size_t
term_strlen(const struct termp *p, const char *cp)
{
	size_t		 sz, rsz, i;
	int		 ssz, c;
	const char	*seq, *rhs;
	enum mandoc_esc	 esc;
	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };

	/*
	 * Account for escaped sequences within string length
	 * calculations.  This follows the logic in term_word() as we
	 * must calculate the width of produced strings.
	 */

	sz = 0;
	while ('\0' != *cp) {
		rsz = strcspn(cp, rej);
		for (i = 0; i < rsz; i++)
			sz += (*p->width)(p, *cp++);

		c = 0;
		switch (*cp) {
		case ('\\'):
			cp++;
			esc = mandoc_escape(&cp, &seq, &ssz);
			if (ESCAPE_ERROR == esc)
				return(sz);

			if (TERMENC_ASCII != p->enc)
				switch (esc) {
				case (ESCAPE_UNICODE):
					c = mchars_num2uc
						(seq + 1, ssz - 1);
					if ('\0' == c)
						break;
					sz += (*p->width)(p, c);
					continue;
				case (ESCAPE_SPECIAL):
					c = mchars_spec2cp
						(p->symtab, seq, ssz);
					if (c <= 0)
						break;
					sz += (*p->width)(p, c);
					continue;
				default:
					break;
				}

			rhs = NULL;

			switch (esc) {
			case (ESCAPE_UNICODE):
				sz += (*p->width)(p, '?');
				break;
			case (ESCAPE_NUMBERED):
				c = mchars_num2char(seq, ssz);
				if ('\0' != c)
					sz += (*p->width)(p, c);
				break;
			case (ESCAPE_SPECIAL):
				rhs = mchars_spec2str
					(p->symtab, seq, ssz, &rsz);

				if (ssz != 1 || rhs)
					break;

				rhs = seq;
				rsz = ssz;
				break;
			default:
				break;
			}

			if (NULL == rhs)
				break;

			for (i = 0; i < rsz; i++)
				sz += (*p->width)(p, *rhs++);
			break;
		case (ASCII_NBRSP):
			sz += (*p->width)(p, ' ');
			cp++;
			break;
		case (ASCII_HYPH):
			sz += (*p->width)(p, '-');
			cp++;
			break;
		default:
			break;
		}
	}

	return(sz);
}
Exemple #16
0
/*
 * In the current line, expand user-defined strings ("\*")
 * and references to number registers ("\n").
 * Also check the syntax of other escape sequences.
 */
static enum rofferr
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
	char		 ubuf[12]; /* buffer to print the number */
	const char	*stesc;	/* start of an escape sequence ('\\') */
	const char	*stnam;	/* start of the name, after "[(*" */
	const char	*cp;	/* end of the name, e.g. before ']' */
	const char	*res;	/* the string to be substituted */
	char		*nbuf;	/* new buffer to copy bufp to */
	size_t		 nsz;	/* size of the new buffer */
	size_t		 maxl;  /* expected length of the escape name */
	size_t		 naml;	/* actual length of the escape name */
	int		 expand_count;	/* to avoid infinite loops */

	expand_count = 0;

again:
	cp = *bufp + pos;
	while (NULL != (cp = strchr(cp, '\\'))) {
		stesc = cp++;

		/*
		 * The second character must be an asterisk or an n.
		 * If it isn't, skip it anyway:  It is escaped,
		 * so it can't start another escape sequence.
		 */

		if ('\0' == *cp)
			return(ROFF_CONT);

		switch (*cp) {
		case ('*'):
			res = NULL;
			break;
		case ('n'):
			res = ubuf;
			break;
		default:
			if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
				continue;
			mandoc_msg
				(MANDOCERR_BADESCAPE, r->parse, 
				 ln, (int)(stesc - *bufp), NULL);
			return(ROFF_CONT);
		}

		cp++;

		/*
		 * The third character decides the length
		 * of the name of the string or register.
		 * Save a pointer to the name.
		 */

		switch (*cp) {
		case ('\0'):
			return(ROFF_CONT);
		case ('('):
			cp++;
			maxl = 2;
			break;
		case ('['):
			cp++;
			maxl = 0;
			break;
		default:
			maxl = 1;
			break;
		}
		stnam = cp;

		/* Advance to the end of the name. */

		for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
			if ('\0' == *cp) {
				mandoc_msg
					(MANDOCERR_BADESCAPE, 
					 r->parse, ln, 
					 (int)(stesc - *bufp), NULL);
				return(ROFF_CONT);
			}
			if (0 == maxl && ']' == *cp)
				break;
		}

		/*
		 * Retrieve the replacement string; if it is
		 * undefined, resume searching for escapes.
		 */

		if (NULL == res)
			res = roff_getstrn(r, stnam, naml);
		else
			snprintf(ubuf, sizeof(ubuf), "%d",
			    roff_getregn(r, stnam, naml));

		if (NULL == res) {
			mandoc_msg
				(MANDOCERR_BADESCAPE, r->parse, 
				 ln, (int)(stesc - *bufp), NULL);
			res = "";
		}

		/* Replace the escape sequence by the string. */

		pos = stesc - *bufp;

		nsz = *szp + strlen(res) + 1;
		nbuf = mandoc_malloc(nsz);

		strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
		strlcat(nbuf, res, nsz);
		strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);

		free(*bufp);

		*bufp = nbuf;
		*szp = nsz;

		if (EXPAND_LIMIT >= ++expand_count)
			goto again;

		/* Just leave the string unexpanded. */
		mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
		return(ROFF_IGN);
	}
	return(ROFF_CONT);
}
Exemple #17
0
/*
 * Parse a macro line, that is, a line beginning with the control
 * character.
 */
static int
mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs)
{
	struct mdoc_node *n;
	const char	 *cp;
	enum mdoct	  tok;
	int		  i, sv;
	char		  mac[5];

	sv = offs;

	/*
	 * Copy the first word into a nil-terminated buffer.
	 * Stop when a space, tab, escape, or eoln is encountered.
	 */

	i = 0;
	while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
		mac[i++] = buf[offs++];

	mac[i] = '\0';

	tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : MDOC_MAX;

	if (tok == MDOC_MAX) {
		mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
		    ln, sv, buf + sv - 1);
		return(1);
	}

	/* Skip a leading escape sequence or tab. */

	switch (buf[offs]) {
	case '\\':
		cp = buf + offs + 1;
		mandoc_escape(&cp, NULL, NULL);
		offs = cp - buf;
		break;
	case '\t':
		offs++;
		break;
	default:
		break;
	}

	/* Jump to the next non-whitespace word. */

	while (buf[offs] && ' ' == buf[offs])
		offs++;

	/*
	 * Trailing whitespace.  Note that tabs are allowed to be passed
	 * into the parser as "text", so we only warn about spaces here.
	 */

	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
		mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
		    ln, offs - 1, NULL);

	/*
	 * If an initial macro or a list invocation, divert directly
	 * into macro processing.
	 */

	if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) {
		mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
		return(1);
	}

	n = mdoc->last;
	assert(mdoc->last);

	/*
	 * If the first macro of a `Bl -column', open an `It' block
	 * context around the parsed macro.
	 */

	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
	    LIST_column == n->norm->Bl.type) {
		mdoc->flags |= MDOC_FREECOL;
		mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
		return(1);
	}

	/*
	 * If we're following a block-level `It' within a `Bl -column'
	 * context (perhaps opened in the above block or in ptext()),
	 * then open an `It' block context around the parsed macro.
	 */

	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
	    NULL != n->parent &&
	    MDOC_Bl == n->parent->tok &&
	    LIST_column == n->parent->norm->Bl.type) {
		mdoc->flags |= MDOC_FREECOL;
		mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
		return(1);
	}

	/* Normal processing of a macro. */

	mdoc_macro(mdoc, tok, ln, sv, &offs, buf);

	/* In quick mode (for mandocdb), abort after the NAME section. */

	if (mdoc->quick && MDOC_Sh == tok &&
	    SEC_NAME != mdoc->last->sec)
		return(2);

	return(1);
}
Exemple #18
0
/*
 * Duplicate an input string, making the appropriate character
 * conversations (as stipulated by `tr') along the way.
 * Returns a heap-allocated string with all the replacements made.
 */
char *
roff_strdup(const struct roff *r, const char *p)
{
	const struct roffkv *cp;
	char		*res;
	const char	*pp;
	size_t		 ssz, sz;
	enum mandoc_esc	 esc;

	if (NULL == r->xmbtab && NULL == r->xtab)
		return(mandoc_strdup(p));
	else if ('\0' == *p)
		return(mandoc_strdup(""));

	/*
	 * Step through each character looking for term matches
	 * (remember that a `tr' can be invoked with an escape, which is
	 * a glyph but the escape is multi-character).
	 * We only do this if the character hash has been initialised
	 * and the string is >0 length.
	 */

	res = NULL;
	ssz = 0;

	while ('\0' != *p) {
		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
			sz = r->xtab[(int)*p].sz;
			res = mandoc_realloc(res, ssz + sz + 1);
			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
			ssz += sz;
			p++;
			continue;
		} else if ('\\' != *p) {
			res = mandoc_realloc(res, ssz + 2);
			res[ssz++] = *p++;
			continue;
		}

		/* Search for term matches. */
		for (cp = r->xmbtab; cp; cp = cp->next)
			if (0 == strncmp(p, cp->key.p, cp->key.sz))
				break;

		if (NULL != cp) {
			/*
			 * A match has been found.
			 * Append the match to the array and move
			 * forward by its keysize.
			 */
			res = mandoc_realloc
				(res, ssz + cp->val.sz + 1);
			memcpy(res + ssz, cp->val.p, cp->val.sz);
			ssz += cp->val.sz;
			p += (int)cp->key.sz;
			continue;
		}

		/*
		 * Handle escapes carefully: we need to copy
		 * over just the escape itself, or else we might
		 * do replacements within the escape itself.
		 * Make sure to pass along the bogus string.
		 */
		pp = p++;
		esc = mandoc_escape(&p, NULL, NULL);
		if (ESCAPE_ERROR == esc) {
			sz = strlen(pp);
			res = mandoc_realloc(res, ssz + sz + 1);
			memcpy(res + ssz, pp, sz);
			break;
		}
		/* 
		 * We bail out on bad escapes. 
		 * No need to warn: we already did so when
		 * roff_res() was called.
		 */
		sz = (int)(p - pp);
		res = mandoc_realloc(res, ssz + sz + 1);
		memcpy(res + ssz, pp, sz);
		ssz += sz;
	}

	res[(int)ssz] = '\0';
	return(res);
}
Exemple #19
0
static int
man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
{
	struct roff_node *n;
	const char	*cp;
	int		 tok;
	int		 i, ppos;
	int		 bline;
	char		 mac[5];

	ppos = offs;

	/*
	 * Copy the first word into a nil-terminated buffer.
	 * Stop when a space, tab, escape, or eoln is encountered.
	 */

	i = 0;
	while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
		mac[i++] = buf[offs++];

	mac[i] = '\0';

	tok = (i > 0 && i < 4) ? man_hash_find(mac) : TOKEN_NONE;

	if (tok == TOKEN_NONE) {
		mandoc_msg(MANDOCERR_MACRO, man->parse,
		    ln, ppos, buf + ppos - 1);
		return 1;
	}

	/* Skip a leading escape sequence or tab. */

	switch (buf[offs]) {
	case '\\':
		cp = buf + offs + 1;
		mandoc_escape(&cp, NULL, NULL);
		offs = cp - buf;
		break;
	case '\t':
		offs++;
		break;
	default:
		break;
	}

	/* Jump to the next non-whitespace word. */

	while (buf[offs] && buf[offs] == ' ')
		offs++;

	/*
	 * Trailing whitespace.  Note that tabs are allowed to be passed
	 * into the parser as "text", so we only warn about spaces here.
	 */

	if (buf[offs] == '\0' && buf[offs - 1] == ' ')
		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
		    ln, offs - 1, NULL);

	/*
	 * Some macros break next-line scopes; otherwise, remember
	 * whether we are in next-line scope for a block head.
	 */

	man_breakscope(man, tok);
	bline = man->flags & MAN_BLINE;

	/* Call to handler... */

	assert(man_macros[tok].fp);
	(*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf);

	/* In quick mode (for mandocdb), abort after the NAME section. */

	if (man->quick && tok == MAN_SH) {
		n = man->last;
		if (n->type == ROFFT_BODY &&
		    strcmp(n->prev->child->string, "NAME"))
			return 2;
	}

	/*
	 * If we are in a next-line scope for a block head,
	 * close it out now and switch to the body,
	 * unless the next-line scope is allowed to continue.
	 */

	if ( ! bline || man->flags & MAN_ELINE ||
	    man_macros[tok].flags & MAN_NSCOPED)
		return 1;

	assert(man->flags & MAN_BLINE);
	man->flags &= ~MAN_BLINE;

	man_unscope(man, man->last->parent);
	roff_body_alloc(man, ln, ppos, man->last->tok);
	return 1;
}
Exemple #20
0
/*
 * Pre-filter each and every line for reserved words (one beginning with
 * `\*', e.g., `\*(ab').  These must be handled before the actual line
 * is processed. 
 * This also checks the syntax of regular escapes.
 */
static enum rofferr
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
	enum mandoc_esc	 esc;
	const char	*stesc;	/* start of an escape sequence ('\\') */
	const char	*stnam;	/* start of the name, after "[(*" */
	const char	*cp;	/* end of the name, e.g. before ']' */
	const char	*res;	/* the string to be substituted */
	int		 i, maxl, expand_count;
	size_t		 nsz;
	char		*n;

	expand_count = 0;

again:
	cp = *bufp + pos;
	while (NULL != (cp = strchr(cp, '\\'))) {
		stesc = cp++;

		/*
		 * The second character must be an asterisk.
		 * If it isn't, skip it anyway:  It is escaped,
		 * so it can't start another escape sequence.
		 */

		if ('\0' == *cp)
			return(ROFF_CONT);

		if ('*' != *cp) {
			res = cp;
			esc = mandoc_escape(&cp, NULL, NULL);
			if (ESCAPE_ERROR != esc)
				continue;
			cp = res;
			mandoc_msg
				(MANDOCERR_BADESCAPE, r->parse, 
				 ln, (int)(stesc - *bufp), NULL);
			return(ROFF_CONT);
		}

		cp++;

		/*
		 * The third character decides the length
		 * of the name of the string.
		 * Save a pointer to the name.
		 */

		switch (*cp) {
		case ('\0'):
			return(ROFF_CONT);
		case ('('):
			cp++;
			maxl = 2;
			break;
		case ('['):
			cp++;
			maxl = 0;
			break;
		default:
			maxl = 1;
			break;
		}
		stnam = cp;

		/* Advance to the end of the name. */

		for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
			if ('\0' == *cp) {
				mandoc_msg
					(MANDOCERR_BADESCAPE, 
					 r->parse, ln, 
					 (int)(stesc - *bufp), NULL);
				return(ROFF_CONT);
			}
			if (0 == maxl && ']' == *cp)
				break;
		}

		/*
		 * Retrieve the replacement string; if it is
		 * undefined, resume searching for escapes.
		 */

		res = roff_getstrn(r, stnam, (size_t)i);

		if (NULL == res) {
			mandoc_msg
				(MANDOCERR_BADESCAPE, r->parse, 
				 ln, (int)(stesc - *bufp), NULL);
			res = "";
		}

		/* Replace the escape sequence by the string. */

		pos = stesc - *bufp;

		nsz = *szp + strlen(res) + 1;
		n = mandoc_malloc(nsz);

		strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
		strlcat(n, res, nsz);
		strlcat(n, cp + (maxl ? 0 : 1), nsz);

		free(*bufp);

		*bufp = n;
		*szp = nsz;

		if (EXPAND_LIMIT >= ++expand_count)
			goto again;

		/* Just leave the string unexpanded. */
		mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
		return(ROFF_IGN);
	}
	return(ROFF_CONT);
}
Exemple #21
0
enum mandoc_esc
mandoc_escape(const char **end, const char **start, int *sz)
{
	const char	*local_start;
	int		 local_sz;
	char		 term;
	enum mandoc_esc	 gly; 

	/*
	 * When the caller doesn't provide return storage,
	 * use local storage.
	 */

	if (NULL == start)
		start = &local_start;
	if (NULL == sz)
		sz = &local_sz;

	/*
	 * Beyond the backslash, at least one input character
	 * is part of the escape sequence.  With one exception
	 * (see below), that character won't be returned.
	 */

	gly = ESCAPE_ERROR;
	*start = ++*end;
	*sz = 0;
	term = '\0';

	switch ((*start)[-1]) {
	/*
	 * First the glyphs.  There are several different forms of
	 * these, but each eventually returns a substring of the glyph
	 * name.
	 */
	case ('('):
		gly = ESCAPE_SPECIAL;
		*sz = 2;
		break;
	case ('['):
		gly = ESCAPE_SPECIAL;
		/*
		 * Unicode escapes are defined in groff as \[uXXXX] to
		 * \[u10FFFF], where the contained value must be a valid
		 * Unicode codepoint.  Here, however, only check whether
		 * it's not a zero-width escape.
		 */
		if ('u' == (*start)[0] && ']' != (*start)[1])
			gly = ESCAPE_UNICODE;
		term = ']';
		break;
	case ('C'):
		if ('\'' != **start)
			return(ESCAPE_ERROR);
		gly = ESCAPE_SPECIAL;
		*start = ++*end;
		term = '\'';
		break;

	/*
	 * The \z escape is supposed to output the following
	 * character without advancing the cursor position.  
	 * Since we are mostly dealing with terminal mode,
	 * let us just skip the next character.
	 */
	case ('z'):
		return(ESCAPE_SKIPCHAR);

	/*
	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
	 * 'X' is the trigger.  These have opaque sub-strings.
	 */
	case ('F'):
		/* FALLTHROUGH */
	case ('g'):
		/* FALLTHROUGH */
	case ('k'):
		/* FALLTHROUGH */
	case ('M'):
		/* FALLTHROUGH */
	case ('m'):
		/* FALLTHROUGH */
	case ('n'):
		/* FALLTHROUGH */
	case ('V'):
		/* FALLTHROUGH */
	case ('Y'):
		gly = ESCAPE_IGNORE;
		/* FALLTHROUGH */
	case ('f'):
		if (ESCAPE_ERROR == gly)
			gly = ESCAPE_FONT;
		switch (**start) {
		case ('('):
			*start = ++*end;
			*sz = 2;
			break;
		case ('['):
			*start = ++*end;
			term = ']';
			break;
		default:
			*sz = 1;
			break;
		}
		break;

	/*
	 * These escapes are of the form \X'Y', where 'X' is the trigger
	 * and 'Y' is any string.  These have opaque sub-strings.
	 */
	case ('A'):
		/* FALLTHROUGH */
	case ('b'):
		/* FALLTHROUGH */
	case ('D'):
		/* FALLTHROUGH */
	case ('o'):
		/* FALLTHROUGH */
	case ('R'):
		/* FALLTHROUGH */
	case ('X'):
		/* FALLTHROUGH */
	case ('Z'):
		if ('\'' != **start)
			return(ESCAPE_ERROR);
		gly = ESCAPE_IGNORE;
		*start = ++*end;
		term = '\'';
		break;

	/*
	 * These escapes are of the form \X'N', where 'X' is the trigger
	 * and 'N' resolves to a numerical expression.
	 */
	case ('B'):
		/* FALLTHROUGH */
	case ('h'):
		/* FALLTHROUGH */
	case ('H'):
		/* FALLTHROUGH */
	case ('L'):
		/* FALLTHROUGH */
	case ('l'):
		gly = ESCAPE_NUMBERED;
		/* FALLTHROUGH */
	case ('S'):
		/* FALLTHROUGH */
	case ('v'):
		/* FALLTHROUGH */
	case ('w'):
		/* FALLTHROUGH */
	case ('x'):
		if ('\'' != **start)
			return(ESCAPE_ERROR);
		if (ESCAPE_ERROR == gly)
			gly = ESCAPE_IGNORE;
		*start = ++*end;
		term = '\'';
		break;

	/*
	 * Special handling for the numbered character escape.
	 * XXX Do any other escapes need similar handling?
	 */
	case ('N'):
		if ('\0' == **start)
			return(ESCAPE_ERROR);
		(*end)++;
		if (isdigit((unsigned char)**start)) {
			*sz = 1;
			return(ESCAPE_IGNORE);
		}
		(*start)++;
		while (isdigit((unsigned char)**end))
			(*end)++;
		*sz = *end - *start;
		if ('\0' != **end)
			(*end)++;
		return(ESCAPE_NUMBERED);

	/* 
	 * Sizes get a special category of their own.
	 */
	case ('s'):
		gly = ESCAPE_IGNORE;

		/* See +/- counts as a sign. */
		if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
			(*end)++;

		switch (**end) {
		case ('('):
			*start = ++*end;
			*sz = 2;
			break;
		case ('['):
			*start = ++*end;
			term = ']';
			break;
		case ('\''):
			*start = ++*end;
			term = '\'';
			break;
		default:
			*sz = 1;
			break;
		}

		break;

	/*
	 * Anything else is assumed to be a glyph.
	 * In this case, pass back the character after the backslash.
	 */
	default:
		gly = ESCAPE_SPECIAL;
		*start = --*end;
		*sz = 1;
		break;
	}

	assert(ESCAPE_ERROR != gly);

	/*
	 * Read up to the terminating character,
	 * paying attention to nested escapes.
	 */

	if ('\0' != term) {
		while (**end != term) {
			switch (**end) {
			case ('\0'):
				return(ESCAPE_ERROR);
			case ('\\'):
				(*end)++;
				if (ESCAPE_ERROR ==
				    mandoc_escape(end, NULL, NULL))
					return(ESCAPE_ERROR);
				break;
			default:
				(*end)++;
				break;
			}
		}
		*sz = (*end)++ - *start;
	} else {
		assert(*sz > 0);
		if ((size_t)*sz > strlen(*start))
			return(ESCAPE_ERROR);
		*end += *sz;
	}

	/* Run post-processors. */

	switch (gly) {
	case (ESCAPE_FONT):
		/*
		 * Pretend that the constant-width font modes are the
		 * same as the regular font modes.
		 */
		if (2 == *sz && 'C' == **start) {
			(*start)++;
			(*sz)--;
		} else if (1 != *sz)
			break;

		switch (**start) {
		case ('3'):
			/* FALLTHROUGH */
		case ('B'):
			gly = ESCAPE_FONTBOLD;
			break;
		case ('2'):
			/* FALLTHROUGH */
		case ('I'):
			gly = ESCAPE_FONTITALIC;
			break;
		case ('P'):
			gly = ESCAPE_FONTPREV;
			break;
		case ('1'):
			/* FALLTHROUGH */
		case ('R'):
			gly = ESCAPE_FONTROMAN;
			break;
		}
		break;
	case (ESCAPE_SPECIAL):
		if (1 == *sz && 'c' == **start)
			gly = ESCAPE_NOSPACE;
		break;
	default:
		break;
	}

	return(gly);
}
Exemple #22
0
/*
 * Print text and mdoc(7) syntax elements.
 */
static void
md_word(const char *s)
{
	const char	*seq, *prevfont, *currfont, *nextfont;
	char		 c;
	int		 bs, sz, uc, breakline;

	/* No spacing before closing delimiters. */
	if (s[0] != '\0' && s[1] == '\0' &&
	    strchr("!),.:;?]", s[0]) != NULL &&
	    (outflags & MD_spc_force) == 0)
		outflags &= ~MD_spc;

	md_preword();

	if (*s == '\0')
		return;

	/* No spacing after opening delimiters. */
	if ((s[0] == '(' || s[0] == '[') && s[1] == '\0')
		outflags &= ~MD_spc;

	breakline = 0;
	prevfont = currfont = "";
	while ((c = *s++) != '\0') {
		bs = 0;
		switch(c) {
		case ASCII_NBRSP:
			if (code_blocks)
				c = ' ';
			else {
				md_named("nbsp");
				c = '\0';
			}
			break;
		case ASCII_HYPH:
			bs = escflags & ESC_BOL && !code_blocks;
			c = '-';
			break;
		case ASCII_BREAK:
			continue;
		case '#':
		case '+':
		case '-':
			bs = escflags & ESC_BOL && !code_blocks;
			break;
		case '(':
			bs = escflags & ESC_HYP && !code_blocks;
			break;
		case ')':
			bs = escflags & ESC_NUM && !code_blocks;
			break;
		case '*':
		case '[':
		case '_':
		case '`':
			bs = !code_blocks;
			break;
		case '.':
			bs = escflags & ESC_NUM && !code_blocks;
			break;
		case '<':
			if (code_blocks == 0) {
				md_named("lt");
				c = '\0';
			}
			break;
		case '=':
			if (escflags & ESC_BOL && !code_blocks) {
				md_named("equals");
				c = '\0';
			}
			break;
		case '>':
			if (code_blocks == 0) {
				md_named("gt");
				c = '\0';
			}
			break;
		case '\\':
			uc = 0;
			nextfont = NULL;
			switch (mandoc_escape(&s, &seq, &sz)) {
			case ESCAPE_UNICODE:
				uc = mchars_num2uc(seq + 1, sz - 1);
				break;
			case ESCAPE_NUMBERED:
				uc = mchars_num2char(seq, sz);
				break;
			case ESCAPE_SPECIAL:
				uc = mchars_spec2cp(seq, sz);
				break;
			case ESCAPE_FONTBOLD:
				nextfont = "**";
				break;
			case ESCAPE_FONTITALIC:
				nextfont = "*";
				break;
			case ESCAPE_FONTBI:
				nextfont = "***";
				break;
			case ESCAPE_FONT:
			case ESCAPE_FONTROMAN:
				nextfont = "";
				break;
			case ESCAPE_FONTPREV:
				nextfont = prevfont;
				break;
			case ESCAPE_BREAK:
				breakline = 1;
				break;
			case ESCAPE_NOSPACE:
			case ESCAPE_SKIPCHAR:
			case ESCAPE_OVERSTRIKE:
				/* XXX not implemented */
				/* FALLTHROUGH */
			case ESCAPE_ERROR:
			default:
				break;
			}
			if (nextfont != NULL && !code_blocks) {
				if (*currfont != '\0') {
					outflags &= ~MD_spc;
					md_rawword(currfont);
				}
				prevfont = currfont;
				currfont = nextfont;
				if (*currfont != '\0') {
					outflags &= ~MD_spc;
					md_rawword(currfont);
				}
			}
			if (uc) {
				if ((uc < 0x20 && uc != 0x09) ||
				    (uc > 0x7E && uc < 0xA0))
					uc = 0xFFFD;
				if (code_blocks) {
					seq = mchars_uc2str(uc);
					fputs(seq, stdout);
					outcount += strlen(seq);
				} else {
					printf("&#%d;", uc);
					outcount++;
				}
				escflags &= ~ESC_FON;
			}
			c = '\0';
			break;
		case ']':
			bs = escflags & ESC_SQU && !code_blocks;
			escflags |= ESC_HYP;
			break;
		default:
			break;
		}
		if (bs)
			putchar('\\');
		md_char(c);
		if (breakline &&
		    (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) {
			printf("  \n");
			breakline = 0;
			while (*s == ' ' || *s == ASCII_NBRSP)
				s++;
		}
	}
	if (*currfont != '\0') {
		outflags &= ~MD_spc;
		md_rawword(currfont);
	} else if (s[-2] == ' ')
		escflags |= ESC_EOL;
	else
		escflags &= ~ESC_EOL;
}
static int
print_encode(struct html *h, const char *p, int norecurse)
{
	size_t		 sz;
	int		 c, len, nospace;
	const char	*seq;
	enum mandoc_esc	 esc;
	static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };

	nospace = 0;

	while ('\0' != *p) {
		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
			h->flags &= ~HTML_SKIPCHAR;
			p++;
			continue;
		}

		sz = strcspn(p, rejs);

		fwrite(p, 1, sz, stdout);
		p += (int)sz;

		if ('\0' == *p)
			break;

		switch (*p++) {
		case ('<'):
			printf("&lt;");
			continue;
		case ('>'):
			printf("&gt;");
			continue;
		case ('&'):
			printf("&amp;");
			continue;
		case (ASCII_HYPH):
			putchar('-');
			continue;
		default:
			break;
		}

		esc = mandoc_escape(&p, &seq, &len);
		if (ESCAPE_ERROR == esc)
			break;

		switch (esc) {
		case (ESCAPE_FONT):
			/* FALLTHROUGH */
		case (ESCAPE_FONTPREV):
			/* FALLTHROUGH */
		case (ESCAPE_FONTBOLD):
			/* FALLTHROUGH */
		case (ESCAPE_FONTITALIC):
			/* FALLTHROUGH */
		case (ESCAPE_FONTBI):
			/* FALLTHROUGH */
		case (ESCAPE_FONTROMAN):
			if (0 == norecurse)
				print_metaf(h, esc);
			continue;
		case (ESCAPE_SKIPCHAR):
			h->flags |= HTML_SKIPCHAR;
			continue;
		default:
			break;
		}

		if (h->flags & HTML_SKIPCHAR) {
			h->flags &= ~HTML_SKIPCHAR;
			continue;
		}

		switch (esc) {
		case (ESCAPE_UNICODE):
			/* Skip passed "u" header. */
			c = mchars_num2uc(seq + 1, len - 1);
			if ('\0' != c)
				printf("&#x%x;", c);
			break;
		case (ESCAPE_NUMBERED):
			c = mchars_num2char(seq, len);
			if ('\0' != c)
				putchar(c);
			break;
		case (ESCAPE_SPECIAL):
			c = mchars_spec2cp(h->symtab, seq, len);
			if (c > 0)
				printf("&#%d;", c);
			else if (-1 == c && 1 == len)
				putchar((int)*seq);
			break;
		case (ESCAPE_NOSPACE):
			if ('\0' == *p)
				nospace = 1;
			break;
		default:
			break;
		}
	}

	return(nospace);
}
Exemple #24
0
static int
man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
{
	struct roff_node *n;
	const char	*cp;
	size_t		 sz;
	enum roff_tok	 tok;
	int		 ppos;
	int		 bline;

	/* Determine the line macro. */

	ppos = offs;
	tok = TOKEN_NONE;
	for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
		offs++;
	if (sz > 0 && sz < 4)
		tok = roffhash_find(man->manmac, buf + ppos, sz);
	if (tok == TOKEN_NONE) {
		mandoc_msg(MANDOCERR_MACRO, man->parse,
		    ln, ppos, buf + ppos - 1);
		return 1;
	}

	/* Skip a leading escape sequence or tab. */

	switch (buf[offs]) {
	case '\\':
		cp = buf + offs + 1;
		mandoc_escape(&cp, NULL, NULL);
		offs = cp - buf;
		break;
	case '\t':
		offs++;
		break;
	default:
		break;
	}

	/* Jump to the next non-whitespace word. */

	while (buf[offs] == ' ')
		offs++;

	/*
	 * Trailing whitespace.  Note that tabs are allowed to be passed
	 * into the parser as "text", so we only warn about spaces here.
	 */

	if (buf[offs] == '\0' && buf[offs - 1] == ' ')
		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
		    ln, offs - 1, NULL);

	/*
	 * Some macros break next-line scopes; otherwise, remember
	 * whether we are in next-line scope for a block head.
	 */

	man_breakscope(man, tok);
	bline = man->flags & MAN_BLINE;

	/*
	 * If the line in next-line scope ends with \c, keep the
	 * next-line scope open for the subsequent input line.
	 * That is not at all portable, only groff >= 1.22.4
	 * does it, but *if* this weird idiom occurs in a manual
	 * page, that's very likely what the author intended.
	 */

	if (bline) {
		cp = strchr(buf + offs, '\0') - 2;
		if (cp >= buf && cp[0] == '\\' && cp[1] == 'c')
			bline = 0;
	}

	/* Call to handler... */

	assert(man_macros[tok].fp);
	(*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf);

	/* In quick mode (for mandocdb), abort after the NAME section. */

	if (man->quick && tok == MAN_SH) {
		n = man->last;
		if (n->type == ROFFT_BODY &&
		    strcmp(n->prev->child->string, "NAME"))
			return 2;
	}

	/*
	 * If we are in a next-line scope for a block head,
	 * close it out now and switch to the body,
	 * unless the next-line scope is allowed to continue.
	 */

	if ( ! bline || man->flags & MAN_ELINE ||
	    man_macros[tok].flags & MAN_NSCOPED)
		return 1;

	assert(man->flags & MAN_BLINE);
	man->flags &= ~MAN_BLINE;

	man_unscope(man, man->last->parent);
	roff_body_alloc(man, ln, ppos, man->last->tok);
	return 1;
}