Пример #1
0
size_t
term_strlen(const struct termp *p, const char *cp)
{
	size_t		 sz, rsz, i;
	int		 ssz, skip, uc;
	const char	*seq, *rhs;
	enum mandoc_esc	 esc;
	static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
			ASCII_BREAK, '\0' };

	/*
	 * Account for escaped sequences within string length
	 * calculations.  This follows the logic in term_word() as we
	 * must calculate the width of produced strings.
	 */

	sz = 0;
	skip = 0;
	while ('\0' != *cp) {
		rsz = strcspn(cp, rej);
		for (i = 0; i < rsz; i++)
			sz += cond_width(p, *cp++, &skip);

		switch (*cp) {
		case '\\':
			cp++;
			esc = mandoc_escape(&cp, &seq, &ssz);
			if (ESCAPE_ERROR == esc)
				continue;

			rhs = NULL;

			switch (esc) {
			case ESCAPE_UNICODE:
				uc = mchars_num2uc(seq + 1, ssz - 1);
				break;
			case ESCAPE_NUMBERED:
				uc = mchars_num2char(seq, ssz);
				if (uc < 0)
					continue;
				break;
			case ESCAPE_SPECIAL:
				if (p->enc == TERMENC_ASCII) {
					rhs = mchars_spec2str(p->symtab,
					    seq, ssz, &rsz);
					if (rhs != NULL)
						break;
				} else {
					uc = mchars_spec2cp(p->symtab,
					    seq, ssz);
					if (uc > 0)
						sz += cond_width(p, uc, &skip);
				}
				continue;
			case ESCAPE_SKIPCHAR:
				skip = 1;
				continue;
			case ESCAPE_OVERSTRIKE:
				rsz = 0;
				rhs = seq + ssz;
				while (seq < rhs) {
					if (*seq == '\\') {
						mandoc_escape(&seq, NULL, NULL);
						continue;
					}
					i = (*p->width)(p, *seq++);
					if (rsz < i)
						rsz = i;
				}
				sz += rsz;
				continue;
			default:
				continue;
			}

			/*
			 * Common handling for Unicode and numbered
			 * character escape sequences.
			 */

			if (rhs == NULL) {
				if (p->enc == TERMENC_ASCII) {
					rhs = ascii_uc2str(uc);
					rsz = strlen(rhs);
				} else {
					if ((uc < 0x20 && uc != 0x09) ||
					    (uc > 0x7E && uc < 0xA0))
						uc = 0xFFFD;
					sz += cond_width(p, uc, &skip);
					continue;
				}
			}

			if (skip) {
				skip = 0;
				break;
			}

			/*
			 * Common handling for all escape sequences
			 * printing more than one character.
			 */

			for (i = 0; i < rsz; i++)
				sz += (*p->width)(p, *rhs++);
			break;
		case ASCII_NBRSP:
			sz += cond_width(p, ' ', &skip);
			cp++;
			break;
		case ASCII_HYPH:
			sz += cond_width(p, '-', &skip);
			cp++;
			/* FALLTHROUGH */
		case ASCII_BREAK:
			break;
		default:
			break;
		}
	}

	return(sz);
}
static int
print_encode(struct html *h, const char *p, int norecurse)
{
	size_t		 sz;
	int		 c, len, nospace;
	const char	*seq;
	enum mandoc_esc	 esc;
	static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };

	nospace = 0;

	while ('\0' != *p) {
		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
			h->flags &= ~HTML_SKIPCHAR;
			p++;
			continue;
		}

		sz = strcspn(p, rejs);

		fwrite(p, 1, sz, stdout);
		p += (int)sz;

		if ('\0' == *p)
			break;

		switch (*p++) {
		case ('<'):
			printf("&lt;");
			continue;
		case ('>'):
			printf("&gt;");
			continue;
		case ('&'):
			printf("&amp;");
			continue;
		case (ASCII_HYPH):
			putchar('-');
			continue;
		default:
			break;
		}

		esc = mandoc_escape(&p, &seq, &len);
		if (ESCAPE_ERROR == esc)
			break;

		switch (esc) {
		case (ESCAPE_FONT):
			/* FALLTHROUGH */
		case (ESCAPE_FONTPREV):
			/* FALLTHROUGH */
		case (ESCAPE_FONTBOLD):
			/* FALLTHROUGH */
		case (ESCAPE_FONTITALIC):
			/* FALLTHROUGH */
		case (ESCAPE_FONTBI):
			/* FALLTHROUGH */
		case (ESCAPE_FONTROMAN):
			if (0 == norecurse)
				print_metaf(h, esc);
			continue;
		case (ESCAPE_SKIPCHAR):
			h->flags |= HTML_SKIPCHAR;
			continue;
		default:
			break;
		}

		if (h->flags & HTML_SKIPCHAR) {
			h->flags &= ~HTML_SKIPCHAR;
			continue;
		}

		switch (esc) {
		case (ESCAPE_UNICODE):
			/* Skip passed "u" header. */
			c = mchars_num2uc(seq + 1, len - 1);
			if ('\0' != c)
				printf("&#x%x;", c);
			break;
		case (ESCAPE_NUMBERED):
			c = mchars_num2char(seq, len);
			if ('\0' != c)
				putchar(c);
			break;
		case (ESCAPE_SPECIAL):
			c = mchars_spec2cp(h->symtab, seq, len);
			if (c > 0)
				printf("&#%d;", c);
			else if (-1 == c && 1 == len)
				putchar((int)*seq);
			break;
		case (ESCAPE_NOSPACE):
			if ('\0' == *p)
				nospace = 1;
			break;
		default:
			break;
		}
	}

	return(nospace);
}
Пример #3
0
/*
 * Handle pwords, partial words, which may be either a single word or a
 * phrase that cannot be broken down (such as a literal string).  This
 * handles word styling.
 */
void
term_word(struct termp *p, const char *word)
{
	const char	 nbrsp[2] = { ASCII_NBRSP, 0 };
	const char	*seq, *cp;
	int		 sz, uc;
	size_t		 ssz;
	enum mandoc_esc	 esc;

	if ( ! (TERMP_NOSPACE & p->flags)) {
		if ( ! (TERMP_KEEP & p->flags)) {
			bufferc(p, ' ');
			if (TERMP_SENTENCE & p->flags)
				bufferc(p, ' ');
		} else
			bufferc(p, ASCII_NBRSP);
	}
	if (TERMP_PREKEEP & p->flags)
		p->flags |= TERMP_KEEP;

	if ( ! (p->flags & TERMP_NONOSPACE))
		p->flags &= ~TERMP_NOSPACE;
	else
		p->flags |= TERMP_NOSPACE;

	p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
	p->skipvsp = 0;

	while ('\0' != *word) {
		if ('\\' != *word) {
			if (TERMP_SKIPCHAR & p->flags) {
				p->flags &= ~TERMP_SKIPCHAR;
				word++;
				continue;
			}
			if (TERMP_NBRWORD & p->flags) {
				if (' ' == *word) {
					encode(p, nbrsp, 1);
					word++;
					continue;
				}
				ssz = strcspn(word, "\\ ");
			} else
				ssz = strcspn(word, "\\");
			encode(p, word, ssz);
			word += (int)ssz;
			continue;
		}

		word++;
		esc = mandoc_escape(&word, &seq, &sz);
		if (ESCAPE_ERROR == esc)
			continue;

		switch (esc) {
		case ESCAPE_UNICODE:
			uc = mchars_num2uc(seq + 1, sz - 1);
			break;
		case ESCAPE_NUMBERED:
			uc = mchars_num2char(seq, sz);
			if (uc < 0)
				continue;
			break;
		case ESCAPE_SPECIAL:
			if (p->enc == TERMENC_ASCII) {
				cp = mchars_spec2str(p->symtab,
				    seq, sz, &ssz);
				if (cp != NULL)
					encode(p, cp, ssz);
			} else {
				uc = mchars_spec2cp(p->symtab, seq, sz);
				if (uc > 0)
					encode1(p, uc);
			}
			continue;
		case ESCAPE_FONTBOLD:
			term_fontrepl(p, TERMFONT_BOLD);
			continue;
		case ESCAPE_FONTITALIC:
			term_fontrepl(p, TERMFONT_UNDER);
			continue;
		case ESCAPE_FONTBI:
			term_fontrepl(p, TERMFONT_BI);
			continue;
		case ESCAPE_FONT:
			/* FALLTHROUGH */
		case ESCAPE_FONTROMAN:
			term_fontrepl(p, TERMFONT_NONE);
			continue;
		case ESCAPE_FONTPREV:
			term_fontlast(p);
			continue;
		case ESCAPE_NOSPACE:
			if (TERMP_SKIPCHAR & p->flags)
				p->flags &= ~TERMP_SKIPCHAR;
			else if ('\0' == *word)
				p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
			continue;
		case ESCAPE_SKIPCHAR:
			p->flags |= TERMP_SKIPCHAR;
			continue;
		case ESCAPE_OVERSTRIKE:
			cp = seq + sz;
			while (seq < cp) {
				if (*seq == '\\') {
					mandoc_escape(&seq, NULL, NULL);
					continue;
				}
				encode1(p, *seq++);
				if (seq < cp)
					encode(p, "\b", 1);
			}
		default:
			continue;
		}

		/*
		 * Common handling for Unicode and numbered
		 * character escape sequences.
		 */

		if (p->enc == TERMENC_ASCII) {
			cp = ascii_uc2str(uc);
			encode(p, cp, strlen(cp));
		} else {
			if ((uc < 0x20 && uc != 0x09) ||
			    (uc > 0x7E && uc < 0xA0))
				uc = 0xFFFD;
			encode1(p, uc);
		}
	}
	p->flags &= ~TERMP_NBRWORD;
}
Пример #4
0
static int
print_encode(struct html *h, const char *p, int norecurse)
{
	size_t		 sz;
	int		 c, len, nospace;
	const char	*seq;
	enum mandoc_esc	 esc;
	static const char rejs[9] = { '\\', '<', '>', '&', '"',
		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };

	nospace = 0;

	while ('\0' != *p) {
		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
			h->flags &= ~HTML_SKIPCHAR;
			p++;
			continue;
		}

		sz = strcspn(p, rejs);

		fwrite(p, 1, sz, stdout);
		p += (int)sz;

		if ('\0' == *p)
			break;

		if (print_escape(*p++))
			continue;

		esc = mandoc_escape(&p, &seq, &len);
		if (ESCAPE_ERROR == esc)
			break;

		switch (esc) {
		case ESCAPE_FONT:
		case ESCAPE_FONTPREV:
		case ESCAPE_FONTBOLD:
		case ESCAPE_FONTITALIC:
		case ESCAPE_FONTBI:
		case ESCAPE_FONTROMAN:
			if (0 == norecurse)
				print_metaf(h, esc);
			continue;
		case ESCAPE_SKIPCHAR:
			h->flags |= HTML_SKIPCHAR;
			continue;
		default:
			break;
		}

		if (h->flags & HTML_SKIPCHAR) {
			h->flags &= ~HTML_SKIPCHAR;
			continue;
		}

		switch (esc) {
		case ESCAPE_UNICODE:
			/* Skip past "u" header. */
			c = mchars_num2uc(seq + 1, len - 1);
			break;
		case ESCAPE_NUMBERED:
			c = mchars_num2char(seq, len);
			if (c < 0)
				continue;
			break;
		case ESCAPE_SPECIAL:
			c = mchars_spec2cp(seq, len);
			if (c <= 0)
				continue;
			break;
		case ESCAPE_NOSPACE:
			if ('\0' == *p)
				nospace = 1;
			continue;
		case ESCAPE_OVERSTRIKE:
			if (len == 0)
				continue;
			c = seq[len - 1];
			break;
		default:
			continue;
		}
		if ((c < 0x20 && c != 0x09) ||
		    (c > 0x7E && c < 0xA0))
			c = 0xFFFD;
		if (c > 0x7E)
			printf("&#%d;", c);
		else if ( ! print_escape(c))
			putchar(c);
	}

	return nospace;
}
Пример #5
0
size_t
term_strlen(const struct termp *p, const char *cp)
{
	size_t		 sz, rsz, i;
	int		 ssz, c;
	const char	*seq, *rhs;
	enum mandoc_esc	 esc;
	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };

	/*
	 * Account for escaped sequences within string length
	 * calculations.  This follows the logic in term_word() as we
	 * must calculate the width of produced strings.
	 */

	sz = 0;
	while ('\0' != *cp) {
		rsz = strcspn(cp, rej);
		for (i = 0; i < rsz; i++)
			sz += (*p->width)(p, *cp++);

		c = 0;
		switch (*cp) {
		case ('\\'):
			cp++;
			esc = mandoc_escape(&cp, &seq, &ssz);
			if (ESCAPE_ERROR == esc)
				return(sz);

			if (TERMENC_ASCII != p->enc)
				switch (esc) {
				case (ESCAPE_UNICODE):
					c = mchars_num2uc
						(seq + 1, ssz - 1);
					if ('\0' == c)
						break;
					sz += (*p->width)(p, c);
					continue;
				case (ESCAPE_SPECIAL):
					c = mchars_spec2cp
						(p->symtab, seq, ssz);
					if (c <= 0)
						break;
					sz += (*p->width)(p, c);
					continue;
				default:
					break;
				}

			rhs = NULL;

			switch (esc) {
			case (ESCAPE_UNICODE):
				sz += (*p->width)(p, '?');
				break;
			case (ESCAPE_NUMBERED):
				c = mchars_num2char(seq, ssz);
				if ('\0' != c)
					sz += (*p->width)(p, c);
				break;
			case (ESCAPE_SPECIAL):
				rhs = mchars_spec2str
					(p->symtab, seq, ssz, &rsz);

				if (ssz != 1 || rhs)
					break;

				rhs = seq;
				rsz = ssz;
				break;
			default:
				break;
			}

			if (NULL == rhs)
				break;

			for (i = 0; i < rsz; i++)
				sz += (*p->width)(p, *rhs++);
			break;
		case (ASCII_NBRSP):
			sz += (*p->width)(p, ' ');
			cp++;
			break;
		case (ASCII_HYPH):
			sz += (*p->width)(p, '-');
			cp++;
			break;
		default:
			break;
		}
	}

	return(sz);
}
Пример #6
0
/*
 * Handle pwords, partial words, which may be either a single word or a
 * phrase that cannot be broken down (such as a literal string).  This
 * handles word styling.
 */
void
term_word(struct termp *p, const char *word)
{
	const char	*seq, *cp;
	char		 c;
	int		 sz, uc;
	size_t		 ssz;
	enum mandoc_esc	 esc;

	if ( ! (TERMP_NOSPACE & p->flags)) {
		if ( ! (TERMP_KEEP & p->flags)) {
			if (TERMP_PREKEEP & p->flags)
				p->flags |= TERMP_KEEP;
			bufferc(p, ' ');
			if (TERMP_SENTENCE & p->flags)
				bufferc(p, ' ');
		} else
			bufferc(p, ASCII_NBRSP);
	}

	if ( ! (p->flags & TERMP_NONOSPACE))
		p->flags &= ~TERMP_NOSPACE;
	else
		p->flags |= TERMP_NOSPACE;

	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);

	while ('\0' != *word) {
		if ((ssz = strcspn(word, "\\")) > 0)
			encode(p, word, ssz);

		word += (int)ssz;
		if ('\\' != *word)
			continue;

		word++;
		esc = mandoc_escape(&word, &seq, &sz);
		if (ESCAPE_ERROR == esc)
			break;

		if (TERMENC_ASCII != p->enc)
			switch (esc) {
			case (ESCAPE_UNICODE):
				uc = mchars_num2uc(seq + 1, sz - 1);
				if ('\0' == uc)
					break;
				encode1(p, uc);
				continue;
			case (ESCAPE_SPECIAL):
				uc = mchars_spec2cp(p->symtab, seq, sz);
				if (uc <= 0)
					break;
				encode1(p, uc);
				continue;
			default:
				break;
			}

		switch (esc) {
		case (ESCAPE_UNICODE):
			encode1(p, '?');
			break;
		case (ESCAPE_NUMBERED):
			c = mchars_num2char(seq, sz);
			if ('\0' != c)
				encode(p, &c, 1);
			break;
		case (ESCAPE_SPECIAL):
			cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
			if (NULL != cp) 
				encode(p, cp, ssz);
			else if (1 == ssz)
				encode(p, seq, sz);
			break;
		case (ESCAPE_FONTBOLD):
			term_fontrepl(p, TERMFONT_BOLD);
			break;
		case (ESCAPE_FONTITALIC):
			term_fontrepl(p, TERMFONT_UNDER);
			break;
		case (ESCAPE_FONT):
			/* FALLTHROUGH */
		case (ESCAPE_FONTROMAN):
			term_fontrepl(p, TERMFONT_NONE);
			break;
		case (ESCAPE_FONTPREV):
			term_fontlast(p);
			break;
		case (ESCAPE_NOSPACE):
			if ('\0' == *word)
				p->flags |= TERMP_NOSPACE;
			break;
		default:
			break;
		}
	}
}
Пример #7
0
/*
 * Print text and mdoc(7) syntax elements.
 */
static void
md_word(const char *s)
{
	const char	*seq, *prevfont, *currfont, *nextfont;
	char		 c;
	int		 bs, sz, uc, breakline;

	/* No spacing before closing delimiters. */
	if (s[0] != '\0' && s[1] == '\0' &&
	    strchr("!),.:;?]", s[0]) != NULL &&
	    (outflags & MD_spc_force) == 0)
		outflags &= ~MD_spc;

	md_preword();

	if (*s == '\0')
		return;

	/* No spacing after opening delimiters. */
	if ((s[0] == '(' || s[0] == '[') && s[1] == '\0')
		outflags &= ~MD_spc;

	breakline = 0;
	prevfont = currfont = "";
	while ((c = *s++) != '\0') {
		bs = 0;
		switch(c) {
		case ASCII_NBRSP:
			if (code_blocks)
				c = ' ';
			else {
				md_named("nbsp");
				c = '\0';
			}
			break;
		case ASCII_HYPH:
			bs = escflags & ESC_BOL && !code_blocks;
			c = '-';
			break;
		case ASCII_BREAK:
			continue;
		case '#':
		case '+':
		case '-':
			bs = escflags & ESC_BOL && !code_blocks;
			break;
		case '(':
			bs = escflags & ESC_HYP && !code_blocks;
			break;
		case ')':
			bs = escflags & ESC_NUM && !code_blocks;
			break;
		case '*':
		case '[':
		case '_':
		case '`':
			bs = !code_blocks;
			break;
		case '.':
			bs = escflags & ESC_NUM && !code_blocks;
			break;
		case '<':
			if (code_blocks == 0) {
				md_named("lt");
				c = '\0';
			}
			break;
		case '=':
			if (escflags & ESC_BOL && !code_blocks) {
				md_named("equals");
				c = '\0';
			}
			break;
		case '>':
			if (code_blocks == 0) {
				md_named("gt");
				c = '\0';
			}
			break;
		case '\\':
			uc = 0;
			nextfont = NULL;
			switch (mandoc_escape(&s, &seq, &sz)) {
			case ESCAPE_UNICODE:
				uc = mchars_num2uc(seq + 1, sz - 1);
				break;
			case ESCAPE_NUMBERED:
				uc = mchars_num2char(seq, sz);
				break;
			case ESCAPE_SPECIAL:
				uc = mchars_spec2cp(seq, sz);
				break;
			case ESCAPE_FONTBOLD:
				nextfont = "**";
				break;
			case ESCAPE_FONTITALIC:
				nextfont = "*";
				break;
			case ESCAPE_FONTBI:
				nextfont = "***";
				break;
			case ESCAPE_FONT:
			case ESCAPE_FONTROMAN:
				nextfont = "";
				break;
			case ESCAPE_FONTPREV:
				nextfont = prevfont;
				break;
			case ESCAPE_BREAK:
				breakline = 1;
				break;
			case ESCAPE_NOSPACE:
			case ESCAPE_SKIPCHAR:
			case ESCAPE_OVERSTRIKE:
				/* XXX not implemented */
				/* FALLTHROUGH */
			case ESCAPE_ERROR:
			default:
				break;
			}
			if (nextfont != NULL && !code_blocks) {
				if (*currfont != '\0') {
					outflags &= ~MD_spc;
					md_rawword(currfont);
				}
				prevfont = currfont;
				currfont = nextfont;
				if (*currfont != '\0') {
					outflags &= ~MD_spc;
					md_rawword(currfont);
				}
			}
			if (uc) {
				if ((uc < 0x20 && uc != 0x09) ||
				    (uc > 0x7E && uc < 0xA0))
					uc = 0xFFFD;
				if (code_blocks) {
					seq = mchars_uc2str(uc);
					fputs(seq, stdout);
					outcount += strlen(seq);
				} else {
					printf("&#%d;", uc);
					outcount++;
				}
				escflags &= ~ESC_FON;
			}
			c = '\0';
			break;
		case ']':
			bs = escflags & ESC_SQU && !code_blocks;
			escflags |= ESC_HYP;
			break;
		default:
			break;
		}
		if (bs)
			putchar('\\');
		md_char(c);
		if (breakline &&
		    (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) {
			printf("  \n");
			breakline = 0;
			while (*s == ' ' || *s == ASCII_NBRSP)
				s++;
		}
	}
	if (*currfont != '\0') {
		outflags &= ~MD_spc;
		md_rawword(currfont);
	} else if (s[-2] == ' ')
		escflags |= ESC_EOL;
	else
		escflags &= ~ESC_EOL;
}