char * striptags(const char *s, unsigned int l)
{
	struct template_buffer *buf = buf_init(l);
	unsigned char *ptr = (unsigned char *)s;
	unsigned char *end = ptr + l;
	unsigned char *tag;
	unsigned char prev;
	char esq[8];
	int esl;

	for (prev = ' '; ptr < end; ptr++)
	{
		if ((*ptr == '<') && ((ptr + 2) < end) &&
			((*(ptr + 1) == '/') || isalpha(*(ptr + 1))))
		{
			for (tag = ptr; tag < end; tag++)
			{
				if (*tag == '>')
				{
					if (!isspace(prev))
						buf_putchar(buf, ' ');

					ptr = tag;
					prev = ' ';
					break;
				}
			}
		}
		else if (isspace(*ptr))
		{
			if (!isspace(prev))
				buf_putchar(buf, *ptr);

			prev = *ptr;
		}
		else
		{
			switch(*ptr)
			{
				case '"':
				case '\'':
				case '<':
				case '>':
				case '&':
					esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
					buf_append(buf, esq, esl);
					break;

				default:
					buf_putchar(buf, *ptr);
					break;
			}

			prev = *ptr;
		}
	}

	return buf_destroy(buf);
}
/* sanitize given string and replace all invalid UTF-8 sequences with "?" */
char * utf8(const char *s, unsigned int l)
{
	struct template_buffer *buf = buf_init(l);
	unsigned char *ptr = (unsigned char *)s;
	unsigned int v, o;

	if (!buf)
		return NULL;

	for (o = 0; o < l; o++)
	{
		/* ascii char */
		if ((*ptr >= 0x01) && (*ptr <= 0x7F))
		{
			if (!buf_putchar(buf, (char)*ptr++))
				break;
		}

		/* invalid byte or multi byte sequence */
		else
		{
			if (!(v = _validate_utf8(&ptr, l - o, buf)))
				break;

			o += (v - 1);
		}
	}

	return buf_destroy(buf);
}
/* Sanitize given string and strip all invalid XML bytes
 * Validate UTF-8 sequences
 * Escape XML control chars */
char * pcdata(const char *s, unsigned int l)
{
	struct template_buffer *buf = buf_init(l);
	unsigned char *ptr = (unsigned char *)s;
	unsigned int o, v;
	char esq[8];
	int esl;

	if (!buf)
		return NULL;

	for (o = 0; o < l; o++)
	{
		/* Invalid XML bytes */
		if (((*ptr >= 0x00) && (*ptr <= 0x08)) ||
		    ((*ptr >= 0x0B) && (*ptr <= 0x0C)) ||
		    ((*ptr >= 0x0E) && (*ptr <= 0x1F)) ||
		    (*ptr == 0x7F))
		{
			ptr++;
		}

		/* Escapes */
		else if ((*ptr == 0x26) ||
		         (*ptr == 0x27) ||
		         (*ptr == 0x22) ||
		         (*ptr == 0x3C) ||
		         (*ptr == 0x3E))
		{
			esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);

			if (!buf_append(buf, esq, esl))
				break;

			ptr++;
		}

		/* ascii char */
		else if (*ptr <= 0x7F)
		{
			buf_putchar(buf, (char)*ptr++);
		}

		/* multi byte sequence */
		else
		{
			if (!(v = _validate_utf8(&ptr, l - o, buf)))
				break;

			o += (v - 1);
		}
	}

	return buf_destroy(buf);
}
Exemple #4
0
/* Replace the current INDEX in the buffer with the given CH value; however, if
   CH is a backspace character, revert the current character at INDEX. */
static void replace_char(char ch)
{
    bool is_backspace = (ch == 127);
    if (is_backspace) {
        move_col(-1);
        buf_revertchar();
    } else {
        buf_putchar(ch);
        move_col(+1);
    }
}
void luastr_escape(struct template_buffer *out, const char *s, unsigned int l,
				   int escape_xml)
{
	int esl;
	char esq[8];
	char *ptr;

	for (ptr = (char *)s; ptr < (s + l); ptr++)
	{
		switch (*ptr)
		{
		case '\\':
			buf_append(out, "\\\\", 2);
			break;

		case '"':
			if (escape_xml)
				buf_append(out, "&#34;", 5);
			else
				buf_append(out, "\\\"", 2);
			break;

		case '\n':
			buf_append(out, "\\n", 2);
			break;

		case '\'':
		case '&':
		case '<':
		case '>':
			if (escape_xml)
			{
				esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
				buf_append(out, esq, esl);
				break;
			}

		default:
			buf_putchar(out, *ptr);
		}
	}
}
/* scan given source string, validate UTF-8 sequence and store result
 * in given buffer object */
static int _validate_utf8(unsigned char **s, int l, struct template_buffer *buf)
{
	unsigned char *ptr = *s;
	unsigned int o = 0, v, n;

	/* ascii byte without null */
	if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
	{
		if (!buf_putchar(buf, *ptr++))
			return 0;

		o = 1;
	}

	/* multi byte sequence */
	else if ((n = mb_num_chars(*ptr)) > 1)
	{
		/* count valid chars */
		for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);

		switch (n)
		{
			case 6:
			case 5:
				/* five and six byte sequences are always invalid */
				if (!buf_putchar(buf, '?'))
					return 0;

				break;

			default:
				/* if the number of valid continuation bytes matches the
				 * expected number and if the sequence is legal, copy
				 * the bytes to the destination buffer */
				if ((v == n) && mb_is_shortest(ptr, n) &&
					!mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
				{
					/* copy sequence */
					if (!buf_append(buf, (char *)ptr, n))
						return 0;
				}

				/* the found sequence is illegal, skip it */
				else
				{
					/* invalid sequence */
					if (!buf_putchar(buf, '?'))
						return 0;
				}

				break;
		}

		/* advance beyound the last found valid continuation char */
		o = v;
		ptr += v;
	}

	/* invalid byte (0x00) */
	else
	{
		if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
			return 0;

		o = 1;
		ptr++;
	}

	*s = ptr;
	return o;
}