char * striptags(const char *s, unsigned int l) { struct template_buffer *buf = buf_init(l); unsigned char *ptr = (unsigned char *)s; unsigned char *end = ptr + l; unsigned char *tag; unsigned char prev; char esq[8]; int esl; for (prev = ' '; ptr < end; ptr++) { if ((*ptr == '<') && ((ptr + 2) < end) && ((*(ptr + 1) == '/') || isalpha(*(ptr + 1)))) { for (tag = ptr; tag < end; tag++) { if (*tag == '>') { if (!isspace(prev)) buf_putchar(buf, ' '); ptr = tag; prev = ' '; break; } } } else if (isspace(*ptr)) { if (!isspace(prev)) buf_putchar(buf, *ptr); prev = *ptr; } else { switch(*ptr) { case '"': case '\'': case '<': case '>': case '&': esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr); buf_append(buf, esq, esl); break; default: buf_putchar(buf, *ptr); break; } prev = *ptr; } } return buf_destroy(buf); }
/* sanitize given string and replace all invalid UTF-8 sequences with "?" */ char * utf8(const char *s, unsigned int l) { struct template_buffer *buf = buf_init(l); unsigned char *ptr = (unsigned char *)s; unsigned int v, o; if (!buf) return NULL; for (o = 0; o < l; o++) { /* ascii char */ if ((*ptr >= 0x01) && (*ptr <= 0x7F)) { if (!buf_putchar(buf, (char)*ptr++)) break; } /* invalid byte or multi byte sequence */ else { if (!(v = _validate_utf8(&ptr, l - o, buf))) break; o += (v - 1); } } return buf_destroy(buf); }
/* Sanitize given string and strip all invalid XML bytes * Validate UTF-8 sequences * Escape XML control chars */ char * pcdata(const char *s, unsigned int l) { struct template_buffer *buf = buf_init(l); unsigned char *ptr = (unsigned char *)s; unsigned int o, v; char esq[8]; int esl; if (!buf) return NULL; for (o = 0; o < l; o++) { /* Invalid XML bytes */ if (((*ptr >= 0x00) && (*ptr <= 0x08)) || ((*ptr >= 0x0B) && (*ptr <= 0x0C)) || ((*ptr >= 0x0E) && (*ptr <= 0x1F)) || (*ptr == 0x7F)) { ptr++; } /* Escapes */ else if ((*ptr == 0x26) || (*ptr == 0x27) || (*ptr == 0x22) || (*ptr == 0x3C) || (*ptr == 0x3E)) { esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr); if (!buf_append(buf, esq, esl)) break; ptr++; } /* ascii char */ else if (*ptr <= 0x7F) { buf_putchar(buf, (char)*ptr++); } /* multi byte sequence */ else { if (!(v = _validate_utf8(&ptr, l - o, buf))) break; o += (v - 1); } } return buf_destroy(buf); }
/* Replace the current INDEX in the buffer with the given CH value; however, if CH is a backspace character, revert the current character at INDEX. */ static void replace_char(char ch) { bool is_backspace = (ch == 127); if (is_backspace) { move_col(-1); buf_revertchar(); } else { buf_putchar(ch); move_col(+1); } }
void luastr_escape(struct template_buffer *out, const char *s, unsigned int l, int escape_xml) { int esl; char esq[8]; char *ptr; for (ptr = (char *)s; ptr < (s + l); ptr++) { switch (*ptr) { case '\\': buf_append(out, "\\\\", 2); break; case '"': if (escape_xml) buf_append(out, """, 5); else buf_append(out, "\\\"", 2); break; case '\n': buf_append(out, "\\n", 2); break; case '\'': case '&': case '<': case '>': if (escape_xml) { esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr); buf_append(out, esq, esl); break; } default: buf_putchar(out, *ptr); } } }
/* scan given source string, validate UTF-8 sequence and store result * in given buffer object */ static int _validate_utf8(unsigned char **s, int l, struct template_buffer *buf) { unsigned char *ptr = *s; unsigned int o = 0, v, n; /* ascii byte without null */ if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F)) { if (!buf_putchar(buf, *ptr++)) return 0; o = 1; } /* multi byte sequence */ else if ((n = mb_num_chars(*ptr)) > 1) { /* count valid chars */ for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++); switch (n) { case 6: case 5: /* five and six byte sequences are always invalid */ if (!buf_putchar(buf, '?')) return 0; break; default: /* if the number of valid continuation bytes matches the * expected number and if the sequence is legal, copy * the bytes to the destination buffer */ if ((v == n) && mb_is_shortest(ptr, n) && !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n)) { /* copy sequence */ if (!buf_append(buf, (char *)ptr, n)) return 0; } /* the found sequence is illegal, skip it */ else { /* invalid sequence */ if (!buf_putchar(buf, '?')) return 0; } break; } /* advance beyound the last found valid continuation char */ o = v; ptr += v; } /* invalid byte (0x00) */ else { if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */ return 0; o = 1; ptr++; } *s = ptr; return o; }