static gchar *html_find_str_case(HTMLParser *parser, const gchar *str) { gchar *p; while ((p = strcasestr(parser->bufp, str)) == NULL) { if (html_read_line(parser) == HTML_EOF) return NULL; } return p; }
static gchar *html_find_char(HTMLParser *parser, gchar ch) { gchar *p; while ((p = strchr(parser->bufp, ch)) == NULL) { if (html_read_line(parser) == HTML_EOF) return NULL; } return p; }
static void html_get_parenthesis(HTMLParser *parser, gchar *buf, gint len) { gchar *p; buf[0] = '\0'; g_return_if_fail(*parser->bufp == '<'); /* ignore comment / CSS / script stuff */ if (!strncmp(parser->bufp, "<!--", 4)) { parser->bufp += 4; while ((p = strstr(parser->bufp, "-->")) == NULL) if (html_read_line(parser) == HTML_EOF) return; parser->bufp = p + 3; return; } if (!g_ascii_strncasecmp(parser->bufp, "<style", 6)) { parser->bufp += 6; while ((p = strcasestr(parser->bufp, "</style>")) == NULL) if (html_read_line(parser) == HTML_EOF) return; parser->bufp = p + 8; return; } if (!g_ascii_strncasecmp(parser->bufp, "<script", 7)) { parser->bufp += 7; while ((p = strcasestr(parser->bufp, "</script>")) == NULL) if (html_read_line(parser) == HTML_EOF) return; parser->bufp = p + 9; return; } parser->bufp++; while ((p = strchr(parser->bufp, '>')) == NULL) if (html_read_line(parser) == HTML_EOF) return; strncpy2(buf, parser->bufp, MIN(p - parser->bufp + 1, len)); g_strstrip(buf); parser->bufp = p + 1; }
const gchar *html_parse(HTMLParser *parser) { parser->state = HTML_NORMAL; g_string_truncate(parser->str, 0); if (*parser->bufp == '\0') { g_string_truncate(parser->buf, 0); parser->bufp = parser->buf->str; if (html_read_line(parser) == HTML_EOF) return NULL; } while (*parser->bufp != '\0') { switch (*parser->bufp) { case '<': if (parser->str->len == 0) html_parse_tag(parser); else return parser->str->str; break; case '&': html_parse_special(parser); break; case ' ': case '\t': case '\r': case '\n': if (parser->bufp[0] == '\r' && parser->bufp[1] == '\n') parser->bufp++; if (!parser->pre) { if (!parser->newline) parser->space = TRUE; parser->bufp++; break; } /* fallthrough */ default: html_append_char(parser, *parser->bufp++); } } return parser->str->str; }