/* * Process command. When we get here, we've processed "<!-" */ xml_token_type_t xml_skip_comment(xml_reader_t *xr) { int match = 0, cc; if (xml_getc(xr) != '-') { xml_parse_error(xr, "Unexpected <!-...> element"); return None; } while ((cc = xml_getc(xr)) != EOF) { if (cc == '-') { match++; } else { if (cc == '>' && match >= 2) { #ifdef XMLDEBUG_PARSER xml_debug("Processed comment\n"); #endif return Comment; } match = 0; } } xml_parse_error(xr, "Unexpected end of file while parsing comment"); return None; }
/* * Process CDATA. When we get here, we've processed "<[CDATA" */ xml_token_type_t xml_process_cdata(xml_reader_t *xr, string_t *res) { int cc, state = 0; cc = xml_getc(xr); if (cc == EOF) goto unexpected_eof; if (cc != '[') { xml_parse_error(xr, "Unexpected '%c' after <[CDATA in XML stream", cc); return None; } while (state != 3) { cc = xml_getc(xr); if (cc == EOF) goto unexpected_eof; if (cc == ']') { if (state == 2) { string_putc(res, ']'); } else { ++state; } } else if (cc == '>' && state == 2) { ++state; } else { while (state) { string_putc(res, ']'); state--; } string_putc(res, cc); } } xr->state = Initial; return CData; unexpected_eof: xml_parse_error(xr, "Unexpected EOF after <[CDATA in XML stream"); return None; }
/* * Expand an XML entity. * For now, we support &<number>; as well as symbolic entities * lt gt amp */ bool xml_expand_entity(xml_reader_t *xr, string_t *res) { char entity[128]; unsigned int elen = 0; int cc, expanded; while ((cc = xml_getc(xr)) != ';') { if (cc == EOF) { xml_parse_error(xr, "Unexpenced EOF in entity"); return false; } if (isspace(cc)) continue; if (elen + 1 >= sizeof(entity)) { xml_parse_error(xr, "Entity string too long"); return false; } entity[elen++] = cc; } entity[elen] = '\0'; if (elen == 0) { xml_parse_error(xr, "Empty entity &;"); return false; } if (!strcasecmp(entity, "lt")) expanded = '<'; else if (!strcasecmp(entity, "gt")) expanded = '>'; else if (!strcasecmp(entity, "amp")) expanded = '&'; else { const char *es = entity; if (*es == '#') { expanded = strtoul(es + 1, (char **) &es, 0); if (*es == '\0') goto good; } xml_parse_error(xr, "Cannot expand unknown entity &%s;", entity); return false; } good: string_putc(res, expanded); return true; }
/* * Expand an XML entity. * For now, we support &<number>; as well as symbolic entities * lt gt amp */ ni_bool_t xml_expand_entity(xml_reader_t *xr, ni_stringbuf_t *res) { char temp[128]; ni_stringbuf_t entity = NI_STRINGBUF_INIT_BUFFER(temp); int cc, expanded; while ((cc = xml_getc(xr)) != ';') { if (cc == EOF) { xml_parse_error(xr, "Unexpenced EOF in entity"); return FALSE; } if (isspace(cc)) continue; if (entity.len + sizeof(char) >= entity.size) { xml_parse_error(xr, "Entity is too long"); return FALSE; } ni_stringbuf_putc(&entity, cc); } if (ni_string_empty(entity.string)) { xml_parse_error(xr, "Empty entity &;"); return FALSE; } if (!strcasecmp(entity.string, "lt")) expanded = '<'; else if (!strcasecmp(entity.string, "gt")) expanded = '>'; else if (!strcasecmp(entity.string, "amp")) expanded = '&'; else { const char *es = entity.string; if (*es == '#') { expanded = strtoul(es + 1, (char **) &es, 0); if (*es == '\0') goto good; } xml_parse_error(xr, "Cannot expand unknown entity &%s;", entity.string); return FALSE; } good: ni_stringbuf_putc(res, expanded); return TRUE; }
/* * Skip any space in the input stream, and copy if to @result */ void xml_skip_space(xml_reader_t *xr, ni_stringbuf_t *result) { int cc; while ((cc = xml_getc(xr)) != EOF) { if (!isspace(cc)) { xml_ungetc(xr, cc); break; } if (result) ni_stringbuf_putc(result, cc); } }
xml_token_type_t xml_get_token_tag(xml_reader_t *xr, ni_stringbuf_t *res) { int cc, oc; xml_skip_space(xr, NULL); cc = xml_getc(xr); if (cc == EOF) { xml_parse_error(xr, "Unexpected EOF while parsing tag"); return None; } ni_stringbuf_putc(res, cc); switch (cc) { case '<': goto error; case '?': if ((cc = xml_getc(xr)) != '>') goto error; ni_stringbuf_putc(res, cc); xr->state = Initial; return RightAngleQ; case '>': xr->state = Initial; return RightAngle; case '/': if ((cc = xml_getc(xr)) != '>') goto error; ni_stringbuf_putc(res, cc); xr->state = Initial; return RightAngleSlash; case '=': return Equals; case 'a' ... 'z': case 'A' ... 'Z': case '_': case '!': while ((cc = xml_getc(xr)) != EOF) { if (!isalnum(cc) && cc != '_' && cc != '!' && cc != ':' && cc != '-') { xml_ungetc(xr, cc); break; } ni_stringbuf_putc(res, cc); } return Identifier; case '\'': case '"': ni_stringbuf_clear(res); oc = cc; while (1) { cc = xml_getc(xr); if (cc == EOF) { xml_parse_error(xr, "Unexpected EOF while parsing quoted string"); return None; } if (cc == oc) break; ni_stringbuf_putc(res, cc); } return QuotedString; default: break; } error: xml_parse_error(xr, "Unexpected character %c in XML document", cc); return None; }
/* * While in state Initial, obtain the next token */ xml_token_type_t xml_get_token_initial(xml_reader_t *xr, ni_stringbuf_t *res) { xml_token_type_t token; int cc; restart: /* Eat initial white space and store it in @res */ xml_skip_space(xr, res); cc = xml_getc(xr); if (cc == EOF) { ni_stringbuf_clear(res); return EndOfDocument; } if (cc == '<') { /* Discard the white space in @res - we're not interested in that. */ ni_stringbuf_clear(res); ni_stringbuf_putc(res, cc); if (xr->state != Initial) { xml_parse_error(xr, "Unexpected < in XML stream (state %s)", xml_parser_state_name(xr->state)); return None; } /* tag is legal here */ xr->state = Tag; cc = xml_getc(xr); switch (cc) { case '/': ni_stringbuf_putc(res, cc); return LeftAngleSlash; case '?': ni_stringbuf_putc(res, cc); return LeftAngleQ; case '!': ni_stringbuf_putc(res, cc); /* If it's <!IDENTIFIER, return LeftAngleExclam */ cc = xml_getc(xr); if (cc != '-') { xml_ungetc(xr, cc); return LeftAngleExclam; } token = xml_skip_comment(xr); if (token == Comment) { xr->state = Initial; ni_stringbuf_clear(res); goto restart; } return token; default: xml_ungetc(xr, cc); break; } return LeftAngle; } // Looks like CDATA. // Ignore initial newline, then scan to next < do { if (cc == '<') { /* Looks like we're done. * FIXME: handle comments within CDATA? */ xml_ungetc(xr, cc); break; } else if (cc == '&') { if (!xml_expand_entity(xr, res)) return None; } else { ni_stringbuf_putc(res, cc); } cc = xml_getc(xr); } while (cc != EOF); ni_stringbuf_trim_empty_lines(res); return CData; }