/* * Process CDATA. When we get here, we've processed "<[CDATA" */ xml_token_type_t xml_process_cdata(xml_reader_t *xr, string_t *res) { int cc, state = 0; cc = xml_getc(xr); if (cc == EOF) goto unexpected_eof; if (cc != '[') { xml_parse_error(xr, "Unexpected '%c' after <[CDATA in XML stream", cc); return None; } while (state != 3) { cc = xml_getc(xr); if (cc == EOF) goto unexpected_eof; if (cc == ']') { if (state == 2) { string_putc(res, ']'); } else { ++state; } } else if (cc == '>' && state == 2) { ++state; } else { while (state) { string_putc(res, ']'); state--; } string_putc(res, cc); } } xr->state = Initial; return CData; unexpected_eof: xml_parse_error(xr, "Unexpected EOF after <[CDATA in XML stream"); return None; }
/* * Expand an XML entity. * For now, we support &<number>; as well as symbolic entities * lt gt amp */ bool xml_expand_entity(xml_reader_t *xr, string_t *res) { char entity[128]; unsigned int elen = 0; int cc, expanded; while ((cc = xml_getc(xr)) != ';') { if (cc == EOF) { xml_parse_error(xr, "Unexpenced EOF in entity"); return false; } if (isspace(cc)) continue; if (elen + 1 >= sizeof(entity)) { xml_parse_error(xr, "Entity string too long"); return false; } entity[elen++] = cc; } entity[elen] = '\0'; if (elen == 0) { xml_parse_error(xr, "Empty entity &;"); return false; } if (!strcasecmp(entity, "lt")) expanded = '<'; else if (!strcasecmp(entity, "gt")) expanded = '>'; else if (!strcasecmp(entity, "amp")) expanded = '&'; else { const char *es = entity; if (*es == '#') { expanded = strtoul(es + 1, (char **) &es, 0); if (*es == '\0') goto good; } xml_parse_error(xr, "Cannot expand unknown entity &%s;", entity); return false; } good: string_putc(res, expanded); return true; }
/* * Skip any space in the input stream, and copy if to @result */ void xml_skip_space(xml_reader_t *xr, string_t *result) { int cc; while ((cc = xml_getc(xr)) != EOF) { if (!isspace(cc)) { xml_ungetc(xr, cc); break; } if (result) string_putc(result, cc); } }
static int handle_header(struct string* s, int* httpres) { char* b; size_t l, i; char* p, * p2; b = string_get(s); l = string_length(s); if (l < 4) return HTTP_ENETERR; for (i = 0; i < l; i++) if (b[i] == 0) return HTTP_ENETERR; if (!string_putc(s, 0)) return HTTP_ENOMEM; if (memcmp(b, "HTTP", 4)) return HTTP_ENETERR; if ((p = strchr(b, ' ')) == NULL) return HTTP_ENETERR; if ((p2 = strchr(p + 1, ' ')) == NULL) return HTTP_ENETERR; *p2 = 0; *httpres = atoi(p); *p2 = 32; return HTTP_EOK; }
xml_token_type_t xml_get_token_tag(xml_reader_t *xr, string_t *res) { int cc, oc; xml_skip_space(xr, NULL); cc = xml_getc(xr); if (cc == EOF) { xml_parse_error(xr, "Unexpected EOF while parsing tag"); return None; } string_putc(res, cc); switch (cc) { case '<': goto error; case '?': if ((cc = xml_getc(xr)) != '>') goto error; string_putc(res, cc); xr->state = Initial; return RightAngleQ; case '>': xr->state = Initial; return RightAngle; case '/': if ((cc = xml_getc(xr)) != '>') goto error; string_putc(res, cc); xr->state = Initial; return RightAngleSlash; case '=': return Equals; case 'a' ... 'z': case 'A' ... 'Z': case '_': case '!': while ((cc = xml_getc(xr)) != EOF) { if (!isalnum(cc) && cc != '_' && cc != '!' && cc != ':' && cc != '-') { xml_ungetc(xr, cc); break; } string_putc(res, cc); } return Identifier; case '\'': case '"': string_destroy(res); oc = cc; while (1) { cc = xml_getc(xr); if (cc == EOF) goto unexpected_eof; if (cc == '\\' && oc == '"') { cc = xml_getc(xr); if (cc == EOF) goto unexpected_eof; } else if (cc == oc) break; string_putc(res, cc); } return QuotedString; default: break; } error: xml_parse_error(xr, "Unexpected character %c in XML document", cc); return None; unexpected_eof: xml_parse_error(xr, "Unexpected EOF while parsing quoted string"); return None; }
/* * While in state Initial, obtain the next token */ xml_token_type_t xml_get_token_initial(xml_reader_t *xr, string_t *res) { xml_token_type_t token; int cc; restart: /* Eat initial white space and store it in @res */ xml_skip_space(xr, res); cc = xml_getc(xr); if (cc == EOF) { string_destroy(res); return EndOfDocument; } if (cc == '<') { /* Discard the white space in @res - we're not interested in that. */ string_destroy(res); string_putc(res, cc); if (xr->state != Initial) { xml_parse_error(xr, "Unexpected < in XML stream (state %s)", xml_parser_state_name(xr->state)); return None; } /* tag is legal here */ xr->state = Tag; cc = xml_getc(xr); switch (cc) { case '/': string_putc(res, cc); return LeftAngleSlash; case '?': string_putc(res, cc); return LeftAngleQ; case '!': string_putc(res, cc); /* If it's <!IDENTIFIER, return LeftAngleExclam */ cc = xml_getc(xr); if (cc == '[') { /* Looks like CDATA */ if (!xml_get_identifier(xr, res) || strcmp("CDATA", res->string)) { xml_parse_error(xr, "Unexpected <[%s in XML stream", res->string); return None; } string_destroy(res); return xml_process_cdata(xr, res); } if (cc != '-') { xml_ungetc(xr, cc); return LeftAngleExclam; } token = xml_skip_comment(xr); if (token == Comment) { xr->state = Initial; string_destroy(res); goto restart; } return token; default: xml_ungetc(xr, cc); break; } return LeftAngle; } // Looks like CDATA. // Ignore initial newline, then scan to next < do { if (cc == '<') { /* Looks like we're done. * FIXME: handle comments within CDATA? */ xml_ungetc(xr, cc); break; } else if (cc == '&') { if (!xml_expand_entity(xr, res)) return None; } else { string_putc(res, cc); } cc = xml_getc(xr); } while (cc != EOF); string_trim_empty_lines(res); return CData; }