/* * Get the next token from the XML stream */ xml_token_type_t xml_get_token(xml_reader_t *xr, ni_stringbuf_t *res) { #ifdef XMLDEBUG_PARSER xml_parser_state_t old_state = xr->state; #endif xml_token_type_t token; ni_stringbuf_clear(res); switch (xr->state) { default: xml_parse_error(xr, "Unexpected state %u in XML reader", xr->state); return None; case Error: return None; case Initial: token = xml_get_token_initial(xr, res); break; case Tag: token = xml_get_token_tag(xr, res); break; } xml_debug("++ %3u %-7s %-10s (%s)\n", xr->lineCount, xml_parser_state_name(old_state), xml_token_name(token), res->string?: ""); return token; }
void ni_stringbuf_move(ni_stringbuf_t *dest, ni_stringbuf_t *src) { ni_assert(dest->dynamic == src->dynamic); ni_stringbuf_clear(dest); *dest = *src; src->string = NULL; src->len = 0; }
xml_token_type_t xml_get_token_tag(xml_reader_t *xr, ni_stringbuf_t *res) { int cc, oc; xml_skip_space(xr, NULL); cc = xml_getc(xr); if (cc == EOF) { xml_parse_error(xr, "Unexpected EOF while parsing tag"); return None; } ni_stringbuf_putc(res, cc); switch (cc) { case '<': goto error; case '?': if ((cc = xml_getc(xr)) != '>') goto error; ni_stringbuf_putc(res, cc); xr->state = Initial; return RightAngleQ; case '>': xr->state = Initial; return RightAngle; case '/': if ((cc = xml_getc(xr)) != '>') goto error; ni_stringbuf_putc(res, cc); xr->state = Initial; return RightAngleSlash; case '=': return Equals; case 'a' ... 'z': case 'A' ... 'Z': case '_': case '!': while ((cc = xml_getc(xr)) != EOF) { if (!isalnum(cc) && cc != '_' && cc != '!' && cc != ':' && cc != '-') { xml_ungetc(xr, cc); break; } ni_stringbuf_putc(res, cc); } return Identifier; case '\'': case '"': ni_stringbuf_clear(res); oc = cc; while (1) { cc = xml_getc(xr); if (cc == EOF) { xml_parse_error(xr, "Unexpected EOF while parsing quoted string"); return None; } if (cc == oc) break; ni_stringbuf_putc(res, cc); } return QuotedString; default: break; } error: xml_parse_error(xr, "Unexpected character %c in XML document", cc); return None; }
/* * While in state Initial, obtain the next token */ xml_token_type_t xml_get_token_initial(xml_reader_t *xr, ni_stringbuf_t *res) { xml_token_type_t token; int cc; restart: /* Eat initial white space and store it in @res */ xml_skip_space(xr, res); cc = xml_getc(xr); if (cc == EOF) { ni_stringbuf_clear(res); return EndOfDocument; } if (cc == '<') { /* Discard the white space in @res - we're not interested in that. */ ni_stringbuf_clear(res); ni_stringbuf_putc(res, cc); if (xr->state != Initial) { xml_parse_error(xr, "Unexpected < in XML stream (state %s)", xml_parser_state_name(xr->state)); return None; } /* tag is legal here */ xr->state = Tag; cc = xml_getc(xr); switch (cc) { case '/': ni_stringbuf_putc(res, cc); return LeftAngleSlash; case '?': ni_stringbuf_putc(res, cc); return LeftAngleQ; case '!': ni_stringbuf_putc(res, cc); /* If it's <!IDENTIFIER, return LeftAngleExclam */ cc = xml_getc(xr); if (cc != '-') { xml_ungetc(xr, cc); return LeftAngleExclam; } token = xml_skip_comment(xr); if (token == Comment) { xr->state = Initial; ni_stringbuf_clear(res); goto restart; } return token; default: xml_ungetc(xr, cc); break; } return LeftAngle; } // Looks like CDATA. // Ignore initial newline, then scan to next < do { if (cc == '<') { /* Looks like we're done. * FIXME: handle comments within CDATA? */ xml_ungetc(xr, cc); break; } else if (cc == '&') { if (!xml_expand_entity(xr, res)) return None; } else { ni_stringbuf_putc(res, cc); } cc = xml_getc(xr); } while (cc != EOF); ni_stringbuf_trim_empty_lines(res); return CData; }
void ni_stringbuf_destroy(ni_stringbuf_t *sb) { ni_stringbuf_clear(sb); }