VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort) { struct _PInfo pi; int body_read = 0; if (0 == xml) { raise_error("Invalid arg, xml string can not be null", xml, 0); } if (DEBUG <= trace) { printf("Parsing xml:\n%s\n", xml); } /* initialize parse info */ pi.str = xml; pi.s = xml; pi.h = 0; pi.pcb = pcb; pi.obj = Qnil; pi.circ_array = 0; pi.encoding = 0; pi.trace = trace; pi.effort = effort; while (1) { next_non_white(&pi); // skip white space if ('\0' == *pi.s) { break; } if (body_read && 0 != endp) { *endp = pi.s; break; } if ('<' != *pi.s) { // all top level entities start with < raise_error("invalid format, expected <", pi.str, pi.s); } pi.s++; // past < switch (*pi.s) { case '?': // prolog pi.s++; read_instruction(&pi); break; case '!': /* comment or doctype */ pi.s++; if ('\0' == *pi.s) { raise_error("invalid format, DOCTYPE or comment not terminated", pi.str, pi.s); } else if ('-' == *pi.s) { pi.s++; // skip - if ('-' != *pi.s) { raise_error("invalid format, bad comment format", pi.str, pi.s); } else { pi.s++; // skip second - read_comment(&pi); } } else if (0 == strncmp("DOCTYPE", pi.s, 7)) { pi.s += 7; read_doctype(&pi); } else { raise_error("invalid format, DOCTYPE or comment expected", pi.str, pi.s); } break; case '\0': raise_error("invalid format, document not terminated", pi.str, pi.s); default: read_element(&pi); body_read = 1; break; } } return pi.obj; }
static void parse(SaxDrive dr) { char c = skipBOM(dr); int state = START_STATE; while ('\0' != c) { buf_protect(&dr->buf); if (is_white(c) && '\0' == (c = buf_next_non_white(&dr->buf))) { break; } if ('<' == c) { c = buf_get(&dr->buf); switch (c) { case '?': /* instructions (xml or otherwise) */ c = read_instruction(dr); break; case '!': /* comment or doctype */ buf_protect(&dr->buf); c = buf_get(&dr->buf); if ('\0' == c) { ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated"); goto DONE; } else if ('-' == c) { c = buf_get(&dr->buf); /* skip first - and get next character */ if ('-' != c) { ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--"); } else { c = buf_get(&dr->buf); /* skip second - */ } c = read_comment(dr); } else { int i; int spaced = 0; int line = dr->buf.line; int col = dr->buf.col; if (is_white(c)) { spaced = 1; c = buf_next_non_white(&dr->buf); } dr->buf.str = dr->buf.tail - 1; for (i = 7; 0 < i; i--) { c = buf_get(&dr->buf); } if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) { if (spaced) { ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", line, col); } if (START_STATE != state) { ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element"); } c = read_doctype(dr); } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) { ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps"); if (START_STATE != state) { ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element"); } c = read_doctype(dr); } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) { if (spaced) { ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", line, col); } c = read_cdata(dr); } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) { ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps"); c = read_cdata(dr); } else { ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", line, col); c = read_name_token(dr); if ('>' == c) { c = buf_get(&dr->buf); } } } break; case '/': /* element end */ c = read_element_end(dr); if (0 == stack_peek(&dr->stack)) { state = AFTER_STATE; } break; case '\0': goto DONE; default: buf_backup(&dr->buf); if (AFTER_STATE == state) { ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements"); } state = BODY_STATE; c = read_element_start(dr); if (0 == stack_peek(&dr->stack)) { state = AFTER_STATE; } break; } } else { buf_reset(&dr->buf); c = read_text(dr); } } DONE: if (dr->stack.head < dr->stack.tail) { char msg[256]; Nv sp; if (dr->has.line) { rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line)); } if (dr->has.column) { rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(dr->buf.col)); } for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) { snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name); ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col); if (dr->has.end_element) { VALUE args[1]; args[0] = sp->val; rb_funcall2(dr->handler, ox_end_element_id, 1, args); } } } }