static void read_array(ParseInfo pi, const char *key) { if (pi->has_array_start) { call_no_value(pi->handler, oj_array_start_id, key); } pi->s++; next_non_white(pi); if (']' == *pi->s) { pi->s++; } else { while (1) { read_next(pi, 0); next_non_white(pi); if (',' == *pi->s) { pi->s++; } else if (']' == *pi->s) { pi->s++; break; } else { if (pi->has_error) { call_error("invalid format, expected , or ] while in an array", pi, __FILE__, __LINE__); } raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s); } } } if (pi->has_array_end) { call_no_value(pi->handler, oj_array_end_id, key); } }
/* Entered after the "<?" sequence. Ready to read the rest. */ static void read_instruction(PInfo pi) { struct _Attr attrs[MAX_ATTRS + 1]; Attr a = attrs; char *target; char *end; char c; memset(attrs, 0, sizeof(attrs)); target = read_name_token(pi); end = pi->s; next_non_white(pi); c = *pi->s; *end = '\0'; // terminate name if ('?' != c) { while ('?' != *pi->s) { if ('\0' == *pi->s) { raise_error("invalid format, processing instruction not terminated", pi->str, pi->s); } next_non_white(pi); a->name = read_name_token(pi); end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { raise_error("invalid format, no attribute value", pi->str, pi->s); } *end = '\0'; // terminate name // read value next_non_white(pi); a->value = read_quoted_value(pi); a++; if (MAX_ATTRS <= (a - attrs)) { raise_error("too many attributes", pi->str, pi->s); } next_non_white(pi); } if ('?' == *pi->s) { pi->s++; } } else { pi->s++; } if ('>' != *pi->s++) { raise_error("invalid format, processing instruction not terminated", pi->str, pi->s); } if (0 != pi->pcb->instruct) { pi->pcb->instruct(pi, target, attrs); } }
/* Entered after "<!--". Returns error code. */ static void read_comment(PInfo pi) { char *end; char *s; char *comment; int done = 0; next_non_white(pi); comment = pi->s; end = strstr(pi->s, "-->"); if (0 == end) { raise_error("invalid format, comment not terminated", pi->str, pi->s); } for (s = end - 1; pi->s < s && !done; s--) { switch(*s) { case ' ': case '\t': case '\f': case '\n': case '\r': break; default: *(s + 1) = '\0'; done = 1; break; } } *end = '\0'; // in case the comment was blank pi->s = end + 3; if (0 != pi->pcb->add_comment) { pi->pcb->add_comment(pi, comment); } }
/* Entered after the "<!DOCTYPE" sequence plus the first character after * that. Ready to read the rest. Returns error code. */ static void read_doctype(PInfo pi) { char *docType; int depth = 1; char c; next_non_white(pi); docType = pi->s; while (1) { c = *pi->s++; if ('\0' == c) { raise_error("invalid format, prolog not terminated", pi->str, pi->s); } else if ('<' == c) { depth++; } else if ('>' == c) { depth--; if (0 == depth) { /* done, at the end */ pi->s--; break; } } } *pi->s = '\0'; pi->s++; if (0 != pi->pcb->add_doctype) { pi->pcb->add_doctype(pi, docType); } }
static void read_hash(ParseInfo pi, const char *key) { const char *ks; if (pi->has_hash_start) { call_no_value(pi->handler, oj_hash_start_id, key); } pi->s++; next_non_white(pi); if ('}' == *pi->s) { pi->s++; } else { while (1) { next_non_white(pi); ks = read_quoted_value(pi); next_non_white(pi); if (':' == *pi->s) { pi->s++; } else { if (pi->has_error) { call_error("invalid format, expected :", pi, __FILE__, __LINE__); } raise_error("invalid format, expected :", pi->str, pi->s); } read_next(pi, ks); next_non_white(pi); if ('}' == *pi->s) { pi->s++; break; } else if (',' == *pi->s) { pi->s++; } else { if (pi->has_error) { call_error("invalid format, expected , or } while in an object", pi, __FILE__, __LINE__); } raise_error("invalid format, expected , or } while in an object", pi->str, pi->s); } } } if (pi->has_hash_end) { call_no_value(pi->handler, oj_hash_end_id, key); } }
static void read_next(ParseInfo pi, const char *key) { VALUE obj; if ((void*)&obj < pi->stack_min) { rb_raise(rb_eSysStackError, "JSON is too deeply nested"); } next_non_white(pi); /* skip white space */ switch (*pi->s) { case '{': read_hash(pi, key); break; case '[': read_array(pi, key); break; case '"': read_str(pi, key); break; case '+': case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': read_num(pi, key); break; case 'I': read_num(pi, key); break; case 't': read_true(pi, key); break; case 'f': read_false(pi, key); break; case 'n': read_nil(pi, key); break; case '\0': return; default: return; } }
VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort) { struct _PInfo pi; int body_read = 0; if (0 == xml) { raise_error("Invalid arg, xml string can not be null", xml, 0); } if (DEBUG <= trace) { printf("Parsing xml:\n%s\n", xml); } /* initialize parse info */ pi.str = xml; pi.s = xml; pi.h = 0; pi.pcb = pcb; pi.obj = Qnil; pi.circ_array = 0; pi.encoding = 0; pi.trace = trace; pi.effort = effort; while (1) { next_non_white(&pi); // skip white space if ('\0' == *pi.s) { break; } if (body_read && 0 != endp) { *endp = pi.s; break; } if ('<' != *pi.s) { // all top level entities start with < raise_error("invalid format, expected <", pi.str, pi.s); } pi.s++; // past < switch (*pi.s) { case '?': // prolog pi.s++; read_instruction(&pi); break; case '!': /* comment or doctype */ pi.s++; if ('\0' == *pi.s) { raise_error("invalid format, DOCTYPE or comment not terminated", pi.str, pi.s); } else if ('-' == *pi.s) { pi.s++; // skip - if ('-' != *pi.s) { raise_error("invalid format, bad comment format", pi.str, pi.s); } else { pi.s++; // skip second - read_comment(&pi); } } else if (0 == strncmp("DOCTYPE", pi.s, 7)) { pi.s += 7; read_doctype(&pi); } else { raise_error("invalid format, DOCTYPE or comment expected", pi.str, pi.s); } break; case '\0': raise_error("invalid format, document not terminated", pi.str, pi.s); default: read_element(&pi); body_read = 1; break; } } return pi.obj; }
/* Entered after the '<' and the first character after that. Returns status * code. */ static void read_element(PInfo pi) { struct _Attr attrs[MAX_ATTRS]; Attr ap = attrs; char *name; char *ename; char *end; char c; long elen; int hasChildren = 0; int done = 0; ename = read_name_token(pi); end = pi->s; elen = end - ename; next_non_white(pi); c = *pi->s; *end = '\0'; if ('/' == c) { /* empty element, no attributes and no children */ pi->s++; if ('>' != *pi->s) { //printf("*** '%s' ***\n", pi->s); raise_error("invalid format, element not closed", pi->str, pi->s); } pi->s++; /* past > */ ap->name = 0; pi->pcb->add_element(pi, ename, attrs, hasChildren); pi->pcb->end_element(pi, ename); return; } /* read attribute names until the close (/ or >) is reached */ while (!done) { if ('\0' == c) { next_non_white(pi); c = *pi->s; } switch (c) { case '\0': raise_error("invalid format, document not terminated", pi->str, pi->s); case '/': // Element with just attributes. pi->s++; if ('>' != *pi->s) { raise_error("invalid format, element not closed", pi->str, pi->s); } pi->s++; ap->name = 0; pi->pcb->add_element(pi, ename, attrs, hasChildren); pi->pcb->end_element(pi, ename); return; case '>': // has either children or a value pi->s++; hasChildren = 1; done = 1; ap->name = 0; pi->pcb->add_element(pi, ename, attrs, hasChildren); break; default: // Attribute name so it's an element and the attribute will be // added to it. ap->name = read_name_token(pi); end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { raise_error("invalid format, no attribute value", pi->str, pi->s); } *end = '\0'; // terminate name // read value next_non_white(pi); ap->value = read_quoted_value(pi); if (0 != strchr(ap->value, '&')) { if (0 != collapse_special((char*)ap->value)) { raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s); } } ap++; if (MAX_ATTRS <= (ap - attrs)) { raise_error("too many attributes", pi->str, pi->s); } break; } c = '\0'; } if (hasChildren) { char *start; done = 0; // read children while (!done) { start = pi->s; next_non_white(pi); c = *pi->s++; if ('\0' == c) { raise_error("invalid format, document not terminated", pi->str, pi->s); } if ('<' == c) { switch (*pi->s) { case '!': /* better be a comment or CDATA */ pi->s++; if ('-' == *pi->s && '-' == *(pi->s + 1)) { pi->s += 2; read_comment(pi); } else if (0 == strncmp("[CDATA[", pi->s, 7)) { pi->s += 7; read_cdata(pi); } else { raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s); } break; case '/': pi->s++; name = read_name_token(pi); end = pi->s; next_non_white(pi); c = *pi->s; *end = '\0'; if (0 != strcmp(name, ename)) { raise_error("invalid format, elements overlap", pi->str, pi->s); } if ('>' != c) { raise_error("invalid format, element not closed", pi->str, pi->s); } pi->s++; pi->pcb->end_element(pi, ename); return; case '\0': raise_error("invalid format, document not terminated", pi->str, pi->s); default: // a child element read_element(pi); break; } } else { // read as TEXT pi->s = start; //pi->s--; read_text(pi); //read_reduced_text(pi); // to exit read_text with no errors the next character must be < if ('/' == *(pi->s + 1) && 0 == strncmp(ename, pi->s + 2, elen) && '>' == *(pi->s + elen + 2)) { // close tag after text so treat as a value pi->s += elen + 3; pi->pcb->end_element(pi, ename); return; } } } } }
/* Entered after the "<?" sequence. Ready to read the rest. */ static void read_instruction(PInfo pi) { char content[1024]; struct _Attr attrs[MAX_ATTRS + 1]; Attr a = attrs; char *target; char *end; char c; char *cend; int attrs_ok = 1; *content = '\0'; memset(attrs, 0, sizeof(attrs)); target = read_name_token(pi); end = pi->s; if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) { raise_error("processing instruction content too large or not terminated", pi->str, pi->s); } next_non_white(pi); c = *pi->s; *end = '\0'; /* terminate name */ if ('?' != c) { while ('?' != *pi->s) { if ('\0' == *pi->s) { raise_error("invalid format, processing instruction not terminated", pi->str, pi->s); } next_non_white(pi); a->name = read_name_token(pi); end = pi->s; next_non_white(pi); if ('=' != *pi->s++) { attrs_ok = 0; break; } *end = '\0'; /* terminate name */ /* read value */ next_non_white(pi); a->value = read_quoted_value(pi); a++; if (MAX_ATTRS <= (a - attrs)) { attrs_ok = 0; break; } next_non_white(pi); } if ('?' == *pi->s) { pi->s++; } } else { pi->s++; } if (attrs_ok) { if ('>' != *pi->s++) { raise_error("invalid format, processing instruction not terminated", pi->str, pi->s); } } else { pi->s = cend + 1; } if (0 != pi->pcb->instruct) { if (attrs_ok) { pi->pcb->instruct(pi, target, attrs, 0); } else { pi->pcb->instruct(pi, target, attrs, content); } } }