int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len) { p->attrend=p->attr; p->beg=str; p->cur=str; p->end=str+len; while ( p->cur < p->end ) { MY_XML_ATTR a; if (p->cur[0] == '<') { int lex; int question=0; int exclam=0; lex=my_xml_scan(p,&a); if (MY_XML_COMMENT == lex) continue; if (lex == MY_XML_CDATA) { a.beg+= 9; a.end-= 3; my_xml_value(p, a.beg, (size_t) (a.end-a.beg)); continue; } lex=my_xml_scan(p,&a); if (MY_XML_SLASH == lex) { if (MY_XML_IDENT != (lex=my_xml_scan(p,&a))) { sprintf(p->errstr,"%s unexpected (ident wanted)",lex2str(lex)); return MY_XML_ERROR; } if (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))) return MY_XML_ERROR; lex=my_xml_scan(p,&a); goto gt; } if (MY_XML_EXCLAM == lex) { lex=my_xml_scan(p,&a); exclam=1; } else if (MY_XML_QUESTION == lex) { lex=my_xml_scan(p,&a); question=1; } if (MY_XML_IDENT == lex) { p->current_node_type= MY_XML_NODE_TAG; if (MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) return MY_XML_ERROR; } else { sprintf(p->errstr,"%s unexpected (ident or '/' wanted)", lex2str(lex)); return MY_XML_ERROR; } while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) || ((MY_XML_STRING == lex && exclam))) { MY_XML_ATTR b; if (MY_XML_EQ == (lex=my_xml_scan(p,&b))) { lex=my_xml_scan(p,&b); if ( (lex == MY_XML_IDENT) || (lex == MY_XML_STRING) ) { p->current_node_type= MY_XML_NODE_ATTR; if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) || (MY_XML_OK != my_xml_value(p,b.beg,(size_t) (b.end-b.beg))) || (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg)))) return MY_XML_ERROR; } else { sprintf(p->errstr,"%s unexpected (ident or string wanted)", lex2str(lex)); return MY_XML_ERROR; } } else if (MY_XML_IDENT == lex) { p->current_node_type= MY_XML_NODE_ATTR; if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) || (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg)))) return MY_XML_ERROR; } else if ((MY_XML_STRING == lex) && exclam) { /* We are in <!DOCTYPE>, e.g. <!DOCTYPE name SYSTEM "SystemLiteral"> <!DOCTYPE name PUBLIC "PublidLiteral" "SystemLiteral"> Just skip "SystemLiteral" and "PublicidLiteral" */ } else break; } if (lex == MY_XML_SLASH) { if (MY_XML_OK != my_xml_leave(p,NULL,0)) return MY_XML_ERROR; lex=my_xml_scan(p,&a); } gt: if (question) { if (lex != MY_XML_QUESTION) { sprintf(p->errstr,"%s unexpected ('?' wanted)",lex2str(lex)); return MY_XML_ERROR; } if (MY_XML_OK != my_xml_leave(p,NULL,0)) return MY_XML_ERROR; lex=my_xml_scan(p,&a); } if (exclam) { if (MY_XML_OK != my_xml_leave(p,NULL,0)) return MY_XML_ERROR; } if (lex != MY_XML_GT) { sprintf(p->errstr,"%s unexpected ('>' wanted)",lex2str(lex)); return MY_XML_ERROR; } } else { a.beg=p->cur; for ( ; (p->cur < p->end) && (p->cur[0] != '<') ; p->cur++); a.end=p->cur; if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION)) my_xml_norm_text(&a); if (a.beg != a.end) { my_xml_value(p,a.beg,(size_t) (a.end-a.beg)); } } } if (p->attr[0]) { sprintf(p->errstr,"unexpected END-OF-INPUT"); return MY_XML_ERROR; } return MY_XML_OK; }
static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) { int lex; for (; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++); if (p->cur >= p->end) { a->beg=p->end; a->end=p->end; lex=MY_XML_EOF; goto ret; } a->beg=p->cur; a->end=p->cur; if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("<!--"))) { for (; p->cur < p->end; p->cur++) { if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("-->"))) { p->cur+= 3; break; } } a->end=p->cur; lex=MY_XML_COMMENT; } else if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("<![CDATA["))) { p->cur+= 9; for (; p->cur < p->end - 2 ; p->cur++) { if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>') { p->cur+= 3; a->end= p->cur; break; } } lex= MY_XML_CDATA; } else if (strchr("?=/<>!",p->cur[0])) { p->cur++; a->end=p->cur; lex=a->beg[0]; } else if ( (p->cur[0] == '"') || (p->cur[0] == '\'') ) { /* "string" or 'string' found. Scan until the closing quote/doublequote, or until the END-OF-INPUT. */ p->cur++; for (; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++) {} a->end=p->cur; if (p->cur < p->end) /* Closing quote or doublequote has been found */ p->cur++; a->beg++; if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION)) my_xml_norm_text(a); lex=MY_XML_STRING; } else if (my_xml_is_id0(p->cur[0])) { p->cur++; while (p->cur < p->end && my_xml_is_id1(p->cur[0])) p->cur++; a->end=p->cur; my_xml_norm_text(a); lex=MY_XML_IDENT; } else lex= MY_XML_UNKNOWN; #if 0 printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg); #endif ret: return lex; }
static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) { int lex; for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++); if (p->cur >= p->end) { a->beg=p->end; a->end=p->end; lex=MY_XML_EOF; goto ret; } a->beg=p->cur; a->end=p->cur; if (!bcmp(p->cur,"<!--",4)) { for( ; (p->cur < p->end) && bcmp(p->cur, "-->", 3); p->cur++) {} if (!bcmp(p->cur, "-->", 3)) p->cur+=3; a->end=p->cur; lex=MY_XML_COMMENT; } else if (strchr("?=/<>!",p->cur[0])) { p->cur++; a->end=p->cur; lex=a->beg[0]; } else if ( (p->cur[0]=='"') || (p->cur[0]=='\'') ) { p->cur++; for( ; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++) {} a->end=p->cur; if (a->beg[0]==p->cur[0])p->cur++; a->beg++; my_xml_norm_text(a); lex=MY_XML_STRING; } else { for(; (p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]); p->cur++) {} a->end=p->cur; my_xml_norm_text(a); lex=MY_XML_IDENT; } #if 0 printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg); #endif ret: return lex; }
static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) { int lex; for (; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++); if (p->cur >= p->end) { a->beg=p->end; a->end=p->end; lex=MY_XML_EOF; goto ret; } a->beg=p->cur; a->end=p->cur; if ((p->end - p->cur > 3) && !bcmp(p->cur,"<!--",4)) { for (; (p->cur < p->end) && bcmp(p->cur, "-->", 3); p->cur++) {} if (!bcmp(p->cur, "-->", 3)) p->cur+=3; a->end=p->cur; lex=MY_XML_COMMENT; } else if (!bcmp(p->cur, "<![CDATA[",9)) { p->cur+= 9; for (; p->cur < p->end - 2 ; p->cur++) { if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>') { p->cur+= 3; a->end= p->cur; break; } } lex= MY_XML_CDATA; } else if (strchr("?=/<>!",p->cur[0])) { p->cur++; a->end=p->cur; lex=a->beg[0]; } else if ( (p->cur[0] == '"') || (p->cur[0] == '\'') ) { p->cur++; for (; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++) {} a->end=p->cur; if (a->beg[0] == p->cur[0])p->cur++; a->beg++; if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION)) my_xml_norm_text(a); lex=MY_XML_STRING; } else if (my_xml_is_id0(p->cur[0])) { p->cur++; while (p->cur < p->end && my_xml_is_id1(p->cur[0])) p->cur++; a->end=p->cur; my_xml_norm_text(a); lex=MY_XML_IDENT; } else lex= MY_XML_UNKNOWN; #if 0 printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg); #endif ret: return lex; }
int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) { p->attrend=p->attr; p->beg=str; p->cur=str; p->end=str+len; while ( p->cur < p->end ) { MY_XML_ATTR a; if(p->cur[0]=='<') { int lex; int question=0; int exclam=0; lex=my_xml_scan(p,&a); if (MY_XML_COMMENT==lex) { continue; } lex=my_xml_scan(p,&a); if (MY_XML_SLASH==lex) { if(MY_XML_IDENT!=(lex=my_xml_scan(p,&a))) { sprintf(p->errstr,"1: %s unexpected (ident wanted)",lex2str(lex)); return MY_XML_ERROR; } if(MY_XML_OK!=my_xml_leave(p,a.beg,a.end-a.beg)) return MY_XML_ERROR; lex=my_xml_scan(p,&a); goto gt; } if (MY_XML_EXCLAM==lex) { lex=my_xml_scan(p,&a); exclam=1; } else if (MY_XML_QUESTION==lex) { lex=my_xml_scan(p,&a); question=1; } if (MY_XML_IDENT==lex) { if(MY_XML_OK!=my_xml_enter(p,a.beg,a.end-a.beg)) return MY_XML_ERROR; } else { sprintf(p->errstr,"3: %s unexpected (ident or '/' wanted)", lex2str(lex)); return MY_XML_ERROR; } while ((MY_XML_IDENT==(lex=my_xml_scan(p,&a))) || (MY_XML_STRING==lex)) { MY_XML_ATTR b; if(MY_XML_EQ==(lex=my_xml_scan(p,&b))) { lex=my_xml_scan(p,&b); if ( (lex==MY_XML_IDENT) || (lex==MY_XML_STRING) ) { if((MY_XML_OK!=my_xml_enter(p,a.beg,a.end-a.beg)) || (MY_XML_OK!=my_xml_value(p,b.beg,b.end-b.beg)) || (MY_XML_OK!=my_xml_leave(p,a.beg,a.end-a.beg))) return MY_XML_ERROR; } else { sprintf(p->errstr,"4: %s unexpected (ident or string wanted)", lex2str(lex)); return MY_XML_ERROR; } } else if ( (MY_XML_STRING==lex) || (MY_XML_IDENT==lex) ) { if((MY_XML_OK!=my_xml_enter(p,a.beg,a.end-a.beg)) || (MY_XML_OK!=my_xml_leave(p,a.beg,a.end-a.beg))) return MY_XML_ERROR; } else break; } if (lex==MY_XML_SLASH) { if(MY_XML_OK!=my_xml_leave(p,NULL,0)) return MY_XML_ERROR; lex=my_xml_scan(p,&a); } gt: if (question) { if (lex!=MY_XML_QUESTION) { sprintf(p->errstr,"6: %s unexpected ('?' wanted)",lex2str(lex)); return MY_XML_ERROR; } if(MY_XML_OK!=my_xml_leave(p,NULL,0)) return MY_XML_ERROR; lex=my_xml_scan(p,&a); } if (exclam) { if(MY_XML_OK!=my_xml_leave(p,NULL,0)) return MY_XML_ERROR; } if (lex!=MY_XML_GT) { sprintf(p->errstr,"5: %s unexpected ('>' wanted)",lex2str(lex)); return MY_XML_ERROR; } } else { a.beg=p->cur; for ( ; (p->cur < p->end) && (p->cur[0]!='<') ; p->cur++); a.end=p->cur; my_xml_norm_text(&a); if (a.beg!=a.end) { my_xml_value(p,a.beg,a.end-a.beg); } } } return MY_XML_OK; }