static int AddEpsilonClosure(struct FSAState *base, struct FSAState *state) { int i; struct FSATran *t1, *t2, *te2; if (state->mark & SMARK_INUSE) return 1; SET_SMARK(state, SMARK_INUSE, 1); if (state->mark & SMARK_FINAL) { SET_SMARK(base, SMARK_FINAL, 1); } if (!state->trans) return 1; for (i=0; i<state->trans->length; i++) { t1 = XMLVector_Get(state->trans, i); if (t1->label == epsilon) { if (!AddEpsilonClosure(base, t1->dst)) return 0; } else { te2 = (struct FSATran *)_XMLVector_GetIterP(base->trans, t2); for (; t2!=te2; t2++) { if(t2->label == t1->label && t2->dst == t1->dst) break; } if(t2 == te2) { if (!AddTran(base, t1->dst, t1->label)) return 0; } } } return 1; }
static int InitValidator(LPXMLDTDVALIDATOR v) { struct ElementDecl *e, *ee; struct FSAState *s; LPXMLVECTOR *declAtts; void **d; ee = (struct ElementDecl *)_XMLVector_GetIterP(v->ElementDecls, e); for (; e!=ee; e++) { /* important: every 1. content particle contains a pointer to hastablebucket in its name, we MUST assign hashtable data here, we cannot alter the cpNames table when DTD parsing is in progress! */ d = (void**)XMLHTable_GetData(v->parser->prt->cpNames, (XMLHTABLEBUCKET*)e->cp->name); if (*d != EMPTYSTR) continue; /* duplicate declaration */ *d = e; switch (e->type) { case XMLCTYPE_ANY: case XMLCTYPE_EMPTY: break; case XMLCTYPE_MIXED: if (!e->cp->children) /* simple #PCDATA */ break; /* note that we can't easily simplify the (e) type content models (which is actually XMLCTYPE_CHOICE) - since we must allow (e)* or ((e))? etc. */ default: v->fsa = XMLVector_Create(&e->fsa, 4, sizeof(struct FSAState*)); if (!e->fsa) return 0; s = AddState(v); if (!s) return 0; SET_SMARK(s, SMARK_FINAL, 1); e->startState = CreateNFA(v, e->cp, s); if (!e->startState) return 0; #ifdef PRINT_FSA PrintFSA(e, "NFA"); if (!NFAtoDFA(v, e)) return 0; PrintFSA(e, "DFA"); #else if (!NFAtoDFA(v, e)) return 0; #endif } if (v->parser->prt->declAttTable) { declAtts = XMLHTable_Lookup(v->parser->prt->declAttTable, e->name); if (declAtts) { e->declAtts = *(declAtts+1); if (e->declAtts->length > 1) qsort((void*)e->declAtts->array, e->declAtts->length, sizeof(XMLATTDECL), attcmp); } } } v->ElementTable = v->parser->prt->cpNames; v->cpNodesPool = v->parser->prt->cpNodesPool; return 1; }
static void FreeDTDValidator(LPXMLDTDVALIDATOR p, int ForReuse) { if (p->ElementDecls) { struct ElementDecl *pEl, *pEnd; struct FSAState **pS, **pSEnd; pEnd = (struct ElementDecl *)_XMLVector_GetIterP(p->ElementDecls, pEl); for (; pEl!=pEnd; pEl++) { if (pEl->fsa) { pSEnd = (struct FSAState **)_XMLVector_GetIterP(pEl->fsa, pS); for (; pS!=pSEnd; pS++) { if ((*pS)->trans) XMLVector_Free((*pS)->trans); } XMLVector_Free(pEl->fsa); } } if (ForReuse) _XMLVector_RemoveAll(p->ElementDecls); else XMLVector_Free(p->ElementDecls); } /* these are allocated by the parser when XMLFLAG_REPORT_DTD_EXT is set, we're responsible for freeing them: */ if (p->cpNodesPool) XMLPool_FreePool(p->cpNodesPool); if (p->ElementTable) XMLHTable_Destroy(p->ElementTable, NULL, 0); if (p->idTable) XMLHTable_Destroy(p->idTable, NULL, 0); if (ForReuse) { if (p->StatePool && p->StatePool->blocksAllocated) { XMLPool_FreePool(p->StatePool); p->StatePool = XMLPool_Create(sizeof(struct FSAState), 16); if (!p->StatePool) Er_(p, NULL, ERR_XMLDTDV_MEMORY_ALLOC); } return; } if (p->StatePool) XMLPool_FreePool(p->StatePool); if (p->ContextStack) XMLVector_Free(p->ContextStack); free(p); }
static struct FSAState *Validate(struct FSAState *context, XMLCH *name) { if (context->trans) { struct FSATran *t, *te; te = (struct FSATran *)_XMLVector_GetIterP(context->trans, t); for (; t!=te; t++) { if (!strcmp(((XMLCP*)t->label)->name, name)) return t->dst; } } return NULL; }
static int StartElement(void *UserData, const XMLCH *uri, const XMLCH *localName, const XMLCH *qName, LPXMLVECTOR atts) { if (*uri) fprintf(PFOUT, "startElement(qName {%s} uri {%s} localName {%s})\n", qName, uri, localName); else fprintf(PFOUT, "startElement(qName {%s})\n", qName); if (atts->length) { LPXMLRUNTIMEATT pAtt, pEnd; fprintf(PFOUT, " %d attribute(s):\n", atts->length); pEnd = (LPXMLRUNTIMEATT)_XMLVector_GetIterP(atts, pAtt); for (; pAtt!=pEnd; pAtt++) { if (*pAtt->uri) fprintf(PFOUT, " qname {%s} value {%s} uri {%s} localName {%s}\n", pAtt->qname, pAtt->value, pAtt->uri, pAtt->localName); else fprintf(PFOUT, " qname {%s} value {%s}\n", pAtt->qname, pAtt->value); } } return 0; }
static int ValidateAtts(LPXMLDTDVALIDATOR v, struct ElementDecl *e, LPXMLVECTOR atts) { LPXMLATTDECL da, da2; LPXMLRUNTIMEATT a, ae; int numAtts; if (e->declAtts) { da2 = (LPXMLATTDECL)_XMLVector_GetIterP(e->declAtts, da); for (;da!=da2;da++) { if (da->defaultDecl != XMLATTDECL_DEF_REQUIRED) break; a = XMLParser_GetNamedItem(v->parser, da->name); if (!a) { Er_(v, NULL, ERR_XMLDTDV_REQUIRED_ATT_MISSING, da->name, e->name); MAYRET(0); continue; } if (da->type > XMLATTDECL_TYPE_NMTOKENS && !ValidateAttsEnum(da->pExt, a->value)) { Er_(v, da->pExt, ERR_XMLDTDV_ILLEGAL_ATT_VALUE, a->qname, e->name); MAYRET(0); } else if (da->type != XMLATTDECL_TYPE_CDATA && !ValidateAttsTok(v, da->type, e->name, a->qname, a->value)) { if (!v->ErrorCode) { Er_(v, NULL, ERR_XMLDTDV_MEMORY_ALLOC); return 0; } MAYRET(0); } a->value = EMPTYSTR; /* hack for marking this attribute processed */ } numAtts = da2-da; /* maybe 0 (only #REQUIRED atts declared) */ } else numAtts = 0; /* no declared atts */ ae = (LPXMLRUNTIMEATT)_XMLVector_GetIterP(atts, a); for (;a!=ae;a++) { if (_XMLParser_AttIsDefaulted(a)) break; /* defaulted atts are always at the end so don't test further */ else if (a->value == EMPTYSTR) /* already processed */ a->value = a->valBuf.str; else { da2 = (numAtts) ? bsearch(a->qname, da, numAtts, sizeof(XMLATTDECL), sattcmp) : NULL; if (!da2) { Er_(v, NULL, ERR_XMLDTDV_UNDECLARED_ATT, a->qname, e->name); MAYRET(0); } else if (da2->defaultDecl == XMLATTDECL_DEF_FIXED && strcmp(a->value, da2->value)) { Er_(v, NULL, ERR_XMLDTDV_ILLEGAL_ATT_VALUE, a->qname, e->name); MAYRET(0); } else if (da2->type > XMLATTDECL_TYPE_NMTOKENS && !ValidateAttsEnum(da2->pExt, a->value)) { Er_(v, da2->pExt, ERR_XMLDTDV_ILLEGAL_ATT_VALUE, a->qname, e->name); MAYRET(0); } else if (da2->type != XMLATTDECL_TYPE_CDATA && !ValidateAttsTok(v, da2->type, e->name, a->qname, a->value)) { if (!v->ErrorCode) { Er_(v, NULL, ERR_XMLDTDV_MEMORY_ALLOC); return 0; } MAYRET(0); } } } return 1; }
static int NFAtoDFA(LPXMLDTDVALIDATOR vp, struct ElementDecl *e) { struct FSAState **sw, **s, **se; struct FSATran *t, *te; LPXMLVECTOR o; int i; XMLVector_Create(&o, 0, sizeof(struct FSAState*)); if (!o) return 0; SET_SMARK(e->startState, SMARK_USEFUL, 1); se = (struct FSAState **)_XMLVector_GetIterP(e->fsa, s); for (;s!=se;s++) { if ((*s)->trans) { te = (struct FSATran *)_XMLVector_GetIterP((*s)->trans, t); for (; t!=te; t++) { if (t->label != epsilon) { SET_SMARK(t->dst, SMARK_USEFUL, 1); } } } } se = (struct FSAState **)_XMLVector_GetIterP(e->fsa, s); for (;s!=se;s++) { if ((*s)->mark & SMARK_USEFUL) { SET_SMARK((*s), SMARK_INUSE, 1); if ((*s)->trans) { for (i=0; i<(*s)->trans->length; i++) { t = XMLVector_Get((*s)->trans, i); if (t->label == epsilon) { if (!AddEpsilonClosure(*s, t->dst)) return 0; } } } UnMarkFSA(e->fsa, SMARK_INUSE); } } se = (struct FSAState **)_XMLVector_GetIterP(e->fsa, s); for (;s!=se;s++) { if ((*s)->mark & SMARK_USEFUL) { if ((*s)->trans) { for (i=0; i<(*s)->trans->length; i++) { t = XMLVector_Get((*s)->trans, i); if (t->label == epsilon) { if (!XMLVector_Remove((*s)->trans, i)) return 0; i--; } } } sw = XMLVector_Append(o, NULL); if (!sw) return 0; *sw = *s; } else if ((*s)->trans) { XMLVector_Free((*s)->trans); } } XMLVector_Free(e->fsa); e->fsa = o; return 1; }
static void UnMarkFSA(LPXMLVECTOR fsa, int mark) { struct FSAState **s, **se; se = (struct FSAState **)_XMLVector_GetIterP(fsa, s); for (;s!=se;s++) (*s)->mark &= ~mark; }