void XsdSchemaDebugger::dumpParticle(const XsdParticle::Ptr &particle, int level) { QString prefix; prefix.fill(QLatin1Char(' '), level); qDebug("%s min=%s max=%s", qPrintable(prefix), qPrintable(QString::number(particle->minimumOccurs())), qPrintable(particle->maximumOccursUnbounded() ? QLatin1String("unbounded") : QString::number(particle->maximumOccurs()))); if (particle->term()->isElement()) { qDebug("%selement (%s)", qPrintable(prefix), qPrintable(XsdElement::Ptr(particle->term())->displayName(m_namePool))); } else if (particle->term()->isModelGroup()) { const XsdModelGroup::Ptr group(particle->term()); if (group->compositor() == XsdModelGroup::SequenceCompositor) { qDebug("%ssequence", qPrintable(prefix)); } else if (group->compositor() == XsdModelGroup::AllCompositor) { qDebug("%sall", qPrintable(prefix)); } else if (group->compositor() == XsdModelGroup::ChoiceCompositor) { qDebug("%schoice", qPrintable(prefix)); } for (int i = 0; i < group->particles().count(); ++i) dumpParticle(group->particles().at(i), level + 5); } else if (particle->term()->isWildcard()) { XsdWildcard::Ptr wildcard(particle->term()); qDebug("%swildcard (process=%d)", qPrintable(prefix), wildcard->processContents()); } }
/** * Internal helper method that checks if the given @p particle contains an element with the * same name and type twice. */ static bool hasDuplicatedElementsInternal(const XsdParticle::Ptr &particle, const NamePool::Ptr &namePool, ElementHash &hash, XsdElement::Ptr &conflictingElement) { const XsdTerm::Ptr term = particle->term(); if (term->isElement()) { const XsdElement::Ptr mainElement(term); XsdElement::WeakList substGroups = mainElement->substitutionGroups(); if (substGroups.isEmpty()) substGroups << mainElement.data(); for (int i = 0; i < substGroups.count(); ++i) { const XsdElement::Ptr element(substGroups.at(i)); if (hash.contains(element->name(namePool))) { if (element->type()->name(namePool) != hash.value(element->name(namePool))->type()->name(namePool)) { conflictingElement = element; return true; } } else { hash.insert(element->name(namePool), element); } } } else if (term->isModelGroup()) { const XsdModelGroup::Ptr group(term); const XsdParticle::List particles = group->particles(); for (int i = 0; i < particles.count(); ++i) { if (hasDuplicatedElementsInternal(particles.at(i), namePool, hash, conflictingElement)) return true; } } return false; }
bool XsdSchemaChecker::particleEqualsRecursively(const XsdParticle::Ptr &particle, const XsdParticle::Ptr &otherParticle) const { // @see http://www.w3.org/TR/xmlschema11-1/#cos-particle-extend //TODO: find out what 'properties' of a particle should be checked here... if (particle->minimumOccurs() != otherParticle->minimumOccurs()) return false; if (particle->maximumOccursUnbounded() != otherParticle->maximumOccursUnbounded()) return false; if (particle->maximumOccurs() != otherParticle->maximumOccurs()) return false; const XsdTerm::Ptr term = particle->term(); const XsdTerm::Ptr otherTerm = otherParticle->term(); if (term->isElement() && !(otherTerm->isElement())) return false; if (term->isModelGroup() && !(otherTerm->isModelGroup())) return false; if (term->isWildcard() && !(otherTerm->isWildcard())) return false; if (term->isElement()) { const XsdElement::Ptr element = term; const XsdElement::Ptr otherElement = otherTerm; if (element->name(m_namePool) != otherElement->name(m_namePool)) return false; if (element->type()->name(m_namePool) != otherElement->type()->name(m_namePool)) return false; } if (term->isModelGroup()) { const XsdModelGroup::Ptr group = term; const XsdModelGroup::Ptr otherGroup = otherTerm; if (group->particles().count() != otherGroup->particles().count()) return false; for (int i = 0; i < group->particles().count(); ++i) { if (!particleEqualsRecursively(group->particles().at(i), otherGroup->particles().at(i))) return false; } } if (term->isWildcard()) { } return true; }
bool XsdParticleChecker::isUPAConformXsdAll(const XsdParticle::Ptr &particle, const NamePool::Ptr &namePool) { /** * see http://www.w3.org/TR/xmlschema-1/#non-ambig */ const XsdModelGroup::Ptr group(particle->term()); const XsdParticle::List particles = group->particles(); const int count = particles.count(); for (int left = 0; left < count; ++left) { for (int right = left+1; right < count; ++right) { if (termMatches(particles.at(left)->term(), particles.at(right)->term(), namePool)) return false; } } return true; }
QSet<XsdElement::Ptr> collectAllElements(const XsdParticle::Ptr &particle) { QSet<XsdElement::Ptr> elements; const XsdTerm::Ptr term(particle->term()); if (term->isElement()) { elements.insert(XsdElement::Ptr(term)); } else if (term->isModelGroup()) { const XsdModelGroup::Ptr group(term); for (int i = 0; i < group->particles().count(); ++i) elements.unite(collectAllElements(group->particles().at(i))); } return elements; }
/* * Create the FSA according to Algorithm Tp(S) from http://www.ltg.ed.ac.uk/~ht/XML_Europe_2003.html */ XsdStateMachine<XsdTerm::Ptr>::StateId XsdStateMachineBuilder::buildParticle(const XsdParticle::Ptr &particle, XsdStateMachine<XsdTerm::Ptr>::StateId endState) { XsdStateMachine<XsdTerm::Ptr>::StateId currentStartState = endState; XsdStateMachine<XsdTerm::Ptr>::StateId currentEndState = endState; // 2 if (particle->maximumOccursUnbounded()) { const XsdStateMachine<XsdTerm::Ptr>::StateId t = m_stateMachine->addState(XsdStateMachine<XsdTerm::Ptr>::InternalState); const XsdStateMachine<XsdTerm::Ptr>::StateId n = buildTerm(particle->term(), t); m_stateMachine->addEpsilonTransition(t, n); m_stateMachine->addEpsilonTransition(n, endState); currentEndState = t; currentStartState = t; } else { // 3 int count = (particle->maximumOccurs() - particle->minimumOccurs()); if (count > 100) count = 100; for (int i = 0; i < count; ++i) { currentStartState = buildTerm(particle->term(), currentEndState); m_stateMachine->addEpsilonTransition(currentStartState, endState); currentEndState = currentStartState; } } int minOccurs = particle->minimumOccurs(); if (minOccurs > 100) minOccurs = 100; for (int i = 0; i < minOccurs; ++i) { currentStartState = buildTerm(particle->term(), currentEndState); currentEndState = currentStartState; } return currentStartState; }
/* * Create the FSA according to Algorithm Tt(S) from http://www.ltg.ed.ac.uk/~ht/XML_Europe_2003.html */ XsdStateMachine<XsdTerm::Ptr>::StateId XsdStateMachineBuilder::buildTerm(const XsdTerm::Ptr &term, XsdStateMachine<XsdTerm::Ptr>::StateId endState) { if (term->isWildcard()) { // 1 const XsdStateMachine<XsdTerm::Ptr>::StateId b = m_stateMachine->addState(XsdStateMachine<XsdTerm::Ptr>::InternalState); m_stateMachine->addTransition(b, term, endState); return b; } else if (term->isElement()) { // 2 const XsdStateMachine<XsdTerm::Ptr>::StateId b = m_stateMachine->addState(XsdStateMachine<XsdTerm::Ptr>::InternalState); m_stateMachine->addTransition(b, term, endState); const XsdElement::Ptr element(term); if (m_mode == CheckingMode) { const XsdElement::WeakList substGroups = element->substitutionGroups(); for (int i = 0; i < substGroups.count(); ++i) m_stateMachine->addTransition(b, XsdElement::Ptr(substGroups.at(i)), endState); } else if (m_mode == ValidatingMode) { const XsdElement::WeakList substGroups = element->substitutionGroups(); for (int i = 0; i < substGroups.count(); ++i) { if (XsdSchemaHelper::substitutionGroupOkTransitive(element, XsdElement::Ptr(substGroups.at(i)), m_namePool)) m_stateMachine->addTransition(b, XsdElement::Ptr(substGroups.at(i)), endState); } } return b; } else if (term->isModelGroup()) { const XsdModelGroup::Ptr group(term); if (group->compositor() == XsdModelGroup::ChoiceCompositor) { // 3 const XsdStateMachine<XsdTerm::Ptr>::StateId b = m_stateMachine->addState(XsdStateMachine<XsdTerm::Ptr>::InternalState); for (int i = 0; i < group->particles().count(); ++i) { const XsdParticle::Ptr particle(group->particles().at(i)); if (particle->maximumOccurs() != 0) { const XsdStateMachine<XsdTerm::Ptr>::StateId state = buildParticle(particle, endState); m_stateMachine->addEpsilonTransition(b, state); } } return b; } else if (group->compositor() == XsdModelGroup::SequenceCompositor) { // 4 XsdStateMachine<XsdTerm::Ptr>::StateId currentStartState = endState; XsdStateMachine<XsdTerm::Ptr>::StateId currentEndState = endState; for (int i = (group->particles().count() - 1); i >= 0; --i) { // iterate reverse const XsdParticle::Ptr particle(group->particles().at(i)); if (particle->maximumOccurs() != 0) { currentStartState = buildParticle(particle, currentEndState); currentEndState = currentStartState; } } return currentStartState; } else if (group->compositor() == XsdModelGroup::AllCompositor) { const XsdStateMachine<XsdTerm::Ptr>::StateId newStartState = m_stateMachine->addState(XsdStateMachine<XsdTerm::Ptr>::InternalState); const QList<XsdParticle::List> list = allCombinations(group->particles()); for (int i = 0; i < list.count(); ++i) { XsdStateMachine<XsdTerm::Ptr>::StateId currentStartState = endState; XsdStateMachine<XsdTerm::Ptr>::StateId currentEndState = endState; const XsdParticle::List particles = list.at(i); for (int j = (particles.count() - 1); j >= 0; --j) { // iterate reverse const XsdParticle::Ptr particle(particles.at(j)); if (particle->maximumOccurs() != 0) { currentStartState = buildParticle(particle, currentEndState); currentEndState = currentStartState; } } m_stateMachine->addEpsilonTransition(newStartState, currentStartState); } if (list.isEmpty()) return endState; else return newStartState; } } Q_ASSERT(false); return 0; }
bool XsdParticleChecker::isUPAConform(const XsdParticle::Ptr &particle, const NamePool::Ptr &namePool) { /** * In case we encounter an <xsd:all> element, don't construct a state machine, but use the approach * described at http://www.w3.org/TR/xmlschema-1/#non-ambig * Reason: For n elements inside the <xsd:all>, represented in the NDA, the state machine * constructs n! states in the DFA, which does not scale. */ if (particle->term()->isModelGroup()) { const XsdModelGroup::Ptr group(particle->term()); if (group->compositor() == XsdModelGroup::AllCompositor) return isUPAConformXsdAll(particle, namePool); } /** * The algorithm is implemented like described in http://www.ltg.ed.ac.uk/~ht/XML_Europe_2003.html#S2.2 */ // create a state machine for the given particle XsdStateMachine<XsdTerm::Ptr> stateMachine(namePool); XsdStateMachineBuilder builder(&stateMachine, namePool); const XsdStateMachine<XsdTerm::Ptr>::StateId endState = builder.reset(); const XsdStateMachine<XsdTerm::Ptr>::StateId startState = builder.buildParticle(particle, endState); builder.addStartState(startState); /* static int counter = 0; { QFile file(QString("/tmp/file_upa%1.dot").arg(counter)); file.open(QIODevice::WriteOnly); stateMachine.outputGraph(&file, "Base"); file.close(); } ::system(QString("dot -Tpng /tmp/file_upa%1.dot -o/tmp/file_upa%1.png").arg(counter).toLatin1().data()); */ const XsdStateMachine<XsdTerm::Ptr> dfa = stateMachine.toDFA(); /* { QFile file(QString("/tmp/file_upa%1dfa.dot").arg(counter)); file.open(QIODevice::WriteOnly); dfa.outputGraph(&file, "Base"); file.close(); } ::system(QString("dot -Tpng /tmp/file_upa%1dfa.dot -o/tmp/file_upa%1dfa.png").arg(counter).toLatin1().data()); */ const QHash<XsdStateMachine<XsdTerm::Ptr>::StateId, XsdStateMachine<XsdTerm::Ptr>::StateType> states = dfa.states(); const QHash<XsdStateMachine<XsdTerm::Ptr>::StateId, QHash<XsdTerm::Ptr, QVector<XsdStateMachine<XsdTerm::Ptr>::StateId> > > transitions = dfa.transitions(); // the basic idea of that algorithm is to iterate over all states of that machine and check that no two edges // that match on the same term leave a state, so for a given term it should always be obvious which edge to take QHashIterator<XsdStateMachine<XsdTerm::Ptr>::StateId, XsdStateMachine<XsdTerm::Ptr>::StateType> stateIt(states); while (stateIt.hasNext()) { // iterate over all states stateIt.next(); // fetch all transitions the current state allows const QHash<XsdTerm::Ptr, QVector<XsdStateMachine<XsdTerm::Ptr>::StateId> > currentTransitions = transitions.value(stateIt.key()); QHashIterator<XsdTerm::Ptr, QVector<XsdStateMachine<XsdTerm::Ptr>::StateId> > transitionIt(currentTransitions); while (transitionIt.hasNext()) { // iterate over all transitions transitionIt.next(); if (transitionIt.value().size() > 1) { // we have one state with two edges leaving it, that means // the XsdTerm::Ptr exists twice, that is an error return false; } QHashIterator<XsdTerm::Ptr, QVector<XsdStateMachine<XsdTerm::Ptr>::StateId> > innerTransitionIt(currentTransitions); while (innerTransitionIt.hasNext()) { // iterate over all transitions again, as we have to compare all transitions with all innerTransitionIt.next(); if (transitionIt.key() == innerTransitionIt.key()) // do no compare with ourself continue; // use the helper method termMatches to check if both term matches if (termMatches(transitionIt.key(), innerTransitionIt.key(), namePool)) return false; } } } return true; }
bool XsdSchemaChecker::elementSequenceAccepted(const XsdModelGroup::Ptr &sequence, const XsdParticle::Ptr &particle) const { // @see http://www.w3.org/TR/xmlschema11-1/#cvc-accept if (particle->term()->isWildcard()) { // 1 const XsdWildcard::Ptr wildcard(particle->term()); // 1.1 if ((unsigned int)sequence->particles().count() < particle->minimumOccurs()) return false; // 1.2 if (!particle->maximumOccursUnbounded()) { if ((unsigned int)sequence->particles().count() > particle->maximumOccurs()) return false; } // 1.3 const XsdParticle::List particles(sequence->particles()); for (int i = 0; i < particles.count(); ++i) { if (particles.at(i)->term()->isElement()) { if (!XsdSchemaHelper::wildcardAllowsExpandedName(XsdElement::Ptr(particles.at(i)->term())->name(m_namePool), wildcard, m_namePool)) return false; } } } else if (particle->term()->isElement()) { // 2 const XsdElement::Ptr element(particle->term()); // 2.1 if ((unsigned int)sequence->particles().count() < particle->minimumOccurs()) return false; // 2.2 if (!particle->maximumOccursUnbounded()) { if ((unsigned int)sequence->particles().count() > particle->maximumOccurs()) return false; } // 2.3 const XsdParticle::List particles(sequence->particles()); for (int i = 0; i < particles.count(); ++i) { bool isValid = false; if (particles.at(i)->term()->isElement()) { const XsdElement::Ptr seqElement(particles.at(i)->term()); // 2.3.1 if (element->name(m_namePool) == seqElement->name(m_namePool)) isValid = true; // 2.3.2 if (element->scope() && element->scope()->variety() == XsdElement::Scope::Global) { if (!(element->disallowedSubstitutions() & NamedSchemaComponent::SubstitutionConstraint)) { //TODO: continue } } } } } return true; }
bool XsdSchemaChecker::isValidParticleExtension(const XsdParticle::Ptr &extension, const XsdParticle::Ptr &base) const { // @see http://www.w3.org/TR/xmlschema11-1/#cos-particle-extend // 1 if (extension == base) return true; // 2 if (extension->minimumOccurs() == 1 && extension->maximumOccurs() == 1 && extension->maximumOccursUnbounded() == false) { if (extension->term()->isModelGroup()) { const XsdModelGroup::Ptr modelGroup = extension->term(); if (modelGroup->compositor() == XsdModelGroup::SequenceCompositor) { if (particleEqualsRecursively(modelGroup->particles().first(), base)) return true; } } } // 3 if (extension->minimumOccurs() == base->minimumOccurs()) { // 3.1 if (extension->term()->isModelGroup() && base->term()->isModelGroup()) { const XsdModelGroup::Ptr extensionGroup(extension->term()); const XsdModelGroup::Ptr baseGroup(base->term()); if (extensionGroup->compositor() == XsdModelGroup::AllCompositor && baseGroup->compositor() == XsdModelGroup::AllCompositor) { const XsdParticle::List extensionParticles = extensionGroup->particles(); const XsdParticle::List baseParticles = baseGroup->particles(); for (int i = 0; i < baseParticles.count() && i < extensionParticles.count(); ++i) { if (baseParticles.at(i) != extensionParticles.at(i)) return false; } } } } return false; }