Grammar* XercesDOMParser::loadGrammar(const InputSource& source, const Grammar::GrammarType grammarType, const bool toCache) { // Avoid multiple entrance if (getParseInProgress()) ThrowXMLwithMemMgr(IOException, XMLExcepts::Gen_ParseInProgress, fMemoryManager); ResetParseType resetParse(this, &XercesDOMParser::resetParse); Grammar* grammar = 0; try { setParseInProgress(true); if (grammarType == Grammar::DTDGrammarType) getScanner()->setDocTypeHandler(0); grammar = getScanner()->loadGrammar(source, grammarType, toCache); } catch(const OutOfMemoryException&) { resetParse.release(); throw; } return grammar; }
void XercesDOMParser::cacheGrammarFromParse(const bool newState) { getScanner()->cacheGrammarFromParse(newState); if (newState) getScanner()->useCachedGrammarInParse(newState); }
// // We need to verify that all of its possible values // (in the enum list) // is distinct and // refer to valid notations if toValidateNotation is set on // void DTDValidator::checkTokenList(const XMLAttDef& curAttDef , bool toValidateNotation) { XMLCh* list = XMLString::replicate(curAttDef.getEnumeration(), getScanner()->getMemoryManager()); ArrayJanitor<XMLCh> janList(list, getScanner()->getMemoryManager()); // // Search forward for a space or a null. If a null, // we are done. If a space, cap it and look it up. // bool breakFlag = false; XMLCh* listPtr = list; XMLCh* lastPtr = listPtr; while (true) { while (*listPtr && (*listPtr != chSpace)) listPtr++; // // If at the end, indicate we need to break after // this one. Else, cap it off here. // if (!*listPtr) breakFlag = true; else *listPtr++ = chNull; //distinction check //there should be no same token found in the remaining list if (XMLString::isInList(lastPtr, listPtr)) { emitError ( XMLValid::AttrDupToken , curAttDef.getFullName() , lastPtr ); } if (toValidateNotation && !fDTDGrammar->getNotationDecl(lastPtr)) { emitError ( XMLValid::UnknownNotRefAttr , curAttDef.getFullName() , lastPtr ); } // Break out if we hit the end last time if (breakFlag) break; // Else move upwards and try again lastPtr = listPtr; } }
void XercesDOMParser::resetParse() { if (getScanner()->getDocTypeHandler() == 0) { getScanner()->setDocTypeHandler(this); } setParseInProgress(false); }
void XercesDOMParser::setXMLEntityResolver(XMLEntityResolver* const handler) { fXMLEntityResolver = handler; if (fXMLEntityResolver) { getScanner()->setEntityHandler(this); fEntityResolver = 0; } else { getScanner()->setEntityHandler(0); } }
// --------------------------------------------------------------------------- // DTDValidator: Implementation of the XMLValidator interface // --------------------------------------------------------------------------- bool DTDValidator::checkContent(XMLElementDecl* const elemDecl , QName** const children , XMLSize_t childCount , XMLSize_t* indexFailingChild) { // // Look up the element id in our element decl pool. This will get us // the element decl in our own way of looking at them. // if (!elemDecl) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_InvalidElemId, getScanner()->getMemoryManager()); // // Get the content spec type of this element. This will tell us what // to do to validate it. // const DTDElementDecl::ModelTypes modelType = ((DTDElementDecl*) elemDecl)->getModelType(); if (modelType == DTDElementDecl::Empty) { // // We can do this one here. It cannot have any children. If it does // we return 0 as the index of the first bad child. // if (childCount) { *indexFailingChild=0; return false; } } else if (modelType == DTDElementDecl::Any) { // We pass no judgement on this one, anything goes } else if ((modelType == DTDElementDecl::Mixed_Simple) || (modelType == DTDElementDecl::Children)) { // Get the element's content model or fault it in const XMLContentModel* elemCM = elemDecl->getContentModel(); // Ask it to validate and return its return return elemCM->validateContent(children, childCount, getScanner()->getEmptyNamespaceId(), indexFailingChild, getScanner()->getMemoryManager()); } else { ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMType, getScanner()->getMemoryManager()); } // Went ok, so return success return true; }
void DTDValidator::faultInAttr(XMLAttr& toFill, const XMLAttDef& attDef) const { // // At this level, we cannot set the URI id. So we just set it to zero // and leave it at that. The scanner, who called us, will look at the // prefix we stored (if any), resolve it, and store the URL id if any. // const XMLCh* fullName = attDef.getFullName(); const int colonInd = XMLString::indexOf(fullName, chColon); if (colonInd == -1) { // There is no prefix, so we just do a simple and quick setting toFill.set ( 0 , fullName , XMLUni::fgZeroLenString , attDef.getValue() , attDef.getType() ); } else { // // There is a colon, so we have to split apart the name and prefix // part. // XMLCh* tmpNameBuf = XMLString::replicate(fullName, getScanner()->getMemoryManager()); ArrayJanitor<XMLCh> janNameBuf(tmpNameBuf, getScanner()->getMemoryManager()); // Put a null where the colon is, to split it into two strings tmpNameBuf[colonInd] = chNull; // // And now we can set the attribute object with the prefix and name // parts. // toFill.set ( 0 , &tmpNameBuf[colonInd+1] , tmpNameBuf , attDef.getValue() , attDef.getType() ); } }
void LogPageStats::run() { LogPageStatsHandler h; BaseScanner* s = getScanner(); s->add_handler(&h); s->fork(); s->join(); delete s; }
void LogrecInfo::run() { LogrecInfoHandler h; BaseScanner* s = getScanner(); s->add_handler(&h); s->fork(); s->join(); delete s; }
// --------------------------------------------------------------------------- // XercesDOMParser: Setter methods // --------------------------------------------------------------------------- void XercesDOMParser::setErrorHandler(ErrorHandler* const handler) { fErrorHandler = handler; XMLScanner* scanner = getScanner(); if (fErrorHandler) { scanner->setErrorReporter(this); scanner->setErrorHandler(fErrorHandler); } else { scanner->setErrorReporter(0); scanner->setErrorHandler(0); } }
void VerifyLog::run() { if (verify_alloc) { init_alloc(); } BaseScanner* s = getScanner(); VerifyHandler h(merge); if (!merge) { s->openFileCallback = std::bind(&VerifyHandler::newFile, &h, std::placeholders::_1); } s->add_handler(&h); s->fork(); s->join(); delete s; }
XMLFilePos XercesDOMParser::getSrcOffset() const { return getScanner()->getSrcOffset(); }
const XMLCh* XercesDOMParser::getURIText(unsigned int uriId) const { return getScanner()->getURIText(uriId); }
Grammar* XercesDOMParser::getRootGrammar() { return getScanner()->getRootGrammar(); }
bool XercesDOMParser::isUsingCachedGrammarInParse() const { return getScanner()->isUsingCachedGrammarInParse(); }
// --------------------------------------------------------------------------- // XercesDOMParser: Getter methods // --------------------------------------------------------------------------- bool XercesDOMParser::isCachingGrammarFromParse() const { return getScanner()->isCachingGrammarFromParse(); }
void XercesDOMParser::resetCachedGrammarPool() { getGrammarResolver()->resetCachedGrammar(); getScanner()->resetCachedGrammar(); }
void DTDValidator::validateAttrValue(const XMLAttDef* attDef , const XMLCh* const attrValue , bool preValidation , const XMLElementDecl*) { // // Get quick refs to lost of of the stuff in the passed objects in // order to simplify the code below, which will reference them very // often. // const XMLAttDef::AttTypes type = attDef->getType(); const XMLAttDef::DefAttTypes defType = attDef->getDefaultType(); const XMLCh* const valueText = attDef->getValue(); const XMLCh* const fullName = attDef->getFullName(); const XMLCh* const enumList = attDef->getEnumeration(); // // If the default type is fixed, then make sure the passed value maps // to the fixed value. // If during preContentValidation, the value we are validating is the fixed value itself // so no need to compare. // Only need to do this for regular attribute value validation // if (defType == XMLAttDef::Fixed && !preValidation) { if (!XMLString::equals(attrValue, valueText)) emitError(XMLValid::NotSameAsFixedValue, fullName, attrValue, valueText); } // // If its a CDATA attribute, then we are done with any DTD level // validation else do the rest. // if (type == XMLAttDef::CData) return; // An empty string cannot be valid for any of the other types if (!attrValue[0]) { emitError(XMLValid::InvalidEmptyAttValue, fullName); return; } // See whether we are doing multiple values or not const bool multipleValues = ( (type == XMLAttDef::IDRefs) || (type == XMLAttDef::Entities) || (type == XMLAttDef::NmTokens) || (type == XMLAttDef::Notation) || (type == XMLAttDef::Enumeration) ); // And whether we must check for a first name char const bool firstNameChar = ( (type == XMLAttDef::ID) || (type == XMLAttDef::IDRef) || (type == XMLAttDef::IDRefs) || (type == XMLAttDef::Entity) || (type == XMLAttDef::Entities) || (type == XMLAttDef::Notation) ); // Whether it requires ref checking stuff const bool isARefType ( (type == XMLAttDef::ID) || (type == XMLAttDef::IDRef) || (type == XMLAttDef::IDRefs) ); // Some trigger flags to avoid issuing redundant errors and whatnot bool alreadyCapped = false; // // Make a copy of the text that we can mangle and get a pointer we can // move through the value // // Use a stack-based buffer, when possible... XMLCh tempBuffer[100]; XMLCh* pszTmpVal = 0; ArrayJanitor<XMLCh> janTmpVal(0); if (XMLString::stringLen(attrValue) < sizeof(tempBuffer) / sizeof(tempBuffer[0])) { XMLString::copyString(tempBuffer, attrValue); pszTmpVal = tempBuffer; } else { janTmpVal.reset(XMLString::replicate(attrValue, getScanner()->getMemoryManager()), getScanner()->getMemoryManager()); pszTmpVal = janTmpVal.get(); } XMLCh* valPtr = pszTmpVal; bool doNamespace = getScanner()->getDoNamespaces(); while (true) { // // Make sure the first character is a valid first name char, i.e. // if its a Name value. For NmToken values we don't treat the first // char any differently. // if (firstNameChar) { // If its not, emit and error but try to keep going if (!getReaderMgr()->getCurrentReader()->isFirstNameChar(*valPtr)) emitError(XMLValid::AttrValNotName, valPtr, fullName); valPtr++; } // Make sure all the remaining chars are valid name chars while (*valPtr) { // // If we hit a whitespace, its either a break between two // or more values, or an error if we have a single value. // // // XML1.0-3rd // // [6] Names ::= Name (#x20 Name)* // [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* // // only and only ONE #x20 is allowed to be the delimiter // if (*valPtr==chSpace) { if (!multipleValues) { emitError(XMLValid::NoMultipleValues, fullName); return; } break; } // Now this attribute can be of type // ID, IDREF, IDREFS, ENTITY, ENTITIES, NOTATION, NMTOKEN, NMTOKENS, ENUMERATION // All these must be valid XMLName // If namespace is enabled, colon is not allowed in the first 6 if (doNamespace && *valPtr == chColon && firstNameChar) emitError(XMLValid::ColonNotValidWithNS); if (!getReaderMgr()->getCurrentReader()->isNameChar(*valPtr)) { emitError(XMLValid::AttrValNotName, valPtr, fullName); return; } valPtr++; } // // Cap it off at the current non-name char. If already capped, // then remember this. // if (!(*valPtr)) alreadyCapped = true; *valPtr = 0; // // If this type of attribute requires that we track reference // stuff, then handle that. // if (isARefType) { if ((type == XMLAttDef::ID) || (type == XMLAttDef::IDRef) || (type == XMLAttDef::IDRefs)) { XMLRefInfo* find = getScanner()->getIDRefList()->get(pszTmpVal); if (find) { if (find->getDeclared() && (type == XMLAttDef::ID)) emitError(XMLValid::ReusedIDValue, pszTmpVal); } else { find = new (getScanner()->getMemoryManager()) XMLRefInfo ( pszTmpVal , false , false , getScanner()->getMemoryManager() ); getScanner()->getIDRefList()->put((void*)find->getRefName(), find); } // // Mark it declared or used, which might be redundant in some cases // but not worth checking // if (type == XMLAttDef::ID) find->setDeclared(true); else { if (!preValidation) { find->setUsed(true); } } } } else if (!preValidation && ((type == XMLAttDef::Entity) || (type == XMLAttDef::Entities))) { // // If its refering to a entity, then look up the name in the // general entity pool. If not there, then its an error. If its // not an external unparsed entity, then its an error. // // In case of pre-validation, the above errors should be ignored. // const XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(pszTmpVal); if (decl) { if (!decl->isUnparsed()) emitError(XMLValid::BadEntityRefAttr, pszTmpVal, fullName); } else { emitError ( XMLValid::UnknownEntityRefAttr , fullName , pszTmpVal ); } } else if ((type == XMLAttDef::Notation) || (type == XMLAttDef::Enumeration)) { // // Make sure that this value maps to one of the enumeration or // notation values in the enumList parameter. We don't have to // look it up in the notation pool (if a notation) because we // will look up the enumerated values themselves. If they are in // the notation pool (after the DTD is parsed), then obviously // this value will be legal since it matches one of them. // if (!XMLString::isInList(pszTmpVal, enumList)) emitError(XMLValid::DoesNotMatchEnumList, pszTmpVal, fullName); } // If not doing multiple values, then we are done if (!multipleValues) break; // // If we are at the end, then break out now, else move up to the // next char and update the base pointer. // if (alreadyCapped) break; valPtr++; pszTmpVal = valPtr; } }
void DTDValidator::preContentValidation(bool #if defined(MATCH_DEBUG) reuseGrammar #endif ,bool validateDefAttr) { // // Lets enumerate all of the elements in the element decl pool // and put out an error for any that did not get declared. // We also check all of the attributes as well. // NameIdPoolEnumerator<DTDElementDecl> elemEnum = fDTDGrammar->getElemEnumerator(); fDTDGrammar->setValidated(true); while (elemEnum.hasMoreElements()) { const DTDElementDecl& curElem = elemEnum.nextElement(); const DTDElementDecl::CreateReasons reason = curElem.getCreateReason(); // // See if this element decl was ever marked as declared. If // not, then put out an error. In some cases its just // a warning, such as being referenced in a content model. // if (reason != XMLElementDecl::Declared) { if (reason == XMLElementDecl::AttList) { getScanner()->emitError ( XMLErrs::UndeclaredElemInAttList , curElem.getFullName() ); } else if (reason == XMLElementDecl::AsRootElem) { // It's ok that the root element is not declared in the DTD /* emitError ( XMLValid::UndeclaredElemInDocType , curElem.getFullName() );*/ } else if (reason == XMLElementDecl::InContentModel) { getScanner()->emitError ( XMLErrs::UndeclaredElemInCM , curElem.getFullName() ); } else { #if defined(MATCH_DEBUG) if(reuseGrammar && reason == XMLElementDecl::JustFaultIn){ } else ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::DTD_UnknownCreateReason, getScanner()->getMemoryManager()); #endif } } // // Check all of the attributes of the current element. // We check for: // // 1) Multiple ID attributes // 2) That all of the default values of attributes are // valid for their type. // 3) That for any notation types, that their lists // of possible values refer to declared notations. // // 4) XML1.0(3rd edition) // // Validity constraint: One Notation Per Element Type // An element type MUST NOT have more than one NOTATION attribute specified. // // Validity constraint: No Notation on Empty Element // For compatibility, an attribute of type NOTATION MUST NOT be declared on an element declared EMPTY. // // Validity constraint: No Duplicate Tokens // The notation names in a single NotationType attribute declaration, as well as // the NmTokens in a single Enumeration attribute declaration, MUST all be distinct. // XMLAttDefList& attDefList = curElem.getAttDefList(); bool seenId = false; bool seenNOTATION = false; bool elemEmpty = (curElem.getModelType() == DTDElementDecl::Empty); for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++) { const XMLAttDef& curAttDef = attDefList.getAttDef(i); if (curAttDef.getType() == XMLAttDef::ID) { if (seenId) { emitError ( XMLValid::MultipleIdAttrs , curElem.getFullName() ); break; } seenId = true; } else if (curAttDef.getType() == XMLAttDef::Notation) { if (seenNOTATION) { emitError ( XMLValid::ElemOneNotationAttr , curElem.getFullName() ); break; } seenNOTATION = true; // no notation attribute on empty element if (elemEmpty) { emitError ( XMLValid::EmptyElemNotationAttr , curElem.getFullName() , curAttDef.getFullName() ); break; } //go through enumeration list to check // distinct // notation declaration if (curAttDef.getEnumeration()) { checkTokenList(curAttDef, true); } } else if (curAttDef.getType() == XMLAttDef::Enumeration ) { //go through enumeration list to check // distinct only if (curAttDef.getEnumeration()) { checkTokenList(curAttDef, false); } } // If it has a default/fixed value, then validate it if (validateDefAttr && curAttDef.getValue()) { validateAttrValue ( &curAttDef , curAttDef.getValue() , true , &curElem ); } } } // // And enumerate all of the general entities. If any of them // reference a notation, then make sure the notation exists. // NameIdPoolEnumerator<DTDEntityDecl> entEnum = fDTDGrammar->getEntityEnumerator(); while (entEnum.hasMoreElements()) { const DTDEntityDecl& curEntity = entEnum.nextElement(); if (!curEntity.getNotationName()) continue; // It has a notation name, so look it up if (!fDTDGrammar->getNotationDecl(curEntity.getNotationName())) { emitError ( XMLValid::NotationNotDeclared , curEntity.getNotationName() ); } } }
void XercesDOMParser::setIgnoreCachedDTD(const bool newValue) { getScanner()->setIgnoredCachedDTD(newValue); }
void XercesDOMParser::useCachedGrammarInParse(const bool newState) { if (newState || !getScanner()->isCachingGrammarFromParse()) getScanner()->useCachedGrammarInParse(newState); }
bool XercesDOMParser::getIgnoreCachedDTD() const { return getScanner()->getIgnoreCachedDTD(); }
unsigned int XercesDOMParser::getSrcOffset() const { return getScanner()->getSrcOffset(); }