// --------------------------------------------------------------------------- // DatatypeValidatorFactory: Registry initialization methods // --------------------------------------------------------------------------- void DatatypeValidatorFactory::expandRegistryToFullSchemaSet() { if (!sBuiltInRegistryMutexRegistered) { if (!sBuiltInRegistryMutex) { XMLMutexLock lock(XMLPlatformUtils::fgAtomicMutex); if (!sBuiltInRegistryMutex) sBuiltInRegistryMutex = new XMLMutex(XMLPlatformUtils::fgMemoryManager); } // Use a faux scope to synchronize while we do this { XMLMutexLock lock(sBuiltInRegistryMutex); // If we got here first, then register it and set the registered flag if (!sBuiltInRegistryMutexRegistered) { //Initialize common Schema/DTD Datatype validator set fBuiltInRegistry = new RefHashTableOf<DatatypeValidator>(29); DatatypeValidator *dv = new StringDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_STRING, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_STRING, dv); dv = new NOTATIONDatatypeValidator(); dv->setTypeName(XMLUni::fgNotationString, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) XMLUni::fgNotationString, dv); dv = new AnySimpleTypeDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_ANYSIMPLETYPE, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_ANYSIMPLETYPE, dv); dv = new BooleanDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_BOOLEAN, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_BOOLEAN, dv); dv = new DecimalDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_DECIMAL, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_DECIMAL, dv); dv = new HexBinaryDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_HEXBINARY, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_HEXBINARY, dv); dv = new Base64BinaryDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_BASE64BINARY, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_BASE64BINARY, dv); dv = new DoubleDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_DOUBLE, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_DOUBLE, dv); dv = new FloatDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_FLOAT, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_FLOAT, dv); dv = new AnyURIDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_ANYURI, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_ANYURI, dv); dv = new QNameDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_QNAME, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_QNAME, dv); dv = new DateTimeDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_DATETIME, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_DATETIME, dv); dv = new DateDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_DATE, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_DATE, dv); dv = new TimeDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_TIME, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_TIME, dv); dv = new DayDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_DAY, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_DAY, dv); dv = new MonthDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_MONTH, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_MONTH, dv); dv = new MonthDayDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_MONTHDAY, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_MONTHDAY, dv); dv = new YearDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_YEAR, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_YEAR, dv); dv = new YearMonthDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_YEARMONTH, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_YEARMONTH, dv); dv = new DurationDatatypeValidator(); dv->setTypeName(SchemaSymbols::fgDT_DURATION, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_DURATION, dv); // REVISIT // We are creating a lot of Hashtables for the facets of the different // validators. It's better to have some kind of a memory pool and ask // the pool to give us a new instance of the hashtable. RefHashTableOf<KVStringPair>* facets = new RefHashTableOf<KVStringPair>(3); // Create 'normalizedString' datatype validator facets->put((void*) SchemaSymbols::fgELT_WHITESPACE, new KVStringPair(SchemaSymbols::fgELT_WHITESPACE, SchemaSymbols::fgWS_REPLACE)); createDatatypeValidator(SchemaSymbols::fgDT_NORMALIZEDSTRING, getDatatypeValidator(SchemaSymbols::fgDT_STRING), facets, 0, false, 0, false); // Create 'token' datatype validator facets = new RefHashTableOf<KVStringPair>(3); facets->put((void*) SchemaSymbols::fgELT_WHITESPACE, new KVStringPair(SchemaSymbols::fgELT_WHITESPACE, SchemaSymbols::fgWS_COLLAPSE)); createDatatypeValidator(SchemaSymbols::fgDT_TOKEN, getDatatypeValidator(SchemaSymbols::fgDT_NORMALIZEDSTRING), facets, 0, false, 0, false); dv = new NameDatatypeValidator(getDatatypeValidator(SchemaSymbols::fgDT_TOKEN), 0, 0, 0); dv->setTypeName(SchemaSymbols::fgDT_NAME, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_NAME, dv); dv = new NCNameDatatypeValidator(getDatatypeValidator(SchemaSymbols::fgDT_NAME), 0, 0, 0); dv->setTypeName(SchemaSymbols::fgDT_NCNAME, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) SchemaSymbols::fgDT_NCNAME, dv); // Create 'NMTOKEN' datatype validator facets = new RefHashTableOf<KVStringPair>(3); facets->put((void*) SchemaSymbols::fgELT_PATTERN , new KVStringPair(SchemaSymbols::fgELT_PATTERN,fgTokPattern)); facets->put((void*) SchemaSymbols::fgELT_WHITESPACE, new KVStringPair(SchemaSymbols::fgELT_WHITESPACE, SchemaSymbols::fgWS_COLLAPSE)); createDatatypeValidator(XMLUni::fgNmTokenString, getDatatypeValidator(SchemaSymbols::fgDT_TOKEN),facets, 0, false, 0, false); // Create 'NMTOKENS' datatype validator createDatatypeValidator(XMLUni::fgNmTokensString, getDatatypeValidator(XMLUni::fgNmTokenString), 0, 0, true, 0, false); // Create 'language' datatype validator facets = new RefHashTableOf<KVStringPair>(3); facets->put((void*) SchemaSymbols::fgELT_PATTERN, new KVStringPair(SchemaSymbols::fgELT_PATTERN, XMLUni::fgLangPattern)); createDatatypeValidator(SchemaSymbols::fgDT_LANGUAGE, getDatatypeValidator(SchemaSymbols::fgDT_TOKEN), facets, 0, false, 0, false); // Create 'integer' datatype validator facets = new RefHashTableOf<KVStringPair>(3); facets->put((void*) SchemaSymbols::fgELT_FRACTIONDIGITS, new KVStringPair(SchemaSymbols::fgELT_FRACTIONDIGITS, XMLUni::fgValueZero)); facets->put((void*) SchemaSymbols::fgELT_PATTERN, new KVStringPair(SchemaSymbols::fgELT_PATTERN, fgIntegerPattern)); createDatatypeValidator(SchemaSymbols::fgDT_INTEGER, getDatatypeValidator(SchemaSymbols::fgDT_DECIMAL), facets, 0, false, 0, false); // Create 'nonPositiveInteger' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgValueZero)); createDatatypeValidator(SchemaSymbols::fgDT_NONPOSITIVEINTEGER, getDatatypeValidator(SchemaSymbols::fgDT_INTEGER), facets, 0, false, 0, false); // Create 'negativeInteger' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgNegOne)); createDatatypeValidator(SchemaSymbols::fgDT_NEGATIVEINTEGER, getDatatypeValidator(SchemaSymbols::fgDT_NONPOSITIVEINTEGER), facets, 0, false, 0, false); // Create 'long' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgLongMaxInc)); facets->put((void*) SchemaSymbols::fgELT_MININCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MININCLUSIVE, XMLUni::fgLongMinInc)); createDatatypeValidator(SchemaSymbols::fgDT_LONG, getDatatypeValidator(SchemaSymbols::fgDT_INTEGER), facets, 0, false, 0, false); // Create 'int' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgIntMaxInc)); facets->put((void*) SchemaSymbols::fgELT_MININCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MININCLUSIVE, XMLUni::fgIntMinInc)); createDatatypeValidator(SchemaSymbols::fgDT_INT, getDatatypeValidator(SchemaSymbols::fgDT_LONG), facets, 0, false, 0, false); // Create 'short' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgShortMaxInc)); facets->put((void*) SchemaSymbols::fgELT_MININCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MININCLUSIVE, XMLUni::fgShortMinInc)); createDatatypeValidator(SchemaSymbols::fgDT_SHORT, getDatatypeValidator(SchemaSymbols::fgDT_INT), facets, 0, false, 0 ,false); // Create 'byte' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgByteMaxInc)); facets->put((void*) SchemaSymbols::fgELT_MININCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MININCLUSIVE, XMLUni::fgByteMinInc)); createDatatypeValidator(SchemaSymbols::fgDT_BYTE, getDatatypeValidator(SchemaSymbols::fgDT_SHORT), facets, 0, false, 0, false); // Create 'nonNegativeInteger' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MININCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MININCLUSIVE, XMLUni::fgValueZero)); createDatatypeValidator(SchemaSymbols::fgDT_NONNEGATIVEINTEGER, getDatatypeValidator(SchemaSymbols::fgDT_INTEGER), facets, 0, false, 0, false); // Create 'unsignedLong' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgULongMaxInc)); createDatatypeValidator(SchemaSymbols::fgDT_ULONG, getDatatypeValidator(SchemaSymbols::fgDT_NONNEGATIVEINTEGER), facets, 0, false, 0, false); // Create 'unsignedInt' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgUIntMaxInc)); createDatatypeValidator(SchemaSymbols::fgDT_UINT, getDatatypeValidator(SchemaSymbols::fgDT_ULONG), facets, 0, false, 0, false); // Create 'unsignedShort' datatypeValidator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgUShortMaxInc)); createDatatypeValidator(SchemaSymbols::fgDT_USHORT, getDatatypeValidator(SchemaSymbols::fgDT_UINT), facets, 0, false, 0, false); // Create 'unsignedByte' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MAXINCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MAXINCLUSIVE, XMLUni::fgUByteMaxInc)); createDatatypeValidator(SchemaSymbols::fgDT_UBYTE, getDatatypeValidator(SchemaSymbols::fgDT_USHORT), facets, 0, false, 0, false); // Create 'positiveInteger' datatype validator facets = new RefHashTableOf<KVStringPair>(2); facets->put((void*) SchemaSymbols::fgELT_MININCLUSIVE, new KVStringPair(SchemaSymbols::fgELT_MININCLUSIVE, XMLUni::fgValueOne)); createDatatypeValidator(SchemaSymbols::fgDT_POSITIVEINTEGER, getDatatypeValidator(SchemaSymbols::fgDT_NONNEGATIVEINTEGER), facets, 0, false, 0, false); // Create 'ID', 'IDREF' and 'ENTITY' datatype validator dv = new IDDatatypeValidator(getDatatypeValidator(SchemaSymbols::fgDT_NCNAME), 0, 0, 0); dv->setTypeName(XMLUni::fgIDString, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) XMLUni::fgIDString, dv); dv = new IDREFDatatypeValidator(getDatatypeValidator(SchemaSymbols::fgDT_NCNAME), 0, 0, 0); dv->setTypeName(XMLUni::fgIDRefString, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) XMLUni::fgIDRefString, dv); dv = new ENTITYDatatypeValidator(getDatatypeValidator(SchemaSymbols::fgDT_NCNAME), 0, 0, 0); dv->setTypeName(XMLUni::fgEntityString, SchemaSymbols::fgURI_SCHEMAFORSCHEMA); fBuiltInRegistry->put((void*) XMLUni::fgEntityString, dv); // Create 'IDREFS' datatype validator createDatatypeValidator ( XMLUni::fgIDRefsString , getDatatypeValidator(XMLUni::fgIDRefString) , 0 , 0 , true , 0 , false ); // Create 'ENTITIES' datatype validator createDatatypeValidator ( XMLUni::fgEntitiesString , getDatatypeValidator(XMLUni::fgEntityString) , 0 , 0 , true , 0 , false ); initCanRepRegistory(); // register cleanup method builtInRegistryCleanup.registerCleanup(DatatypeValidatorFactory::reinitRegistry); sBuiltInRegistryMutexRegistered = true; } } } }
DOMElement* SchemaInfo::getTopLevelComponent(const unsigned short compCategory, const XMLCh* const compName, const XMLCh* const name) { if (compCategory >= C_Count) return 0; DOMElement* child = XUtil::getFirstChildElement(fSchemaRootElement); if (!child) return 0; RefHashTableOf<DOMElement>* compList = fTopLevelComponents[compCategory]; if (fTopLevelComponents[compCategory] == 0) { compList= new (fMemoryManager) RefHashTableOf<DOMElement>(17, false, fMemoryManager); fTopLevelComponents[compCategory] = compList; } else { DOMElement* cachedChild = compList->get(name); if(cachedChild) return cachedChild; child = fLastTopLevelComponent[compCategory]; } DOMElement* redefParent = (DOMElement*) child->getParentNode(); // Parent is not "redefine" if (!XMLString::equals(redefParent->getLocalName(),SchemaSymbols::fgELT_REDEFINE)) redefParent = 0; while (child != 0) { fLastTopLevelComponent[compCategory]=child; if (XMLString::equals(child->getLocalName(), compName)) { const XMLCh* cName=child->getAttribute(SchemaSymbols::fgATT_NAME); compList->put((void*)cName, child); if (XMLString::equals(cName, name)) return child; } else if (XMLString::equals(child->getLocalName(),SchemaSymbols::fgELT_REDEFINE) && (!fFailedRedefineList || !fFailedRedefineList->containsElement(child))) { // if redefine DOMElement* redefineChild = XUtil::getFirstChildElement(child); while (redefineChild != 0) { fLastTopLevelComponent[compCategory]=redefineChild; if ((!fFailedRedefineList || !fFailedRedefineList->containsElement(redefineChild)) && XMLString::equals(redefineChild->getLocalName(), compName)) { const XMLCh* rName=redefineChild->getAttribute(SchemaSymbols::fgATT_NAME); compList->put((void*)rName, redefineChild); if (XMLString::equals(rName, name)) return redefineChild; } redefineChild = XUtil::getNextSiblingElement(redefineChild); } } child = XUtil::getNextSiblingElement(child); if (child == 0 && redefParent) { child = XUtil::getNextSiblingElement(redefParent); redefParent = 0; } } return child; }
// --------------------------------------------------------------------------- // DFAContentModel: Private helper methods // --------------------------------------------------------------------------- void DFAContentModel::buildDFA(ContentSpecNode* const curNode) { unsigned int index; // // The first step we need to take is to rewrite the content model using // our CMNode objects, and in the process get rid of any repetition short // cuts, converting them into '*' style repetitions or getting rid of // repetitions altogether. // // The conversions done are: // // x+ -> (x|x*) // x? -> (x|epsilon) // // This is a relatively complex scenario. What is happening is that we // create a top level binary node of which the special EOC value is set // as the right side node. The the left side is set to the rewritten // syntax tree. The source is the original content model info from the // decl pool. The rewrite is done by buildSyntaxTree() which recurses the // decl pool's content of the element and builds a new tree in the // process. // // Note that, during this operation, we set each non-epsilon leaf node's // DFA state position and count the number of such leafs, which is left // in the fLeafCount member. // CMLeaf* nodeEOC = new (fMemoryManager) CMLeaf ( new (fMemoryManager) QName ( XMLUni::fgZeroLenString , XMLUni::fgZeroLenString , XMLContentModel::gEOCFakeId , fMemoryManager ) , ~0 , true , fMemoryManager ); CMNode* nodeOrgContent = buildSyntaxTree(curNode); fHeadNode = new (fMemoryManager) CMBinaryOp ( ContentSpecNode::Sequence , nodeOrgContent , nodeEOC , fMemoryManager ); // // And handle specially the EOC node, which also must be numbered and // counted as a non-epsilon leaf node. It could not be handled in the // above tree build because it was created before all that started. We // save the EOC position since its used during the DFA building loop. // fEOCPos = fLeafCount; nodeEOC->setPosition(fLeafCount++); // // Ok, so now we have to iterate the new tree and do a little more work // now that we know the leaf count. One thing we need to do is to // calculate the first and last position sets of each node. This is // cached away in each of the nodes. // // Along the way we also set the leaf count in each node as the maximum // state count. They must know this in order to create their first/last // position sets. // // We also need to build an array of references to the non-epsilon // leaf nodes. Since we iterate here the same way as we did during the // initial tree build (which built their position numbers, we will put // them in the array according to their position values. // fLeafList = (CMLeaf**) fMemoryManager->allocate(fLeafCount*sizeof(CMLeaf*)); //new CMLeaf*[fLeafCount]; fLeafListType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate ( fLeafCount * sizeof(ContentSpecNode::NodeTypes) ); //new ContentSpecNode::NodeTypes[fLeafCount]; postTreeBuildInit(fHeadNode, 0); // // And, moving onward... We now need to build the follow position sets // for all the nodes. So we allocate an array of pointers to state sets, // one for each leaf node (i.e. each significant DFA position.) // fFollowList = (CMStateSet**) fMemoryManager->allocate ( fLeafCount * sizeof(CMStateSet*) ); //new CMStateSet*[fLeafCount]; for (index = 0; index < fLeafCount; index++) fFollowList[index] = new (fMemoryManager) CMStateSet(fLeafCount, fMemoryManager); calcFollowList(fHeadNode); // // Check to see whether this content model can handle an empty content, // which is something we need to optimize by looking now before we // throw away the info that would tell us that. // // If the left node of the head (the top level of the original content) // is nullable, then its true. // fEmptyOk = nodeOrgContent->isNullable(); // // And finally the big push... Now we build the DFA using all the states // and the tree we've built up. First we set up the various data // structures we are going to use while we do this. // // First of all we need an array of unique element ids in our content // model. For each transition table entry, we need a set of contiguous // indices to represent the transitions for a particular input element. // So we need to a zero based range of indexes that map to element types. // This element map provides that mapping. // fElemMap = (QName**) fMemoryManager->allocate ( fLeafCount * sizeof(QName*) ); //new QName*[fLeafCount]; fElemMapType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate ( fLeafCount * sizeof(ContentSpecNode::NodeTypes) ); //new ContentSpecNode::NodeTypes[fLeafCount]; fElemMapSize = 0; for (unsigned int outIndex = 0; outIndex < fLeafCount; outIndex++) { fElemMap[outIndex] = new (fMemoryManager) QName(fMemoryManager); if ( (fLeafListType[outIndex] & 0x0f) != ContentSpecNode::Leaf ) if (!fLeafNameTypeVector) fLeafNameTypeVector = new (fMemoryManager) ContentLeafNameTypeVector(fMemoryManager); // Get the current leaf's element index const QName* element = fLeafList[outIndex]->getElement(); const XMLCh* elementRawName = 0; if (fDTD && element) elementRawName = element->getRawName(); // See if the current leaf node's element index is in the list unsigned int inIndex = 0; for (; inIndex < fElemMapSize; inIndex++) { const QName* inElem = fElemMap[inIndex]; if (fDTD) { if (XMLString::equals(inElem->getRawName(), elementRawName)) { break; } } else { if ((fElemMapType[inIndex] == fLeafListType[outIndex]) && (inElem->getURI() == element->getURI()) && (XMLString::equals(inElem->getLocalPart(), element->getLocalPart()))) { break; } } } // If it was not in the list, then add it and bump the map size if (inIndex == fElemMapSize) { fElemMap[fElemMapSize]->setValues(*element); fElemMapType[fElemMapSize] = fLeafListType[outIndex]; ++fElemMapSize; } } // set up the fLeafNameTypeVector object if there is one. if (fLeafNameTypeVector) { fLeafNameTypeVector->setValues(fElemMap, fElemMapType, fElemMapSize); } /*** * Optimization(Jan, 2001); We sort fLeafList according to * elemIndex which is *uniquely* associated to each leaf. * We are *assuming* that each element appears in at least one leaf. **/ // don't forget to delete it int *fLeafSorter = (int*) fMemoryManager->allocate ( (fLeafCount + fElemMapSize) * sizeof(int) ); //new int[fLeafCount + fElemMapSize]; unsigned int fSortCount = 0; for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { const QName* element = fElemMap[elemIndex]; const XMLCh* elementRawName = 0; if (fDTD && element) elementRawName = element->getRawName(); for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { const QName* leaf = fLeafList[leafIndex]->getElement(); if (fDTD) { if (XMLString::equals(leaf->getRawName(), elementRawName)) { fLeafSorter[fSortCount++] = leafIndex; } } else { if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) && (leaf->getURI() == element->getURI()) && (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) { fLeafSorter[fSortCount++] = leafIndex; } } } fLeafSorter[fSortCount++] = -1; } // // Next lets create some arrays, some that that hold transient info // during the DFA build and some that are permament. These are kind of // sticky since we cannot know how big they will get, but we don't want // to use any collection type classes because of performance. // // Basically they will probably be about fLeafCount*2 on average, but can // be as large as 2^(fLeafCount*2), worst case. So we start with // fLeafCount*4 as a middle ground. This will be very unlikely to ever // have to expand though, it if does, the overhead will be somewhat ugly. // unsigned int curArraySize = fLeafCount * 4; const CMStateSet** statesToDo = (const CMStateSet**) fMemoryManager->allocate ( curArraySize * sizeof(const CMStateSet*) ); //new const CMStateSet*[curArraySize]; fFinalStateFlags = (bool*) fMemoryManager->allocate ( curArraySize * sizeof(bool) ); //new bool[curArraySize]; fTransTable = (unsigned int**) fMemoryManager->allocate ( curArraySize * sizeof(unsigned int*) ); //new unsigned int*[curArraySize]; // // Ok we start with the initial set as the first pos set of the head node // (which is the seq node that holds the content model and the EOC node.) // const CMStateSet* setT = new (fMemoryManager) CMStateSet(fHeadNode->getFirstPos()); // // Init our two state flags. Basically the unmarked state counter is // always chasing the current state counter. When it catches up, that // means we made a pass through that did not add any new states to the // lists, at which time we are done. We could have used a expanding array // of flags which we used to mark off states as we complete them, but // this is easier though less readable maybe. // unsigned int unmarkedState = 0; unsigned int curState = 0; // // Init the first transition table entry, and put the initial state // into the states to do list, then bump the current state. // fTransTable[curState] = makeDefStateList(); statesToDo[curState] = setT; curState++; // // the stateTable is an auxiliary means to fast // identification of new state created (instead // of squential loop statesToDo to find out), // while the role that statesToDo plays remain unchanged. // // TODO: in the future, we may change the 29 to something // derived from curArraySize. RefHashTableOf<XMLInteger> *stateTable = new (fMemoryManager) RefHashTableOf<XMLInteger> ( curArraySize , true , new (fMemoryManager) HashCMStateSet() , fMemoryManager ); //stateTable->put((CMStateSet*)setT, new (fMemoryManager) XMLInteger(0)); // // Ok, almost done with the algorithm from hell... We now enter the // loop where we go until the states done counter catches up with // the states to do counter. // CMStateSet* newSet = 0; while (unmarkedState < curState) { // // Get the next unmarked state out of the list of states to do. // And get the associated transition table entry. // setT = statesToDo[unmarkedState]; unsigned int* transEntry = fTransTable[unmarkedState]; // Mark this one final if it contains the EOC state fFinalStateFlags[unmarkedState] = setT->getBit(fEOCPos); // Bump up the unmarked state count, marking this state done unmarkedState++; // Optimization(Jan, 2001) unsigned int sorterIndex = 0; // Optimization(Jan, 2001) // Loop through each possible input symbol in the element map for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { // // Build up a set of states which is the union of all of the // follow sets of DFA positions that are in the current state. If // we gave away the new set last time through then create a new // one. Otherwise, zero out the existing one. // if (!newSet) newSet = new (fMemoryManager) CMStateSet ( fLeafCount , fMemoryManager ); else newSet->zeroBits(); #ifdef OBSOLETED // unoptimized code for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { // If this leaf index (DFA position) is in the current set... if (setT->getBit(leafIndex)) { // // If this leaf is the current input symbol, then we want // to add its follow list to the set of states to transition // to from the current state. // const QName* leaf = fLeafList[leafIndex]->getElement(); const QName* element = fElemMap[elemIndex]; if (fDTD) { if (XMLString::equals(leaf->getRawName(), element->getRawName())) { *newSet |= *fFollowList[leafIndex]; } } else { if ((leaf->getURI() == element->getURI()) && (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) { *newSet |= *fFollowList[leafIndex]; } } } } // for leafIndex #endif // Optimization(Jan, 2001) int leafIndex = fLeafSorter[sorterIndex++]; while (leafIndex != -1) { // If this leaf index (DFA position) is in the current set... if (setT->getBit(leafIndex)) { // // If this leaf is the current input symbol, then we // want to add its follow list to the set of states to // transition to from the current state. // *newSet |= *fFollowList[leafIndex]; } leafIndex = fLeafSorter[sorterIndex++]; } // while (leafIndex != -1) // // If this new set is not empty, then see if its in the list // of states to do. If not, then add it. // if (!newSet->isEmpty()) { // // Search the 'states to do' list to see if this new // state set is already in there. // /*** unsigned int stateIndex = 0; for (; stateIndex < curState; stateIndex++) { if (*statesToDo[stateIndex] == *newSet) break; } ***/ XMLInteger *stateObj = (XMLInteger*) (stateTable->get(newSet)); unsigned int stateIndex = (stateObj == 0 ? curState : stateObj->intValue()); // If we did not find it, then add it if (stateIndex == curState) { // // Put this new state into the states to do and init // a new entry at the same index in the transition // table. // statesToDo[curState] = newSet; fTransTable[curState] = makeDefStateList(); stateTable->put ( newSet , new (fMemoryManager) XMLInteger(curState) ); // We now have a new state to do so bump the count curState++; // // Null out the new set to indicate we adopted it. This // will cause the creation of a new set on the next time // around the loop. // newSet = 0; } // // Now set this state in the transition table's entry for this // element (using its index), with the DFA state we will move // to from the current state when we see this input element. // transEntry[elemIndex] = stateIndex; // Expand the arrays if we're full if (curState == curArraySize) { // // Yikes, we overflowed the initial array size, so we've // got to expand all of these arrays. So adjust up the // size by 50% and allocate new arrays. // const unsigned int newSize = (unsigned int)(curArraySize * 1.5); const CMStateSet** newToDo = (const CMStateSet**) fMemoryManager->allocate ( newSize * sizeof(const CMStateSet*) ); //new const CMStateSet*[newSize]; bool* newFinalFlags = (bool*) fMemoryManager->allocate ( newSize * sizeof(bool) ); //new bool[newSize]; unsigned int** newTransTable = (unsigned int**) fMemoryManager->allocate ( newSize * sizeof(unsigned int*) ); //new unsigned int*[newSize]; // Copy over all of the existing content for (unsigned int expIndex = 0; expIndex < curArraySize; expIndex++) { newToDo[expIndex] = statesToDo[expIndex]; newFinalFlags[expIndex] = fFinalStateFlags[expIndex]; newTransTable[expIndex] = fTransTable[expIndex]; } // Clean up the old stuff fMemoryManager->deallocate(statesToDo); //delete [] statesToDo; fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags; fMemoryManager->deallocate(fTransTable); //delete [] fTransTable; // Store the new array size and pointers curArraySize = newSize; statesToDo = newToDo; fFinalStateFlags = newFinalFlags; fTransTable = newTransTable; } //if (curState == curArraySize) } //if (!newSet->isEmpty()) } // for elemIndex } //while // Store the current state count in the trans table size fTransTableSize = curState; // If the last temp set was not stored, then clean it up if (newSet) delete newSet; // // Now we can clean up all of the temporary data that was needed during // DFA build. // // // Note on memory leak: Bugzilla#2707: // =================================== // The CMBinary, pointed to by fHeadNode, shall be released by // deleted by itself. // // Change has been made to postTreeBuildInit() such that fLeafList[] // would maintain its **OWN** copy of CMLeaf to avoid double deletion // of CMLeaf. // delete fHeadNode; for (index = 0; index < fLeafCount; index++) delete fFollowList[index]; fMemoryManager->deallocate(fFollowList); //delete [] fFollowList; // // removeAll() will delete all data, XMLInteger, // while the keys are to be deleted by the // deletion of statesToDo. // delete stateTable; for (index = 0; index < curState; index++) delete (CMStateSet*)statesToDo[index]; fMemoryManager->deallocate(statesToDo); //delete [] statesToDo; for (index = 0; index < fLeafCount; index++) delete fLeafList[index]; fMemoryManager->deallocate(fLeafList); //delete [] fLeafList; fMemoryManager->deallocate(fLeafSorter); //delete [] fLeafSorter; }