// --------------------------------------------------------------------------- // XMLRangeFactory: Range creation methods // --------------------------------------------------------------------------- void XMLRangeFactory::buildRanges(RangeTokenMap *rangeTokMap) { if (fRangesCreated) return; if (!fKeywordsInitialized) { initializeKeywordMap(rangeTokMap); } TokenFactory* tokFactory = rangeTokMap->getTokenFactory(); // Create space ranges unsigned int wsTblLen = getTableLen(gWhitespaceChars); RangeToken* tok = tokFactory->createRange(); XMLInt32* wsRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate ( wsTblLen * sizeof(XMLInt32) );//new XMLInt32[wsTblLen]; tok->setRangeValues(wsRange, wsTblLen); setupRange(wsRange, gWhitespaceChars, 0); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLSpace, tok); tok = (RangeToken*) RangeToken::complementRanges(tok, tokFactory); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLSpace, tok , true); // Create digits ranges tok = tokFactory->createRange(); unsigned int digitTblLen = getTableLen(gDigitChars); XMLInt32* digitRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate ( digitTblLen * sizeof(XMLInt32) );//new XMLInt32[digitTblLen]; tok->setRangeValues(digitRange, digitTblLen); setupRange(digitRange, gDigitChars, 0); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLDigit, tok); tok = (RangeToken*) RangeToken::complementRanges(tok, tokFactory); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLDigit, tok , true); // Build word ranges unsigned int baseTblLen = getTableLen(gBaseChars); unsigned int ideoTblLen = getTableLen(gIdeographicChars); unsigned int wordRangeLen = baseTblLen + ideoTblLen + digitTblLen; XMLInt32* wordRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate ( wordRangeLen * sizeof(XMLInt32) );//new XMLInt32[wordRangeLen]; ArrayJanitor<XMLInt32> janWordRange(wordRange, XMLPlatformUtils::fgMemoryManager); setupRange(wordRange, gBaseChars, 0); setupRange(wordRange, gIdeographicChars, baseTblLen); memcpy(wordRange + baseTblLen + ideoTblLen, digitRange, digitTblLen * sizeof(XMLInt32)); // Create NameChar ranges tok = tokFactory->createRange(); unsigned int combTblLen = getTableLen(gCombiningChars); unsigned int extTblLen = getTableLen(gExtenderChars); unsigned int nameTblLen = wordRangeLen + combTblLen + extTblLen; XMLInt32* nameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate ( (nameTblLen + 8) * sizeof(XMLInt32) );//new XMLInt32[nameTblLen + 8]; tok->setRangeValues(nameRange, nameTblLen + 8); memcpy(nameRange, wordRange, wordRangeLen * sizeof(XMLInt32)); setupRange(nameRange, gCombiningChars, wordRangeLen); setupRange(nameRange, gExtenderChars, wordRangeLen + combTblLen); nameRange[nameTblLen++] = chDash; nameRange[nameTblLen++] = chDash; nameRange[nameTblLen++] = chColon; nameRange[nameTblLen++] = chColon; nameRange[nameTblLen++] = chPeriod; nameRange[nameTblLen++] = chPeriod; nameRange[nameTblLen++] = chUnderscore; nameRange[nameTblLen++] = chUnderscore; tok->sortRanges(); tok->compactRanges(); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLNameChar, tok); tok = (RangeToken*) RangeToken::complementRanges(tok, tokFactory); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLNameChar, tok , true); // Create initialNameChar ranges tok = tokFactory->createRange(); unsigned int initialNameTblLen = baseTblLen + ideoTblLen; XMLInt32* initialNameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate ( (initialNameTblLen + 4) * sizeof(XMLInt32) );//new XMLInt32[initialNameTblLen + 4]; tok->setRangeValues(initialNameRange, initialNameTblLen + 4); memcpy(initialNameRange, wordRange, initialNameTblLen * sizeof(XMLInt32)); initialNameRange[initialNameTblLen++] = chColon; initialNameRange[initialNameTblLen++] = chColon; initialNameRange[initialNameTblLen++] = chUnderscore; initialNameRange[initialNameTblLen++] = chUnderscore; tok->sortRanges(); tok->compactRanges(); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok); tok = (RangeToken*) RangeToken::complementRanges(tok, tokFactory); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok , true); // Create word range tok = tokFactory->createRange(); tok->setRangeValues(wordRange, wordRangeLen); janWordRange.orphan(); tok->sortRanges(); tok->compactRanges(); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLWord, tok); tok = (RangeToken*) RangeToken::complementRanges(tok, tokFactory); // Build the internal map. tok->createMap(); rangeTokMap->setRangeToken(fgXMLWord, tok , true); fRangesCreated = true; }
void RangeToken::mergeRanges(const Token *const tok) { if (tok->getTokenType() != this->getTokenType()) ThrowXMLwithMemMgr(IllegalArgumentException, XMLExcepts::Regex_MergeRangesTypeMismatch, fMemoryManager); RangeToken* rangeTok = (RangeToken *) tok; if (rangeTok->fRanges == 0) return; fCaseIToken = 0; sortRanges(); rangeTok->sortRanges(); if (fRanges == 0) { fMaxCount = rangeTok->fMaxCount; fRanges = (XMLInt32*) fMemoryManager->allocate ( fMaxCount * sizeof(XMLInt32) );//new XMLInt32[fMaxCount]; for (unsigned int index = 0; index < rangeTok->fElemCount; index++) { fRanges[index] = rangeTok->fRanges[index]; } fElemCount = rangeTok->fElemCount; return; } unsigned int newMaxCount = (fElemCount + rangeTok->fElemCount >= fMaxCount) ? fMaxCount + rangeTok->fMaxCount : fMaxCount; XMLInt32* result = (XMLInt32*) fMemoryManager->allocate ( newMaxCount * sizeof(XMLInt32) );//new XMLInt32[newMaxCount]; for (unsigned int i=0, j=0, k=0; i < fElemCount || j < rangeTok->fElemCount;) { if (i >= fElemCount) { for (int count = 0; count < 2; count++) { result[k++] = rangeTok->fRanges[j++]; } } else if (j >= rangeTok->fElemCount) { for (int count = 0; count < 2; count++) { result[k++] = fRanges[i++]; } } else if (rangeTok->fRanges[j] < fRanges[i] || (rangeTok->fRanges[j] == fRanges[i] && rangeTok->fRanges[j+1] < fRanges[i+1])) { for (int count = 0; count < 2; count++) { result[k++] = rangeTok->fRanges[j++]; } } else { for (int count = 0; count < 2; count++) { result[k++] = fRanges[i++]; } } } fMemoryManager->deallocate(fRanges);//delete [] fRanges; fElemCount += rangeTok->fElemCount; fRanges = result; fMaxCount = newMaxCount; }
RangeToken* RegxParser::parseCharacterClass(const bool useNRange) { setParseContext(regexParserStateInBrackets); processNext(); RangeToken* tok = 0; bool isNRange = false; if (getState() == REGX_T_CHAR && getCharData() == chCaret) { isNRange = true; processNext(); } tok = fTokenFactory->createRange(); parserState type; bool firstLoop = true; bool wasDecoded; while ( (type = getState()) != REGX_T_EOF) { wasDecoded = false; // single range | from-to-range | subtraction if (type == REGX_T_CHAR && getCharData() == chCloseSquare && !firstLoop) break; XMLInt32 ch = getCharData(); bool end = false; if (type == REGX_T_BACKSOLIDUS) { switch(ch) { case chLatin_d: case chLatin_D: case chLatin_w: case chLatin_W: case chLatin_s: case chLatin_S: case chLatin_i: case chLatin_I: case chLatin_c: case chLatin_C: { tok->mergeRanges(getTokenForShorthand(ch)); end = true; } break; case chLatin_p: case chLatin_P: { RangeToken* tok2 = processBacksolidus_pP(ch); if (tok2 == 0) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom5, getMemoryManager()); } tok->mergeRanges(tok2); end = true; } break; case chDash: wasDecoded = true; // fall thru to default. default: ch = decodeEscaped(); } } // end if REGX_T_BACKSOLIDUS else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION && !firstLoop) { if (isNRange) { tok = RangeToken::complementRanges(tok, fTokenFactory, fMemoryManager); isNRange=false; } RangeToken* rangeTok = parseCharacterClass(false); tok->subtractRanges(rangeTok); if (getState() != REGX_T_CHAR || getCharData() != chCloseSquare) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC5, getMemoryManager()); } break; } // end if REGX_T_XMLSCHEMA... processNext(); if (!end) { if (type == REGX_T_CHAR && (ch == chOpenSquare || ch == chCloseSquare || (ch == chDash && getCharData() == chCloseSquare && firstLoop))) { // if regex = [-] then invalid... // '[', ']', '-' not allowed and should be escaped XMLCh chStr[] = { ch, chNull }; ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager()); } if (ch == chDash && getCharData() == chDash && getState() != REGX_T_BACKSOLIDUS && !wasDecoded) { XMLCh chStr[] = { ch, chNull }; ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager()); } if (getState() != REGX_T_CHAR || getCharData() != chDash) { tok->addRange(ch, ch); } else { processNext(); if ((type = getState()) == REGX_T_EOF) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager()); if (type == REGX_T_CHAR && getCharData() == chCloseSquare) { tok->addRange(ch, ch); tok->addRange(chDash, chDash); } else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) { static const XMLCh dashStr[] = { chDash, chNull}; ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, dashStr, dashStr, getMemoryManager()); } else { XMLInt32 rangeEnd = getCharData(); XMLCh rangeEndStr[] = { rangeEnd, chNull }; if (type == REGX_T_CHAR) { if (rangeEnd == chOpenSquare || rangeEnd == chCloseSquare || rangeEnd == chDash) // '[', ']', '-' not allowed and should be escaped ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr, getMemoryManager()); } else if (type == REGX_T_BACKSOLIDUS) { rangeEnd = decodeEscaped(); } processNext(); if (ch > rangeEnd) { XMLCh chStr[] = { ch, chNull }; ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_Ope3, rangeEndStr, chStr, getMemoryManager()); } tok->addRange(ch, rangeEnd); } } } firstLoop = false; } if (getState() == REGX_T_EOF) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager()); if (isNRange) { if(useNRange) tok->setTokenType(Token::T_NRANGE); else tok = RangeToken::complementRanges(tok, fTokenFactory, fMemoryManager); } tok->sortRanges(); tok->compactRanges(); // If the case-insensitive option is enabled, we need to // have the new RangeToken instance build its internal // case-insensitive RangeToken. if (RegularExpression::isSet(fOptions, RegularExpression::IGNORE_CASE)) { tok->getCaseInsensitiveToken(fTokenFactory); } setParseContext(regexParserStateNormal); processNext(); return tok; }