bool RegularExpression::matchRange(Context* const context, const Op* const op, int& offset, const short direction, const bool ignoreCase) { int tmpOffset = direction > 0 ? offset : offset - 1; if (tmpOffset >= context->fLimit || tmpOffset < 0) return false; XMLInt32 strCh = 0; if (!context->nextCh(strCh, tmpOffset, direction)) return false; RangeToken* tok = (RangeToken *) op->getToken(); bool match = false; if (ignoreCase) { tok = tok->getCaseInsensitiveToken(fTokenFactory); } match = tok->match(strCh); if (!match) return false; offset = (direction > 0) ? ++tmpOffset : tmpOffset; return true; }
/* * Prepares for matching. This method is called during construction. */ void RegularExpression::prepare() { compile(fTokenTree); fMinLength = fTokenTree->getMinLength(); fFirstChar = 0; if (!isSet(fOptions, PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) && !isSet(fOptions, XMLSCHEMA_MODE)) { RangeToken* rangeTok = fTokenFactory->createRange(); int result = fTokenTree->analyzeFirstCharacter(rangeTok, fOptions, fTokenFactory); if (result == Token::FC_TERMINAL) { rangeTok->compactRanges(); fFirstChar = rangeTok; } rangeTok->createMap(); if (isSet(fOptions, IGNORE_CASE)) { rangeTok->getCaseInsensitiveToken(fTokenFactory); } } if (fOperations != 0 && fOperations->getNextOp() == 0 && (fOperations->getOpType() == Op::O_STRING || fOperations->getOpType() == Op::O_CHAR) && !isSet(fOptions, IGNORE_CASE) ) { fFixedStringOnly = true; if (fOperations->getOpType() == Op::O_STRING) { fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = XMLString::replicate(fOperations->getLiteral(), fMemoryManager); } else{ XMLInt32 ch = fOperations->getData(); if ( ch >= 0x10000) { // add as constant fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = RegxUtil::decomposeToSurrogates(ch, fMemoryManager); } else { XMLCh* dummyStr = (XMLCh*) fMemoryManager->allocate(2 * sizeof(XMLCh));//new XMLCh[2]; dummyStr[0] = (XMLCh) fOperations->getData(); dummyStr[1] = chNull; fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = dummyStr; } } fBMPattern = new (fMemoryManager) BMPattern(fFixedString, 256, isSet(fOptions, IGNORE_CASE), fMemoryManager); } else if (!isSet(fOptions, XMLSCHEMA_MODE) && !isSet(fOptions, PROHIBIT_FIXED_STRING_OPTIMIZATION) && !isSet(fOptions, IGNORE_CASE)) { int fixedOpts = 0; Token* tok = fTokenTree->findFixedString(fOptions, fixedOpts); fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = (tok == 0) ? 0 : XMLString::replicate(tok->getString(), fMemoryManager); if (fFixedString != 0 && XMLString::stringLen(fFixedString) < 2) { fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = 0; } if (fFixedString != 0) { fBMPattern = new (fMemoryManager) BMPattern(fFixedString, 256, isSet(fixedOpts, IGNORE_CASE), fMemoryManager); } } }
RangeToken* RegxParser::parseCharacterClass(const bool useNRange) { setParseContext(regexParserStateInBrackets); processNext(); RangeToken* tok = 0; bool isNRange = false; if (getState() == REGX_T_CHAR && getCharData() == chCaret) { isNRange = true; processNext(); } tok = fTokenFactory->createRange(); parserState type; bool firstLoop = true; bool wasDecoded; while ( (type = getState()) != REGX_T_EOF) { wasDecoded = false; // single range | from-to-range | subtraction if (type == REGX_T_CHAR && getCharData() == chCloseSquare && !firstLoop) break; XMLInt32 ch = getCharData(); bool end = false; if (type == REGX_T_BACKSOLIDUS) { switch(ch) { case chLatin_d: case chLatin_D: case chLatin_w: case chLatin_W: case chLatin_s: case chLatin_S: case chLatin_i: case chLatin_I: case chLatin_c: case chLatin_C: { tok->mergeRanges(getTokenForShorthand(ch)); end = true; } break; case chLatin_p: case chLatin_P: { RangeToken* tok2 = processBacksolidus_pP(ch); if (tok2 == 0) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom5, getMemoryManager()); } tok->mergeRanges(tok2); end = true; } break; case chDash: wasDecoded = true; // fall thru to default. default: ch = decodeEscaped(); } } // end if REGX_T_BACKSOLIDUS else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION && !firstLoop) { if (isNRange) { tok = RangeToken::complementRanges(tok, fTokenFactory, fMemoryManager); isNRange=false; } RangeToken* rangeTok = parseCharacterClass(false); tok->subtractRanges(rangeTok); if (getState() != REGX_T_CHAR || getCharData() != chCloseSquare) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC5, getMemoryManager()); } break; } // end if REGX_T_XMLSCHEMA... processNext(); if (!end) { if (type == REGX_T_CHAR && (ch == chOpenSquare || ch == chCloseSquare || (ch == chDash && getCharData() == chCloseSquare && firstLoop))) { // if regex = [-] then invalid... // '[', ']', '-' not allowed and should be escaped XMLCh chStr[] = { ch, chNull }; ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager()); } if (ch == chDash && getCharData() == chDash && getState() != REGX_T_BACKSOLIDUS && !wasDecoded) { XMLCh chStr[] = { ch, chNull }; ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager()); } if (getState() != REGX_T_CHAR || getCharData() != chDash) { tok->addRange(ch, ch); } else { processNext(); if ((type = getState()) == REGX_T_EOF) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager()); if (type == REGX_T_CHAR && getCharData() == chCloseSquare) { tok->addRange(ch, ch); tok->addRange(chDash, chDash); } else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) { static const XMLCh dashStr[] = { chDash, chNull}; ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, dashStr, dashStr, getMemoryManager()); } else { XMLInt32 rangeEnd = getCharData(); XMLCh rangeEndStr[] = { rangeEnd, chNull }; if (type == REGX_T_CHAR) { if (rangeEnd == chOpenSquare || rangeEnd == chCloseSquare || rangeEnd == chDash) // '[', ']', '-' not allowed and should be escaped ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr, getMemoryManager()); } else if (type == REGX_T_BACKSOLIDUS) { rangeEnd = decodeEscaped(); } processNext(); if (ch > rangeEnd) { XMLCh chStr[] = { ch, chNull }; ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_Ope3, rangeEndStr, chStr, getMemoryManager()); } tok->addRange(ch, rangeEnd); } } } firstLoop = false; } if (getState() == REGX_T_EOF) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager()); if (isNRange) { if(useNRange) tok->setTokenType(Token::T_NRANGE); else tok = RangeToken::complementRanges(tok, fTokenFactory, fMemoryManager); } tok->sortRanges(); tok->compactRanges(); // If the case-insensitive option is enabled, we need to // have the new RangeToken instance build its internal // case-insensitive RangeToken. if (RegularExpression::isSet(fOptions, RegularExpression::IGNORE_CASE)) { tok->getCaseInsensitiveToken(fTokenFactory); } setParseContext(regexParserStateNormal); processNext(); return tok; }