RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const expression, const int start, const int end, RefVectorOf<Match> *subEx){ if (fOperations == 0) prepare(); RefArrayVectorOf<XMLCh>* tokenStack = new (fMemoryManager) RefArrayVectorOf<XMLCh>(16, true, fMemoryManager); Context context(fMemoryManager); int strLength = XMLString::stringLen(expression); context.reset(expression, strLength, start, end, fNoClosures); Match* lMatch = 0; bool adoptMatch = false; if (subEx || fHasBackReferences) { lMatch = new (fMemoryManager) Match(fMemoryManager); adoptMatch = true; lMatch->setNoGroups(fNoGroups); } if (context.fAdoptMatch) delete context.fMatch; context.fMatch = lMatch; context.fAdoptMatch = adoptMatch; int tokStart = start; int matchStart = start; for (; matchStart <= end; matchStart++) { int matchEnd = match(&context, fOperations, matchStart, 1); if (matchEnd != -1) { if (context.fMatch != 0) { context.fMatch->setStartPos(0, context.fStart); context.fMatch->setEndPos(0, matchEnd); } if (subEx){ subEx->addElement(lMatch); lMatch = new (fMemoryManager) Match(*(context.fMatch)); adoptMatch = true; context.fAdoptMatch = adoptMatch; context.fMatch = lMatch; } XMLCh* token; if (tokStart == matchStart){ if (tokStart == strLength){ tokStart--; break; } token = (XMLCh*) fMemoryManager->allocate(sizeof(XMLCh));//new XMLCh[1]; token[0] = chNull; // When you tokenize using zero string, will return each // token in the string. Since the zero string will also // match the start/end characters, resulting in empty // tokens, we ignore them and do not add them to the stack. if (!XMLString::equals(fPattern, &chNull)) tokenStack->addElement(token); else fMemoryManager->deallocate(token);//delete[] token; } else { token = (XMLCh*) fMemoryManager->allocate ( (matchStart + 1 - tokStart) * sizeof(XMLCh) );//new XMLCh[matchStart + 1 - tokStart]; XMLString::subString(token, expression, tokStart, matchStart, fMemoryManager); tokenStack->addElement(token); } tokStart = matchEnd; //decrement matchStart as will increment it at the top of the loop if (matchStart < matchEnd - 1) matchStart = matchEnd - 1; } } XMLCh* token; if (matchStart == tokStart + 1){ token = (XMLCh*) fMemoryManager->allocate(sizeof(XMLCh));//new XMLCh[1]; token[0] = chNull; } else { token = (XMLCh*) fMemoryManager->allocate ( (strLength + 1 - tokStart) * sizeof(XMLCh) );//new XMLCh[strLength + 1 - tokStart]; XMLString::subString(token, expression, tokStart, strLength, fMemoryManager); } if (!XMLString::equals(fPattern, &chNull)) tokenStack->addElement(token); else fMemoryManager->deallocate(token);//delete[] token; return tokenStack; }
bool RegularExpression::matches(const XMLCh* const expression, const int start, const int end, Match* const pMatch , MemoryManager* const manager) { if (fOperations == 0) prepare(); Context context(manager); int strLength = XMLString::stringLen(expression); context.reset(expression, strLength, start, end, fNoClosures); bool adoptMatch = false; Match* lMatch = pMatch; if (lMatch != 0) { lMatch->setNoGroups(fNoGroups); } else if (fHasBackReferences) { lMatch = new (fMemoryManager) Match(fMemoryManager); lMatch->setNoGroups(fNoGroups); adoptMatch = true; } if (context.fAdoptMatch) delete context.fMatch; context.fMatch = lMatch; context.fAdoptMatch = adoptMatch; if (isSet(fOptions, XMLSCHEMA_MODE)) { int matchEnd = match(&context, fOperations, context.fStart, 1); if (matchEnd == context.fLimit) { if (context.fMatch != 0) { context.fMatch->setStartPos(0, context.fStart); context.fMatch->setEndPos(0, matchEnd); } return true; } return false; } /* * If the pattern has only fixed string, use Boyer-Moore */ if (fFixedStringOnly) { int ret = fBMPattern->matches(expression, context.fStart, context.fLimit); if (ret >= 0) { if (context.fMatch != 0) { context.fMatch->setStartPos(0, ret); context.fMatch->setEndPos(0, ret + strLength); } return true; } return false; } /* * If the pattern contains a fixed string, we check with Boyer-Moore * whether the text contains the fixed string or not. If not found * return false */ if (fFixedString != 0) { int ret = fBMPattern->matches(expression, context.fStart, context.fLimit); if (ret < 0) { // No match return false; } } int limit = context.fLimit - fMinLength; int matchStart; int matchEnd = -1; /* * Check whether the expression start with ".*" */ if (fOperations != 0 && fOperations->getOpType() == Op::O_CLOSURE && fOperations->getChild()->getOpType() == Op::O_DOT) { if (isSet(fOptions, SINGLE_LINE)) { matchStart = context.fStart; matchEnd = match(&context, fOperations, matchStart, 1); } else { bool previousIsEOL = true; for (matchStart=context.fStart; matchStart<=limit; matchStart++) { XMLCh ch = expression[matchStart]; if (RegxUtil::isEOLChar(ch)) { previousIsEOL = true; } else { if (previousIsEOL) { if (0 <= (matchEnd = match(&context, fOperations, matchStart, 1))) break; } previousIsEOL = false; } } } } else { /* * Optimization against the first char */ if (fFirstChar != 0) { bool ignoreCase = isSet(fOptions, IGNORE_CASE); RangeToken* range = fFirstChar; if (ignoreCase) range = fFirstChar->getCaseInsensitiveToken(fTokenFactory); for (matchStart=context.fStart; matchStart<=limit; matchStart++) { XMLInt32 ch; if (!context.nextCh(ch, matchStart, 1)) break; if (!range->match(ch)) { if (!ignoreCase) continue; // Perform case insensitive match // REVISIT continue; } if (0 <= (matchEnd = match(&context,fOperations,matchStart,1))) break; } } else { /* * Straightforward matching */ for (matchStart=context.fStart; matchStart<=limit; matchStart++) { if (0 <= (matchEnd = match(&context,fOperations,matchStart,1))) break; } } } if (matchEnd >= 0) { if (context.fMatch != 0) { context.fMatch->setStartPos(0, matchStart); context.fMatch->setEndPos(0, matchEnd); } return true; } return false; }