Beispiel #1
0
RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const expression, 
                                                     const int start, const int end,
                                                     RefVectorOf<Match> *subEx){
  
  if (fOperations == 0)
	  prepare();

  RefArrayVectorOf<XMLCh>* tokenStack = new (fMemoryManager) RefArrayVectorOf<XMLCh>(16, true, fMemoryManager);

  Context context(fMemoryManager);

  int		 strLength = XMLString::stringLen(expression);
 
  context.reset(expression, strLength, start, end, fNoClosures);
 

  Match* lMatch = 0;
  bool adoptMatch = false;

  if (subEx || fHasBackReferences) {
    lMatch = new (fMemoryManager) Match(fMemoryManager);
    adoptMatch = true;
    lMatch->setNoGroups(fNoGroups);
  }

  if (context.fAdoptMatch)
 	  delete context.fMatch;
  
  context.fMatch = lMatch;
  context.fAdoptMatch = adoptMatch;

  int tokStart = start;
  int matchStart = start;

  for (; matchStart <= end; matchStart++) { 
  
 	  int matchEnd = match(&context, fOperations, matchStart, 1);
  
 	  if (matchEnd != -1) {

 	    if (context.fMatch != 0) {
 	      context.fMatch->setStartPos(0, context.fStart);
 	      context.fMatch->setEndPos(0, matchEnd);
 	    }

      if (subEx){
        subEx->addElement(lMatch);
        lMatch = new (fMemoryManager) Match(*(context.fMatch));
        adoptMatch = true;
        
        context.fAdoptMatch = adoptMatch;
        context.fMatch = lMatch;
      }

      XMLCh* token;
      if (tokStart == matchStart){
  
        if (tokStart == strLength){
          tokStart--;
          break;  
        }

        token = (XMLCh*) fMemoryManager->allocate(sizeof(XMLCh));//new XMLCh[1];
        token[0] = chNull;

        // When you tokenize using zero string, will return each
        // token in the string. Since the zero string will also 
        // match the start/end characters, resulting in empty 
        // tokens, we ignore them and do not add them to the stack. 
        if (!XMLString::equals(fPattern, &chNull)) 
          tokenStack->addElement(token); 
        else
            fMemoryManager->deallocate(token);//delete[] token;

      } else {
        token = (XMLCh*) fMemoryManager->allocate
        (
            (matchStart + 1 - tokStart) * sizeof(XMLCh)
        );//new XMLCh[matchStart + 1 - tokStart];
        XMLString::subString(token, expression, tokStart, matchStart, fMemoryManager);
        tokenStack->addElement(token);
      } 

      tokStart = matchEnd;

      //decrement matchStart as will increment it at the top of the loop
      if (matchStart < matchEnd - 1) 
        matchStart = matchEnd - 1; 	    
    }
  }
 
  XMLCh* token;
 
  if (matchStart == tokStart + 1){
    token = (XMLCh*) fMemoryManager->allocate(sizeof(XMLCh));//new XMLCh[1];
    token[0] = chNull;
  
  } else {
    token = (XMLCh*) fMemoryManager->allocate
    (
        (strLength + 1 - tokStart) * sizeof(XMLCh)
    );//new XMLCh[strLength + 1 - tokStart];
    XMLString::subString(token, expression, tokStart, strLength, fMemoryManager);
  }  

  if (!XMLString::equals(fPattern, &chNull)) 
    tokenStack->addElement(token);
  else
    fMemoryManager->deallocate(token);//delete[] token;

  return tokenStack;

}
Beispiel #2
0
bool RegularExpression::matches(const XMLCh* const expression, const int start,
                                const int end, Match* const pMatch
                                , MemoryManager* const manager)	{
		
	if (fOperations == 0)
		prepare();

	Context context(manager);
	int		 strLength = XMLString::stringLen(expression);

    context.reset(expression, strLength, start, end, fNoClosures);

	bool adoptMatch = false;
	Match* lMatch = pMatch;

	if (lMatch != 0) {
		lMatch->setNoGroups(fNoGroups);
	}
	else if (fHasBackReferences) {

		lMatch = new (fMemoryManager) Match(fMemoryManager);
		lMatch->setNoGroups(fNoGroups);
		adoptMatch = true;
	}

	if (context.fAdoptMatch)
		delete context.fMatch;
    context.fMatch = lMatch;
	context.fAdoptMatch = adoptMatch;

	if (isSet(fOptions, XMLSCHEMA_MODE)) {

		int matchEnd = match(&context, fOperations, context.fStart, 1);

		if (matchEnd == context.fLimit) {

			if (context.fMatch != 0) {

				context.fMatch->setStartPos(0, context.fStart);
				context.fMatch->setEndPos(0, matchEnd);
			}		
			return true;
		}

		return false;
	}

	/*
	 *	If the pattern has only fixed string, use Boyer-Moore
	 */
	if (fFixedStringOnly) {

		int ret = fBMPattern->matches(expression, context.fStart,
			                          context.fLimit);
		if (ret >= 0) {

			if (context.fMatch != 0) {
				context.fMatch->setStartPos(0, ret);
				context.fMatch->setEndPos(0, ret + strLength);
			}		
			return true;
		}		
		return false;
	}

	/*
	 *	If the pattern contains a fixed string, we check with Boyer-Moore
	 *	whether the text contains the fixed string or not. If not found
	 *	return false
	 */
	if (fFixedString != 0) {

		int ret = fBMPattern->matches(expression, context.fStart,
                                      context.fLimit);

		if (ret < 0) { // No match
			return false;
		}
	}

	int limit = context.fLimit - fMinLength;
	int matchStart;
	int matchEnd = -1;

	/*
	 *	Check whether the expression start with ".*"
	 */
	if (fOperations != 0 && fOperations->getOpType() == Op::O_CLOSURE
        && fOperations->getChild()->getOpType() == Op::O_DOT) {

		if (isSet(fOptions, SINGLE_LINE)) {
			matchStart = context.fStart;
			matchEnd = match(&context, fOperations, matchStart, 1);
		}
		else {
			bool previousIsEOL = true;

			for (matchStart=context.fStart; matchStart<=limit; matchStart++) {

				XMLCh ch = expression[matchStart];
				if (RegxUtil::isEOLChar(ch)) {
					previousIsEOL = true;
				}
				else {

					if (previousIsEOL) {
						if (0 <= (matchEnd = match(&context, fOperations,
                                                   matchStart, 1)))
                            break;
					}

					previousIsEOL = false;
				}
			}
		}
	}
	else {
        /*
         *	Optimization against the first char
         */
		if (fFirstChar != 0) {
			bool ignoreCase = isSet(fOptions, IGNORE_CASE);
			RangeToken* range = fFirstChar;

			if (ignoreCase)
				range = fFirstChar->getCaseInsensitiveToken(fTokenFactory);

			for (matchStart=context.fStart; matchStart<=limit; matchStart++) {

                XMLInt32 ch;

				if (!context.nextCh(ch, matchStart, 1))
					break;

				if (!range->match(ch)) {

					if (!ignoreCase)
						continue;

					// Perform case insensitive match
					// REVISIT
					continue;
				}

				if (0 <= (matchEnd = match(&context,fOperations,matchStart,1)))
					break;
            }
		}
		else {

            /*
             *	Straightforward matching
             */
			for (matchStart=context.fStart; matchStart<=limit; matchStart++) {

				if (0 <= (matchEnd = match(&context,fOperations,matchStart,1)))
					break;
			}
		}
	}

	if (matchEnd >= 0) {

		if (context.fMatch != 0) {

			context.fMatch->setStartPos(0, matchStart);
			context.fMatch->setEndPos(0, matchEnd);
		}		
		return true;
	}
	return false;
}