Exemplo n.º 1
0
/*
 - regbranch - one alternative of an | operator
 *
 * Implements the concatenation operator.
 */
static int regbranch(regex_t *preg, int *flagp )
{
	int ret;
	int chain;
	int latest;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	ret = regnode(preg, BRANCH);
	chain = 0;
	while (*preg->regparse != '\0' && *preg->regparse != ')' &&
	       *preg->regparse != '|') {
		latest = regpiece(preg, &flags);
		if (latest == 0)
			return 0;
		*flagp |= flags&HASWIDTH;
		if (chain == 0) {/* First piece. */
			*flagp |= flags&SPSTART;
		}
		else {
			regtail(preg, chain, latest);
		}
		chain = latest;
	}
	if (chain == 0)	/* Loop ran zero times. */
		(void) regnode(preg, NOTHING);

	return(ret);
}
Exemplo n.º 2
0
/*
 - regbranch - one alternative of an | operator
 *
 * Implements the concatenation operator.
 */
static char *
regbranch( int *flagp )
{
	register char *ret;
	register char *chain;
	register char *latest;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	ret = regnode(BRANCH);
	chain = NULL;
	while (*regparse != '\0' && *regparse != ')' &&
	       *regparse != '\n' && *regparse != '|') {
		latest = regpiece(&flags);
		if (latest == NULL)
			return(NULL);
		*flagp |= flags&HASWIDTH;
		if (chain == NULL)	/* First piece. */
			*flagp |= flags&SPSTART;
		else
			regtail(chain, latest);
		chain = latest;
	}
	if (chain == NULL)	/* Loop ran zero times. */
		(void) regnode(NOTHING);

	return(ret);
}
Exemplo n.º 3
0
TCHAR *CRegExp::regbranch(int *flagp)
{
	TCHAR *ret;
	TCHAR *chain;
	TCHAR *latest;
	int flags;
	int c;

	*flagp = WORST;				// Tentatively.

	ret = regnode(BRANCH);
	chain = NULL;
	while ((c = *regparse) != _T('\0') && c != _T('|') && c != _T(')')) {
		latest = regpiece(&flags);
		if (latest == NULL)
			return(NULL);
		*flagp |= flags&HASWIDTH;
		if (chain == NULL)		// First piece.
			*flagp |= flags&SPSTART;
		else
			regtail(chain, latest);
		chain = latest;
	}
	if (chain == NULL)			// Loop ran zero times.
		(void) regnode(NOTHING);

	return(ret);
}
Exemplo n.º 4
0
/*
 - regbranch - one alternative of an | operator
 *
 * Implements the concatenation operator.
 */
static char	*
regbranch(int *flagp)
{
	register char	*ret;
	register char	*chain;
	register char	*latest;
	int	flags;
	int	len = 0;

	*flagp = WORST;		/* Tentatively. */

	ret = regnode(BRANCH);
	chain = NULL;
        /* Solaris 2.6 motif diff bug 1236359 1 line */
	while ((len = CHARLEN(regparse)) > 0  &&  
		(len != 1 || (*regparse != ')' &&  
			*regparse != '\n' && *regparse != '|'))) {
		latest = regpiece(&flags);
		if (latest == NULL)
			return(NULL);
		*flagp |= flags & HASWIDTH;
		if (chain == NULL)	/* First piece. */
			*flagp |= flags & SPSTART;
			else
			regtail(chain, latest);
		chain = latest;
	}
	if (chain == NULL)	/* Loop ran zero times. */
		(void) regnode(NOTHING);

	return(ret);
}
Exemplo n.º 5
0
struct Node *regspcl (struct Expr *expr, uchar *type, struct Node *parent)
{
struct Node *node;
uchar *pat;

	switch( *type ) {
	case 'd':	pat = "[0-9]";	break;
	case 'D':	pat = "[^0-9]";	break;
	case 's':	pat = "[ 	]";	break;
	case 'S':	pat = "[^ 	]";	break;
	case 'i':	pat = "[a-zA-Z_:]";		break;
	case 'I':	pat = "[^a-zA-Z_:]";	break;
	default:

	  // pattern is escaped regular character

	  if( node = regnode (expr) ) {
		node->maximum = 1;
		node->minimum = 1;
		node->typelen = 1;
		node->type->pattern = type;
		node->parent = parent;
	  }
	  return node;
	}

	if( node = regnode (expr) ) {
		node->maximum = 1;
		node->minimum = 1;
		node->typelen = strlen(pat);
		node->type->pattern = pat;
		node->parent = parent;
	}
	return node;
}
Exemplo n.º 6
0
/*
 - regbranch - one alternative of an | operator
 *
 * Implements the concatenation operator.
 */
char* ossimRegExp::regbranch (int *flagp) {
    char* ret;
    char* chain;
    char* latest;
    int                  flags;

    *flagp = WORST;		// Tentatively.

    ret = regnode(BRANCH);
    chain = NULL;
    while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
        latest = regpiece(&flags);
        if (latest == NULL)
            return (NULL);
        *flagp |= flags & HASWIDTH;
        if (chain == NULL)	// First piece.
            *flagp |= flags & SPSTART;
        else
            regtail(chain, latest);
        chain = latest;
    }
    if (chain == NULL)		// Loop ran zero times.
        regnode(NOTHING);

    return (ret);
}
Exemplo n.º 7
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
char* ossimRegExp::regpiece (int *flagp) {
    char* ret;
    char  op;
    char* next;
    int            flags;

    ret = regatom(&flags);
    if (ret == NULL)
        return (NULL);

    op = *regparse;
    if (!ISMULT(op)) {
        *flagp = flags;
        return (ret);
    }

    if (!(flags & HASWIDTH) && op != '?') {
        //RAISE Error, SYM(ossimRegExp), SYM(Empty_Operand),
        printf ("ossimRegExp::compile() : *+ operand could be empty.\n");
        return 0;
    }
    *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH);

    if (op == '*' && (flags & SIMPLE))
        reginsert(STAR, ret);
    else if (op == '*') {
        // Emit x* as (x&|), where & means "self".
        reginsert(BRANCH, ret);	// Either x
        regoptail(ret, regnode(BACK));	// and loop
        regoptail(ret, ret);	// back
        regtail(ret, regnode(BRANCH));	// or
        regtail(ret, regnode(NOTHING));	// null.
    }
    else if (op == '+' && (flags & SIMPLE))
        reginsert(PLUS, ret);
    else if (op == '+') {
        // Emit x+ as x(&|), where & means "self".
        next = regnode(BRANCH);	// Either
        regtail(ret, next);
        regtail(regnode(BACK), ret);	// loop back
        regtail(next, regnode(BRANCH));	// or
        regtail(ret, regnode(NOTHING));	// null.
    }
    else if (op == '?') {
        // Emit x? as (x|)
        reginsert(BRANCH, ret);	// Either x
        regtail(ret, regnode(BRANCH));	// or
        next = regnode(NOTHING);// null.
        regtail(ret, next);
        regoptail(ret, next);
    }
    regparse++;
    if (ISMULT(*regparse)) {
        //RAISE Error, SYM(ossimRegExp), SYM(Nested_Operand),
        printf ("ossimRegExp::compile(): Nested *?+.\n");
        return 0;
    }
    return (ret);
}
Exemplo n.º 8
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char	*
regpiece(int *flagp)
{
	register char	*ret;
	register char	*op;
	register char	*next;
	int	flags;
	int	len = 0;

	ret = regatom(&flags);
	if (ret == NULL)
		return(NULL);

	op = regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	len = CHARLEN(op);
	if (!(flags & HASWIDTH) && ((len != 1) || (*op != '?')) )
		FAIL("*+ operand could be empty");
	*flagp = ((len != 1) || (*op != '+')) ?  (WORST | SPSTART) : (WORST | HASWIDTH);

	if ((len == 1) && (*op == '*') && (flags & SIMPLE))
		reginsert(STAR, ret);
	else if ((len == 1) && 
			(*op == '*')) {
		/* Emit x* as (x&|), where & means "self". */
		reginsert(BRANCH, ret);			/* Either x */
		regoptail(ret, regnode(BACK));		/* and loop */
		regoptail(ret, ret);			/* back */
		regtail(ret, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if ((len == 1) && (*op == '+') && (flags & SIMPLE))
		reginsert(PLUS, ret);
	else if ((len == 1) && (*op == '+')) {
		/* Emit x+ as x(&|), where & means "self". */
		next = regnode(BRANCH);			/* Either */
		regtail(ret, next);
		regtail(regnode(BACK), ret);		/* loop back */
		regtail(next, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if ((len == 1) && (*op == '?')) {
		/* Emit x? as (x|) */
		reginsert(BRANCH, ret);			/* Either x */
		regtail(ret, regnode(BRANCH));		/* or */
		next = regnode(NOTHING);		/* null. */
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse += INCRLEN(len);
	if (ISMULT(regparse))
		FAIL("nested *?+");

	return(ret);
}
Exemplo n.º 9
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *regpiece( int *flagp )
{
    char        *ret, op, *next;
    int         flags;

    ret = regatom( &flags );
    if( ret == NULL ) {
        return( NULL );
    }

    op = *regparse;
    if( !ISMULT( op ) ) {
        *flagp = flags;
        return( ret );
    }

    if( !( flags & HASWIDTH ) && op != '?' ) {
        FAIL( ERR_RE_EMPTY_OPERAND );
    }
    *flagp = ( op != '+' ) ? ( WORST | SPSTART ) : ( WORST | HASWIDTH );

    if( op == '*' && ( flags & SIMPLE ) ) {
        reginsert( STAR, ret );
    } else if( op == '*' ) {
        /* Emit x* as (x&|), where & means "self". */
        reginsert( BRANCH, ret );                       /* Either x */
        regoptail( ret, regnode( BACK ) );              /* and loop */
        regoptail( ret, ret );                          /* back */
        regtail( ret, regnode( BRANCH ) );              /* or */
        regtail( ret, regnode( NOTHING ) );             /* null. */
    } else if( op == '+' && ( flags & SIMPLE ) ) {
        reginsert( PLUS, ret );
    } else if( op == '+' ) {
        /* Emit x+ as x(&|), where & means "self". */
        next = regnode( BRANCH );                       /* Either */
        regtail( ret, next );
        regtail( regnode( BACK ), ret );                /* loop back */
        regtail( next, regnode( BRANCH ) );             /* or */
        regtail( ret, regnode( NOTHING ) );             /* null. */
    } else if( op == '?' ) {
        /* Emit x? as (x|) */
        reginsert( BRANCH, ret );                       /* Either x */
        regtail( ret, regnode( BRANCH ) );              /* or */
        next = regnode( NOTHING );                      /* null. */
        regtail( ret, next );
        regoptail( ret, next );
    }
    regparse++;
    if( ISMULT( *regparse ) ) {
        FAIL( ERR_RE_NESTED_OPERAND );
    }

    return( ret );
}
Exemplo n.º 10
0
/*
 * regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *
regpiece(i4 *flagp)
{
    register char *ret;
    register char op;
    register char *next;
    i4 flags;

    ret = regatom(&flags);
    if (ret == NULL)
        return(NULL);

    op = *regparse;
    if (!ISMULT(op)) {
        *flagp = flags;
        return(ret);
    }

    if (!(flags&HASWIDTH) && op != '?')
        _FAIL("*+ operand could be empty");
    *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);

    if (op == '*' && (flags&SIMPLE))
        reginsert(STAR, ret);
    else if (op == '*') {
        /* Emit x* as (x&|), where & means "self". */
        reginsert(BRANCH, ret);			/* Either x */
        regoptail(ret, regnode(BACK));		/* and loop */
        regoptail(ret, ret);			/* back */
        regtail(ret, regnode(BRANCH));		/* or */
        regtail(ret, regnode(NOTHING));		/* null. */
    } else if (op == '+' && (flags&SIMPLE))
        reginsert(PLUS, ret);
    else if (op == '+') {
        /* Emit x+ as x(&|), where & means "self". */
        next = regnode(BRANCH);			/* Either */
        regtail(ret, next);
        regtail(regnode(BACK), ret);		/* loop back */
        regtail(next, regnode(BRANCH));		/* or */
        regtail(ret, regnode(NOTHING));		/* null. */
    } else if (op == '?') {
        /* Emit x? as (x|) */
        reginsert(BRANCH, ret);			/* Either x */
        regtail(ret, regnode(BRANCH));		/* or */
        next = regnode(NOTHING);		/* null. */
        regtail(ret, next);
        regoptail(ret, next);
    }
    CMnext( regparse );
    if (ISMULT(*regparse))
        _FAIL("nested *?+");

    return(ret);
}
Exemplo n.º 11
0
/*
   - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequence used for ? and the general cases of
 * * and + are somewhat optimized: they use the same NOTHING node as both the
 * endmarker for their branch list and the body of the last branch.  It might
 * seem that this node could be dispensed with entirely, but the endmarker
 * role is not redundant.
 */
static char *regpiece (int * flagp)
{
    register char *ret;
    register short op;
    register char *nxt;
    int flags;

    ret = regatom(&flags);
    if (ret == (char *) NULL)
        return ((char *) NULL);

    op = *regparse;
    if (!ISMULT(op)) {
        *flagp = flags;
        return (ret);
    }
    if (!(flags & HASWIDTH) && op != QMARK)
        FAIL("*+ operand could be empty\n");
    *flagp = (op != PLUSS) ? (WORST | SPSTART) : (WORST | HASWIDTH);

    if (op == ASTERIX && (flags & SIMPLE))
        reginsert(STAR, ret);
    else if (op == ASTERIX) {
        /* Emit x* as (x&|), where & means "self". */
        reginsert(BRANCH, ret); /* Either x */
        regoptail(ret, regnode(BACK));  /* and loop */
        regoptail(ret, ret);    /* back */
        regtail(ret, regnode(BRANCH));  /* or */
        regtail(ret, regnode(NOTHING)); /* null. */
    } else if (op == PLUSS && (flags & SIMPLE))
        reginsert(PLUS, ret);
    else if (op == PLUSS) {
        /* Emit x+ as x(&|), where & means "self". */
        nxt = regnode(BRANCH);  /* Either */
        regtail(ret, nxt);
        regtail(regnode(BACK), ret);    /* loop back */
        regtail(nxt, regnode(BRANCH));  /* or */
        regtail(ret, regnode(NOTHING)); /* null. */
    } else if (op == QMARK) {
        /* Emit x? as (x|) */
        reginsert(BRANCH, ret); /* Either x */
        regtail(ret, regnode(BRANCH));  /* or */
        nxt = regnode(NOTHING); /* null. */
        regtail(ret, nxt);
        regoptail(ret, nxt);
    }
    regparse++;
    if (ISMULT(*regparse))
        FAIL("nested *?+\n");

    return (ret);
}
Exemplo n.º 12
0
struct Node *regpat (struct Expr *expr, uchar *pat, int len, struct Node *parent)
{
struct Node *node;

	if( node = regnode (expr) ) {
		node->maximum = 1;
		node->minimum = 1;
		node->typelen = len;
		node->parent = parent;
		node->type->pattern = pat;
	}
	return node;
}
Exemplo n.º 13
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom(int *flagp)
{
	register char *ret;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	switch (*regparse++) {
	case '^':
		ret = regnode(BOL);
		break;
	case '$':
		ret = regnode(EOL);
		break;
	case '.':
		ret = regnode(ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			register int chclass;
			register int chclassend;

			if (*regparse == '^') {	/* Complement of range. */
				ret = regnode(ANYBUT);
				regparse++;
			} else {
				ret = regnode(ANYOF);
			}
			if (*regparse == ']' || *regparse == '-') {
				regc(*regparse++);
			}
			while (*regparse != '\0' && *regparse != ']') {
				if (*regparse == '-') {
					regparse++;
					if (*regparse == ']' || *regparse == '\0') {
						regc('-');
					} else {
						chclass = UCHARAT(regparse-2)+1;
						chclassend = UCHARAT(regparse);
						if (chclass > chclassend+1) {
							FAIL("invalid [] range");
						}
						for (; chclass <= chclassend; chclass++) {
							regc(chclass);
						}
						regparse++;
					}
				} else if (*regparse == '\\') {
					switch(*++regparse) {
					case 'n' :
						regc('\n');
						regparse++;
						break;
					case 't' :
						regc('\t');
						regparse++;
						break;
					case ']' :
						regc(']');
						regparse++;
						break;
					case '-' :
						regc('-');
						regparse++;
						break;
					case '\\' :
						regc('\\');
						regparse++;
						break;
					default :
						regparse--;
						regc(*regparse++);
					}
				} else {
					regc(*regparse++);
				}
			}
			regc('\0');
			if (*regparse != ']') {
				FAIL("unmatched []");
			}
			regparse++;
			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(1, &flags);
		if (ret == NULL) {
			return(NULL);
		}
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case ')':
		FAIL("internal urp");	/* Supposed to be caught earlier. */
		break;
	case '?':
	case '+':
	case '*':
	case '{':
		FAIL("?+*{ follows nothing");
		break;
	case '\\':
		if (*regparse == '\0') {
			FAIL("trailing \\");
		}
		switch(*regparse) {
		case '<':
			ret = regnode(BEGWORD);
			break;
		case '>':
			ret = regnode(ENDWORD);
			break;
		case 'd':
			ret = regnode(DIGIT);
			*flagp |= (HASWIDTH|SIMPLE);
			break;
		case 'D':
			ret = regnode(NDIGIT);
			*flagp |= (HASWIDTH|SIMPLE);
			break;
		case 'n' :
			ret = regnode(EXACTLY);
			regc('\n');
			regc('\0');
			*flagp |= (HASWIDTH|SIMPLE);
			break;
		case 'p':
			ret = regnode(PRINT);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 'P':
			ret = regnode(NPRINT);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 's':
			ret = regnode(WHITESP);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 'S':
			ret = regnode(NWHITESP);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 't' :
			ret = regnode(EXACTLY);
			regc('\t');
			regc('\0');
			*flagp |= (HASWIDTH|SIMPLE);
			break;
		case 'w':
			ret = regnode(ALNUM);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 'W':
			ret = regnode(NALNUM);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		default :
			ret = regnode(EXACTLY);
			regc(*regparse);
			regc('\0');
			*flagp |= HASWIDTH|SIMPLE;
		}
		regparse++;
		break;
	default: {
			register int len;
			register char ender;

			regparse--;
			len = strcspn(regparse, META);
			if (len <= 0) {
				FAIL("internal disaster");
			}
			ender = *(regparse+len);
			if (len > 1 && ISMULT(ender)) {
				len--;		/* Back off clear of ?+* operand. */
			}
			*flagp |= HASWIDTH;
			if (len == 1) {
				*flagp |= SIMPLE;
			}
			ret = regnode(EXACTLY);
			while (len > 0) {
				regc(*regparse++);
				len--;
			}
			regc('\0');
		}
		break;
	}

	return(ret);
}
Exemplo n.º 14
0
/*
 * reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static char *reg( int paren, int *flagp )
{
    char        *ret, *br, *ender;
    int         flags;
    char        parno = 0;

    *flagp = HASWIDTH;      /* Tentatively. */

    /* Make an OPEN node, if parenthesized. */
    if( paren ) {
        if( regnpar >= NSUBEXP ) {
            FAIL( ERR_RE_TOO_MANY_ROUND_BRACKETS );
        }
        parno = regnpar;
        regnpar++;
        ret = regnode( OPEN + parno );
    } else {
        ret = NULL;
    }

    /* Pick up the branches, linking them together. */
    br = regbranch( &flags );
    if( br == NULL ) {
        return( NULL );
    }
    if( ret != NULL ) {
        regtail( ret, br );       /* OPEN -> first. */
    } else {
        ret = br;
    }
    if( !( flags & HASWIDTH ) ) {
        *flagp &= ~HASWIDTH;
    }
    *flagp |= flags & SPSTART;
    while( *regparse == '|' ) {
        regparse++;
        br = regbranch( &flags );
        if( br == NULL ) {
            return( NULL );
        }
        regtail( ret, br );       /* BRANCH -> BRANCH. */
        if( !( flags & HASWIDTH ) ) {
            *flagp &= ~HASWIDTH;
        }
        *flagp |= flags & SPSTART;
    }

    /* Make a closing node, and hook it on the end. */
    ender = regnode( ( paren ) ? CLOSE + parno : END );
    regtail( ret, ender );

    /* Hook the tails of the branches to the closing node. */
    for( br = ret; br != NULL; br = regnext( br ) ) {
        regoptail( br, ender );
    }

    /* Check for proper termination. */
    if( paren && *regparse++ != ')' ) {
        FAIL( ERR_RE_UNMATCHED_ROUND_BRACKETS );
    } else if( !paren && *regparse != '\0' ) {
        if( *regparse == ')' ) {
            FAIL( ERR_RE_UNMATCHED_ROUND_BRACKETS );
        } else {
            FAIL( ERR_RE_INTERNAL_FOULUP );    /* "Can't happen". */
        }
    }

    return( ret );
}
Exemplo n.º 15
0
TCHAR *CRegExp::regatom(int *flagp)
{
	TCHAR *ret;
	int flags;

	*flagp = WORST;		// Tentatively.

	switch (*regparse++) {
	case _T('^'):
		ret = regnode(BOL);
		break;
	case _T('$'):
		ret = regnode(EOL);
		break;
	case _T('.'):
		ret = regnode(ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case _T('['): {
		int range;
		int rangeend;
		int c;

		if (*regparse == _T('^')) {	// Complement of range.
			ret = regnode(ANYBUT);
			regparse++;
		} else
			ret = regnode(ANYOF);
		if ((c = *regparse) == _T(']') || c == _T('-')) {
			regc(c);
			regparse++;
		}
		while ((c = *regparse++) != _T('\0') && c != _T(']')) {
			if (c != _T('-'))
				regc(c);
			else if ((c = *regparse) == _T(']') || c == _T('\0'))
				regc(_T('-'));
			else
			{
				range = (unsigned) (TCHAR)*(regparse-2);
				rangeend = (unsigned) (TCHAR)c;
				if (range > rangeend)
				{
					TRACE0("invalid [] range\n");
					return NULL;
				}
				for (range++; range <= rangeend; range++)
					regc(range);
				regparse++;
			}
		}
		regc(_T('\0'));
		if (c != _T(']'))
		{
			TRACE0("unmatched []\n");
			return NULL;
		}
		*flagp |= HASWIDTH|SIMPLE;
		break;
		}
	case _T('('):
		ret = reg(1, &flags);
		if (ret == NULL)
			return(NULL);
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case _T('\0'):
	case _T('|'):
	case _T(')'):
		// supposed to be caught earlier
		TRACE0("internal error: \\0|) unexpected\n");
		return NULL;
		break;
	case _T('?'):
	case _T('+'):
	case _T('*'):
		TRACE0("?+* follows nothing\n");
		return NULL;
		break;
	case _T('\\'):
		if (*regparse == _T('\0'))
		{
			TRACE0("trailing \\\n");
			return NULL;
		}
		ret = regnode(EXACTLY);
		regc(*regparse++);
		regc(_T('\0'));
		*flagp |= HASWIDTH|SIMPLE;
		break;
	default: {
		size_t len;
		TCHAR ender;

		regparse--;
		len = _tcscspn(regparse, META);
		if (len == 0)
		{
			TRACE0("internal error: strcspn 0\n");
			return NULL;
		}
		ender = *(regparse+len);
		if (len > 1 && ISREPN(ender))
			len--;		// Back off clear of ?+* operand.
		*flagp |= HASWIDTH;
		if (len == 1)
			*flagp |= SIMPLE;
		ret = regnode(EXACTLY);
		for (; len > 0; len--)
			regc(*regparse++);
		regc(_T('\0'));
		break;
		}
	}

	return(ret);
}
Exemplo n.º 16
0
TCHAR *CRegExp::regpiece(int *flagp)
{
	TCHAR *ret;
	TCHAR op;
	TCHAR *next;
	int flags;

	ret = regatom(&flags);
	if (ret == NULL)
		return(NULL);

	op = *regparse;
	if (!ISREPN(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != _T('?'))
	{
		TRACE0("*+ operand could be empty\n");
		return NULL;
	}

	switch (op) {
	case _T('*'):	*flagp = WORST|SPSTART;			break;
	case _T('+'):	*flagp = WORST|SPSTART|HASWIDTH;	break;
	case _T('?'):	*flagp = WORST;				break;
	}

	if (op == _T('*') && (flags&SIMPLE))
		reginsert(STAR, ret);
	else if (op == _T('*')) {
		// Emit x* as (x&|), where & means "self".
		reginsert(BRANCH, ret);		// Either x
		regoptail(ret, regnode(BACK));	// and loop
		regoptail(ret, ret);		// back
		regtail(ret, regnode(BRANCH));	// or
		regtail(ret, regnode(NOTHING));	// null.
	} else if (op == _T('+') && (flags&SIMPLE))
		reginsert(PLUS, ret);
	else if (op == _T('+')) {
		// Emit x+ as x(&|), where & means "self".
		next = regnode(BRANCH);		// Either
		regtail(ret, next);
		regtail(regnode(BACK), ret);	// loop back
		regtail(next, regnode(BRANCH));	// or
		regtail(ret, regnode(NOTHING));	// null.
	} else if (op == _T('?')) {
		// Emit x? as (x|)
		reginsert(BRANCH, ret);		// Either x
		regtail(ret, regnode(BRANCH));	// or
		next = regnode(NOTHING);		// null.
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse++;
	if (ISREPN(*regparse))
	{
		TRACE0("nested *?+\n");
		return NULL;
	}

	return(ret);
}
Exemplo n.º 17
0
/*
   - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.
 */
static char *regatom (int * flagp)
{
    register char *ret;
    int flags;

    *flagp = WORST;             /* Tentatively. */

    switch (*regparse++) {
        case CARET:
            ret = regnode(BOL);
            break;
        case DOLLAR:
            ret = regnode(EOL);
            break;
        case DOT:
            ret = regnode(ANY);
            *flagp |= HASWIDTH | SIMPLE;
            break;
        case LSHBRAC:
            ret = regnode(WORDSTART);
            break;
        case RSHBRAC:
            ret = regnode(WORDEND);
            break;
        case LSQBRAC:{
                         register int classs;
                         register int classend;

                         if (*regparse == CARET) {   /* Complement of range. */
                             ret = regnode(ANYBUT);
                             regparse++;
                         } else
                             ret = regnode(ANYOF);
                         if (*regparse == RSQBRAC || *regparse == '-')
                             regc(*regparse++);
                         while (*regparse != '\0' && *regparse != RSQBRAC) {
                             if (*regparse == '-') {
                                 regparse++;
                                 if (*regparse == RSQBRAC || *regparse == '\0')
                                     regc('-');
                                 else {
                                     classs = (CHARBITS & *(regparse - 2)) + 1;
                                     classend = (CHARBITS & *(regparse));
                                     if (classs > classend + 1)
                                         FAIL("invalid [] range\n");
                                     for (; classs <= classend; classs++)
                                         regc(classs);
                                     regparse++;
                                 }
                             } else
                                 regc(*regparse++);
                         }
                         regc('\0');
                         if (*regparse != RSQBRAC)
                             FAIL("unmatched []\n");
                         regparse++;
                         *flagp |= HASWIDTH | SIMPLE;
                     }
                     break;
        case LBRAC:
                     ret = reg(1, &flags);
                     if (ret == (char *) NULL)
                         return ((char *) NULL);
                     *flagp |= flags & (HASWIDTH | SPSTART);
                     break;
        case '\0':
        case OR_OP:
        case RBRAC:
                     FAIL("internal urp\n"); /* Supposed to be caught earlier. */
                     break;
        case ASTERIX:
                     FAIL("* follows nothing\n");
                     break;
        case PLUSS:
                     FAIL("+ follows nothing\n");
                     break;
        case QMARK:
                     FAIL("? follows nothing\n");
                     break;
        default:{
                    register int len;
                    register short ender;

                    regparse--;
                    for (len = 0; regparse[len] &&
                            !(regparse[len] & SPECIAL) && regparse[len] != RSQBRAC; len++);
                    if (len <= 0) {
                        FAIL("unexpected ]\n");
                    }
                    ender = *(regparse + len);
                    if (len > 1 && ISMULT(ender))
                        len--;          /* Back off clear of ?+* operand. */
                    *flagp |= HASWIDTH;
                    if (len == 1)
                        *flagp |= SIMPLE;
                    ret = regnode(EXACTLY);
                    while (len > 0) {
                        regc(*regparse++);
                        len--;
                    }
                    regc('\0');
                }
                break;
    }

    return (ret);
}
Exemplo n.º 18
0
TCHAR *CRegExp::reg(int paren, int *flagp)
{
	char *ret;
	char *br;
	char *ender;
	int parno;
	int flags;

	*flagp = HASWIDTH;	// Tentatively.

	if (paren)
	{
		// Make an OPEN node.
		if (regnpar >= NSUBEXP)
		{
			TRACE1("Too many (). NSUBEXP is set to %d\n", NSUBEXP );
			return NULL;
		}
		parno = regnpar;
		regnpar++;
		ret = regnode(OPEN+parno);
	}

	// Pick up the branches, linking them together.
	br = regbranch(&flags);
	if (br == NULL)
		return(NULL);
	if (paren)
		regtail(ret, br);	// OPEN -> first.
	else
		ret = br;
	*flagp &= ~(~flags&HASWIDTH);	// Clear bit if bit 0.
	*flagp |= flags&SPSTART;
	while (*regparse == _T('|')) {
		regparse++;
		br = regbranch(&flags);
		if (br == NULL)
			return(NULL);
		regtail(ret, br);	// BRANCH -> BRANCH.
		*flagp &= ~(~flags&HASWIDTH);
		*flagp |= flags&SPSTART;
	}

	// Make a closing node, and hook it on the end.
	ender = regnode((paren) ? CLOSE+parno : END);
	regtail(ret, ender);

	// Hook the tails of the branches to the closing node.
	for (br = ret; br != NULL; br = regnext(br))
		regoptail(br, ender);

	// Check for proper termination.
	if (paren && *regparse++ != _T(')'))
	{
		TRACE0("unterminated ()\n");
		return NULL;
	}
	else if (!paren && *regparse != _T('\0'))
	{
		if (*regparse == _T(')'))
		{
			TRACE0("unmatched ()\n");
			return NULL;
		}
		else
		{
			TRACE0("internal error: junk on end\n");
			return NULL;
		}
		// NOTREACHED
	}

	return(ret);
}
Exemplo n.º 19
0
/*
 * regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom(i4 *flagp)
{
    register char *ret;
    i4 flags;
    char null_byte = '\0';

    *flagp = WORST;		/* Tentatively. */

    switch (*regparse) {
    case '^':
        CMnext( regparse );
        ret = regnode(BOL);
        break;
    case '$':
        CMnext( regparse );
        ret = regnode(EOL);
        break;
    case '.':
        CMnext( regparse );
        ret = regnode(ANY);
        *flagp |= HASWIDTH|SIMPLE;
        break;
    case '[': {
        char *range_start = NULL;
        bool double_start;
        u_i2 first_u2, last_u2;
        u_char first_u1, last_u1;

        CMnext( regparse );
        if (*regparse == '^') {	/* Complement of range. */
            ret = regnode(ANYBUT);
            CMnext( regparse );
        } else
            ret = regnode(ANYOF);
        if (*regparse == ']' || *regparse == '-') {
            regc( regparse );
            CMnext( regparse );
        }
        while (*regparse != '\0' && *regparse != ']') {
            if (*regparse == '-') {
                char range_op = '-';

                CMnext( regparse );
                if( *regparse == ']' ||
                        *regparse == '\0'
                  )
                    regc( &range_op );
                else {
                    char *tmp;
                    bool invalid = FALSE;
                    bool double_end;

                    if( range_start == NULL )
                        invalid = TRUE;

                    double_end =
                        CMdbl1st( regparse );

                    if( !invalid &&
                            double_end
                            && !double_start
                      )
                        invalid = TRUE;

                    if( !invalid &&
                            double_start
                            && !double_start
                      )
                        invalid = TRUE;

                    if( !invalid &&
                            CMcmpcase( range_start,
                                       regparse ) > 0
                      )
                        invalid = TRUE;

                    if( double_start )
                        _FAIL("don't know how to support character classes containing double-byte ranges");

                    if( invalid )
                        _FAIL("invalid [] range");
                    /* no double-byte ranges! */
                    /*
                    ** Initialize the value for the end of the range.
                    */
                    last_u1 = UCHARAT(regparse);
                    for (; first_u1 <= last_u1;
                            first_u1++
                        )
                        regc( (char *)
                              &first_u1 );

                    CMnext( regparse );
                }
            } else {
                range_start = regparse;
                if( CMdbl1st( range_start ) )
                {
                    double_start = TRUE;
                    first_u2 = *(u_i2 *) range_start;
                }
                else
                {
                    double_start = FALSE;
                    first_u1 = UCHARAT(range_start);
                }
                regc( regparse );
                CMnext( regparse );
            }
        }
        regc( &null_byte );
        if (*regparse != ']')
            _FAIL("unmatched []");
        CMnext( regparse );
        *flagp |= HASWIDTH|SIMPLE;
    }
    break;
    case '(':
        CMnext( regparse );
        ret = reg(1, &flags);
        if (ret == NULL)
            return(NULL);
        *flagp |= flags&(HASWIDTH|SPSTART);
        break;
    case '\0':
    case '|':
    case ')':
        CMnext( regparse );
        _FAIL("internal urp");	/* Supposed to be caught earlier. */
        break;
    case '?':
    case '+':
    case '*':
        CMnext( regparse );
        _FAIL("?+* follows nothing");
        break;
    case '\\':
        CMnext( regparse );
        if (*regparse == '\0')
            _FAIL("trailing \\");
        ret = regnode(EXACTLY);
        regc( regparse );
        CMnext( regparse );
        regc( &null_byte );
        *flagp |= HASWIDTH|SIMPLE;
        break;
    default: {
        register i4  len;
        register char ender;

        len = my_strcspn(regparse, META);
        if (len <= 0)
            _FAIL("internal disaster");
        ender = *(regparse+len);
        if (len > 1 && ISMULT(ender))
            len--;	/* Back off clear of ?+* operand. */
        *flagp |= HASWIDTH;
        if (len == 1)
            *flagp |= SIMPLE;
        ret = regnode(EXACTLY);
        while (len > 0) {
            regc( regparse );
            CMbytedec( len, regparse );
            CMnext( regparse );
        }
        regc( &null_byte );
    }
    break;
    }

    return(ret);
}
Exemplo n.º 20
0
/*
 * regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *regatom( int *flagp )
{
    char *ret;
    int flags;

    *flagp = WORST;         /* Tentatively. */

    switch( *regparse++ ) {
    case '~':
        if( *regparse == 0 ) {
            FAIL( ERR_RE_INVALID_CASETOGGLE );
        }
        ret = regnode( CASEI );
        break;
    case '@':
        if( *regparse == 0 ) {
            FAIL( ERR_RE_INVALID_CASETOGGLE );
        }
        ret = regnode( NOCASEI );
        break;
    case '^':
        ret = regnode( BOL );
        break;
    case '$':
        ret = regnode( EOL );
        break;
    case '.':
        ret = regnode( ANY );
        *flagp |= HASWIDTH | SIMPLE;
        break;
    case '[':
        {
            if( *regparse == '^' ) { /* Complement of range. */
                ret = regnode( ANYBUT );
                regparse++;
            } else {
                ret = regnode( ANYOF );
            }
            if( *regparse == ']' || *regparse == '-' ) {
                regc( *regparse++ );
            }
            while( *regparse != '\0' && *regparse != ']' ) {
                if( *regparse == '-' ) {
                    regparse++;
                    if( *regparse == ']' || *regparse == '\0' ) {
                        regc( '-' );
                    } else {
                        int class;
                        int classend;

                        class = UCHARAT( regparse - 2 ) + 1;
                        classend = UCHARAT( regparse );
                        if( class > classend + 1 ) {
                            FAIL( ERR_RE_INVALID_SB_RANGE );
                        }
                        for( ; class <= classend; class++ ) {
                            regc( (char)class );
                        }
                        regparse++;
                    }
                } else {
                    if( *regparse == '\\' && *( regparse + 1 ) == 't' && REALTABS ) {
                        regparse += 2;
                        regc( '\t' );
                    } else {
                        regc( *regparse++ );
                    }
                }
            }
            regc( '\0' );
            if( *regparse != ']' ) {
                FAIL( ERR_RE_UNMATCHED_SQUARE_BRACKET );
            }
            regparse++;
            *flagp |= HASWIDTH | SIMPLE;
        }
Exemplo n.º 21
0
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static int reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp )
{
	int ret;
	int br;
	int ender;
	int parno = 0;
	int flags;

	*flagp = HASWIDTH;	/* Tentatively. */

	/* Make an OPEN node, if parenthesized. */
	if (paren) {
		if (preg->regparse[0] == '?' && preg->regparse[1] == ':') {
			/* non-capturing paren */
			preg->regparse += 2;
			parno = -1;
		}
		else {
			parno = ++preg->re_nsub;
		}
		ret = regnode(preg, OPEN+parno);
	} else
		ret = 0;

	/* Pick up the branches, linking them together. */
	br = regbranch(preg, &flags);
	if (br == 0)
		return 0;
	if (ret != 0)
		regtail(preg, ret, br);	/* OPEN -> first. */
	else
		ret = br;
	if (!(flags&HASWIDTH))
		*flagp &= ~HASWIDTH;
	*flagp |= flags&SPSTART;
	while (*preg->regparse == '|') {
		preg->regparse++;
		br = regbranch(preg, &flags);
		if (br == 0)
			return 0;
		regtail(preg, ret, br);	/* BRANCH -> BRANCH. */
		if (!(flags&HASWIDTH))
			*flagp &= ~HASWIDTH;
		*flagp |= flags&SPSTART;
	}

	/* Make a closing node, and hook it on the end. */
	ender = regnode(preg, (paren) ? CLOSE+parno : END);
	regtail(preg, ret, ender);

	/* Hook the tails of the branches to the closing node. */
	for (br = ret; br != 0; br = regnext(preg, br))
		regoptail(preg, br, ender);

	/* Check for proper termination. */
	if (paren && *preg->regparse++ != ')') {
		preg->err = REG_ERR_UNMATCHED_PAREN;
		return 0;
	} else if (!paren && *preg->regparse != '\0') {
		if (*preg->regparse == ')') {
			preg->err = REG_ERR_UNMATCHED_PAREN;
			return 0;
		} else {
			preg->err = REG_ERR_JUNK_ON_END;
			return 0;
		}
	}

	return(ret);
}
Exemplo n.º 22
0
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
char* ossimRegExp::reg (int paren, int *flagp) {
    char* ret;
    char* br;
    char* ender;
    int   parno =0;
    int   flags;

    *flagp = HASWIDTH;		// Tentatively.

    // Make an OPEN node, if parenthesized.
    if (paren) {
        if (regnpar >= NSUBEXP) {
            //RAISE Error, SYM(ossimRegExp), SYM(Too_Many_Parens),
            printf ("ossimRegExp::compile(): Too many parentheses.\n");
            return 0;
        }
        parno = regnpar;
        regnpar++;
        ret = regnode(OPEN + parno);
    }
    else
        ret = NULL;

    // Pick up the branches, linking them together.
    br = regbranch(&flags);
    if (br == NULL)
        return (NULL);
    if (ret != NULL)
        regtail(ret, br);	// OPEN -> first.
    else
        ret = br;
    if (!(flags & HASWIDTH))
        *flagp &= ~HASWIDTH;
    *flagp |= flags & SPSTART;
    while (*regparse == '|') {
        regparse++;
        br = regbranch(&flags);
        if (br == NULL)
            return (NULL);
        regtail(ret, br);	// BRANCH -> BRANCH.
        if (!(flags & HASWIDTH))
            *flagp &= ~HASWIDTH;
        *flagp |= flags & SPSTART;
    }

    // Make a closing node, and hook it on the end.
    ender = regnode((paren) ? CLOSE + parno : END);
    regtail(ret, ender);

    // Hook the tails of the branches to the closing node.
    for (br = ret; br != NULL; br = regnext(br))
        regoptail(br, ender);

    // Check for proper termination.
    if (paren && *regparse++ != ')') {
        //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens),
        printf ("ossimRegExp::compile(): Unmatched parentheses.\n");
        return 0;
    }
    else if (!paren && *regparse != '\0') {
        if (*regparse == ')') {
            //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens),
            printf ("ossimRegExp::compile(): Unmatched parentheses.\n");
            return 0;
        }
        else {
            //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
            printf ("ossimRegExp::compile(): Internal error.\n");
            return 0;
        }
        // NOTREACHED
    }
    return (ret);
}
Exemplo n.º 23
0
int regcomp (struct Expr *expr, int size, uchar *pat, int len)
{
int bnest = 0, off = 0, ch;
struct Node *node = NULL;
struct Node *prev = NULL;
struct Node *parent;

  if( size < sizeof(*expr) )
	return 0;

  memset (expr, 0, sizeof(*expr));
  expr->size = size;

  if( parent = regpat(expr, NULL, 0, NULL) )
	while( off < len ) {
	  switch( ch = pat[off] ) {
	  case '{':
		if( node ) {
			off += regminimax (expr, pat + off, len - off, node);
			continue;
		}
		return 0;

	  case ']':
		return 0;

	  case '[':
		if( node = regpat(expr, pat + off++, 1, parent) )
			bnest = 1;
		else
			return 0;

		while( off < len && bnest )
			if( pat[off] == '[' )
				bnest++, off++, node->typelen++;
			else if( pat[off] == ']' )
				--bnest, off++, node->typelen++;
			else
				off++, node->typelen++;

		regappend (node, prev);
		prev = node;
		continue;

	  // "or" node

	  case '|':
		if( node = regnode(expr) ) {
			node->typelen = -1;
			node->parent = parent;
			node->minimum = 0;
			node->maximum = 1;
		} else
			return 0;


		//	if already underway,
		//	move node chain under
		//	new "or" node

		if( parent->ornode ) {
			node->type->child = parent->ornode->next;
			parent->ornode->next = node;
		} else {
			node->type->child = parent->type->child;
			parent->type->child = node;
		}

		parent->ornode = prev = node;

		//	reparent child nodes
		//	under new "or" node

		if( node = node->type->child )
		  do node->parent = prev;
		  while( node = node->next );

		off++;
		continue;

	  case '(':
		if( parent = regpat (expr, NULL, 0, parent) ) // expression node
			regappend (parent, prev);
		else
			return 0;
		prev = node = NULL;
		off++;
		continue;

	  case ')':
		if( node = parent ) {
			off++;
			parent = node->parent;
			if( prev = parent->type->child )
				while( prev->next )
					prev = prev->next;
			
			continue;
		}
		return 0;

	  case '\\':
		off++;
		if( pat[off] >= 'A' && pat[off] <= 'Z' || pat[off] >= 'a' && pat[off] <= 'z' )
		  if( node = regspcl(expr, pat + off, parent) ) {
			regappend (node, prev);
			prev = node;
			off++;
			continue;
		  }
			
	  default:
		if( node = regpat(expr, pat + off++, 1, parent) )
			regappend (node, prev);
		else
			return 0;

		prev = node;
		continue;

	  case '?':
		if( node ) {
			node->minimum = 0;
			node->maximum = 1;
			off++;
			continue;
		}
		return 0;

	  case '+':
		if( node ) {
			node->minimum = 1;
			node->maximum = 0x7fffffff;
			off++;
			continue;
		}
		return 0;

	  case '*':
		if( node ) {
			node->minimum = 0;
			node->maximum = 0x7fffffff;
			off++;
			continue;
		}
		return 0;
	  }
	}

	return 1;
}
Exemplo n.º 24
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static int regpiece(regex_t *preg, int *flagp)
{
	int ret;
	char op;
	int next;
	int flags;
	int min;
	int max;

	ret = regatom(preg, &flags);
	if (ret == 0)
		return 0;

	op = *preg->regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != '?') {
		preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY;
		return 0;
	}

	/* Handle braces (counted repetition) by expansion */
	if (op == '{') {
		char *end;

		min = strtoul(preg->regparse + 1, &end, 10);
		if (end == preg->regparse + 1) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}
		if (*end == '}') {
			max = min;
		}
		else {
			preg->regparse = end;
			max = strtoul(preg->regparse + 1, &end, 10);
			if (*end != '}') {
				preg->err = REG_ERR_UNMATCHED_BRACES;
				return 0;
			}
		}
		if (end == preg->regparse + 1) {
			max = MAX_REP_COUNT;
		}
		else if (max < min || max >= 100) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}
		if (min >= 100) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}

		preg->regparse = strchr(preg->regparse, '}');
	}
	else {
		min = (op == '+');
		max = (op == '?' ? 1 : MAX_REP_COUNT);
	}

	if (preg->regparse[1] == '?') {
		preg->regparse++;
		next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret);
	}
	else {
		next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret);
	}
	preg->program[ret + 2] = max;
	preg->program[ret + 3] = min;
	preg->program[ret + 4] = 0;

	*flagp = (min) ? (WORST|HASWIDTH) : (WORST|SPSTART);

	if (!(flags & SIMPLE)) {
		int back = regnode(preg, BACK);
		regtail(preg, back, ret);
		regtail(preg, next, back);
	}

	preg->regparse++;
	if (ISMULT(*preg->regparse)) {
		preg->err = REG_ERR_NESTED_COUNT;
		return 0;
	}

	return ret;
}
Exemplo n.º 25
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
char* ossimRegExp::regatom (int *flagp) {
    char* ret;
    int   flags;

    *flagp = WORST;		// Tentatively.

    switch (*regparse++) {
    case '^':
        ret = regnode(BOL);
        break;
    case '$':
        ret = regnode(EOL);
        break;
    case '.':
        ret = regnode(ANY);
        *flagp |= HASWIDTH | SIMPLE;
        break;
    case '[': {
        int    rxpclass;
        int    rxpclassend;

        if (*regparse == '^') {	// Complement of range.
            ret = regnode(ANYBUT);
            regparse++;
        }
        else
            ret = regnode(ANYOF);
        if (*regparse == ']' || *regparse == '-')
            regc(*regparse++);
        while (*regparse != '\0' && *regparse != ']') {
            if (*regparse == '-') {
                regparse++;
                if (*regparse == ']' || *regparse == '\0')
                    regc('-');
                else {
                    rxpclass = UCHARAT(regparse - 2) + 1;
                    rxpclassend = UCHARAT(regparse);
                    if (rxpclass > rxpclassend + 1) {
                        //RAISE Error, SYM(ossimRegExp), SYM(Invalid_Range),
                        printf ("ossimRegExp::compile(): Invalid range in [].\n");
                        return 0;
                    }
                    for (; rxpclass <= rxpclassend; rxpclass++)
                        regc(rxpclass);
                    regparse++;
                }
            }
            else
                regc(*regparse++);
        }
        regc('\0');
        if (*regparse != ']') {
            //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Bracket),
            printf ("ossimRegExp::compile(): Unmatched [].\n");
            return 0;
        }
        regparse++;
        *flagp |= HASWIDTH | SIMPLE;
    }
    break;
    case '(':
        ret = reg(1, &flags);
        if (ret == NULL)
            return (NULL);
        *flagp |= flags & (HASWIDTH | SPSTART);
        break;
    case '\0':
    case '|':
    case ')':
        //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
        printf ("ossimRegExp::compile(): Internal error.\n"); // Never here
        return 0;
    case '?':
    case '+':
    case '*':
        //RAISE Error, SYM(ossimRegExp), SYM(No_Operand),
        printf ("ossimRegExp::compile(): ?+* follows nothing.\n");
        return 0;
    case '\\':
        if (*regparse == '\0') {
            //RAISE Error, SYM(ossimRegExp), SYM(Trailing_Backslash),
            printf ("ossimRegExp::compile(): Trailing backslash.\n");
            return 0;
        }
        ret = regnode(EXACTLY);
        regc(*regparse++);
        regc('\0');
        *flagp |= HASWIDTH | SIMPLE;
        break;
    default: {
        int    len;
        char   ender;

        regparse--;
        len = (int)strcspn(regparse, META);
        if (len <= 0) {
            //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
            printf ("ossimRegExp::compile(): Internal error.\n");
            return 0;
        }
        ender = *(regparse + len);
        if (len > 1 && ISMULT(ender))
            len--;	// Back off clear of ?+* operand.
        *flagp |= HASWIDTH;
        if (len == 1)
            *flagp |= SIMPLE;
        ret = regnode(EXACTLY);
        while (len > 0) {
            regc(*regparse++);
            len--;
        }
        regc('\0');
    }
    break;
    }
    return (ret);
}
Exemplo n.º 26
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom(int *flagp)
{
	register char *ret;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	switch (*regparse++) {
	case '^':
		ret = regnode(BOL);
		break;
	case '$':
		ret = regnode(EOL);
		break;
	case '.':
		ret = regnode(ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			register int clss;
			register int classend;

			if (*regparse == '^') {	/* Complement of range. */
				ret = regnode(ANYBUT);
				regparse++;
			} else
				ret = regnode(ANYOF);
			if (*regparse == ']' || *regparse == '-')
				regc(*regparse++);
			while (*regparse != '\0' && *regparse != ']') {
				if (*regparse == '-') {
					regparse++;
					if (*regparse == ']' || *regparse == '\0')
						regc('-');
					else {
						clss = UCHARAT(regparse-2)+1;
						classend = UCHARAT(regparse);
						if (clss > classend+1)
							FAIL("invalid [] range");
						for (; clss <= classend; clss++)
							regc(clss);
						regparse++;
					}
				} else
					regc(*regparse++);
			}
			regc('\0');
			if (*regparse != ']')
				FAIL("unmatched []");
			regparse++;
			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(1, &flags);
		if (ret == NULL)
			return(NULL);
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case ')':
		FAIL("internal urp");	/* Supposed to be caught earlier. */
		/* NOTREACHED */
		break;
	case '?':
	case '+':
	case '*':
		FAIL("?+* follows nothing");
		/* NOTREACHED */
		break;
	case '\\':
		if (*regparse == '\0')
			FAIL("trailing \\");
		ret = regnode(EXACTLY);
		regc(*regparse++);
		regc('\0');
		*flagp |= HASWIDTH|SIMPLE;
		break;
	default: {
			register int len;
			register char ender;

			regparse--;
			len = (int) strcspn(regparse, META);
			if (len <= 0)
				FAIL("internal disaster");
			ender = *(regparse+len);
			if (len > 1 && ISMULT(ender))
				len--;		/* Back off clear of ?+* operand. */
			*flagp |= HASWIDTH;
			if (len == 1)
				*flagp |= SIMPLE;
			ret = regnode(EXACTLY);
			while (len > 0) {
				regc(*regparse++);
				len--;
			}
			regc('\0');
		}
		break;
	}

	return(ret);
}
Exemplo n.º 27
0
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static char *
reg(
	int paren,			/* Parenthesized? */
	int *flagp )
{
	register char *ret;
	register char *br;
	register char *ender;
	register int parno;
	int flags;

	*flagp = HASWIDTH;	/* Tentatively. */

	/* Make an OPEN node, if parenthesized. */
	if (paren) {
		if (regnpar >= NSUBEXP)
			FAIL("too many ()");
		parno = regnpar;
		regnpar++;
		ret = regnode(OPEN+parno);
	} else
		ret = NULL;

	/* Pick up the branches, linking them together. */
	br = regbranch(&flags);
	if (br == NULL)
		return(NULL);
	if (ret != NULL)
		regtail(ret, br);	/* OPEN -> first. */
	else
		ret = br;
	if (!(flags&HASWIDTH))
		*flagp &= ~HASWIDTH;
	*flagp |= flags&SPSTART;
	while (*regparse == '|' || *regparse == '\n') {
		regparse++;
		br = regbranch(&flags);
		if (br == NULL)
			return(NULL);
		regtail(ret, br);	/* BRANCH -> BRANCH. */
		if (!(flags&HASWIDTH))
			*flagp &= ~HASWIDTH;
		*flagp |= flags&SPSTART;
	}

	/* Make a closing node, and hook it on the end. */
	ender = regnode((paren) ? CLOSE+parno : END);	
	regtail(ret, ender);

	/* Hook the tails of the branches to the closing node. */
	for (br = ret; br != NULL; br = regnext(br))
		regoptail(br, ender);

	/* Check for proper termination. */
	if (paren && *regparse++ != ')') {
		FAIL("unmatched ()");
	} else if (!paren && *regparse != '\0') {
		if (*regparse == ')') {
			FAIL("unmatched ()");
		} else
			FAIL("junk on end");	/* "Can't happen". */
		/* NOTREACHED */
	}

	return(ret);
}
Exemplo n.º 28
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static int regatom(regex_t *preg, int *flagp)
{
	int ret;
	int flags;
	int nocase = (preg->cflags & REG_ICASE);

	int ch;
	int n = reg_utf8_tounicode_case(preg->regparse, &ch, nocase);

	*flagp = WORST;		/* Tentatively. */

	preg->regparse += n;
	switch (ch) {
	/* FIXME: these chars only have meaning at beg/end of pat? */
	case '^':
		ret = regnode(preg, BOL);
		break;
	case '$':
		ret = regnode(preg, EOL);
		break;
	case '.':
		ret = regnode(preg, ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			const char *pattern = preg->regparse;

			if (*pattern == '^') {	/* Complement of range. */
				ret = regnode(preg, ANYBUT);
				pattern++;
			} else
				ret = regnode(preg, ANYOF);

			/* Special case. If the first char is ']' or '-', it is part of the set */
			if (*pattern == ']' || *pattern == '-') {
				reg_addrange(preg, *pattern, *pattern);
				pattern++;
			}

			while (*pattern && *pattern != ']') {
				/* Is this a range? a-z */
				int start;
				int end;

				pattern += reg_utf8_tounicode_case(pattern, &start, nocase);
				if (start == '\\') {
					pattern += reg_decode_escape(pattern, &start);
					if (start == 0) {
						preg->err = REG_ERR_NULL_CHAR;
						return 0;
					}
				}
				if (pattern[0] == '-' && pattern[1] && pattern[1] != ']') {
					/* skip '-' */
					pattern += utf8_tounicode(pattern, &end);
					pattern += reg_utf8_tounicode_case(pattern, &end, nocase);
					if (end == '\\') {
						pattern += reg_decode_escape(pattern, &end);
						if (end == 0) {
							preg->err = REG_ERR_NULL_CHAR;
							return 0;
						}
					}

					reg_addrange(preg, start, end);
					continue;
				}
				if (start == '[') {
					if (strncmp(pattern, ":alpha:]", 8) == 0) {
						if ((preg->cflags & REG_ICASE) == 0) {
							reg_addrange(preg, 'a', 'z');
						}
						reg_addrange(preg, 'A', 'Z');
						pattern += 8;
						continue;
					}
					if (strncmp(pattern, ":alnum:]", 8) == 0) {
						if ((preg->cflags & REG_ICASE) == 0) {
							reg_addrange(preg, 'a', 'z');
						}
						reg_addrange(preg, 'A', 'Z');
						reg_addrange(preg, '0', '9');
						pattern += 8;
						continue;
					}
					if (strncmp(pattern, ":space:]", 8) == 0) {
						reg_addrange_str(preg, " \t\r\n\f\v");
						pattern += 8;
						continue;
					}
				}
				/* Not a range, so just add the char */
				reg_addrange(preg, start, start);
			}
			regc(preg, '\0');

			if (*pattern) {
				pattern++;
			}
			preg->regparse = pattern;

			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(preg, 1, &flags);
		if (ret == 0)
			return 0;
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case ')':
		preg->err = REG_ERR_INTERNAL;
		return 0;	/* Supposed to be caught earlier. */
	case '?':
	case '+':
	case '*':
	case '{':
		preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
		return 0;
	case '\\':
		switch (*preg->regparse++) {
		case '\0':
			preg->err = REG_ERR_TRAILING_BACKSLASH;
			return 0;
		case '<':
		case 'm':
			ret = regnode(preg, WORDA);
			break;
		case '>':
		case 'M':
			ret = regnode(preg, WORDZ);
			break;
		case 'd':
			ret = regnode(preg, ANYOF);
			reg_addrange(preg, '0', '9');
			regc(preg, '\0');
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 'w':
			ret = regnode(preg, ANYOF);
			if ((preg->cflags & REG_ICASE) == 0) {
				reg_addrange(preg, 'a', 'z');
			}
			reg_addrange(preg, 'A', 'Z');
			reg_addrange(preg, '0', '9');
			reg_addrange(preg, '_', '_');
			regc(preg, '\0');
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 's':
			ret = regnode(preg, ANYOF);
			reg_addrange_str(preg," \t\r\n\f\v");
			regc(preg, '\0');
			*flagp |= HASWIDTH|SIMPLE;
			break;
		/* FIXME: Someday handle \1, \2, ... */
		default:
			/* Handle general quoted chars in exact-match routine */
			/* Back up to include the backslash */
			preg->regparse--;
			goto de_fault;
		}
		break;
	de_fault:
	default: {
			/*
			 * Encode a string of characters to be matched exactly.
			 */
			int added = 0;

			/* Back up to pick up the first char of interest */
			preg->regparse -= n;

			ret = regnode(preg, EXACTLY);

			/* Note that a META operator such as ? or * consumes the
			 * preceding char.
			 * Thus we must be careful to look ahead by 2 and add the
			 * last char as it's own EXACTLY if necessary
			 */

			/* Until end of string or a META char is reached */
			while (*preg->regparse && strchr(META, *preg->regparse) == NULL) {
				n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE));
				if (ch == '\\' && preg->regparse[n]) {
					/* Non-trailing backslash.
					 * Is this a special escape, or a regular escape?
					 */
					if (strchr("<>mMwds", preg->regparse[n])) {
						/* A special escape. All done with EXACTLY */
						break;
					}
					/* Decode it. Note that we add the length for the escape
					 * sequence to the length for the backlash so we can skip
					 * the entire sequence, or not as required.
					 */
					n += reg_decode_escape(preg->regparse + n, &ch);
					if (ch == 0) {
						preg->err = REG_ERR_NULL_CHAR;
						return 0;
					}
				}

				/* Now we have one char 'ch' of length 'n'.
				 * Check to see if the following char is a MULT
				 */

				if (ISMULT(preg->regparse[n])) {
					/* Yes. But do we already have some EXACTLY chars? */
					if (added) {
						/* Yes, so return what we have and pick up the current char next time around */
						break;
					}
					/* No, so add this single char and finish */
					regc(preg, ch);
					added++;
					preg->regparse += n;
					break;
				}

				/* No, so just add this char normally */
				regc(preg, ch);
				added++;
				preg->regparse += n;
			}
			regc(preg, '\0');

			*flagp |= HASWIDTH;
			if (added == 1)
				*flagp |= SIMPLE;
			break;
		}
		break;
	}

	return(ret);
}
Exemplo n.º 29
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom( int *flagp )
{
	register char *ret;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	switch (*regparse++) {
	/* FIXME: these chars only have meaning at beg/end of pat? */
	case '^':
		ret = regnode(BOL);
		break;
	case '$':
		ret = regnode(EOL);
		break;
	case '.':
		ret = regnode(ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			register int classr;
			register int classend;

			if (*regparse == '^') {	/* Complement of range. */
				ret = regnode(ANYBUT);
				regparse++;
			} else
				ret = regnode(ANYOF);
			if (*regparse == ']' || *regparse == '-')
				regc(*regparse++);
			while (*regparse != '\0' && *regparse != ']') {
				if (*regparse == '-') {
					regparse++;
					if (*regparse == ']' || *regparse == '\0')
						regc('-');
					else {
						classr = UCHARAT(regparse-2)+1;
						classend = UCHARAT(regparse);
						if (classr > classend+1)
							FAIL("invalid [] range");
						for (; classr <= classend; classr++)
							regc(classr);
						regparse++;
					}
				} else
					regc(*regparse++);
			}
			regc('\0');
			if (*regparse != ']')
				FAIL("unmatched []");
			regparse++;
			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(1, &flags);
		if (ret == NULL)
			return(NULL);
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case '\n':
	case ')':
		FAIL("internal urp");	/* Supposed to be caught earlier. */
		break;
	case '?':
	case '+':
	case '*':
		FAIL("?+* follows nothing");
		break;
	case '\\':
		switch (*regparse++) {
		case '\0':
			FAIL("trailing \\");
			break;
		case '<':
			ret = regnode(WORDA);
			break;
		case '>':
			ret = regnode(WORDZ);
			break;
		/* FIXME: Someday handle \1, \2, ... */
		default:
			/* Handle general quoted chars in exact-match routine */
			goto de_fault;
		}
		break;
	de_fault:
	default:
		/*
		 * Encode a string of characters to be matched exactly.
		 *
		 * This is a bit tricky due to quoted chars and due to
		 * '*', '+', and '?' taking the SINGLE char previous
		 * as their operand.
		 *
		 * On entry, the char at regparse[-1] is going to go
		 * into the string, no matter what it is.  (It could be
		 * following a \ if we are entered from the '\' case.)
		 * 
		 * Basic idea is to pick up a good char in  ch  and
		 * examine the next char.  If it's *+? then we twiddle.
		 * If it's \ then we frozzle.  If it's other magic char
		 * we push  ch  and terminate the string.  If none of the
		 * above, we push  ch  on the string and go around again.
		 *
		 *  regprev  is used to remember where "the current char"
		 * starts in the string, if due to a *+? we need to back
		 * up and put the current char in a separate, 1-char, string.
		 * When  regprev  is NULL,  ch  is the only char in the
		 * string; this is used in *+? handling, and in setting
		 * flags |= SIMPLE at the end.
		 */
		{
			char *regprev;
			register char ch;

			regparse--;			/* Look at cur char */
			ret = regnode(EXACTLY);
			for ( regprev = 0 ; ; ) {
				ch = *regparse++;	/* Get current char */
				switch (*regparse) {	/* look at next one */

				default:
					regc(ch);	/* Add cur to string */
					break;

				case '.': case '[': case '(':
				case ')': case '|': case '\n':
				case '$': case '^':
				case '\0':
				/* FIXME, $ and ^ should not always be magic */
				magic:
					regc(ch);	/* dump cur char */
					goto done;	/* and we are done */

				case '?': case '+': case '*':
					if (!regprev) 	/* If just ch in str, */
						goto magic;	/* use it */
					/* End mult-char string one early */
					regparse = regprev; /* Back up parse */
					goto done;

				case '\\':
					regc(ch);	/* Cur char OK */
					switch (regparse[1]){ /* Look after \ */
					case '\0':
					case '<':
					case '>':
					/* FIXME: Someday handle \1, \2, ... */
						goto done; /* Not quoted */
					default:
						/* Backup point is \, scan							 * point is after it. */
						regprev = regparse;
						regparse++; 
						continue;	/* NOT break; */
					}
				}
				regprev = regparse;	/* Set backup point */
			}
		done:
			regc('\0');
			*flagp |= HASWIDTH;
			if (!regprev)		/* One char? */
				*flagp |= SIMPLE;
		}
		break;
	}

	return(ret);
}
Exemplo n.º 30
0
/*
 - regpiece - something followed by possible [*+?{]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *
regpiece(int *flagp)
{
	register char	*next;
	register char	*ret;
	register char	op;
	unsigned char	max;
	unsigned char	min;
	int		flags;

	ret = regatom(&flags);
	if (ret == NULL) {
		return(NULL);
	}

	op = *regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != '?') {
		FAIL("*+{ operand could be empty");
	}
	*flagp = (op != '+' && op != '{') ? (WORST|SPSTART) : (WORST|HASWIDTH);

	if (op == '*' && (flags&SIMPLE)) {
		reginsert(STAR, ret);
	} else if (op == '*') {
		/* Emit x* as (x&|), where & means "self". */
		reginsert(BRANCH, ret);			/* Either x */
		regoptail(ret, regnode(BACK));		/* and loop */
		regoptail(ret, ret);			/* back */
		regtail(ret, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if (op == '+' && (flags&SIMPLE)) {
		reginsert(PLUS, ret);
	} else if (op == '+') {
		/* Emit x+ as x(&|), where & means "self". */
		next = regnode(BRANCH);			/* Either */
		regtail(ret, next);
		regtail(regnode(BACK), ret);		/* loop back */
		regtail(next, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if (op == '{') {
		for (min = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) {
			min = min * 10 + (*regparse - '0');
		}
		for (max = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) {
			max = max * 10 + (*regparse - '0');
		}
		reginsert(max, ret);
		next = OPERAND(ret);
		reginsert(min, ret);
		next = OPERAND(next);
		reginsert(MINMAX, ret);
		regtail(ret, OPERAND(next));		/* MINMAX->next = x */
	} else if (op == '?') {
		/* Emit x? as (x|) */
		reginsert(BRANCH, ret);			/* Either x */
		regtail(ret, regnode(BRANCH));		/* or */
		next = regnode(NOTHING);		/* null. */
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse++;
	if (ISMULT(*regparse)) {
		FAIL("nested *?+{");
	}

	return(ret);
}