Example #1
0
/*
 - regbranch - one alternative of an | operator
 *
 * Implements the concatenation operator.
 */
static char *
regbranch(int *flagp)
{
	register char *ret;
	register char *chain;
	register char *latest;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	ret = regnode(BRANCH);
	chain = NULL;
	while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
		latest = regpiece(&flags);
		if (latest == NULL)
			return(NULL);
		*flagp |= flags&HASWIDTH;
		if (chain == NULL)	/* First piece. */
			*flagp |= flags&SPSTART;
		else
			regtail(chain, latest);
		chain = latest;
	}
	if (chain == NULL)	/* Loop ran zero times. */
		(void) regnode(NOTHING);

	return(ret);
}
Example #2
0
/*
   - regoptail - regtail on operand of first argument; nop if operandless
 */
static void regoptail (char * p, char * val)
{
    /* "Operandless" and "op != BRANCH" are synonymous in practice. */
    if (p == (char *) NULL || p == &regdummy || OP(p) != BRANCH)
        return;
    regtail(OPERAND(p), val);
}
Example #3
0
/*
 - regbranch - one alternative of an | operator
 *
 * Implements the concatenation operator.
 */
static int regbranch(regex_t *preg, int *flagp )
{
	int ret;
	int chain;
	int latest;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	ret = regnode(preg, BRANCH);
	chain = 0;
	while (*preg->regparse != '\0' && *preg->regparse != ')' &&
	       *preg->regparse != '|') {
		latest = regpiece(preg, &flags);
		if (latest == 0)
			return 0;
		*flagp |= flags&HASWIDTH;
		if (chain == 0) {/* First piece. */
			*flagp |= flags&SPSTART;
		}
		else {
			regtail(preg, chain, latest);
		}
		chain = latest;
	}
	if (chain == 0)	/* Loop ran zero times. */
		(void) regnode(preg, NOTHING);

	return(ret);
}
Example #4
0
TCHAR *CRegExp::regbranch(int *flagp)
{
	TCHAR *ret;
	TCHAR *chain;
	TCHAR *latest;
	int flags;
	int c;

	*flagp = WORST;				// Tentatively.

	ret = regnode(BRANCH);
	chain = NULL;
	while ((c = *regparse) != _T('\0') && c != _T('|') && c != _T(')')) {
		latest = regpiece(&flags);
		if (latest == NULL)
			return(NULL);
		*flagp |= flags&HASWIDTH;
		if (chain == NULL)		// First piece.
			*flagp |= flags&SPSTART;
		else
			regtail(chain, latest);
		chain = latest;
	}
	if (chain == NULL)			// Loop ran zero times.
		(void) regnode(NOTHING);

	return(ret);
}
Example #5
0
void CRegExp::regoptail(TCHAR *p, TCHAR *val)
{
	// "Operandless" and "op != BRANCH" are synonymous in practice.
	if (!bEmitCode || OP(p) != BRANCH)
		return;
	regtail(OPERAND(p), val);
}
Example #6
0
static void regoptail(regex_t *preg, int p, int val )
{
	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
	if (p != 0 && OP(preg, p) == BRANCH) {
		regtail(preg, OPERAND(p), val);
	}
}
Example #7
0
/*
 - regbranch - one alternative of an | operator
 *
 * Implements the concatenation operator.
 */
static char	*
regbranch(int *flagp)
{
	register char	*ret;
	register char	*chain;
	register char	*latest;
	int	flags;
	int	len = 0;

	*flagp = WORST;		/* Tentatively. */

	ret = regnode(BRANCH);
	chain = NULL;
        /* Solaris 2.6 motif diff bug 1236359 1 line */
	while ((len = CHARLEN(regparse)) > 0  &&  
		(len != 1 || (*regparse != ')' &&  
			*regparse != '\n' && *regparse != '|'))) {
		latest = regpiece(&flags);
		if (latest == NULL)
			return(NULL);
		*flagp |= flags & HASWIDTH;
		if (chain == NULL)	/* First piece. */
			*flagp |= flags & SPSTART;
			else
			regtail(chain, latest);
		chain = latest;
	}
	if (chain == NULL)	/* Loop ran zero times. */
		(void) regnode(NOTHING);

	return(ret);
}
Example #8
0
/*
 - regbranch - one alternative of an | operator
 *
 * Implements the concatenation operator.
 */
char* ossimRegExp::regbranch (int *flagp) {
    char* ret;
    char* chain;
    char* latest;
    int                  flags;

    *flagp = WORST;		// Tentatively.

    ret = regnode(BRANCH);
    chain = NULL;
    while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
        latest = regpiece(&flags);
        if (latest == NULL)
            return (NULL);
        *flagp |= flags & HASWIDTH;
        if (chain == NULL)	// First piece.
            *flagp |= flags & SPSTART;
        else
            regtail(chain, latest);
        chain = latest;
    }
    if (chain == NULL)		// Loop ran zero times.
        regnode(NOTHING);

    return (ret);
}
Example #9
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char	*
regpiece(int *flagp)
{
	register char	*ret;
	register char	*op;
	register char	*next;
	int	flags;
	int	len = 0;

	ret = regatom(&flags);
	if (ret == NULL)
		return(NULL);

	op = regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	len = CHARLEN(op);
	if (!(flags & HASWIDTH) && ((len != 1) || (*op != '?')) )
		FAIL("*+ operand could be empty");
	*flagp = ((len != 1) || (*op != '+')) ?  (WORST | SPSTART) : (WORST | HASWIDTH);

	if ((len == 1) && (*op == '*') && (flags & SIMPLE))
		reginsert(STAR, ret);
	else if ((len == 1) && 
			(*op == '*')) {
		/* Emit x* as (x&|), where & means "self". */
		reginsert(BRANCH, ret);			/* Either x */
		regoptail(ret, regnode(BACK));		/* and loop */
		regoptail(ret, ret);			/* back */
		regtail(ret, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if ((len == 1) && (*op == '+') && (flags & SIMPLE))
		reginsert(PLUS, ret);
	else if ((len == 1) && (*op == '+')) {
		/* Emit x+ as x(&|), where & means "self". */
		next = regnode(BRANCH);			/* Either */
		regtail(ret, next);
		regtail(regnode(BACK), ret);		/* loop back */
		regtail(next, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if ((len == 1) && (*op == '?')) {
		/* Emit x? as (x|) */
		reginsert(BRANCH, ret);			/* Either x */
		regtail(ret, regnode(BRANCH));		/* or */
		next = regnode(NOTHING);		/* null. */
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse += INCRLEN(len);
	if (ISMULT(regparse))
		FAIL("nested *?+");

	return(ret);
}
Example #10
0
/*
 * regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *
regpiece(i4 *flagp)
{
    register char *ret;
    register char op;
    register char *next;
    i4 flags;

    ret = regatom(&flags);
    if (ret == NULL)
        return(NULL);

    op = *regparse;
    if (!ISMULT(op)) {
        *flagp = flags;
        return(ret);
    }

    if (!(flags&HASWIDTH) && op != '?')
        _FAIL("*+ operand could be empty");
    *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);

    if (op == '*' && (flags&SIMPLE))
        reginsert(STAR, ret);
    else if (op == '*') {
        /* Emit x* as (x&|), where & means "self". */
        reginsert(BRANCH, ret);			/* Either x */
        regoptail(ret, regnode(BACK));		/* and loop */
        regoptail(ret, ret);			/* back */
        regtail(ret, regnode(BRANCH));		/* or */
        regtail(ret, regnode(NOTHING));		/* null. */
    } else if (op == '+' && (flags&SIMPLE))
        reginsert(PLUS, ret);
    else if (op == '+') {
        /* Emit x+ as x(&|), where & means "self". */
        next = regnode(BRANCH);			/* Either */
        regtail(ret, next);
        regtail(regnode(BACK), ret);		/* loop back */
        regtail(next, regnode(BRANCH));		/* or */
        regtail(ret, regnode(NOTHING));		/* null. */
    } else if (op == '?') {
        /* Emit x? as (x|) */
        reginsert(BRANCH, ret);			/* Either x */
        regtail(ret, regnode(BRANCH));		/* or */
        next = regnode(NOTHING);		/* null. */
        regtail(ret, next);
        regoptail(ret, next);
    }
    CMnext( regparse );
    if (ISMULT(*regparse))
        _FAIL("nested *?+");

    return(ret);
}
Example #11
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *regpiece( int *flagp )
{
    char        *ret, op, *next;
    int         flags;

    ret = regatom( &flags );
    if( ret == NULL ) {
        return( NULL );
    }

    op = *regparse;
    if( !ISMULT( op ) ) {
        *flagp = flags;
        return( ret );
    }

    if( !( flags & HASWIDTH ) && op != '?' ) {
        FAIL( ERR_RE_EMPTY_OPERAND );
    }
    *flagp = ( op != '+' ) ? ( WORST | SPSTART ) : ( WORST | HASWIDTH );

    if( op == '*' && ( flags & SIMPLE ) ) {
        reginsert( STAR, ret );
    } else if( op == '*' ) {
        /* Emit x* as (x&|), where & means "self". */
        reginsert( BRANCH, ret );                       /* Either x */
        regoptail( ret, regnode( BACK ) );              /* and loop */
        regoptail( ret, ret );                          /* back */
        regtail( ret, regnode( BRANCH ) );              /* or */
        regtail( ret, regnode( NOTHING ) );             /* null. */
    } else if( op == '+' && ( flags & SIMPLE ) ) {
        reginsert( PLUS, ret );
    } else if( op == '+' ) {
        /* Emit x+ as x(&|), where & means "self". */
        next = regnode( BRANCH );                       /* Either */
        regtail( ret, next );
        regtail( regnode( BACK ), ret );                /* loop back */
        regtail( next, regnode( BRANCH ) );             /* or */
        regtail( ret, regnode( NOTHING ) );             /* null. */
    } else if( op == '?' ) {
        /* Emit x? as (x|) */
        reginsert( BRANCH, ret );                       /* Either x */
        regtail( ret, regnode( BRANCH ) );              /* or */
        next = regnode( NOTHING );                      /* null. */
        regtail( ret, next );
        regoptail( ret, next );
    }
    regparse++;
    if( ISMULT( *regparse ) ) {
        FAIL( ERR_RE_NESTED_OPERAND );
    }

    return( ret );
}
Example #12
0
/*
   - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequence used for ? and the general cases of
 * * and + are somewhat optimized: they use the same NOTHING node as both the
 * endmarker for their branch list and the body of the last branch.  It might
 * seem that this node could be dispensed with entirely, but the endmarker
 * role is not redundant.
 */
static char *regpiece (int * flagp)
{
    register char *ret;
    register short op;
    register char *nxt;
    int flags;

    ret = regatom(&flags);
    if (ret == (char *) NULL)
        return ((char *) NULL);

    op = *regparse;
    if (!ISMULT(op)) {
        *flagp = flags;
        return (ret);
    }
    if (!(flags & HASWIDTH) && op != QMARK)
        FAIL("*+ operand could be empty\n");
    *flagp = (op != PLUSS) ? (WORST | SPSTART) : (WORST | HASWIDTH);

    if (op == ASTERIX && (flags & SIMPLE))
        reginsert(STAR, ret);
    else if (op == ASTERIX) {
        /* Emit x* as (x&|), where & means "self". */
        reginsert(BRANCH, ret); /* Either x */
        regoptail(ret, regnode(BACK));  /* and loop */
        regoptail(ret, ret);    /* back */
        regtail(ret, regnode(BRANCH));  /* or */
        regtail(ret, regnode(NOTHING)); /* null. */
    } else if (op == PLUSS && (flags & SIMPLE))
        reginsert(PLUS, ret);
    else if (op == PLUSS) {
        /* Emit x+ as x(&|), where & means "self". */
        nxt = regnode(BRANCH);  /* Either */
        regtail(ret, nxt);
        regtail(regnode(BACK), ret);    /* loop back */
        regtail(nxt, regnode(BRANCH));  /* or */
        regtail(ret, regnode(NOTHING)); /* null. */
    } else if (op == QMARK) {
        /* Emit x? as (x|) */
        reginsert(BRANCH, ret); /* Either x */
        regtail(ret, regnode(BRANCH));  /* or */
        nxt = regnode(NOTHING); /* null. */
        regtail(ret, nxt);
        regoptail(ret, nxt);
    }
    regparse++;
    if (ISMULT(*regparse))
        FAIL("nested *?+\n");

    return (ret);
}
Example #13
0
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
char* ossimRegExp::reg (int paren, int *flagp) {
    char* ret;
    char* br;
    char* ender;
    int   parno =0;
    int   flags;

    *flagp = HASWIDTH;		// Tentatively.

    // Make an OPEN node, if parenthesized.
    if (paren) {
        if (regnpar >= NSUBEXP) {
            //RAISE Error, SYM(ossimRegExp), SYM(Too_Many_Parens),
            printf ("ossimRegExp::compile(): Too many parentheses.\n");
            return 0;
        }
        parno = regnpar;
        regnpar++;
        ret = regnode(OPEN + parno);
    }
    else
        ret = NULL;

    // Pick up the branches, linking them together.
    br = regbranch(&flags);
    if (br == NULL)
        return (NULL);
    if (ret != NULL)
        regtail(ret, br);	// OPEN -> first.
    else
        ret = br;
    if (!(flags & HASWIDTH))
        *flagp &= ~HASWIDTH;
    *flagp |= flags & SPSTART;
    while (*regparse == '|') {
        regparse++;
        br = regbranch(&flags);
        if (br == NULL)
            return (NULL);
        regtail(ret, br);	// BRANCH -> BRANCH.
        if (!(flags & HASWIDTH))
            *flagp &= ~HASWIDTH;
        *flagp |= flags & SPSTART;
    }

    // Make a closing node, and hook it on the end.
    ender = regnode((paren) ? CLOSE + parno : END);
    regtail(ret, ender);

    // Hook the tails of the branches to the closing node.
    for (br = ret; br != NULL; br = regnext(br))
        regoptail(br, ender);

    // Check for proper termination.
    if (paren && *regparse++ != ')') {
        //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens),
        printf ("ossimRegExp::compile(): Unmatched parentheses.\n");
        return 0;
    }
    else if (!paren && *regparse != '\0') {
        if (*regparse == ')') {
            //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens),
            printf ("ossimRegExp::compile(): Unmatched parentheses.\n");
            return 0;
        }
        else {
            //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
            printf ("ossimRegExp::compile(): Internal error.\n");
            return 0;
        }
        // NOTREACHED
    }
    return (ret);
}
Example #14
0
/*
 * reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static char *reg( int paren, int *flagp )
{
    char        *ret, *br, *ender;
    int         flags;
    char        parno = 0;

    *flagp = HASWIDTH;      /* Tentatively. */

    /* Make an OPEN node, if parenthesized. */
    if( paren ) {
        if( regnpar >= NSUBEXP ) {
            FAIL( ERR_RE_TOO_MANY_ROUND_BRACKETS );
        }
        parno = regnpar;
        regnpar++;
        ret = regnode( OPEN + parno );
    } else {
        ret = NULL;
    }

    /* Pick up the branches, linking them together. */
    br = regbranch( &flags );
    if( br == NULL ) {
        return( NULL );
    }
    if( ret != NULL ) {
        regtail( ret, br );       /* OPEN -> first. */
    } else {
        ret = br;
    }
    if( !( flags & HASWIDTH ) ) {
        *flagp &= ~HASWIDTH;
    }
    *flagp |= flags & SPSTART;
    while( *regparse == '|' ) {
        regparse++;
        br = regbranch( &flags );
        if( br == NULL ) {
            return( NULL );
        }
        regtail( ret, br );       /* BRANCH -> BRANCH. */
        if( !( flags & HASWIDTH ) ) {
            *flagp &= ~HASWIDTH;
        }
        *flagp |= flags & SPSTART;
    }

    /* Make a closing node, and hook it on the end. */
    ender = regnode( ( paren ) ? CLOSE + parno : END );
    regtail( ret, ender );

    /* Hook the tails of the branches to the closing node. */
    for( br = ret; br != NULL; br = regnext( br ) ) {
        regoptail( br, ender );
    }

    /* Check for proper termination. */
    if( paren && *regparse++ != ')' ) {
        FAIL( ERR_RE_UNMATCHED_ROUND_BRACKETS );
    } else if( !paren && *regparse != '\0' ) {
        if( *regparse == ')' ) {
            FAIL( ERR_RE_UNMATCHED_ROUND_BRACKETS );
        } else {
            FAIL( ERR_RE_INTERNAL_FOULUP );    /* "Can't happen". */
        }
    }

    return( ret );
}
Example #15
0
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static int reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp )
{
	int ret;
	int br;
	int ender;
	int parno = 0;
	int flags;

	*flagp = HASWIDTH;	/* Tentatively. */

	/* Make an OPEN node, if parenthesized. */
	if (paren) {
		if (preg->regparse[0] == '?' && preg->regparse[1] == ':') {
			/* non-capturing paren */
			preg->regparse += 2;
			parno = -1;
		}
		else {
			parno = ++preg->re_nsub;
		}
		ret = regnode(preg, OPEN+parno);
	} else
		ret = 0;

	/* Pick up the branches, linking them together. */
	br = regbranch(preg, &flags);
	if (br == 0)
		return 0;
	if (ret != 0)
		regtail(preg, ret, br);	/* OPEN -> first. */
	else
		ret = br;
	if (!(flags&HASWIDTH))
		*flagp &= ~HASWIDTH;
	*flagp |= flags&SPSTART;
	while (*preg->regparse == '|') {
		preg->regparse++;
		br = regbranch(preg, &flags);
		if (br == 0)
			return 0;
		regtail(preg, ret, br);	/* BRANCH -> BRANCH. */
		if (!(flags&HASWIDTH))
			*flagp &= ~HASWIDTH;
		*flagp |= flags&SPSTART;
	}

	/* Make a closing node, and hook it on the end. */
	ender = regnode(preg, (paren) ? CLOSE+parno : END);
	regtail(preg, ret, ender);

	/* Hook the tails of the branches to the closing node. */
	for (br = ret; br != 0; br = regnext(preg, br))
		regoptail(preg, br, ender);

	/* Check for proper termination. */
	if (paren && *preg->regparse++ != ')') {
		preg->err = REG_ERR_UNMATCHED_PAREN;
		return 0;
	} else if (!paren && *preg->regparse != '\0') {
		if (*preg->regparse == ')') {
			preg->err = REG_ERR_UNMATCHED_PAREN;
			return 0;
		} else {
			preg->err = REG_ERR_JUNK_ON_END;
			return 0;
		}
	}

	return(ret);
}
Example #16
0
TCHAR *CRegExp::regpiece(int *flagp)
{
	TCHAR *ret;
	TCHAR op;
	TCHAR *next;
	int flags;

	ret = regatom(&flags);
	if (ret == NULL)
		return(NULL);

	op = *regparse;
	if (!ISREPN(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != _T('?'))
	{
		TRACE0("*+ operand could be empty\n");
		return NULL;
	}

	switch (op) {
	case _T('*'):	*flagp = WORST|SPSTART;			break;
	case _T('+'):	*flagp = WORST|SPSTART|HASWIDTH;	break;
	case _T('?'):	*flagp = WORST;				break;
	}

	if (op == _T('*') && (flags&SIMPLE))
		reginsert(STAR, ret);
	else if (op == _T('*')) {
		// Emit x* as (x&|), where & means "self".
		reginsert(BRANCH, ret);		// Either x
		regoptail(ret, regnode(BACK));	// and loop
		regoptail(ret, ret);		// back
		regtail(ret, regnode(BRANCH));	// or
		regtail(ret, regnode(NOTHING));	// null.
	} else if (op == _T('+') && (flags&SIMPLE))
		reginsert(PLUS, ret);
	else if (op == _T('+')) {
		// Emit x+ as x(&|), where & means "self".
		next = regnode(BRANCH);		// Either
		regtail(ret, next);
		regtail(regnode(BACK), ret);	// loop back
		regtail(next, regnode(BRANCH));	// or
		regtail(ret, regnode(NOTHING));	// null.
	} else if (op == _T('?')) {
		// Emit x? as (x|)
		reginsert(BRANCH, ret);		// Either x
		regtail(ret, regnode(BRANCH));	// or
		next = regnode(NOTHING);		// null.
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse++;
	if (ISREPN(*regparse))
	{
		TRACE0("nested *?+\n");
		return NULL;
	}

	return(ret);
}
Example #17
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static int regpiece(regex_t *preg, int *flagp)
{
	int ret;
	char op;
	int next;
	int flags;
	int min;
	int max;

	ret = regatom(preg, &flags);
	if (ret == 0)
		return 0;

	op = *preg->regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != '?') {
		preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY;
		return 0;
	}

	/* Handle braces (counted repetition) by expansion */
	if (op == '{') {
		char *end;

		min = strtoul(preg->regparse + 1, &end, 10);
		if (end == preg->regparse + 1) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}
		if (*end == '}') {
			max = min;
		}
		else {
			preg->regparse = end;
			max = strtoul(preg->regparse + 1, &end, 10);
			if (*end != '}') {
				preg->err = REG_ERR_UNMATCHED_BRACES;
				return 0;
			}
		}
		if (end == preg->regparse + 1) {
			max = MAX_REP_COUNT;
		}
		else if (max < min || max >= 100) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}
		if (min >= 100) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}

		preg->regparse = strchr(preg->regparse, '}');
	}
	else {
		min = (op == '+');
		max = (op == '?' ? 1 : MAX_REP_COUNT);
	}

	if (preg->regparse[1] == '?') {
		preg->regparse++;
		next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret);
	}
	else {
		next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret);
	}
	preg->program[ret + 2] = max;
	preg->program[ret + 3] = min;
	preg->program[ret + 4] = 0;

	*flagp = (min) ? (WORST|HASWIDTH) : (WORST|SPSTART);

	if (!(flags & SIMPLE)) {
		int back = regnode(preg, BACK);
		regtail(preg, back, ret);
		regtail(preg, next, back);
	}

	preg->regparse++;
	if (ISMULT(*preg->regparse)) {
		preg->err = REG_ERR_NESTED_COUNT;
		return 0;
	}

	return ret;
}
Example #18
0
TCHAR *CRegExp::reg(int paren, int *flagp)
{
	char *ret;
	char *br;
	char *ender;
	int parno;
	int flags;

	*flagp = HASWIDTH;	// Tentatively.

	if (paren)
	{
		// Make an OPEN node.
		if (regnpar >= NSUBEXP)
		{
			TRACE1("Too many (). NSUBEXP is set to %d\n", NSUBEXP );
			return NULL;
		}
		parno = regnpar;
		regnpar++;
		ret = regnode(OPEN+parno);
	}

	// Pick up the branches, linking them together.
	br = regbranch(&flags);
	if (br == NULL)
		return(NULL);
	if (paren)
		regtail(ret, br);	// OPEN -> first.
	else
		ret = br;
	*flagp &= ~(~flags&HASWIDTH);	// Clear bit if bit 0.
	*flagp |= flags&SPSTART;
	while (*regparse == _T('|')) {
		regparse++;
		br = regbranch(&flags);
		if (br == NULL)
			return(NULL);
		regtail(ret, br);	// BRANCH -> BRANCH.
		*flagp &= ~(~flags&HASWIDTH);
		*flagp |= flags&SPSTART;
	}

	// Make a closing node, and hook it on the end.
	ender = regnode((paren) ? CLOSE+parno : END);
	regtail(ret, ender);

	// Hook the tails of the branches to the closing node.
	for (br = ret; br != NULL; br = regnext(br))
		regoptail(br, ender);

	// Check for proper termination.
	if (paren && *regparse++ != _T(')'))
	{
		TRACE0("unterminated ()\n");
		return NULL;
	}
	else if (!paren && *regparse != _T('\0'))
	{
		if (*regparse == _T(')'))
		{
			TRACE0("unmatched ()\n");
			return NULL;
		}
		else
		{
			TRACE0("internal error: junk on end\n");
			return NULL;
		}
		// NOTREACHED
	}

	return(ret);
}
Example #19
0
/*
 - regoptail - regtail on operand of first argument; nop if operandless
 */
void ossimRegExp::regoptail (char* p, const char* val) {
    // "Operandless" and "op != BRANCH" are synonymous in practice.
    if (p == NULL || p == &regdummy || OP(p) != BRANCH)
        return;
    regtail(OPERAND(p), val);
}
Example #20
0
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static char *
reg(
	int paren,			/* Parenthesized? */
	int *flagp )
{
	register char *ret;
	register char *br;
	register char *ender;
	register int parno;
	int flags;

	*flagp = HASWIDTH;	/* Tentatively. */

	/* Make an OPEN node, if parenthesized. */
	if (paren) {
		if (regnpar >= NSUBEXP)
			FAIL("too many ()");
		parno = regnpar;
		regnpar++;
		ret = regnode(OPEN+parno);
	} else
		ret = NULL;

	/* Pick up the branches, linking them together. */
	br = regbranch(&flags);
	if (br == NULL)
		return(NULL);
	if (ret != NULL)
		regtail(ret, br);	/* OPEN -> first. */
	else
		ret = br;
	if (!(flags&HASWIDTH))
		*flagp &= ~HASWIDTH;
	*flagp |= flags&SPSTART;
	while (*regparse == '|' || *regparse == '\n') {
		regparse++;
		br = regbranch(&flags);
		if (br == NULL)
			return(NULL);
		regtail(ret, br);	/* BRANCH -> BRANCH. */
		if (!(flags&HASWIDTH))
			*flagp &= ~HASWIDTH;
		*flagp |= flags&SPSTART;
	}

	/* Make a closing node, and hook it on the end. */
	ender = regnode((paren) ? CLOSE+parno : END);	
	regtail(ret, ender);

	/* Hook the tails of the branches to the closing node. */
	for (br = ret; br != NULL; br = regnext(br))
		regoptail(br, ender);

	/* Check for proper termination. */
	if (paren && *regparse++ != ')') {
		FAIL("unmatched ()");
	} else if (!paren && *regparse != '\0') {
		if (*regparse == ')') {
			FAIL("unmatched ()");
		} else
			FAIL("junk on end");	/* "Can't happen". */
		/* NOTREACHED */
	}

	return(ret);
}
Example #21
0
/*
 - regpiece - something followed by possible [*+?{]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *
regpiece(int *flagp)
{
	register char	*next;
	register char	*ret;
	register char	op;
	unsigned char	max;
	unsigned char	min;
	int		flags;

	ret = regatom(&flags);
	if (ret == NULL) {
		return(NULL);
	}

	op = *regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != '?') {
		FAIL("*+{ operand could be empty");
	}
	*flagp = (op != '+' && op != '{') ? (WORST|SPSTART) : (WORST|HASWIDTH);

	if (op == '*' && (flags&SIMPLE)) {
		reginsert(STAR, ret);
	} else if (op == '*') {
		/* Emit x* as (x&|), where & means "self". */
		reginsert(BRANCH, ret);			/* Either x */
		regoptail(ret, regnode(BACK));		/* and loop */
		regoptail(ret, ret);			/* back */
		regtail(ret, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if (op == '+' && (flags&SIMPLE)) {
		reginsert(PLUS, ret);
	} else if (op == '+') {
		/* Emit x+ as x(&|), where & means "self". */
		next = regnode(BRANCH);			/* Either */
		regtail(ret, next);
		regtail(regnode(BACK), ret);		/* loop back */
		regtail(next, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if (op == '{') {
		for (min = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) {
			min = min * 10 + (*regparse - '0');
		}
		for (max = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) {
			max = max * 10 + (*regparse - '0');
		}
		reginsert(max, ret);
		next = OPERAND(ret);
		reginsert(min, ret);
		next = OPERAND(next);
		reginsert(MINMAX, ret);
		regtail(ret, OPERAND(next));		/* MINMAX->next = x */
	} else if (op == '?') {
		/* Emit x? as (x|) */
		reginsert(BRANCH, ret);			/* Either x */
		regtail(ret, regnode(BRANCH));		/* or */
		next = regnode(NOTHING);		/* null. */
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse++;
	if (ISMULT(*regparse)) {
		FAIL("nested *?+{");
	}

	return(ret);
}