Exemple #1
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
char* ossimRegExp::regpiece (int *flagp) {
    char* ret;
    char  op;
    char* next;
    int            flags;

    ret = regatom(&flags);
    if (ret == NULL)
        return (NULL);

    op = *regparse;
    if (!ISMULT(op)) {
        *flagp = flags;
        return (ret);
    }

    if (!(flags & HASWIDTH) && op != '?') {
        //RAISE Error, SYM(ossimRegExp), SYM(Empty_Operand),
        printf ("ossimRegExp::compile() : *+ operand could be empty.\n");
        return 0;
    }
    *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH);

    if (op == '*' && (flags & SIMPLE))
        reginsert(STAR, ret);
    else if (op == '*') {
        // Emit x* as (x&|), where & means "self".
        reginsert(BRANCH, ret);	// Either x
        regoptail(ret, regnode(BACK));	// and loop
        regoptail(ret, ret);	// back
        regtail(ret, regnode(BRANCH));	// or
        regtail(ret, regnode(NOTHING));	// null.
    }
    else if (op == '+' && (flags & SIMPLE))
        reginsert(PLUS, ret);
    else if (op == '+') {
        // Emit x+ as x(&|), where & means "self".
        next = regnode(BRANCH);	// Either
        regtail(ret, next);
        regtail(regnode(BACK), ret);	// loop back
        regtail(next, regnode(BRANCH));	// or
        regtail(ret, regnode(NOTHING));	// null.
    }
    else if (op == '?') {
        // Emit x? as (x|)
        reginsert(BRANCH, ret);	// Either x
        regtail(ret, regnode(BRANCH));	// or
        next = regnode(NOTHING);// null.
        regtail(ret, next);
        regoptail(ret, next);
    }
    regparse++;
    if (ISMULT(*regparse)) {
        //RAISE Error, SYM(ossimRegExp), SYM(Nested_Operand),
        printf ("ossimRegExp::compile(): Nested *?+.\n");
        return 0;
    }
    return (ret);
}
Exemple #2
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char	*
regpiece(int *flagp)
{
	register char	*ret;
	register char	*op;
	register char	*next;
	int	flags;
	int	len = 0;

	ret = regatom(&flags);
	if (ret == NULL)
		return(NULL);

	op = regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	len = CHARLEN(op);
	if (!(flags & HASWIDTH) && ((len != 1) || (*op != '?')) )
		FAIL("*+ operand could be empty");
	*flagp = ((len != 1) || (*op != '+')) ?  (WORST | SPSTART) : (WORST | HASWIDTH);

	if ((len == 1) && (*op == '*') && (flags & SIMPLE))
		reginsert(STAR, ret);
	else if ((len == 1) && 
			(*op == '*')) {
		/* Emit x* as (x&|), where & means "self". */
		reginsert(BRANCH, ret);			/* Either x */
		regoptail(ret, regnode(BACK));		/* and loop */
		regoptail(ret, ret);			/* back */
		regtail(ret, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if ((len == 1) && (*op == '+') && (flags & SIMPLE))
		reginsert(PLUS, ret);
	else if ((len == 1) && (*op == '+')) {
		/* Emit x+ as x(&|), where & means "self". */
		next = regnode(BRANCH);			/* Either */
		regtail(ret, next);
		regtail(regnode(BACK), ret);		/* loop back */
		regtail(next, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if ((len == 1) && (*op == '?')) {
		/* Emit x? as (x|) */
		reginsert(BRANCH, ret);			/* Either x */
		regtail(ret, regnode(BRANCH));		/* or */
		next = regnode(NOTHING);		/* null. */
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse += INCRLEN(len);
	if (ISMULT(regparse))
		FAIL("nested *?+");

	return(ret);
}
Exemple #3
0
/*
 * regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *
regpiece(i4 *flagp)
{
    register char *ret;
    register char op;
    register char *next;
    i4 flags;

    ret = regatom(&flags);
    if (ret == NULL)
        return(NULL);

    op = *regparse;
    if (!ISMULT(op)) {
        *flagp = flags;
        return(ret);
    }

    if (!(flags&HASWIDTH) && op != '?')
        _FAIL("*+ operand could be empty");
    *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);

    if (op == '*' && (flags&SIMPLE))
        reginsert(STAR, ret);
    else if (op == '*') {
        /* Emit x* as (x&|), where & means "self". */
        reginsert(BRANCH, ret);			/* Either x */
        regoptail(ret, regnode(BACK));		/* and loop */
        regoptail(ret, ret);			/* back */
        regtail(ret, regnode(BRANCH));		/* or */
        regtail(ret, regnode(NOTHING));		/* null. */
    } else if (op == '+' && (flags&SIMPLE))
        reginsert(PLUS, ret);
    else if (op == '+') {
        /* Emit x+ as x(&|), where & means "self". */
        next = regnode(BRANCH);			/* Either */
        regtail(ret, next);
        regtail(regnode(BACK), ret);		/* loop back */
        regtail(next, regnode(BRANCH));		/* or */
        regtail(ret, regnode(NOTHING));		/* null. */
    } else if (op == '?') {
        /* Emit x? as (x|) */
        reginsert(BRANCH, ret);			/* Either x */
        regtail(ret, regnode(BRANCH));		/* or */
        next = regnode(NOTHING);		/* null. */
        regtail(ret, next);
        regoptail(ret, next);
    }
    CMnext( regparse );
    if (ISMULT(*regparse))
        _FAIL("nested *?+");

    return(ret);
}
Exemple #4
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *regpiece( int *flagp )
{
    char        *ret, op, *next;
    int         flags;

    ret = regatom( &flags );
    if( ret == NULL ) {
        return( NULL );
    }

    op = *regparse;
    if( !ISMULT( op ) ) {
        *flagp = flags;
        return( ret );
    }

    if( !( flags & HASWIDTH ) && op != '?' ) {
        FAIL( ERR_RE_EMPTY_OPERAND );
    }
    *flagp = ( op != '+' ) ? ( WORST | SPSTART ) : ( WORST | HASWIDTH );

    if( op == '*' && ( flags & SIMPLE ) ) {
        reginsert( STAR, ret );
    } else if( op == '*' ) {
        /* Emit x* as (x&|), where & means "self". */
        reginsert( BRANCH, ret );                       /* Either x */
        regoptail( ret, regnode( BACK ) );              /* and loop */
        regoptail( ret, ret );                          /* back */
        regtail( ret, regnode( BRANCH ) );              /* or */
        regtail( ret, regnode( NOTHING ) );             /* null. */
    } else if( op == '+' && ( flags & SIMPLE ) ) {
        reginsert( PLUS, ret );
    } else if( op == '+' ) {
        /* Emit x+ as x(&|), where & means "self". */
        next = regnode( BRANCH );                       /* Either */
        regtail( ret, next );
        regtail( regnode( BACK ), ret );                /* loop back */
        regtail( next, regnode( BRANCH ) );             /* or */
        regtail( ret, regnode( NOTHING ) );             /* null. */
    } else if( op == '?' ) {
        /* Emit x? as (x|) */
        reginsert( BRANCH, ret );                       /* Either x */
        regtail( ret, regnode( BRANCH ) );              /* or */
        next = regnode( NOTHING );                      /* null. */
        regtail( ret, next );
        regoptail( ret, next );
    }
    regparse++;
    if( ISMULT( *regparse ) ) {
        FAIL( ERR_RE_NESTED_OPERAND );
    }

    return( ret );
}
Exemple #5
0
/*
   - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequence used for ? and the general cases of
 * * and + are somewhat optimized: they use the same NOTHING node as both the
 * endmarker for their branch list and the body of the last branch.  It might
 * seem that this node could be dispensed with entirely, but the endmarker
 * role is not redundant.
 */
static char *regpiece (int * flagp)
{
    register char *ret;
    register short op;
    register char *nxt;
    int flags;

    ret = regatom(&flags);
    if (ret == (char *) NULL)
        return ((char *) NULL);

    op = *regparse;
    if (!ISMULT(op)) {
        *flagp = flags;
        return (ret);
    }
    if (!(flags & HASWIDTH) && op != QMARK)
        FAIL("*+ operand could be empty\n");
    *flagp = (op != PLUSS) ? (WORST | SPSTART) : (WORST | HASWIDTH);

    if (op == ASTERIX && (flags & SIMPLE))
        reginsert(STAR, ret);
    else if (op == ASTERIX) {
        /* Emit x* as (x&|), where & means "self". */
        reginsert(BRANCH, ret); /* Either x */
        regoptail(ret, regnode(BACK));  /* and loop */
        regoptail(ret, ret);    /* back */
        regtail(ret, regnode(BRANCH));  /* or */
        regtail(ret, regnode(NOTHING)); /* null. */
    } else if (op == PLUSS && (flags & SIMPLE))
        reginsert(PLUS, ret);
    else if (op == PLUSS) {
        /* Emit x+ as x(&|), where & means "self". */
        nxt = regnode(BRANCH);  /* Either */
        regtail(ret, nxt);
        regtail(regnode(BACK), ret);    /* loop back */
        regtail(nxt, regnode(BRANCH));  /* or */
        regtail(ret, regnode(NOTHING)); /* null. */
    } else if (op == QMARK) {
        /* Emit x? as (x|) */
        reginsert(BRANCH, ret); /* Either x */
        regtail(ret, regnode(BRANCH));  /* or */
        nxt = regnode(NOTHING); /* null. */
        regtail(ret, nxt);
        regoptail(ret, nxt);
    }
    regparse++;
    if (ISMULT(*regparse))
        FAIL("nested *?+\n");

    return (ret);
}
Exemple #6
0
TCHAR *CRegExp::regpiece(int *flagp)
{
	TCHAR *ret;
	TCHAR op;
	TCHAR *next;
	int flags;

	ret = regatom(&flags);
	if (ret == NULL)
		return(NULL);

	op = *regparse;
	if (!ISREPN(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != _T('?'))
	{
		TRACE0("*+ operand could be empty\n");
		return NULL;
	}

	switch (op) {
	case _T('*'):	*flagp = WORST|SPSTART;			break;
	case _T('+'):	*flagp = WORST|SPSTART|HASWIDTH;	break;
	case _T('?'):	*flagp = WORST;				break;
	}

	if (op == _T('*') && (flags&SIMPLE))
		reginsert(STAR, ret);
	else if (op == _T('*')) {
		// Emit x* as (x&|), where & means "self".
		reginsert(BRANCH, ret);		// Either x
		regoptail(ret, regnode(BACK));	// and loop
		regoptail(ret, ret);		// back
		regtail(ret, regnode(BRANCH));	// or
		regtail(ret, regnode(NOTHING));	// null.
	} else if (op == _T('+') && (flags&SIMPLE))
		reginsert(PLUS, ret);
	else if (op == _T('+')) {
		// Emit x+ as x(&|), where & means "self".
		next = regnode(BRANCH);		// Either
		regtail(ret, next);
		regtail(regnode(BACK), ret);	// loop back
		regtail(next, regnode(BRANCH));	// or
		regtail(ret, regnode(NOTHING));	// null.
	} else if (op == _T('?')) {
		// Emit x? as (x|)
		reginsert(BRANCH, ret);		// Either x
		regtail(ret, regnode(BRANCH));	// or
		next = regnode(NOTHING);		// null.
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse++;
	if (ISREPN(*regparse))
	{
		TRACE0("nested *?+\n");
		return NULL;
	}

	return(ret);
}
Exemple #7
0
/*
 - regpiece - something followed by possible [*+?]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static int regpiece(regex_t *preg, int *flagp)
{
	int ret;
	char op;
	int next;
	int flags;
	int min;
	int max;

	ret = regatom(preg, &flags);
	if (ret == 0)
		return 0;

	op = *preg->regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != '?') {
		preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY;
		return 0;
	}

	/* Handle braces (counted repetition) by expansion */
	if (op == '{') {
		char *end;

		min = strtoul(preg->regparse + 1, &end, 10);
		if (end == preg->regparse + 1) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}
		if (*end == '}') {
			max = min;
		}
		else {
			preg->regparse = end;
			max = strtoul(preg->regparse + 1, &end, 10);
			if (*end != '}') {
				preg->err = REG_ERR_UNMATCHED_BRACES;
				return 0;
			}
		}
		if (end == preg->regparse + 1) {
			max = MAX_REP_COUNT;
		}
		else if (max < min || max >= 100) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}
		if (min >= 100) {
			preg->err = REG_ERR_BAD_COUNT;
			return 0;
		}

		preg->regparse = strchr(preg->regparse, '}');
	}
	else {
		min = (op == '+');
		max = (op == '?' ? 1 : MAX_REP_COUNT);
	}

	if (preg->regparse[1] == '?') {
		preg->regparse++;
		next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret);
	}
	else {
		next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret);
	}
	preg->program[ret + 2] = max;
	preg->program[ret + 3] = min;
	preg->program[ret + 4] = 0;

	*flagp = (min) ? (WORST|HASWIDTH) : (WORST|SPSTART);

	if (!(flags & SIMPLE)) {
		int back = regnode(preg, BACK);
		regtail(preg, back, ret);
		regtail(preg, next, back);
	}

	preg->regparse++;
	if (ISMULT(*preg->regparse)) {
		preg->err = REG_ERR_NESTED_COUNT;
		return 0;
	}

	return ret;
}
Exemple #8
0
/*
 - regpiece - something followed by possible [*+?{]
 *
 * Note that the branching code sequences used for ? and the general cases
 * of * and + are somewhat optimized:  they use the same NOTHING node as
 * both the endmarker for their branch list and the body of the last branch.
 * It might seem that this node could be dispensed with entirely, but the
 * endmarker role is not redundant.
 */
static char *
regpiece(int *flagp)
{
	register char	*next;
	register char	*ret;
	register char	op;
	unsigned char	max;
	unsigned char	min;
	int		flags;

	ret = regatom(&flags);
	if (ret == NULL) {
		return(NULL);
	}

	op = *regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != '?') {
		FAIL("*+{ operand could be empty");
	}
	*flagp = (op != '+' && op != '{') ? (WORST|SPSTART) : (WORST|HASWIDTH);

	if (op == '*' && (flags&SIMPLE)) {
		reginsert(STAR, ret);
	} else if (op == '*') {
		/* Emit x* as (x&|), where & means "self". */
		reginsert(BRANCH, ret);			/* Either x */
		regoptail(ret, regnode(BACK));		/* and loop */
		regoptail(ret, ret);			/* back */
		regtail(ret, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if (op == '+' && (flags&SIMPLE)) {
		reginsert(PLUS, ret);
	} else if (op == '+') {
		/* Emit x+ as x(&|), where & means "self". */
		next = regnode(BRANCH);			/* Either */
		regtail(ret, next);
		regtail(regnode(BACK), ret);		/* loop back */
		regtail(next, regnode(BRANCH));		/* or */
		regtail(ret, regnode(NOTHING));		/* null. */
	} else if (op == '{') {
		for (min = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) {
			min = min * 10 + (*regparse - '0');
		}
		for (max = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) {
			max = max * 10 + (*regparse - '0');
		}
		reginsert(max, ret);
		next = OPERAND(ret);
		reginsert(min, ret);
		next = OPERAND(next);
		reginsert(MINMAX, ret);
		regtail(ret, OPERAND(next));		/* MINMAX->next = x */
	} else if (op == '?') {
		/* Emit x? as (x|) */
		reginsert(BRANCH, ret);			/* Either x */
		regtail(ret, regnode(BRANCH));		/* or */
		next = regnode(NOTHING);		/* null. */
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse++;
	if (ISMULT(*regparse)) {
		FAIL("nested *?+{");
	}

	return(ret);
}