예제 #1
0
파일: regexp.c 프로젝트: BlackYoup/medusa
/*
 - regtail - set the next-pointer at the end of a node chain
 */
static void
regtail(
	char *p,
	char *val )
{
	register char *scan;
	register char *temp;
	register int offset;

	if (p == &regdummy)
		return;

	/* Find last node. */
	scan = p;
	for (;;) {
		temp = regnext(scan);
		if (temp == NULL)
			break;
		scan = temp;
	}

	if (OP(scan) == BACK)
		offset = scan - val;
	else
		offset = val - scan;
	*(scan+1) = (offset>>8)&0377;
	*(scan+2) = offset&0377;
}
예제 #2
0
static int regmatchsimplerepeat(regex_t *preg, int scan, int matchmin)
{
	int nextch = '\0';
	const char *save;
	int no;
	int c;

	int max = preg->program[scan + 2];
	int min = preg->program[scan + 3];
	int next = regnext(preg, scan);

	/*
	 * Lookahead to avoid useless match attempts
	 * when we know what character comes next.
	 */
	if (OP(preg, next) == EXACTLY) {
		nextch = preg->program[OPERAND(next)];
	}
	save = preg->reginput;
	no = regrepeat(preg, scan + 5, max);
	if (no < min) {
		return 0;
	}
	if (matchmin) {
		/* from min up to no */
		max = no;
		no = min;
	}
	/* else from no down to min */
	while (1) {
		if (matchmin) {
			if (no > max) {
				break;
			}
		}
		else {
			if (no < min) {
				break;
			}
		}
		preg->reginput = save + utf8_index(save, no);
		reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE));
		/* If it could work, try it. */
		if (reg_iseol(preg, nextch) || c == nextch) {
			if (regmatch(preg, next)) {
				return(1);
			}
		}
		if (matchmin) {
			/* Couldn't or didn't, add one more */
			no++;
		}
		else {
			/* Couldn't or didn't -- back up. */
			no--;
		}
	}
	return(0);
}
예제 #3
0
static int regmatchrepeat(regex_t *preg, int scan, int matchmin)
{
	int *scanpt = preg->program + scan;

	int max = scanpt[2];
	int min = scanpt[3];

	/* Have we reached min? */
	if (scanpt[4] < min) {
		/* No, so get another one */
		scanpt[4]++;
		if (regmatch(preg, scan + 5)) {
			return 1;
		}
		scanpt[4]--;
		return 0;
	}
	if (scanpt[4] > max) {
		return 0;
	}

	if (matchmin) {
		/* minimal, so try other branch first */
		if (regmatch(preg, regnext(preg, scan))) {
			return 1;
		}
		/* No, so try one more */
		scanpt[4]++;
		if (regmatch(preg, scan + 5)) {
			return 1;
		}
		scanpt[4]--;
		return 0;
	}
	/* maximal, so try this branch again */
	if (scanpt[4] < max) {
		scanpt[4]++;
		if (regmatch(preg, scan + 5)) {
			return 1;
		}
		scanpt[4]--;
	}
	/* At this point we are at max with no match. Try the other branch */
	return regmatch(preg, regnext(preg, scan));
}
예제 #4
0
void CRegExp::regtail(TCHAR *p, TCHAR *val)
{
	TCHAR *scan;
	TCHAR *temp;
//	int offset;

	if (!bEmitCode)
		return;

	// Find last node.
	for (scan = p; (temp = regnext(scan)) != NULL; scan = temp)
		continue;

	*((short *)(scan+1)) = (OP(scan) == BACK) ? scan - val : val - scan;
}
예제 #5
0
/*
 - regdump - dump a SRE onto stdout in vaguely comprehensible form
 */
void
regdump(SRE *r)
{
	register char *s;
	register char op = EXACTLY;	/* Arbitrary non-END op. */
	register char *next;
	extern char *strchr();


	s = r->program + 1;
	while (op != END) {	/* While that wasn't END last time... */
		op = OP(s);
		printf("%2d%s", s-r->program, regprop(s));	/* Where, what. */
		next = regnext(s);
		if (next == NULL) {		/* Next ptr. */
			printf("(0)");
		} else {
			printf("(%d)", (s-r->program)+(next-s));
		}
		s += 3;
		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
			/* Literal string, where present. */
			while (*s != '\0') {
				putchar(*s);
				s++;
			}
			s++;
		}
		putchar('\n');
	}

	/* Header fields of interest. */
	if (r->regstart != '\0') {
		printf("start `%c' ", r->regstart);
	}
	if (r->reganch) {
		printf("anchored ");
	}
	if (r->regmust != NULL) {
		printf("must have \"%s\"", r->regmust);
	}
	printf("\n");
}
예제 #6
0
파일: regexp.c 프로젝트: BPotato/fluffos
/*
 - regdump - dump a regexp onto stdout in vaguely comprehensible form
 */
void regdump (regexp * r)
{
    register char *s;
    register char op = EXACTLY; /* Arbitrary non-END op. */
    register char *nxt;

#ifdef _AIX
    extern char *strchr();

#endif                          /* _AIX */

    s = r->program + 1;
    while (op != END) {         /* While that wasn't END last time... */
        op = OP(s);
        printf("%2ld%s", (s - r->program), regprop(s));    /* Where, what. */
        nxt = regnext(s);
        if (nxt == (char *) NULL)       /* nxt ptr. */
            printf("(0)");
        else
            printf("(%ld)", ((s - r->program) + (nxt - s)));
        s += 3;
        if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
            /* Literal string, where present. */
            while (*s != '\0') {
                putchar(*s);
                s++;
            }
            s++;
        }
        putchar('\n');
    }

    /* Header fields of interest. */
    if (r->regstart != '\0')
        printf("start `%c' ", r->regstart);
    if (r->reganch)
        printf("anchored ");
    if (r->regmust != (char *) NULL)
        printf("must have \"%s\"", r->regmust);
    printf("\n");
}
예제 #7
0
/*
 - regtail - set the next-pointer at the end of a node chain
 */
static void regtail(regex_t *preg, int p, int val)
{
	int scan;
	int temp;
	int offset;

	/* Find last node. */
	scan = p;
	for (;;) {
		temp = regnext(preg, scan);
		if (temp == 0)
			break;
		scan = temp;
	}

	if (OP(preg, scan) == BACK)
		offset = scan - val;
	else
		offset = val - scan;

	preg->program[scan + 1] = offset;
}
예제 #8
0
/*
 - regtail - set the next-pointer at the end of a node chain
 */
void ossimRegExp::regtail (char* p, const char* val) {
    char* scan;
    char* temp;
    int   offset;

    if (p == &regdummy)
        return;

    // Find last node.
    scan = p;
    for (;;) {
        temp = regnext(scan);
        if (temp == NULL)
            break;
        scan = temp;
    }

    if (OP(scan) == BACK)
        offset = (const char*)scan - val;
    else
        offset = val - scan;
    *(scan + 1) = (offset >> 8) & 0377;
    *(scan + 2) = offset & 0377;
}
예제 #9
0
파일: regexp.c 프로젝트: BlackYoup/medusa
/*
 - regcomp - compile a regular expression into internal code
 *
 * We can't allocate space until we know how big the compiled form will be,
 * but we can't compile it (and thus know how big it is) until we've got a
 * place to put the code.  So we cheat:  we compile it twice, once with code
 * generation turned off and size counting turned on, and once "for real".
 * This also means that we don't allocate space until we are sure that the
 * thing really will compile successfully, and we never have to move the
 * code and thus invalidate pointers into it.  (Note that it has to be in
 * one piece because free() must be able to free it all.)
 *
 * Beware that the optimization-preparation code in here knows about some
 * of the structure of the compiled regexp.
 */
regexp *
regcomp( const char *exp )
{
	register regexp *r;
	register char *scan;
	register char *longest;
	register unsigned len;
	int flags;

	if (exp == NULL)
		FAIL("NULL argument");

	/* First pass: determine size, legality. */
#ifdef notdef
	if (exp[0] == '.' && exp[1] == '*') exp += 2;  /* aid grep */
#endif
	regparse = (char *)exp;
	regnpar = 1;
	regsize = 0L;
	regcode = &regdummy;
	regc(MAGIC);
	if (reg(0, &flags) == NULL)
		return(NULL);

	/* Small enough for pointer-storage convention? */
	if (regsize >= 32767L)		/* Probably could be 65535L. */
		FAIL("regexp too big");

	/* Allocate space. */
	r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
	if (r == NULL)
		FAIL("out of space");

	/* Second pass: emit code. */
	regparse = (char *)exp;
	regnpar = 1;
	regcode = r->program;
	regc(MAGIC);
	if (reg(0, &flags) == NULL)
		return(NULL);

	/* Dig out information for optimizations. */
	r->regstart = '\0';	/* Worst-case defaults. */
	r->reganch = 0;
	r->regmust = NULL;
	r->regmlen = 0;
	scan = r->program+1;			/* First BRANCH. */
	if (OP(regnext(scan)) == END) {		/* Only one top-level choice. */
		scan = OPERAND(scan);

		/* Starting-point info. */
		if (OP(scan) == EXACTLY)
			r->regstart = *OPERAND(scan);
		else if (OP(scan) == BOL)
			r->reganch++;

		/*
		 * If there's something expensive in the r.e., find the
		 * longest literal string that must appear and make it the
		 * regmust.  Resolve ties in favor of later strings, since
		 * the regstart check works with the beginning of the r.e.
		 * and avoiding duplication strengthens checking.  Not a
		 * strong reason, but sufficient in the absence of others.
		 */
		if (flags&SPSTART) {
			longest = NULL;
			len = 0;
			for (; scan != NULL; scan = regnext(scan))
				if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
					longest = OPERAND(scan);
					len = strlen(OPERAND(scan));
				}
			r->regmust = longest;
			r->regmlen = len;
		}
	}

	return(r);
}
예제 #10
0
/*
 * reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static char *reg( int paren, int *flagp )
{
    char        *ret, *br, *ender;
    int         flags;
    char        parno = 0;

    *flagp = HASWIDTH;      /* Tentatively. */

    /* Make an OPEN node, if parenthesized. */
    if( paren ) {
        if( regnpar >= NSUBEXP ) {
            FAIL( ERR_RE_TOO_MANY_ROUND_BRACKETS );
        }
        parno = regnpar;
        regnpar++;
        ret = regnode( OPEN + parno );
    } else {
        ret = NULL;
    }

    /* Pick up the branches, linking them together. */
    br = regbranch( &flags );
    if( br == NULL ) {
        return( NULL );
    }
    if( ret != NULL ) {
        regtail( ret, br );       /* OPEN -> first. */
    } else {
        ret = br;
    }
    if( !( flags & HASWIDTH ) ) {
        *flagp &= ~HASWIDTH;
    }
    *flagp |= flags & SPSTART;
    while( *regparse == '|' ) {
        regparse++;
        br = regbranch( &flags );
        if( br == NULL ) {
            return( NULL );
        }
        regtail( ret, br );       /* BRANCH -> BRANCH. */
        if( !( flags & HASWIDTH ) ) {
            *flagp &= ~HASWIDTH;
        }
        *flagp |= flags & SPSTART;
    }

    /* Make a closing node, and hook it on the end. */
    ender = regnode( ( paren ) ? CLOSE + parno : END );
    regtail( ret, ender );

    /* Hook the tails of the branches to the closing node. */
    for( br = ret; br != NULL; br = regnext( br ) ) {
        regoptail( br, ender );
    }

    /* Check for proper termination. */
    if( paren && *regparse++ != ')' ) {
        FAIL( ERR_RE_UNMATCHED_ROUND_BRACKETS );
    } else if( !paren && *regparse != '\0' ) {
        if( *regparse == ')' ) {
            FAIL( ERR_RE_UNMATCHED_ROUND_BRACKETS );
        } else {
            FAIL( ERR_RE_INTERNAL_FOULUP );    /* "Can't happen". */
        }
    }

    return( ret );
}
예제 #11
0
int	CRegExp::regmatch(TCHAR *prog)
{
	TCHAR *scan;	// Current node.
	TCHAR *next;		// Next node.

	for (scan = prog; scan != NULL; scan = next) {
		next = regnext(scan);

		switch (OP(scan)) {
		case BOL:
			if (reginput != regbol)
				return(0);
			break;
		case EOL:
			if (*reginput != _T('\0'))
				return(0);
			break;
		case ANY:
			if (*reginput == _T('\0'))
				return(0);
			reginput++;
			break;
		case EXACTLY: {
			size_t len;
			TCHAR *const opnd = OPERAND(scan);

			// Inline the first character, for speed.
			if (*opnd != *reginput)
				return(0);
			len = _tcslen(opnd);
			if (len > 1 && _tcsncmp(opnd, reginput, len) != 0)
				return(0);
			reginput += len;
			break;
			}
		case ANYOF:
			if (*reginput == _T('\0') ||
					_tcschr(OPERAND(scan), *reginput) == NULL)
				return(0);
			reginput++;
			break;
		case ANYBUT:
			if (*reginput == _T('\0') ||
					_tcschr(OPERAND(scan), *reginput) != NULL)
				return(0);
			reginput++;
			break;
		case NOTHING:
			break;
		case BACK:
			break;
		case OPEN+1: case OPEN+2: case OPEN+3:
		case OPEN+4: case OPEN+5: case OPEN+6:
		case OPEN+7: case OPEN+8: case OPEN+9: {
			const int no = OP(scan) - OPEN;
			TCHAR *const input = reginput;

			if (regmatch(next)) {
				// Don't set startp if some later
				// invocation of the same parentheses
				// already has.

				if (startp[no] == NULL)
					startp[no] = input;
				return(1);
			} else
				return(0);
			break;
			}
		case CLOSE+1: case CLOSE+2: case CLOSE+3:
		case CLOSE+4: case CLOSE+5: case CLOSE+6:
		case CLOSE+7: case CLOSE+8: case CLOSE+9: {
			const int no = OP(scan) - CLOSE;
			TCHAR *const input = reginput;

			if (regmatch(next)) {
				// Don't set endp if some later
				// invocation of the same parentheses
				// already has.

				if (endp[no] == NULL)
					endp[no] = input;
				return(1);
			} else
				return(0);
			break;
			}
		case BRANCH: {
			TCHAR *const save = reginput;

			if (OP(next) != BRANCH)		// No choice.
				next = OPERAND(scan);	// Avoid recursion.
			else {
				while (OP(scan) == BRANCH) {
					if (regmatch(OPERAND(scan)))
						return(1);
					reginput = save;
					scan = regnext(scan);
				}
				return(0);
				// NOTREACHED
			}
			break;
			}
		case STAR:
		case PLUS: {
			const TCHAR nextch =
				(OP(next) == EXACTLY) ? *OPERAND(next) : _T('\0');
			size_t no;
			TCHAR *const save = reginput;
			const size_t min = (OP(scan) == STAR) ? 0 : 1;

			for (no = regrepeat(OPERAND(scan)) + 1; no > min; no--) {
				reginput = save + no - 1;
				// If it could work, try it.
				if (nextch == _T('\0') || *reginput == nextch)
					if (regmatch(next))
						return(1);
			}
			return(0);
			break;
			}
		case END:
			return(1);	// Success!
			break;
		default:
			TRACE0("regexp corruption\n");
			return(0);
			break;
		}
	}

	// We get here only if there's trouble -- normally "case END" is
	// the terminating point.

	TRACE0("corrupted pointers\n");
	return(0);
}
예제 #12
0
/*
 - regdump - dump a regexp onto stdout in vaguely comprehensible form
 */
static void regdump(regex_t *preg)
{
	int s;
	int op = EXACTLY;	/* Arbitrary non-END op. */
	int next;
	char buf[MAX_UTF8_LEN + 1];

	int i;
	for (i = 1; i < preg->p; i++) {
		printf("%02x ", (unsigned char)preg->program[i]);
		if (i % 16 == 0) {
			printf("\n");
		}
	}
	printf("\n");

	s = 1;
	while (op != END && s < preg->p) {	/* While that wasn't END last time... */
		op = OP(preg, s);
		printf("%3d: %s", s, regprop(op));	/* Where, what. */
		next = regnext(preg, s);
		if (next == 0)		/* Next ptr. */
			printf("(0)");
		else
			printf("(%d)", next);
		s += 2;
		if (op == REP || op == REPMIN || op == REPX || op == REPXMIN) {
			int max = preg->program[s];
			int min = preg->program[s + 1];
			if (max == 65535) {
				printf("{%d,*}", min);
			}
			else {
				printf("{%d,%d}", min, max);
			}
			printf(" %d", preg->program[s + 2]);
			s += 3;
		}
		else if (op == ANYOF || op == ANYBUT) {
			/* set of ranges */

			while (preg->program[s]) {
				int len = preg->program[s++];
				int first = preg->program[s++];
				buf[utf8_getchars(buf, first)] = 0;
				printf("%s", buf);
				if (len > 1) {
					buf[utf8_getchars(buf, first + len - 1)] = 0;
					printf("-%s", buf);
				}
			}
			s++;
		}
		else if (op == EXACTLY) {
			/* Literal string, where present. */

			while (preg->program[s]) {
				buf[utf8_getchars(buf, preg->program[s])] = 0;
				printf("%s", buf);
				s++;
			}
			s++;
		}
		putchar('\n');
	}

	if (op == END) {
		/* Header fields of interest. */
		if (preg->regstart) {
			buf[utf8_getchars(buf, preg->regstart)] = 0;
			printf("start '%s' ", buf);
		}
		if (preg->reganch)
			printf("anchored ");
		if (preg->regmust != 0) {
			int i;
			printf("must have:");
			for (i = 0; i < preg->regmlen; i++) {
				putchar(preg->program[preg->regmust + i]);
			}
			putchar('\n');
		}
	}
	printf("\n");
}
예제 #13
0
/* 0 failure, 1 success */
static int regmatch(regex_t *preg, int prog)
{
	int scan;	/* Current node. */
	int next;		/* Next node. */
	const char *save;

	scan = prog;

#ifdef DEBUG
	if (scan != 0 && regnarrate)
		fprintf(stderr, "%s(\n", regprop(scan));
#endif
	while (scan != 0) {
		int n;
		int c;
#ifdef DEBUG
		if (regnarrate) {
			fprintf(stderr, "%3d: %s...\n", scan, regprop(OP(preg, scan)));	/* Where, what. */
		}
#endif
		next = regnext(preg, scan);
		n = reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE));

		switch (OP(preg, scan)) {
		case BOL:
			if (preg->reginput != preg->regbol)
				return(0);
			break;
		case EOL:
			if (!reg_iseol(preg, c)) {
				return(0);
			}
			break;
		case WORDA:
			/* Must be looking at a letter, digit, or _ */
			if ((!isalnum(UCHAR(c))) && c != '_')
				return(0);
			/* Prev must be BOL or nonword */
			if (preg->reginput > preg->regbol &&
				(isalnum(UCHAR(preg->reginput[-1])) || preg->reginput[-1] == '_'))
				return(0);
			break;
		case WORDZ:
			/* Can't match at BOL */
			if (preg->reginput > preg->regbol) {
				/* Current must be EOL or nonword */
				if (reg_iseol(preg, c) || !isalnum(UCHAR(c)) || c != '_') {
					c = preg->reginput[-1];
					/* Previous must be word */
					if (isalnum(UCHAR(c)) || c == '_') {
						break;
					}
				}
			}
			/* No */
			return(0);

		case ANY:
			if (reg_iseol(preg, c))
				return 0;
			preg->reginput += n;
			break;
		case EXACTLY: {
				int opnd;
				int len;
				int slen;

				opnd = OPERAND(scan);
				len = str_int_len(preg->program + opnd);

				slen = prefix_cmp(preg->program + opnd, len, preg->reginput, preg->cflags & REG_ICASE);
				if (slen < 0) {
					return(0);
				}
				preg->reginput += slen;
			}
			break;
		case ANYOF:
			if (reg_iseol(preg, c) || reg_range_find(preg->program + OPERAND(scan), c) == 0) {
				return(0);
			}
			preg->reginput += n;
			break;
		case ANYBUT:
			if (reg_iseol(preg, c) || reg_range_find(preg->program + OPERAND(scan), c) != 0) {
				return(0);
			}
			preg->reginput += n;
			break;
		case NOTHING:
			break;
		case BACK:
			break;
		case BRANCH:
			if (OP(preg, next) != BRANCH)		/* No choice. */
				next = OPERAND(scan);	/* Avoid recursion. */
			else {
				do {
					save = preg->reginput;
					if (regmatch(preg, OPERAND(scan))) {
						return(1);
					}
					preg->reginput = save;
					scan = regnext(preg, scan);
				} while (scan != 0 && OP(preg, scan) == BRANCH);
				return(0);
				/* NOTREACHED */
			}
			break;
		case REP:
		case REPMIN:
			return regmatchsimplerepeat(preg, scan, OP(preg, scan) == REPMIN);

		case REPX:
		case REPXMIN:
			return regmatchrepeat(preg, scan, OP(preg, scan) == REPXMIN);

		case END:
			return 1;	/* Success! */

		case OPENNC:
		case CLOSENC:
			return regmatch(preg, next);

		default:
			if (OP(preg, scan) >= OPEN+1 && OP(preg, scan) < CLOSE_END) {
				save = preg->reginput;
				if (regmatch(preg, next)) {
					if (OP(preg, scan) < CLOSE) {
						int no = OP(preg, scan) - OPEN;
						if (no < preg->nmatch && preg->pmatch[no].rm_so == -1) {
							preg->pmatch[no].rm_so = save - preg->start;
						}
					}
					else {
						int no = OP(preg, scan) - CLOSE;
						if (no < preg->nmatch && preg->pmatch[no].rm_eo == -1) {
							preg->pmatch[no].rm_eo = save - preg->start;
						}
					}
					return(1);
				}
				return(0);
			}
			return REG_ERR_INTERNAL;
		}

		scan = next;
	}

	/*
	 * We get here only if there's trouble -- normally "case END" is
	 * the terminating point.
	 */
	return REG_ERR_INTERNAL;
}
예제 #14
0
/*
 - regmatch - main matching routine
 *
 * Conceptually the strategy is simple:  check to see whether the current
 * node matches, call self recursively to see whether the rest matches,
 * and then act accordingly.  In practice we make some effort to avoid
 * recursion, in particular by going through "ordinary" nodes (that don't
 * need to know whether the rest of the match failed) by a loop instead of
 * by recursion.
 */
static int	/* 0 failure, 1 success */
regmatch(char *prog)
{
	register char	*scan;	/* Current node. */
	char	*next;		/* Next node. */
	wchar_t	wc = L'\0';
	int	len;

	scan = prog;
	while (scan != NULL) {
		next = regnext(scan);

		switch (OP(scan)) {
		case BOL:
			if (reginput != regbol)
				return(0);
			break;
		case EOL:
			if (CHARLEN(reginput) != 0)
				return(0);
			break;
		case WORDA:
			/* Must be looking at a letter, digit, or _ */
			len = mbtowc(&wc, reginput, MB_CUR_MAX);
			if (len == -1) wc = *reginput;
			if ((!iswalnum(wc)) && wc != L'_')
				return(0);
			/* Prev must be BOL or nonword */
			len = mbtowc(&wc, reginput - reglmlen, MB_CUR_MAX);
			if (len == -1) {
			   wc = *(reginput- reglmlen);
			   len = 1;
			}
			if (reginput > regbol &&  (iswalnum(wc) || wc == L'_'))
				return(0);
			break;
		case WORDZ:
			len = mbtowc(&wc, reginput, MB_CUR_MAX);
			if (len == -1) {
			   wc = *reginput;
			   len = 1;
			}
			/* Must be looking at non letter, digit, or _ */
			if (iswalnum(wc) || wc == L'_')
				return(0);
			/* We don't care what the previous char was */
			break;
		case ANY:
		        /* Solaris 2.6 Motif diff bug 1236359 - 1 line */
			if ( (len = CHARLEN(reginput)) <= 0)
				return(0);
			reglmlen = len;
			reginput += INCRLEN(len);
			break;
		case EXACTLY:
			 {
				register int	len;
				register int	clen;
				register char	*opnd;
				register char	*op, *ip;

				opnd = OPERAND(scan);
				len = strlen(opnd);
				for (clen = len, op = opnd, ip = reginput; clen; ) {
					int	opl = CHARLEN(op), ipl = CHARLEN(ip);
					if (opl == ipl && !strncmp(op, ip, ipl))  {
						op += ipl;
						ip += ipl;
						clen -= ipl;
						reglmlen = ipl;
					} else
						break;
				}

				if (clen)
					return(0);
				reginput += len;
			}
			break;
		case ANYOF:
                        /* Solaris 2.6 motif diff bug 1236359 - 1 line */
			if ( ((len = CHARLEN(reginput)) <= 0) ||  
				!inclass(OPERAND(scan), reginput))
				return 0;
			reginput += len;
			reglmlen = len;
			break;
		case ANYBUT:
                        /* Solaris 2.6 motif diff bug 1236359 - 1 line */
			if ( ((len = CHARLEN(reginput)) <= 0) ||  
				inclass(OPERAND(scan), reginput))
				return 0;
			reginput += len;
			reglmlen = len;
			break;
		case NOTHING:
			break;
		case BACK:
			break;
		case OPEN + 1:
		case OPEN + 2:
		case OPEN + 3:
		case OPEN + 4:
		case OPEN + 5:
		case OPEN + 6:
		case OPEN + 7:
		case OPEN + 8:
		case OPEN + 9:
			 {
				register int	no;
				register char	*save;

				no = OP(scan) - OPEN;
				save = reginput;

				if (regmatch(next)) {
				/*
				 * Don't set startp if some later 
				 * invocation of the same parentheses
				 * already has.
				 */
					if (regstartp[no] == NULL)
						regstartp[no] = save;
					return(1);
				} else
					return(0);
			}
			break;
		case CLOSE + 1:
		case CLOSE + 2:
		case CLOSE + 3:
		case CLOSE + 4:
		case CLOSE + 5:
		case CLOSE + 6:
		case CLOSE + 7:
		case CLOSE + 8:
		case CLOSE + 9:
			 {
				register int	no;
				register char	*save;

				no = OP(scan) - CLOSE;
				save = reginput;

				if (regmatch(next)) {
					/*
					 * Don't set endp if some later
					 * invocation of the same parentheses
					 * already has.
					 */
					if (regendp[no] == NULL)
						regendp[no] = save;
					return(1);
				} else
					return(0);
			}
			break;
		case BRANCH:
			 {
				register char	*save;

				if (OP(next) != BRANCH)		/* No choice. */
					next = OPERAND(scan);	/* Avoid recursion. */
				else {
					do {
						save = reginput;
						if (regmatch(OPERAND(scan)))
							return(1);
						reginput = save;
						scan = regnext(scan);
					} while (scan != NULL && OP(scan) == BRANCH);
					return(0);
					/* NOTREACHED */
				}
			}
			break;
		case STAR:
		case PLUS:
			 {
				register char	*nextch;
				register int	no;
				register char	*save;
				register int	min;
				int	nchars = 0;

				/*
				 * Lookahead to avoid useless match attempts
				 * when we know what character comes next.
				 */
				nextch = 0;
				if (OP(next) == EXACTLY)
					nextch = OPERAND(next);
				min = (OP(scan) == STAR) ? 0 : 1;
				save = reginput;
				no = regrepeat(OPERAND(scan));
				while (no >= min) {
                                       /* Solaris 2.6 motif diff bug 1236359 - 1 line */
                                       int mb_len = 0;
					/* If it could work, try it. */
					if (!nextch || !(len = CHARLEN(nextch)) ||  
						!strncmp(reginput, nextch, len) )
						if (regmatch(next))
							return(1);
					/* Couldn't or didn't -- back up. */
					no--;
					reginput = save;
                                        /* Solaris 2.6 motif diff bug 1236359 - 4 lines */
					for (nchars = 0; nchars < no && mb_len >= 0; nchars++) {
                                             mb_len = CHARLEN(reginput);
						if (mb_len > 0) reginput += mb_len;
                                        }
				}
				return(0);
			}
			break;
		case END:
			return(1);	/* Success! */
			break;
		default:
			return(0);
			break;
		}

		scan = next;
	}

	/*
	 * We get here only if there's trouble -- normally "case END" is
	 * the terminating point.
	 */
	return(0);
}
예제 #15
0
/*
 - regmatch - main matching routine
 *
 * Conceptually the strategy is simple:  check to see whether the current
 * node matches, call self recursively to see whether the rest matches,
 * and then act accordingly.  In practice we make some effort to avoid
 * recursion, in particular by going through "ordinary" nodes (that don't
 * need to know whether the rest of the match failed) by a loop instead of
 * by recursion.
 */
static int			/* 0 failure, 1 success */
regmatch(char *prog)
{
	register char *scan;	/* Current node. */
	char *next;		/* Next node. */
	extern char *strchr();

	scan = prog;
#ifdef DEBUG
	if (scan != NULL && regnarrate) {
		fprintf(stderr, "%s(\n", regprop(scan));
	}
#endif
	while (scan != NULL) {
#ifdef DEBUG
		if (regnarrate) {
			fprintf(stderr, "%s...\n", regprop(scan));
		}
#endif
		next = regnext(scan);

		switch (OP(scan)) {
		case BOL:
			if (reginput != regbol) {
				return(0);
			}
			break;
		case EOL:
			if (regpeek(0) != '\0' && regpeek(0) != '\n') {
				return(0);
			}
			break;
		case BEGWORD:
			/* Match if current char isident
			 * and previous char BOL or !ident */
			if ((regpeek(0) == 0 || !isident(regpeek(0))) ||
			    (reginput != regbol && isident(regpeek(-1)))) {
				return(0);
			}
			break;
		case ENDWORD:
			/* Match if previous char isident
			 * and current char EOL or !ident */
			if ((regpeek(0) != 0 && isident(regpeek(0))) ||
			    reginput == regbol ||
			    !isident(regpeek(-1))) {
 				return(0);
			}
 			break;
		case WHITESP:
			/* match single whitespace */
			if (regpeek(0) != 0 && !isspace(regpeek(0))) {
				return(0);
			}
			reginput++;
			break;
		case NWHITESP:
			/* don't match eol, or space or tab */
			if (regpeek(0) == 0 || isspace(regpeek(0))) {
				return(0);
			}
			reginput++;
			break;
		case ALNUM: /* includes _ */
			if (regpeek(0) == 0 || !isident(regpeek(0))) {
				return(0);
			}
			reginput++;
			break;
		case NALNUM:
			if (regpeek(0) == 0 || isident(regpeek(0))) {
				return(0);
			}
			reginput++;
			break;
		case DIGIT:
			if (regpeek(0) == 0 || !isdigit(regpeek(0))) {
				return(0);
			}
			reginput++;
			break;
		case NDIGIT:
			if (regpeek(0) == 0 || isdigit(regpeek(0))) {
				return(0);
			}
			reginput++;
			break;
		case PRINT:
			if (regpeek(0) == 0 ||
			    !(isprint(regpeek(0)) || isspace(regpeek(0)))) {
				return(0);
			}
			reginput++;
			break;
		case NPRINT:
			if (regpeek(0) == 0 || isprint(regpeek(0)) || isspace(regpeek(0))) {
				return(0);
			}
			reginput++;
			break;
		case ANY:
			if (regpeek(0) == '\0' || regpeek(0) == '\n') {
				return(0);
			}
			regseek(1);
			break;
		case EXACTLY: {
				register int len;
				register char *opnd;

				opnd = OPERAND(scan);
				/* Inline the first character, for speed. */
				if (*opnd != regpeek(0)) {
					return(0);
				}
				len = strlen(opnd);
				if (len > 1 &&
				    strncmp(opnd, reginput, len) != 0) {
					return(0);
				}
				regseek(len);
			}
			break;
		case ANYOF:
			if (strchr(OPERAND(scan), regpeek(0)) == NULL) {
				return(0);
			}
			regseek(1);
			break;
		case ANYBUT:
			if (strchr(OPERAND(scan), regpeek(0)) != NULL) {
				return(0);
			}
			regseek(1);
			break;
		case NOTHING:
			break;
		case BACK:
			break;
		case OPEN+1:
		case OPEN+2:
		case OPEN+3:
		case OPEN+4:
		case OPEN+5:
		case OPEN+6:
		case OPEN+7:
		case OPEN+8:
		case OPEN+9: {
				register int no;
				register char *save;

				no = OP(scan) - OPEN;
				save = reginput;

				if (regmatch(next)) {
					/*
					 * Don't set startp if some later
					 * invocation of the same parentheses
					 * already has.
					 */
					if (regstartp[no] == NULL) {
						regstartp[no] = save;
					}
					return(1);
				} else {
					return(0);
				}
			}
			break;
		case CLOSE+1:
		case CLOSE+2:
		case CLOSE+3:
		case CLOSE+4:
		case CLOSE+5:
		case CLOSE+6:
		case CLOSE+7:
		case CLOSE+8:
		case CLOSE+9: {
				register int no;
				register char *save;

				no = OP(scan) - CLOSE;
				save = reginput;

				if (regmatch(next)) {
					/*
					 * Don't set endp if some later
					 * invocation of the same parentheses
					 * already has.
					 */
					if (regendp[no] == NULL) {
						regendp[no] = save;
					}
					return(1);
				} else {
					return(0);
				}
			}
			break;
		case BRANCH: {
				register char *save;

				if (OP(next) != BRANCH) {	/* No choice. */
					next = OPERAND(scan);	/* Avoid recursion. */
				} else {
					do {
						save = reginput;
						if (regmatch(OPERAND(scan))) {
							return(1);
						}
						reginput = save;
						scan = regnext(scan);
					} while (scan != NULL && OP(scan) == BRANCH);
					return(0);
					/* NOTREACHED */
				}
			}
			break;
		case STAR:
		case PLUS: {
				register char nextch;
				register int no;
				register char *save;
				register int min;

				/*
				 * Lookahead to avoid useless match attempts
				 * when we know what character comes next.
				 */
				nextch = '\0';
				if (OP(next) == EXACTLY) {
					nextch = *OPERAND(next);
				}
				min = (OP(scan) == STAR) ? 0 : 1;
				save = reginput;
				no = regrepeat(OPERAND(scan));
				while (no >= min) {
					/* If it could work, try it. */
					if (nextch == '\0' || regpeek(0) == nextch) {
						if (regmatch(next)) {
							return(1);
						}
					}
					/* Couldn't or didn't -- back up. */
					no--;
					reginput = save + no;
				}
				return(0);
			}
			break;
		case MINMAX: {
				register char	*save;
				unsigned char	min;
				unsigned char	max;
				register int	no;

				next = OPERAND(scan);
				min = OP(next);
				next = OPERAND(next);
				max = OP(next);
				next = OPERAND(next);
				save = reginput;
				for (no = 0 ; no < min ; no++) {
					if (!regmatch(next)) {
						reginput = save;
						return(0);
					}
				}
				for ( ; no < max ; no++) {
					if (!regmatch(next)) {
						break;
					}
				}
				return(1);
			}
			break;
		case END:
			return(1);	/* Success! */
			break;
		default:
			SREerror("memory corruption");
			return(0);
			break;
		}

		scan = next;
	}

	/*
	 * We get here only if there's trouble -- normally "case END" is
	 * the terminating point.
	 */
	SREerror("corrupted pointers");
	return(0);
}
예제 #16
0
void ossimRegExp::compile (const char* exp) {
    const char* scan;
    const char* longest;
    unsigned long len;
    int         flags;

    if (exp == NULL) {
        //RAISE Error, SYM(ossimRegExp), SYM(No_Expr),
        printf ("ossimRegExp::compile(): No expression supplied.\n");
        return;
    }

    // First pass: determine size, legality.
    regparse = exp;
    regnpar = 1;
    regsize = 0L;
    regcode = &regdummy;
    regc(MAGIC);
    if(!reg(0, &flags))
    {
        printf ("ossimRegExp::compile(): Error in compile.\n");
        return;
    }
    this->startp[0] = this->endp[0] = this->searchstring = NULL;

    // Small enough for pointer-storage convention?
    if (regsize >= 32767L) {	// Probably could be 65535L.
        //RAISE Error, SYM(ossimRegExp), SYM(Expr_Too_Big),
        printf ("ossimRegExp::compile(): Expression too big.\n");
        return;
    }

    // Allocate space.
//#ifndef WIN32
    if (this->program != NULL) delete [] this->program;
//#endif
    this->program = new char[regsize];
    this->progsize = (int) regsize;

    if (this->program == NULL) {
        //RAISE Error, SYM(ossimRegExp), SYM(Out_Of_Memory),
        printf ("ossimRegExp::compile(): Out of memory.\n");
        return;
    }

    // Second pass: emit code.
    regparse = exp;
    regnpar = 1;
    regcode = this->program;
    regc(MAGIC);
    reg(0, &flags);

    // Dig out information for optimizations.
    this->regstart = '\0';		// Worst-case defaults.
    this->reganch = 0;
    this->regmust = NULL;
    this->regmlen = 0;
    scan = this->program + 1;	// First BRANCH.
    if (OP(regnext(scan)) == END) {	// Only one top-level choice.
        scan = OPERAND(scan);

        // Starting-point info.
        if (OP(scan) == EXACTLY)
            this->regstart = *OPERAND(scan);
        else if (OP(scan) == BOL)
            this->reganch++;

        //
        // If there's something expensive in the r.e., find the longest
        // literal string that must appear and make it the regmust.  Resolve
        // ties in favor of later strings, since the regstart check works
        // with the beginning of the r.e. and avoiding duplication
        // strengthens checking.  Not a strong reason, but sufficient in the
        // absence of others.
        //
        if (flags & SPSTART) {
            longest = NULL;
            len = 0;
            for (; scan != NULL; scan = regnext(scan))
                if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
                    longest = OPERAND(scan);
                    len = (unsigned long)strlen(OPERAND(scan));
                }
            this->regmust = longest;
            this->regmlen = len;
        }
    }
}
예제 #17
0
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
char* ossimRegExp::reg (int paren, int *flagp) {
    char* ret;
    char* br;
    char* ender;
    int   parno =0;
    int   flags;

    *flagp = HASWIDTH;		// Tentatively.

    // Make an OPEN node, if parenthesized.
    if (paren) {
        if (regnpar >= NSUBEXP) {
            //RAISE Error, SYM(ossimRegExp), SYM(Too_Many_Parens),
            printf ("ossimRegExp::compile(): Too many parentheses.\n");
            return 0;
        }
        parno = regnpar;
        regnpar++;
        ret = regnode(OPEN + parno);
    }
    else
        ret = NULL;

    // Pick up the branches, linking them together.
    br = regbranch(&flags);
    if (br == NULL)
        return (NULL);
    if (ret != NULL)
        regtail(ret, br);	// OPEN -> first.
    else
        ret = br;
    if (!(flags & HASWIDTH))
        *flagp &= ~HASWIDTH;
    *flagp |= flags & SPSTART;
    while (*regparse == '|') {
        regparse++;
        br = regbranch(&flags);
        if (br == NULL)
            return (NULL);
        regtail(ret, br);	// BRANCH -> BRANCH.
        if (!(flags & HASWIDTH))
            *flagp &= ~HASWIDTH;
        *flagp |= flags & SPSTART;
    }

    // Make a closing node, and hook it on the end.
    ender = regnode((paren) ? CLOSE + parno : END);
    regtail(ret, ender);

    // Hook the tails of the branches to the closing node.
    for (br = ret; br != NULL; br = regnext(br))
        regoptail(br, ender);

    // Check for proper termination.
    if (paren && *regparse++ != ')') {
        //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens),
        printf ("ossimRegExp::compile(): Unmatched parentheses.\n");
        return 0;
    }
    else if (!paren && *regparse != '\0') {
        if (*regparse == ')') {
            //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens),
            printf ("ossimRegExp::compile(): Unmatched parentheses.\n");
            return 0;
        }
        else {
            //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
            printf ("ossimRegExp::compile(): Internal error.\n");
            return 0;
        }
        // NOTREACHED
    }
    return (ret);
}
예제 #18
0
/*
 - regcomp - compile a regular expression into internal code
 *
 * We can't allocate space until we know how big the compiled form will be,
 * but we can't compile it (and thus know how big it is) until we've got a
 * place to put the code.  So we cheat:  we compile it twice, once with code
 * generation turned off and size counting turned on, and once "for real".
 * This also means that we don't allocate space until we are sure that the
 * thing really will compile successfully, and we never have to move the
 * code and thus invalidate pointers into it.  (Note that it has to be in
 * one piece because free() must be able to free it all.)
 *
 * Beware that the optimization-preparation code in here knows about some
 * of the structure of the compiled regexp.
 */
int regcomp(regex_t *preg, const char *exp, int cflags)
{
	int scan;
	int longest;
	unsigned len;
	int flags;

#ifdef DEBUG
	fprintf(stderr, "Compiling: '%s'\n", exp);
#endif
	memset(preg, 0, sizeof(*preg));

	if (exp == NULL)
		FAIL(preg, REG_ERR_NULL_ARGUMENT);

	/* First pass: determine size, legality. */
	preg->cflags = cflags;
	preg->regparse = exp;

	/* Allocate space. */
	preg->proglen = (strlen(exp) + 1) * 5;
	preg->program = malloc(preg->proglen * sizeof(int));
	if (preg->program == NULL)
		FAIL(preg, REG_ERR_NOMEM);

	/* Note that since we store a magic value as the first item in the program,
	 * program offsets will never be 0
	 */
	regc(preg, REG_MAGIC);
	if (reg(preg, 0, &flags) == 0) {
		return preg->err;
	}

	/* Small enough for pointer-storage convention? */
	if (preg->re_nsub >= REG_MAX_PAREN)		/* Probably could be 65535L. */
		FAIL(preg,REG_ERR_TOO_BIG);

	/* Dig out information for optimizations. */
	preg->regstart = 0;	/* Worst-case defaults. */
	preg->reganch = 0;
	preg->regmust = 0;
	preg->regmlen = 0;
	scan = 1;			/* First BRANCH. */
	if (OP(preg, regnext(preg, scan)) == END) {		/* Only one top-level choice. */
		scan = OPERAND(scan);

		/* Starting-point info. */
		if (OP(preg, scan) == EXACTLY) {
			preg->regstart = preg->program[OPERAND(scan)];
		}
		else if (OP(preg, scan) == BOL)
			preg->reganch++;

		/*
		 * If there's something expensive in the r.e., find the
		 * longest literal string that must appear and make it the
		 * regmust.  Resolve ties in favor of later strings, since
		 * the regstart check works with the beginning of the r.e.
		 * and avoiding duplication strengthens checking.  Not a
		 * strong reason, but sufficient in the absence of others.
		 */
		if (flags&SPSTART) {
			longest = 0;
			len = 0;
			for (; scan != 0; scan = regnext(preg, scan)) {
				if (OP(preg, scan) == EXACTLY) {
					int plen = str_int_len(preg->program + OPERAND(scan));
					if (plen >= len) {
						longest = OPERAND(scan);
						len = plen;
					}
				}
			}
			preg->regmust = longest;
			preg->regmlen = len;
		}
	}

#ifdef DEBUG
	regdump(preg);
#endif

	return 0;
}
예제 #19
0
TCHAR *CRegExp::reg(int paren, int *flagp)
{
	char *ret;
	char *br;
	char *ender;
	int parno;
	int flags;

	*flagp = HASWIDTH;	// Tentatively.

	if (paren)
	{
		// Make an OPEN node.
		if (regnpar >= NSUBEXP)
		{
			TRACE1("Too many (). NSUBEXP is set to %d\n", NSUBEXP );
			return NULL;
		}
		parno = regnpar;
		regnpar++;
		ret = regnode(OPEN+parno);
	}

	// Pick up the branches, linking them together.
	br = regbranch(&flags);
	if (br == NULL)
		return(NULL);
	if (paren)
		regtail(ret, br);	// OPEN -> first.
	else
		ret = br;
	*flagp &= ~(~flags&HASWIDTH);	// Clear bit if bit 0.
	*flagp |= flags&SPSTART;
	while (*regparse == _T('|')) {
		regparse++;
		br = regbranch(&flags);
		if (br == NULL)
			return(NULL);
		regtail(ret, br);	// BRANCH -> BRANCH.
		*flagp &= ~(~flags&HASWIDTH);
		*flagp |= flags&SPSTART;
	}

	// Make a closing node, and hook it on the end.
	ender = regnode((paren) ? CLOSE+parno : END);
	regtail(ret, ender);

	// Hook the tails of the branches to the closing node.
	for (br = ret; br != NULL; br = regnext(br))
		regoptail(br, ender);

	// Check for proper termination.
	if (paren && *regparse++ != _T(')'))
	{
		TRACE0("unterminated ()\n");
		return NULL;
	}
	else if (!paren && *regparse != _T('\0'))
	{
		if (*regparse == _T(')'))
		{
			TRACE0("unmatched ()\n");
			return NULL;
		}
		else
		{
			TRACE0("internal error: junk on end\n");
			return NULL;
		}
		// NOTREACHED
	}

	return(ret);
}
예제 #20
0
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static int reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp )
{
	int ret;
	int br;
	int ender;
	int parno = 0;
	int flags;

	*flagp = HASWIDTH;	/* Tentatively. */

	/* Make an OPEN node, if parenthesized. */
	if (paren) {
		if (preg->regparse[0] == '?' && preg->regparse[1] == ':') {
			/* non-capturing paren */
			preg->regparse += 2;
			parno = -1;
		}
		else {
			parno = ++preg->re_nsub;
		}
		ret = regnode(preg, OPEN+parno);
	} else
		ret = 0;

	/* Pick up the branches, linking them together. */
	br = regbranch(preg, &flags);
	if (br == 0)
		return 0;
	if (ret != 0)
		regtail(preg, ret, br);	/* OPEN -> first. */
	else
		ret = br;
	if (!(flags&HASWIDTH))
		*flagp &= ~HASWIDTH;
	*flagp |= flags&SPSTART;
	while (*preg->regparse == '|') {
		preg->regparse++;
		br = regbranch(preg, &flags);
		if (br == 0)
			return 0;
		regtail(preg, ret, br);	/* BRANCH -> BRANCH. */
		if (!(flags&HASWIDTH))
			*flagp &= ~HASWIDTH;
		*flagp |= flags&SPSTART;
	}

	/* Make a closing node, and hook it on the end. */
	ender = regnode(preg, (paren) ? CLOSE+parno : END);
	regtail(preg, ret, ender);

	/* Hook the tails of the branches to the closing node. */
	for (br = ret; br != 0; br = regnext(preg, br))
		regoptail(preg, br, ender);

	/* Check for proper termination. */
	if (paren && *preg->regparse++ != ')') {
		preg->err = REG_ERR_UNMATCHED_PAREN;
		return 0;
	} else if (!paren && *preg->regparse != '\0') {
		if (*preg->regparse == ')') {
			preg->err = REG_ERR_UNMATCHED_PAREN;
			return 0;
		} else {
			preg->err = REG_ERR_JUNK_ON_END;
			return 0;
		}
	}

	return(ret);
}
예제 #21
0
CRegExp* CRegExp::RegComp(const TCHAR *exp)
{
	TCHAR *scan;
	int flags;

	if (exp == NULL)
		return NULL;

	bCompiled = TRUE;

	// First pass: determine size, legality.
	bEmitCode = FALSE;
	regparse = (TCHAR *)exp;
	regnpar = 1;
	regsize = 0L;
	regdummy[0] = NOTHING;
	regdummy[1] = regdummy[2] = 0;
	regcode = regdummy;
	if (reg(0, &flags) == NULL)
		return(NULL);

	// Allocate space.
	delete program;
	program = new TCHAR[regsize];
	memset( program, 0, regsize * sizeof(TCHAR) );

	if (program == NULL)
		return NULL;

	// Second pass: emit code.
	bEmitCode = TRUE;
	regparse = (TCHAR *)exp;
	regnpar = 1;
	regcode = program;
	if (reg(0, &flags) == NULL)
		return NULL;

	// Dig out information for optimizations.
	regstart = _T('\0');		// Worst-case defaults.
	reganch = 0;
	regmust = NULL;
	regmlen = 0;
	scan = program;		// First BRANCH.
	if (OP(regnext(scan)) == END)
	{
		// Only one top-level choice.
		scan = OPERAND(scan);

		// Starting-point info.
		if (OP(scan) == EXACTLY)
			regstart = *OPERAND(scan);
		else if (OP(scan) == BOL)
			reganch = 1;

		// If there's something expensive in the r.e., find the
		// longest literal string that must appear and make it the
		// regmust.  Resolve ties in favor of later strings, since
		// the regstart check works with the beginning of the r.e.
		// and avoiding duplication strengthens checking.  Not a
		// strong reason, but sufficient in the absence of others.

		if (flags&SPSTART)
		{
			char *longest = NULL;
			size_t len = 0;

			for (; scan != NULL; scan = regnext(scan))
				if (OP(scan) == EXACTLY && _tcslen(OPERAND(scan)) >= len)
				{
					longest = OPERAND(scan);
					len = _tcslen(OPERAND(scan));
				}
			regmust = longest;
			regmlen = (int)len;
		}
	}

	return this;
}
예제 #22
0
/*
   - regcomp - compile a regular expression into internal code
 *
 * We can't allocate space until we know how big the compiled form will be,
 * but we can't compile it (and thus know how big it is) until we've got a
 * place to put the code.  So we cheat:  we compile it twice, once with code
 * generation turned off and size counting turned on, and once "for real".
 * This also means that we don't allocate space until we are sure that the
 * thing really will compile successfully, and we never have to move the
 * code and thus invalidate pointers into it.  (Note that it has to be in
 * one piece because FREE() must be able to free it all.)
 *
 * Beware that the optimization-preparation code in here knows about some
 * of the structure of the compiled regexp.
 */
regexp *regcomp (unsigned char * exp,
        int excompat)       /* \( \) operators like in unix ex */
{
    register regexp *r;
    register unsigned char *scan;
    register char *longest;
    register int len;
    int flags;
    short *exp2, *dest, c;

    if (!exp)
        FAIL("NULL argument\n");

    exp2 = (short *)
        DXALLOC((strlen((char *)exp) + 1) * (sizeof(short[8]) / sizeof(char[8])),
                TAG_TEMPORARY, "regcomp: 1");
    for (scan = exp, dest = exp2; (c = *scan++);) {
        switch (c) {
            case '(':
            case ')':
                *dest++ = excompat ? c : c | SPECIAL;
                break;
            case '.':
            case '*':
            case '+':
            case '?':
            case '|':
            case '$':
            case '^':
            case '[':
            case ']':
                *dest++ = c | SPECIAL;
                break;
            case '\\':
                switch (c = *scan++) {
                    case 0:
                        FREE(exp2);
                        FAIL("Regular expression cannot end with '\\'.  Use \"\\\\\".\n");
                        break;
                    case '(':
                    case ')':
                        *dest++ = excompat ? c | SPECIAL : c;
                        break;
                    case '<':
                    case '>':
                        *dest++ = c | SPECIAL;
                        break;
                    case '{':
                    case '}':
                        FREE(exp2);
                        FAIL("sorry, unimplemented operator\n");
                    case 'b':
                        *dest++ = '\b';
                        break;
                    case 't':
                        *dest++ = '\t';
                        break;
                    case 'r':
                        *dest++ = '\r';
                        break;
                    default:
                        *dest++ = c;
                }
                break;
            default:
                *dest++ = c;
        }
    }
    *dest = 0;
    /* First pass: determine size, legality. */
    regparse = exp2;
    regnpar = 1;
    regsize = 0L;
    regcode = &regdummy;
    regc((char) MAGIC);
    if (reg(0, &flags) == (char *) NULL) {
        FREE(exp2);
        return ((regexp *) NULL);
    }

    /* Small enough for pointer-storage convention? */
    if (regsize >= 32767L)      /* Probably could be 65535L. */
    {
        FREE(exp2);
        FAIL("regexp too big\n");
    }

    /* Allocate space. */
    r = (regexp *) DXALLOC(sizeof(regexp) + (unsigned) regsize,
            TAG_TEMPORARY, "regcomp: 2");
    if (r == (regexp *) NULL) {
        FREE(exp2);
        FAIL("out of space\n");
    }

    /* Second pass: emit code. */
    regparse = exp2;
    regnpar = 1;
    regcode = (char *)(r->program);
    regc((char) MAGIC);
    if (reg(0, &flags) == NULL) {
        FREE(exp2);
        FREE(r);
        return ((regexp *) NULL);
    }

    /* Dig out information for optimizations. */
    r->regstart = '\0';         /* Worst-case defaults. */
    r->reganch = 0;
    r->regmust = NULL;
    r->regmlen = 0;
    scan = (unsigned char *)(r->program + 1);   /* First BRANCH. */
    if (OP(regnext((char *)scan)) == END) {     /* Only one top-level choice. */
        scan = OPERAND(scan);

        /* Starting-point info. */
        if (OP(scan) == EXACTLY)
            r->regstart = *OPERAND(scan);
        else if (OP(scan) == BOL)
            r->reganch++;

        /*
         * If there's something expensive in the r.e., find the longest
         * literal string that must appear and make it the regmust.  Resolve
         * ties in favor of later strings, since the regstart check works
         * with the beginning of the r.e. and avoiding duplication
         * strengthens checking.  Not a strong reason, but sufficient in the
         * absence of others.
         */
        if (flags & SPSTART) {
            longest = NULL;
            len = 0;
            for (; scan != NULL; scan = (unsigned char *)regnext((char *)scan)) {
                char *tmp = (char *)OPERAND(scan);
                int tlen;
                if (OP(scan) == EXACTLY && (tlen = strlen(tmp)) >= len) {
                    longest = tmp;
                    len = tlen;
                }
            }
            r->regmust = longest;
            r->regmlen = len;
        }
    }
    FREE((char *) exp2);
    return (r);
}
예제 #23
0
/*
 - RegComp - compile a regular expression into internal code
 *
 * We can't allocate space until we know how big the compiled form will be,
 * but we can't compile it (and thus know how big it is) until we've got a
 * place to put the code.  So we cheat:  we compile it twice, once with code
 * generation turned off and size counting turned on, and once "for real".
 * This also means that we don't allocate space until we are sure that the
 * thing really will compile successfully, and we never have to move the
 * code and thus invalidate pointers into it.  (Note that it has to be in
 * one piece because free() must be able to free it all.)
 *
 * Beware that the optimization-preparation code in here knows about some
 * of the structure of the compiled regexp.
 */
regexp *RegComp( const char *instr )
{
    regexp      *r;
    char        *scan;
    char        *longest;
    const char  *exp;
    char        buff[MAX_STR*2];
    int         flags, ignmag = FALSE;
    unsigned    j;
    size_t      i, k, len;

#ifdef WANT_EXCLAMATION
    if( instr[0] == '!' ) {
        instr++;
        ignmag = TRUE;
    }
#endif

    /*
     * flip roles of magic chars
     */
    if( !ignmag && ( !MAGICFLAG && MAGICSTR != NULL ) ) {
        j = 0;
        k = strlen( instr );
        for( i = 0; i < k; i++ ) {
            if( instr[i] == '\\' ) {
                if( strchr( MAGICSTR, instr[i + 1] ) == NULL ) {
                    buff[j++] = '\\';
                }
                i++;
            } else {
                if( strchr( MAGICSTR, instr[i] ) != NULL ) {
                    buff[j++] = '\\';
                }
            }
            buff[j++] = instr[i];

        }
        buff[j] = 0;
        exp = buff;
    } else {
        exp = instr;
    }

    regError( ERR_NO_ERR );
    if( exp == NULL ) {
        FAIL( ERR_RE_NULL_ARGUMENT );
    }

    /* First pass: determine size, legality. */
    regparse = exp;
    regnpar = 1;
    regsize = 0L;
    regcode = &regdummy;
    regc( MAGIC );
    if( reg( 0, &flags ) == NULL ) {
        return( NULL );
    }

    /* Allocate space. */
    r = ALLOC( sizeof( regexp ) + ( unsigned ) regsize );

    /* Second pass: emit code. */
    regparse = exp;
    regnpar = 1;
    regcode = r->program;
    regc( MAGIC );
    if( reg( 0, &flags ) == NULL ) {
        return( NULL );
    }

    /* Dig out information for optimizations. */
    r->regstart = '\0';     /* Worst-case defaults. */
    r->reganch = 0;
    r->regmust = NULL;
    r->regmlen = 0;
    scan = r->program + 1;                    /* First BRANCH. */
    if( OP( regnext( scan ) ) == END ) { /* Only one top-level choice. */
        scan = OPERAND( scan );

        /* Starting-point info. */
        if( OP( scan ) == EXACTLY ) {
            r->regstart = *OPERAND( scan );
        } else if( OP( scan ) == BOL ) {
            r->reganch++;
        }

        /*
         * If there's something expensive in the r.e., find the
         * longest literal string that must appear and make it the
         * regmust.  Resolve ties in favor of later strings, since
         * the regstart check works with the beginning of the r.e.
         * and avoiding duplication strengthens checking.  Not a
         * strong reason, but sufficient in the absence of others.
         */
        if( flags & SPSTART ) {
            longest = NULL;
            len = 0;
            for( ; scan != NULL; scan = regnext( scan ) ) {
                if( OP( scan ) == EXACTLY && strlen( OPERAND( scan ) ) >= len ) {
                    longest = OPERAND( scan );
                    len = strlen( OPERAND( scan ) );
                }
            }
            r->regmust = longest;
            r->regmlen = (short)len;
        }
    }

    return( r );
}
예제 #24
0
/*
   - regmatch - main matching routine
 *
 * Conceptually the strategy is simple:  check to see whether the current
 * node matches, call self recursively to see whether the rest matches,
 * and then act accordingly.  In practice we make some effort to avoid
 * recursion, in particular by going through "ordinary" nodes (that don't
 * need to know whether the rest of the match failed) by a loop instead of
 * by recursion.
 */
static int regmatch (char * prog)
{
    register char *scan;        /* Current node. */
    char *nxt;                  /* nxt node. */

    scan = prog;
#ifdef DEBUG
    if (scan != (char *) NULL && regnarrate)
        debug_message("%s(\n", regprop(scan));
#endif
                while (scan != (char *) NULL) {
#ifdef DEBUG
                if (regnarrate)
                debug_message("%s...\n", regprop(scan));
#endif
                nxt = regnext(scan);

                switch (OP(scan)) {
                case BOL:
                if (reginput != regbol)
                return (0);
                break;
                case EOL:
                if (*reginput != '\0')
                return (0);
                break;
                case ANY:
                if (*reginput == '\0')
                return (0);
                reginput++;
                break;
                case WORDSTART:
                if (reginput == regbol)
                    break;
                if (*reginput == '\0' ||
                        ISWORDPART(*(reginput - 1)) || !ISWORDPART(*reginput))
                    return (0);
                break;
                case WORDEND:
                if (*reginput == '\0')
                    break;
                if (reginput == regbol ||
                        !ISWORDPART(*(reginput - 1)) || ISWORDPART(*reginput))
                    return (0);
                break;
                case EXACTLY:{
                                 register int len;
                                 register char *opnd;

                                 opnd = OPERAND(scan);
                                 /* Inline the first character, for speed. */
                                 if (*opnd != *reginput)
                                     return (0);
                                 len = strlen(opnd);
                                 if (len > 1 && strncmp(opnd, reginput, len) != 0)
                                     return (0);
                                 reginput += len;
                             }
                             break;
                case ANYOF:
                             if (*reginput == '\0' ||
                                     strchr(OPERAND(scan), *reginput) == (char *) NULL)
                                 return (0);
                             reginput++;
                             break;
                case ANYBUT:
                             if (*reginput == '\0' ||
                                     strchr(OPERAND(scan), *reginput) != (char *) NULL)
                                 return (0);
                             reginput++;
                             break;
                case NOTHING:
                             break;
                case BACK:
                             break;
                case OPEN + 1:
                case OPEN + 2:
                case OPEN + 3:
                case OPEN + 4:
                case OPEN + 5:
                case OPEN + 6:
                case OPEN + 7:
                case OPEN + 8:
                case OPEN + 9:{
                                  register int no;
                                  register const char *save;

                                  no = OP(scan) - OPEN;
                                  save = reginput;

                                  if (regmatch(nxt)) {
                                      /*
                                       * Don't set startp if some later invocation of the same
                                       * parentheses already has.
                                       */
                                      if (regstartp[no] == (char *) NULL)
                                          regstartp[no] = save;
                                      return (1);
                                  } else
                                      return (0);
                              }
                              break;
                case CLOSE + 1:
                case CLOSE + 2:
                case CLOSE + 3:
                case CLOSE + 4:
                case CLOSE + 5:
                case CLOSE + 6:
                case CLOSE + 7:
                case CLOSE + 8:
                case CLOSE + 9:{
                                   register int no;
                                   register const char *save;

                                   no = OP(scan) - CLOSE;
                                   save = reginput;

                                   if (regmatch(nxt)) {
                                       /*
                                        * Don't set endp if some later invocation of the same
                                        * parentheses already has.
                                        */
                                       if (regendp[no] == (char *) NULL)
                                           regendp[no] = save;
                                       return (1);
                                   } else
                                       return (0);
                               }
                               break;
                case BRANCH:{
                                register const char *save;

                                if (OP(nxt) != BRANCH)  /* No choice. */
                                    nxt = OPERAND(scan);        /* Avoid recursion. */
                                else {
                                    do {
                                        save = reginput;
                                        if (regmatch(OPERAND(scan)))
                                            return (1);
                                        reginput = save;
                                        scan = regnext(scan);
                                    } while (scan != (char *) NULL && OP(scan) == BRANCH);
                                    return (0);
                                    /* NOTREACHED */
                                }
                            }
                            break;
                case STAR:
                case PLUS:{
                              register char nextch;
                              register int no;
                              register const char *save;
                              register int minimum;

                              /*
                               * Lookahead to avoid useless match attempts when we know
                               * what character comes next.
                               */
                              nextch = '\0';
                              if (OP(nxt) == EXACTLY)
                                  nextch = *OPERAND(nxt);
                              minimum = (OP(scan) == STAR) ? 0 : 1;
                              save = reginput;
                              no = regrepeat(OPERAND(scan));
                              while (no >= minimum) {
                                  /* If it could work, try it. */
                                  if (nextch == '\0' || *reginput == nextch)
                                      if (regmatch(nxt))
                                          return (1);
                                  /* Couldn't or didn't -- back up. */
                                  no--;
                                  reginput = save + no;
                              }
                              return (0);
                          }
                          break;
                case END:
                          return (1);         /* Success! */
                          break;
                default:
                          regerror("memory corruption\n");
                          return (0);
                          break;
                }

                scan = nxt;
                }

                /*
                 * We get here only if there's trouble -- normally "case END" is the
                 * terminating point.
                 */
                regerror("corrupted pointers\n");
                return (0);
}
예제 #25
0
/*
 * REcompile - compile a regular expression into internal code
 *
 * We can't allocate space until we know how big the compiled form will be,
 * but we can't compile it (and thus know how big it is) until we've got a
 * place to put the code.  So we cheat:  we compile it twice, once with code
 * generation turned off and size counting turned on, and once "for real".
 * This also means that we don't allocate space until we are sure that the
 * thing really will compile successfully, and we never have to move the
 * code and thus invalidate pointers into it.  (Note that it has to be in
 * one piece because free() must be able to free it all.)
 *
 * Beware that the optimization-preparation code in here knows about some
 * of the structure of the compiled RE_EXP.
 */
STATUS
REcompile( char *exp, RE_EXP **re_exp, i4  mem_tag )
{
    register RE_EXP *r;
    register char *scan;
    register char *longest;
    register i4  len;
    i4 flags;
    u_char magic = MAGIC;

    if (exp == NULL)
    {
        _error("NULL argument");
        return (FAIL);
    }

    /* First pass: determine size, legality. */
    regparse = exp;
    regnpar = 1;
    regsize = 0L;
    regcode = &regdummy;
    regc( (char *) &magic );
    if (reg(0, &flags) == NULL)
        return( FAIL );

    /* Small enough for pointer-storage convention? */
    if (regsize >= 32767L)		/* Probably could be 65535L. */
    {
        _error("regular expression too big");
        return (FAIL);
    }

    /* Allocate space. */
    r = (RE_EXP *) MEreqmem( mem_tag, sizeof(RE_EXP) + (unsigned) regsize,
                             FALSE, NULL);
    if (r == NULL)
    {
        _error("out of space");
        return (FAIL);
    }

    /* Second pass: emit code. */
    regparse = exp;
    regnpar = 1;
    regcode = r->program;
    regc( (char *) &magic );
    if (reg(0, &flags) == NULL)
        return( FAIL );

    /* Dig out information for optimizations. */
    r->regstart = '\0';	/* Worst-case defaults. */
    r->reganch = 0;
    r->regmust = NULL;
    r->regmlen = 0;
    scan = r->program+1;			/* First BRANCH. */
    if (OP(regnext(scan)) == END) {		/* Only one top-level choice. */
        scan = OPERAND(scan);

        /* Starting-point info. */
        if (OP(scan) == EXACTLY)
            r->regstart = *OPERAND(scan);
        else if (OP(scan) == BOL)
            r->reganch++;

        /*
         * If there's something expensive in the r.e., find the
         * longest literal string that must appear and make it the
         * regmust.  Resolve ties in favor of later strings, since
         * the regstart check works with the beginning of the r.e.
         * and avoiding duplication strengthens checking.  Not a
         * strong reason, but sufficient in the absence of others.
         */
        if (flags&SPSTART) {
            longest = NULL;
            len = 0;
            for (; scan != NULL; scan = regnext(scan))
                if (OP(scan) == EXACTLY && STlength(OPERAND(scan)) >= len) {
                    longest = OPERAND(scan);
                    len = STlength(OPERAND(scan));
                }
            r->regmust = longest;
            r->regmlen = len;
        }
    }
    *re_exp = r;
    return( OK );
}
예제 #26
0
파일: regexpr2.c 프로젝트: telescreen/misc
int regevaluate (struct Expr *expr, uchar *val, int amt)
{
struct Probe *base = (struct Probe *)((uchar *)expr + expr->size);
struct Probe *probe, *stack, *clone;
int idx, queue;

	//	reset evaluator

	expr->val = val;
	expr->amt = amt;
	expr->top = 0;

	//	if using new compiled node tree

	if( !expr->memo )
		expr->memo = (uchar *)(expr + 1) + expr->tree;

	//	calculate size of memo array in bits
	//	by calculating number of nodes
	//	and multiplying by source len

	idx = (expr->memo - (uchar *)(expr + 1)) / sizeof(struct Node);

	//	convert number of bits to number of bytes
	//	and clear memo array

	idx = (idx * (amt + 1) + 7) / 8;
	expr->tree = idx + (expr->memo - (uchar *)(expr + 1));

	if( expr->tree + sizeof(struct Expr) > expr->size )
		return 0;	// out of memory
	else
		memset (expr->memo, 0, idx);

	//	launch initial probe on root of parse tree

	if( probe = regprobe (expr) )
		probe->node = (struct Node *)(expr + 1);
	else
		return 0;	// out of memory

	queue = base - probe;

	//	evaluate input string against parse tree
	//	until a probe reaches both the end of the
	//	parse tree and the end of the input string

	while( idx = queue ) {
	  probe = base - idx;
	  queue = probe->next;

	  //	continue our node down to a
	  //	pattern match node.

	  while( ++expr->steps ) {
		//	if maximum occurrences reached
		//	move to sibling node
		//	if no sibling, either return
		//	success if done, or kill probe

		if( probe->occurrence == probe->node->maximum )
			if( regnext (expr, probe) )
				continue;
			else if( probe->off == expr->amt )
				return 1;
			else
				break;

		//	if another probe began evaluation
		//	of this node at this offset before,
		//	abandon our probe.

		idx = probe->node - (struct Node *)(expr + 1);
		idx *= amt + 1;
		idx += probe->off;

		if( ++probe->occurrence > probe->node->minimum )
		  if( expr->memo[idx/8] & (1 << (idx % 8)) )
			break;
	 	  else
	 		expr->memo[idx/8] |= 1 << (idx % 8);

		//	if minimum requirement met
		//	clone another probe to continue
		//	with alternate

		if( probe->occurrence > probe->node->minimum )
		  if( clone = regclone (expr, probe) ) {
			clone->occurrence = clone->node->maximum;
			clone->next = queue;
			queue = base - clone;
		  } else
			return 0;		//	out of memory

		// descend probe into subexpressions

		if( probe->node->typelen <= 0 ) {

			//	make a stack node
			//	to remember parent

			if( stack = regprobe (expr) )
				stack->next = probe->stack;
			else
				return 0;	// out of memory

			stack->occurrence = probe->occurrence;
			stack->off = probe->off;

			probe->node = probe->node->type->child;
			probe->stack = base - stack;
			probe->occurrence = 0;
			continue;
		}

		//	advance to next input character,
		//	or kill probe if no pattern match,

		if( regmatch (expr, probe) )
			probe->off++;
		else
			break;
	  }

	//	delete our probe and continue
	//	with next queued clone

	regkill (expr, probe);
	}

	//	when run queue is exhausted,
	//	delete all probes and return failure

	return 0;
}
예제 #27
0
파일: regexp.c 프로젝트: BlackYoup/medusa
/*
 - reg - regular expression, i.e. main body or parenthesized thing
 *
 * Caller must absorb opening parenthesis.
 *
 * Combining parenthesis handling with the base level of regular expression
 * is a trifle forced, but the need to tie the tails of the branches to what
 * follows makes it hard to avoid.
 */
static char *
reg(
	int paren,			/* Parenthesized? */
	int *flagp )
{
	register char *ret;
	register char *br;
	register char *ender;
	register int parno;
	int flags;

	*flagp = HASWIDTH;	/* Tentatively. */

	/* Make an OPEN node, if parenthesized. */
	if (paren) {
		if (regnpar >= NSUBEXP)
			FAIL("too many ()");
		parno = regnpar;
		regnpar++;
		ret = regnode(OPEN+parno);
	} else
		ret = NULL;

	/* Pick up the branches, linking them together. */
	br = regbranch(&flags);
	if (br == NULL)
		return(NULL);
	if (ret != NULL)
		regtail(ret, br);	/* OPEN -> first. */
	else
		ret = br;
	if (!(flags&HASWIDTH))
		*flagp &= ~HASWIDTH;
	*flagp |= flags&SPSTART;
	while (*regparse == '|' || *regparse == '\n') {
		regparse++;
		br = regbranch(&flags);
		if (br == NULL)
			return(NULL);
		regtail(ret, br);	/* BRANCH -> BRANCH. */
		if (!(flags&HASWIDTH))
			*flagp &= ~HASWIDTH;
		*flagp |= flags&SPSTART;
	}

	/* Make a closing node, and hook it on the end. */
	ender = regnode((paren) ? CLOSE+parno : END);	
	regtail(ret, ender);

	/* Hook the tails of the branches to the closing node. */
	for (br = ret; br != NULL; br = regnext(br))
		regoptail(br, ender);

	/* Check for proper termination. */
	if (paren && *regparse++ != ')') {
		FAIL("unmatched ()");
	} else if (!paren && *regparse != '\0') {
		if (*regparse == ')') {
			FAIL("unmatched ()");
		} else
			FAIL("junk on end");	/* "Can't happen". */
		/* NOTREACHED */
	}

	return(ret);
}
예제 #28
0
/*
 - regmatch - main matching routine
 *
 * Conceptually the strategy is simple:  check to see whether the current
 * node matches, call self recursively to see whether the rest matches,
 * and then act accordingly.  In practice we make some effort to avoid
 * recursion, in particular by going through "ordinary" nodes (that don't
 * need to know whether the rest of the match failed) by a loop instead of
 * by recursion.
 * 0 failure, 1 success
 */
int ossimRegExp::regmatch (const char* prog) {
    const char* scan;	// Current node.
    const char* next;	// Next node.

    scan = prog;

    while (scan != NULL) {

        next = regnext(scan);

        switch (OP(scan)) {
        case BOL:
            if (reginput != regbol)
                return (0);
            break;
        case EOL:
            if (*reginput != '\0')
                return (0);
            break;
        case ANY:
            if (*reginput == '\0')
                return (0);
            reginput++;
            break;
        case EXACTLY: {
            int         len;
            const char* opnd;

            opnd = OPERAND(scan);
            // Inline the first character, for speed.
            if (*opnd != *reginput)
                return (0);
            len = (int)strlen(opnd);
            if (len > 1 && strncmp(opnd, reginput, len) != 0)
                return (0);
            reginput += len;
        }
        break;
        case ANYOF:
            if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
                return (0);
            reginput++;
            break;
        case ANYBUT:
            if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
                return (0);
            reginput++;
            break;
        case NOTHING:
            break;
        case BACK:
            break;
        case OPEN + 1:
        case OPEN + 2:
        case OPEN + 3:
        case OPEN + 4:
        case OPEN + 5:
        case OPEN + 6:
        case OPEN + 7:
        case OPEN + 8:
        case OPEN + 9: {
            int    no;
            const char* save;

            no = OP(scan) - OPEN;
            save = reginput;

            if (regmatch(next)) {

                //
                // Don't set startp if some later invocation of the
                // same parentheses already has.
                //
                if (regstartp[no] == NULL)
                    regstartp[no] = save;
                return (1);
            }
            else
                return (0);
        }
//		break;
        case CLOSE + 1:
        case CLOSE + 2:
        case CLOSE + 3:
        case CLOSE + 4:
        case CLOSE + 5:
        case CLOSE + 6:
        case CLOSE + 7:
        case CLOSE + 8:
        case CLOSE + 9: {
            int    no;
            const char* save;

            no = OP(scan) - CLOSE;
            save = reginput;

            if (regmatch(next)) {

                //
                // Don't set endp if some later invocation of the
                // same parentheses already has.
                //
                if (regendp[no] == NULL)
                    regendp[no] = save;
                return (1);
            }
            else
                return (0);
        }
//		break;
        case BRANCH: {

            const char* save;

            if (OP(next) != BRANCH)	// No choice.
                next = OPERAND(scan);	// Avoid recursion.
            else {
                do {
                    save = reginput;
                    if (regmatch(OPERAND(scan)))
                        return (1);
                    reginput = save;
                    scan = regnext(scan);
                } while (scan != NULL && OP(scan) == BRANCH);
                return (0);
                // NOTREACHED
            }
        }
        break;
        case STAR:
        case PLUS: {
            char   nextch;
            int        no;
            const char* save;
            int        min_no;

            //
            // Lookahead to avoid useless match attempts when we know
            // what character comes next.
            //
            nextch = '\0';
            if (OP(next) == EXACTLY)
                nextch = *OPERAND(next);
            min_no = (OP(scan) == STAR) ? 0 : 1;
            save = reginput;
            no = regrepeat(OPERAND(scan));
            while (no >= min_no) {
                // If it could work, try it.
                if (nextch == '\0' || *reginput == nextch)
                    if (regmatch(next))
                        return (1);
                // Couldn't or didn't -- back up.
                no--;
                reginput = save + no;
            }
            return (0);
        }
//		break;
        case END:
            return (1);	// Success!

        default:
            //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
            printf ("ossimRegExp::find(): Internal error -- memory corrupted.\n");
            return 0;
        }
        scan = next;
    }

    //
    //  We get here only if there's trouble -- normally "case END" is the
    //  terminating point.
    //
    //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
    printf ("ossimRegExp::find(): Internal error -- corrupted pointers.\n");
    return (0);
}
예제 #29
0
파일: regexp.c 프로젝트: BlackYoup/medusa
/*
 - regmatch - main matching routine
 *
 * Conceptually the strategy is simple:  check to see whether the current
 * node matches, call self recursively to see whether the rest matches,
 * and then act accordingly.  In practice we make some effort to avoid
 * recursion, in particular by going through "ordinary" nodes (that don't
 * need to know whether the rest of the match failed) by a loop instead of
 * by recursion.
 */
static int			/* 0 failure, 1 success */
regmatch( char *prog )
{
	register char *scan;	/* Current node. */
	char *next;		/* Next node. */

	scan = prog;
#ifdef DEBUG
	if (scan != NULL && regnarrate)
		fprintf(stderr, "%s(\n", regprop(scan));
#endif
	while (scan != NULL) {
#ifdef DEBUG
		if (regnarrate)
			fprintf(stderr, "%s...\n", regprop(scan));
#endif
		next = regnext(scan);

		switch (OP(scan)) {
		case BOL:
			if (reginput != regbol)
				return(0);
			break;
		case EOL:
			if (*reginput != '\0')
				return(0);
			break;
		case WORDA:
			/* Must be looking at a letter, digit, or _ */
			if ((!isalnum(*reginput)) && *reginput != '_')
				return(0);
			/* Prev must be BOL or nonword */
			if (reginput > regbol &&
			    (isalnum(reginput[-1]) || reginput[-1] == '_'))
				return(0);
			break;
		case WORDZ:
			/* Must be looking at non letter, digit, or _ */
			if (isalnum(*reginput) || *reginput == '_')
				return(0);
			/* We don't care what the previous char was */
			break;
		case ANY:
			if (*reginput == '\0')
				return(0);
			reginput++;
			break;
		case EXACTLY: {
				register int len;
				register char *opnd;

				opnd = OPERAND(scan);
				/* Inline the first character, for speed. */
				if (*opnd != *reginput)
					return(0);
				len = strlen(opnd);
				if (len > 1 && strncmp(opnd, reginput, len) != 0)
					return(0);
				reginput += len;
			}
			break;
		case ANYOF:
 			if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
				return(0);
			reginput++;
			break;
		case ANYBUT:
 			if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
				return(0);
			reginput++;
			break;
		case NOTHING:
			break;
		case BACK:
			break;
		case OPEN+1:
		case OPEN+2:
		case OPEN+3:
		case OPEN+4:
		case OPEN+5:
		case OPEN+6:
		case OPEN+7:
		case OPEN+8:
		case OPEN+9: {
				register int no;
				register const char *save;

				no = OP(scan) - OPEN;
				save = reginput;

				if (regmatch(next)) {
					/*
					 * Don't set startp if some later
					 * invocation of the same parentheses
					 * already has.
					 */
					if (regstartp[no] == NULL)
						regstartp[no] = save;
					return(1);
				} else
					return(0);
			}
			break;
		case CLOSE+1:
		case CLOSE+2:
		case CLOSE+3:
		case CLOSE+4:
		case CLOSE+5:
		case CLOSE+6:
		case CLOSE+7:
		case CLOSE+8:
		case CLOSE+9: {
				register int no;
				register const char *save;

				no = OP(scan) - CLOSE;
				save = reginput;

				if (regmatch(next)) {
					/*
					 * Don't set endp if some later
					 * invocation of the same parentheses
					 * already has.
					 */
					if (regendp[no] == NULL)
						regendp[no] = save;
					return(1);
				} else
					return(0);
			}
			break;
		case BRANCH: {
				register const char *save;

				if (OP(next) != BRANCH)		/* No choice. */
					next = OPERAND(scan);	/* Avoid recursion. */
				else {
					do {
						save = reginput;
						if (regmatch(OPERAND(scan)))
							return(1);
						reginput = save;
						scan = regnext(scan);
					} while (scan != NULL && OP(scan) == BRANCH);
					return(0);
					/* NOTREACHED */
				}
			}
			break;
		case STAR:
		case PLUS: {
				register char nextch;
				register int no;
				register const char *save;
				register int min;

				/*
				 * Lookahead to avoid useless match attempts
				 * when we know what character comes next.
				 */
				nextch = '\0';
				if (OP(next) == EXACTLY)
					nextch = *OPERAND(next);
				min = (OP(scan) == STAR) ? 0 : 1;
				save = reginput;
				no = regrepeat(OPERAND(scan));
				while (no >= min) {
					/* If it could work, try it. */
					if (nextch == '\0' || *reginput == nextch)
						if (regmatch(next))
							return(1);
					/* Couldn't or didn't -- back up. */
					no--;
					reginput = save + no;
				}
				return(0);
			}
			break;
		case END:
			return(1);	/* Success! */
			break;
		default:
			regerror("memory corruption");
			return(0);
			break;
		}

		scan = next;
	}

	/*
	 * We get here only if there's trouble -- normally "case END" is
	 * the terminating point.
	 */
	regerror("corrupted pointers");
	return(0);
}
예제 #30
0
/*
 - regcomp - compile a regular expression into internal code
 *
 * We can't allocate space until we know how big the compiled form will be,
 * but we can't compile it (and thus know how big it is) until we've got a
 * place to put the code.  So we cheat:  we compile it twice, once with code
 * generation turned off and size counting turned on, and once "for real".
 * This also means that we don't allocate space until we are sure that the
 * thing really will compile successfully, and we never have to move the
 * code and thus invalidate pointers into it.  (Note that it has to be in
 * one piece because free() must be able to free it all.)
 *
 * Beware that the optimization-preparation code in here knows about some
 * of the structure of the compiled regexp.
 */
	PGPError
pgpRegComp(PGPContextRef context, char const *exp, regexp **pregexp)
{
	regexp *r;
	char const *scan;
	char const *longest;
	int len;
	int flags;
	regcompState s_rcs;
	regcompState *rcs = &s_rcs;

	PGPValidateContext( context );
	PGPValidatePtr( exp );
	PGPValidatePtr( pregexp );

	*pregexp = NULL;

	pgpClearMemory( &s_rcs, sizeof(s_rcs) );

	/* First pass: determine size, legality. */
	rcs->regparse = exp;
	rcs->regnpar = 1;
	rcs->regsize = 0L;
	rcs->regcode = &regdummy;
	regc(rcs, MAGIC);
	if (reg(rcs, 0, &flags) == NULL)
		return(kPGPError_OutOfMemory);

	/* Small enough for pointer-storage convention? */
	if (rcs->regsize >= 32767L)		/* Probably could be 65535L. */
		return(kPGPError_BadParams);

	/* Allocate space. */
	r = (regexp *)pgpContextMemAlloc(context,
							 sizeof(regexp) + (unsigned)rcs->regsize, 0);
	if (r == NULL)
		return kPGPError_OutOfMemory;

	/* Second pass: emit code. */
	rcs->regparse = exp;
	rcs->regnpar = 1;
	rcs->regcode = r->program;
	regc(rcs, MAGIC);
	if (reg(rcs, 0, &flags) == NULL)
		return(kPGPError_OutOfMemory);

	/* Dig out information for optimizations. */
	r->regstart = '\0';	/* Worst-case defaults. */
	r->reganch = 0;
	r->regmust = NULL;
	r->regmlen = 0;
	scan = r->program+1;			/* First BRANCH. */
	if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
		scan = OPERAND(scan);

		/* Starting-point info. */
		if (OP(scan) == EXACTLY)
			r->regstart = *OPERAND(scan);
		else if (OP(scan) == BOL)
			r->reganch++;

		/*
		 * If there's something expensive in the r.e., find the
		 * longest literal string that must appear and make it the
		 * regmust.  Resolve ties in favor of later strings, since
		 * the regstart check works with the beginning of the r.e.
		 * and avoiding duplication strengthens checking.  Not a
		 * strong reason, but sufficient in the absence of others.
		 */
		if (flags&SPSTART) {
			longest = NULL;
			len = 0;
			for (; scan != NULL; scan = regnext(scan))
				if (OP(scan) == EXACTLY
						&& strlen(OPERAND(scan)) >= (unsigned)len) {
					longest = OPERAND(scan);
					len = strlen(OPERAND(scan));
				}
			r->regmust = longest;
			r->regmlen = len;
		}
	}

	*pregexp = r;
	return(kPGPError_NoErr);
}