コード例 #1
0
ファイル: rcomp.c プロジェクト: Sunshine-OS/svr4-userland
/*
 * Range search for singlebyte locales using the modified UNIX(R) Regular
 * Expression Library DFA.
 */
static int
rc_range(struct iblok *ip, char *last)
{
	char	*p;
	int	c, cstat, nstat;
	Dfa	*dp = e0->e_exp->re_dfa;

	p = ip->ib_cur;
	lineno++;
	cstat = dp->anybol;
	if (dp->acc[cstat])
		goto found;
	for (;;) {
		if ((nstat = dp->trans[cstat][*p & 0377]) == 0) {
			/*
			 * '\0' is used to indicate end-of-line. If a '\0'
			 * character appears in input, it matches '$' but
			 * the DFA remains in dead state afterwards; there
			 * is thus no need to handle this condition
			 * specially to get the same behavior as in plain
			 * regexec().
			 */
			if ((c = *p & 0377) == '\n')
				c = '\0';
			if ((nstat = regtrans(dp, cstat, c, 1)) == 0)
				goto fail;
			dp->trans[cstat]['\n'] = dp->trans[cstat]['\0'];
		}
		if (dp->acc[cstat = nstat - 1]) {
		found:	for (;;) {
				if (vflag == 0) {
		succeed:		outline(ip, last, p - ip->ib_cur);
					if (qflag || lflag)
						return 1;
				} else {
		fail:			ip->ib_cur = p;
					while (*ip->ib_cur++ != '\n');
				}
				if ((p = ip->ib_cur) > last)
					return 0;
				lineno++;
				if (dp->acc[cstat = dp->anybol] == 0)
					goto brk2;
			}
		}
		if (*p++ == '\n') {
			if (vflag) {
				p--;
				goto succeed;
			}
			if ((ip->ib_cur = p) > last)
				return 0;
			lineno++;
			if (dp->acc[cstat = dp->anybol])
				goto found;
		}
		brk2:;
	}
}
コード例 #2
0
ファイル: regdfa.c プロジェクト: n-t-roff/heirloom-ex-vi
LIBUXRE_STATIC int
libuxre_regdfaexec(Dfa *dp, Exec *xp)
{
	const unsigned char *s;
	int i, nst, st, mb_cur_max;
	w_type wc;

	dp->flags = xp->flags & REG_NOTEOL;	/* for regtrans() */
	mb_cur_max = xp->mb_cur_max;
	if (xp->nmatch != 0)
		return leftmost(dp, xp);
	if (mb_cur_max == 1 && (xp->flags & REG_NEWLINE) == 0)
		return regdfaexec_opt(dp, xp);
	s = xp->str;
	st = dp->anybol;
	if (xp->flags & REG_NOTBOL)
		st = 1;
	if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
		return 0;	/* initial empty match allowed */
	for (;;)
	{
		if ((wc = *s++) == '\n')
		{
			if (xp->flags & REG_NEWLINE)
				wc = ROP_EOL;
		}
		else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0)
			s += i;
		if ((wc & ~(long)(NCHAR - 1)) != 0
			|| (nst = dp->trans[st][wc]) == 0)
		{
			if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0)
				return REG_ESPACE;
			if (wc == ROP_EOL) /* REG_NEWLINE only */
			{
				if (dp->acc[nst - 1])
					return 0;
				if (dp->acc[st = dp->anybol])
					return 0;
				continue;
			}
		}
		if (dp->acc[st = nst - 1])
			return 0;
		if (wc == '\0')	/* st == 0 */
			return REG_NOMATCH;
	}
}
コード例 #3
0
ファイル: regdfa.c プロジェクト: n-t-roff/heirloom-ex-vi
/*
* Optimization by simplification: singlebyte locale and REG_NEWLINE not set.
* Performance gain for grep is 25% so it's worth the hack.
*/
static int
regdfaexec_opt(Dfa *dp, Exec *xp)
{
	const unsigned char *s;
	int nst, st;

	s = xp->str;
	st = dp->anybol;
	if (xp->flags & REG_NOTBOL)
		st = 1;
	if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
		return 0;	/* initial empty match allowed */
	do
	{
		if ((nst = dp->trans[st][*s]) == 0)
		{
			if ((nst = regtrans(dp, st, *s, 1)) == 0)
				return REG_ESPACE;
		}
		if (dp->acc[st = nst - 1])
			return 0;
	} while (*s++ != '\0');	/* st != 0 */
	return REG_NOMATCH;
}
コード例 #4
0
ファイル: regdfa.c プロジェクト: n-t-roff/heirloom-ex-vi
static int
leftmost(Dfa *dp, Exec *xp)
{
	const unsigned char *s, *beg, *end;
	int i, nst, st, mb_cur_max;
	w_type wc;

	mb_cur_max = xp->mb_cur_max;
	beg = s = xp->str;
	end = 0;
	st = dp->leftbol;
	if (xp->flags & REG_NOTBOL)
		st = dp->leftmost;
	if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
		end = s;	/* initial empty match allowed */
	for (;;)
	{
		if ((wc = *s++) == '\n')
		{
			if (xp->flags & REG_NEWLINE)
				wc = ROP_EOL;
		}
		else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0)
			s += i;
		if ((wc & ~(long)(NCHAR - 1)) != 0
			|| (nst = dp->trans[st][wc]) == 0)
		{
			if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0)
				return REG_ESPACE;
			if (wc == ROP_EOL) /* REG_NEWLINE only */
			{
				if (dp->acc[nst - 1])
				{
					if (end == 0 || end < s)
						end = s;
					break;
				}
				beg = s;
				st = dp->leftbol;
				goto newst;
			}
		}
		if ((st = nst - 1) == 0) /* dead state */
		{
			if (end != 0)
				break;
			if ((wc = *beg++) == '\0')
				return REG_NOMATCH;
			else if (!ISONEBYTE(wc) &&
					(i = libuxre_mb2wc(&wc, beg)) > 0)
				beg += i;
			s = beg;
			st = dp->leftmost;
			goto newst;
		}
		if (wc == '\0')
		{
			if (dp->acc[st])
			{
				s--;	/* don't include \0 */
				if (end == 0 || end < s)
					end = s;
				break;
			}
			if (end != 0)
				break;
			return REG_NOMATCH;
		}
	newst:;
		if (dp->acc[st])
		{
			if (end == 0 || end < s)
				end = s;
		}
	}
	xp->match[0].rm_so = beg - xp->str;
	xp->match[0].rm_eo = end - xp->str;
	return 0;
}
コード例 #5
0
ファイル: regdfa.c プロジェクト: n-t-roff/heirloom-ex-vi
LIBUXRE_STATIC int
libuxre_regdfacomp(regex_t *ep, Tree *tp, Lex *lxp)
{
	Tree *lp;
	Dfa *dp;
	Posn *p;
	int st;

	/*
	* It's convenient to insert an STAR(ALL) subtree to the
	* immediate left of the current tree.  This makes the
	* "any match" libuxre_regdfaexec() not a special case,
	* and the initial state signature will fall out when
	* building the follow sets for all the leaves.
	*/
	if ((lp = libuxre_reg1tree(ROP_ALL, 0)) == 0
		|| (lp = libuxre_reg1tree(ROP_STAR, lp)) == 0
		|| (tp->left.ptr = lp
			= libuxre_reg2tree(ROP_CAT, lp, tp->left.ptr)) == 0)
	{
		return REG_ESPACE;
	}
	lp->parent = tp;
	if ((dp = calloc(1, sizeof(Dfa))) == 0)
		return REG_ESPACE;
	ep->re_dfa = dp;
	/*
	* Just in case null pointers aren't just all bits zero...
	*/
	dp->posfoll = 0;
	dp->sigfoll = 0;
	dp->cursig = 0;
	dp->posn = 0;
	/*
	* Assign position values to each of the tree's leaves
	* (the important parts), meanwhile potentially rewriting
	* the parse tree so that it fits within the restrictions
	* of our DFA.
	*/
	if ((tp = findposn(ep, tp, lxp->mb_cur_max)) == 0)
		goto err;
	/*
	* Get space for the array of positions and current set,
	* now that the number of positions is known.
	*/
	if ((dp->posn = malloc(sizeof(Posn) * dp->nposn + dp->nposn)) == 0)
		goto err;
	dp->posset = (unsigned char *)&dp->posn[dp->nposn];
	/*
	* Get follow sets for each position.
	*/
	if (posnfoll(dp, tp) != 0)
		goto err;
	/*
	* Set up the special invariant states:
	*  - dead state (no valid transitions); index 0.
	*  - initial state for any match [STAR(ALL) follow set]; index 1.
	*  - initial state for any match after ROP_BOL.
	*  - initial state for left-most longest if REG_NOTBOL.
	*  - initial state for left-most longest after ROP_BOL.
	* The final two are not allocated if leftmost() cannot be called.
	* The pairs of initial states are the same if there is no
	* explicit ROP_BOL transition.
	*/
	dp->avail += dp->used;
	dp->used = 0;
	if ((dp->sigfoll = malloc(sizeof(size_t) * dp->avail)) == 0)
		goto err;
	p = &dp->posn[dp->nposn - 1];	/* same as first(root) */
	dp->cursig = &dp->posfoll[p->seti];
	dp->nset = p->nset;
	dp->top = 1;	/* index 0 is dead state */
	addstate(dp);	/* must be state index 1 (returns 2) */
	if ((dp->cursig = malloc(sizeof(size_t) * dp->nposn)) == 0)
		goto err;
	dp->nfix = 2;
	if ((st = regtrans(dp, 1, ROP_BOL, lxp->mb_cur_max)) == 0)
		goto err;
	if ((dp->anybol = st - 1) == 2) /* new state */
		dp->nfix = 3;
	if ((ep->re_flags & REG_NOSUB) == 0) /* leftmost() might be called */
	{
		/*
		* leftmost() initial states are the same as the
		* "any match" ones without the STAR(ALL) position.
		*/
		dp->sigi[dp->nfix] = 0;
		dp->nsig[dp->nfix] = dp->nsig[1] - 1;
		dp->acc[dp->nfix] = dp->acc[1];
		dp->leftbol = dp->leftmost = dp->nfix;
		dp->nfix++;
		if (dp->anybol != 1)	/* distinct state w/BOL */
		{
			dp->sigi[dp->nfix] = dp->sigi[2];
			dp->nsig[dp->nfix] = dp->nsig[2] - 1;
			dp->acc[dp->nfix] = dp->acc[2];
			dp->leftbol = dp->nfix;
			dp->nfix++;
		}
		dp->top = dp->nfix;
	}
	return 0;
err:;
	libuxre_regdeldfa(dp);
	return REG_ESPACE;
}
コード例 #6
0
ファイル: rcomp.c プロジェクト: Sunshine-OS/svr4-userland
/*
 * Range search for multibyte locales using the modified UNIX(R) Regular
 * Expression Library DFA.
 */
static int
rc_rangew(struct iblok *ip, char *last)
{
	char	*p;
	int	n, cstat, nstat;
	wint_t	wc;
	Dfa	*dp = e0->e_exp->re_dfa;

	p = ip->ib_cur;
	lineno++;
	cstat = dp->anybol;
	if (dp->acc[cstat])
		goto found;
	for (;;) {
		if (*p & 0200) {
			if ((n = mbtowi(&wc, p, last + 1 - p)) < 0) {
				n = 1;
				wc = WEOF;
			}
		} else {
			wc = *p;
			n = 1;
		}
		if ((wc & ~(wchar_t)(NCHAR-1)) != 0 ||
				(nstat = dp->trans[cstat][wc]) == 0) {
			/*
			 * '\0' is used to indicate end-of-line. If a '\0'
			 * character appears in input, it matches '$' but
			 * the DFA remains in dead state afterwards; there
			 * is thus no need to handle this condition
			 * specially to get the same behavior as in plain
			 * regexec().
			 */
			if (wc == '\n')
				wc = '\0';
			if ((nstat = regtrans(dp, cstat, wc, mb_cur_max)) == 0)
				goto fail;
			dp->trans[cstat]['\n'] = dp->trans[cstat]['\0'];
		}
		if (dp->acc[cstat = nstat - 1]) {
		found:	for (;;) {
				if (vflag == 0) {
		succeed:		outline(ip, last, p - ip->ib_cur);
					if (qflag || lflag)
						return 1;
				} else {
		fail:			ip->ib_cur = p;
					while (*ip->ib_cur++ != '\n');
				}
				if ((p = ip->ib_cur) > last)
					return 0;
				lineno++;
				if (dp->acc[cstat = dp->anybol] == 0)
					goto brk2;
			}
		}
		p += n;
		if (p[-n] == '\n') {
			if (vflag) {
				p--;
				goto succeed;
			}
			if ((ip->ib_cur = p) > last)
				return 0;
			lineno++;
			if (dp->acc[cstat = dp->anybol])
				goto found;
		}
		brk2:;
	}
}