Esempio n. 1
0
/*
 - matcher - the actual matching engine
 == static int matcher(struct re_guts *g, const char *string, \
 ==	size_t nmatch, regmatch_t pmatch[], int eflags);
 */
static int			/* 0 success, REG_NOMATCH failure */
matcher(struct re_guts *g,
	const char *string,
	size_t nmatch,
	regmatch_t pmatch[],
	int eflags)
{
	const char *endp;
	int i;
	struct match mv;
	struct match *m = &mv;
	const char *dp;
	const sopno gf = g->firststate+1;	/* +1 for OEND */
	const sopno gl = g->laststate;
	const char *start;
	const char *stop;
	/* Boyer-Moore algorithms variables */
	const char *pp;
	int cj, mj;
	const char *mustfirst;
	const char *mustlast;
	int *matchjump;
	int *charjump;

	/* simplify the situation where possible */
	if (g->cflags&REG_NOSUB)
		nmatch = 0;
	if (eflags&REG_STARTEND) {
		start = string + pmatch[0].rm_so;
		stop = string + pmatch[0].rm_eo;
	} else {
		start = string;
		stop = start + strlen(start);
	}
	if (stop < start)
		return(REG_INVARG);

	/* prescreening; this does wonders for this rather slow code */
	if (g->must != NULL) {
		if (g->charjump != NULL && g->matchjump != NULL) {
			mustfirst = g->must;
			mustlast = g->must + g->mlen - 1;
			charjump = g->charjump;
			matchjump = g->matchjump;
			pp = mustlast;
			for (dp = start+g->mlen-1; dp < stop;) {
				/* Fast skip non-matches */
				while (dp < stop && charjump[(int)*dp])
					dp += charjump[(int)*dp];

				if (dp >= stop)
					break;

				/* Greedy matcher */
				/* We depend on not being used for
				 * for strings of length 1
				 */
				while (*--dp == *--pp && pp != mustfirst);

				if (*dp == *pp)
					break;

				/* Jump to next possible match */
				mj = matchjump[pp - mustfirst];
				cj = charjump[(int)*dp];
				dp += (cj < mj ? mj : cj);
				pp = mustlast;
			}
			if (pp != mustfirst)
				return(REG_NOMATCH);
		} else {
			for (dp = start; dp < stop; dp++)
				if (*dp == g->must[0] &&
				    stop - dp >= g->mlen &&
				    memcmp(dp, g->must, (size_t)g->mlen) == 0)
					break;
			if (dp == stop)		/* we didn't find g->must */
				return(REG_NOMATCH);
		}
	}

	/* match struct setup */
	m->g = g;
	m->eflags = eflags;
	m->pmatch = NULL;
	m->lastpos = NULL;
	m->offp = string;
	m->beginp = start;
	m->endp = stop;
	STATESETUP(m, 4);
	SETUP(m->st);
	SETUP(m->fresh);
	SETUP(m->tmp);
	SETUP(m->empty);
	CLEAR(m->empty);
	ZAPSTATE(&m->mbs);

	/* Adjust start according to moffset, to speed things up */
	if (g->moffset > -1)
		start = ((dp - g->moffset) < start) ? start : dp - g->moffset;

	/* this loop does only one repetition except for backrefs */
	for (;;) {
		endp = fast(m, start, stop, gf, gl);
		if (endp == NULL) {		/* a miss */
			if (m->pmatch != NULL)
				free((char *)m->pmatch);
			if (m->lastpos != NULL)
				free((char *)m->lastpos);
			STATETEARDOWN(m);
			return(REG_NOMATCH);
		}
		if (nmatch == 0 && !g->backrefs)
			break;		/* no further info needed */

		/* where? */
		assert(m->coldp != NULL);
		for (;;) {
			NOTE("finding start");
			endp = slow(m, m->coldp, stop, gf, gl);
			if (endp != NULL)
				break;
			assert(m->coldp < m->endp);
			m->coldp += XMBRTOWC(NULL, m->coldp,
			    m->endp - m->coldp, &m->mbs, 0);
		}
		if (nmatch == 1 && !g->backrefs)
			break;		/* no further info needed */

		/* oh my, he wants the subexpressions... */
		if (m->pmatch == NULL)
			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
							sizeof(regmatch_t));
		if (m->pmatch == NULL) {
			STATETEARDOWN(m);
			return(REG_ESPACE);
		}
		for (i = 1; i <= m->g->nsub; i++)
			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
			NOTE("dissecting");
			dp = dissect(m, m->coldp, endp, gf, gl);
		} else {
			if (g->nplus > 0 && m->lastpos == NULL)
				m->lastpos = malloc((g->nplus+1) *
						sizeof(const char *));
			if (g->nplus > 0 && m->lastpos == NULL) {
				free(m->pmatch);
				STATETEARDOWN(m);
				return(REG_ESPACE);
			}
			NOTE("backref dissect");
			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
		}
		if (dp != NULL)
			break;

		/* uh-oh... we couldn't find a subexpression-level match */
		assert(g->backrefs);	/* must be back references doing it */
		assert(g->nplus == 0 || m->lastpos != NULL);
		for (;;) {
			if (dp != NULL || endp <= m->coldp)
				break;		/* defeat */
			NOTE("backoff");
			endp = slow(m, m->coldp, endp-1, gf, gl);
			if (endp == NULL)
				break;		/* defeat */
			/* try it on a shorter possibility */
#ifndef NDEBUG
			for (i = 1; i <= m->g->nsub; i++) {
				assert(m->pmatch[i].rm_so == -1);
				assert(m->pmatch[i].rm_eo == -1);
			}
#endif
			NOTE("backoff dissect");
			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
		}
		assert(dp == NULL || dp == endp);
		if (dp != NULL)		/* found a shorter one */
			break;

		/* despite initial appearances, there is no match here */
		NOTE("false alarm");
		/* recycle starting later */
		start = m->coldp + XMBRTOWC(NULL, m->coldp,
		    stop - m->coldp, &m->mbs, 0);
		assert(start <= stop);
	}

	/* fill in the details if requested */
	if (nmatch > 0) {
		pmatch[0].rm_so = m->coldp - m->offp;
		pmatch[0].rm_eo = endp - m->offp;
	}
	if (nmatch > 1) {
		assert(m->pmatch != NULL);
		for (i = 1; i < nmatch; i++)
			if (i <= m->g->nsub)
				pmatch[i] = m->pmatch[i];
			else {
				pmatch[i].rm_so = -1;
				pmatch[i].rm_eo = -1;
			}
	}

	if (m->pmatch != NULL)
		free((char *)m->pmatch);
	if (m->lastpos != NULL)
		free((char *)m->lastpos);
	STATETEARDOWN(m);
	return(0);
}
Esempio n. 2
0
/*
 - matcher - the actual matching engine
 */
static int			/* 0 success, R_REGEX_NOMATCH failure */
matcher(struct re_guts *g, char *string, size_t nmatch, RRegexMatch pmatch[],
    int eflags)
{
	char *endp;
	int i;
	struct match mv;
	struct match *m = &mv;
	char *dp;
	const sopno gf = g->firststate+1;	/* +1 for OEND */
	const sopno gl = g->laststate;
	char *start;
	char *stop;

	/* simplify the situation where possible */
	if (g->cflags&R_REGEX_NOSUB)
		nmatch = 0;
	if (eflags&R_REGEX_STARTEND) {
		start = string + pmatch[0].rm_so;
		stop = string + pmatch[0].rm_eo;
	} else {
		start = string;
		stop = start + strlen(start);
	}
	if (stop < start)
		return(R_REGEX_INVARG);

	/* prescreening; this does wonders for this rather slow code */
	if (g->must != NULL) {
		for (dp = start; dp < stop; dp++)
			if (*dp == g->must[0] && stop - dp >= g->mlen &&
				memcmp(dp, g->must, (size_t)g->mlen) == 0)
				break;
		if (dp == stop)		/* we didn't find g->must */
			return(R_REGEX_NOMATCH);
	}

	/* match struct setup */
	m->g = g;
	m->eflags = eflags;
	m->pmatch = NULL;
	m->lastpos = NULL;
	m->offp = string;
	m->beginp = start;
	m->endp = stop;
	STATESETUP(m, 4);
	SETUP(m->st);
	SETUP(m->fresh);
	SETUP(m->tmp);
	SETUP(m->empty);
	CLEAR(m->empty);

	/* this loop does only one repetition except for backrefs */
	for (;;) {
		endp = fast(m, start, stop, gf, gl);
		if (endp == NULL) {		/* a miss */
			free(m->pmatch);
			free(m->lastpos);
			STATETEARDOWN(m);
			return(R_REGEX_NOMATCH);
		}
		if (nmatch == 0 && !g->backrefs)
			break;		/* no further info needed */

		/* where? */
		assert(m->coldp != NULL);
		for (;;) {
			NOTE("finding start");
			endp = slow(m, m->coldp, stop, gf, gl);
			if (endp != NULL)
				break;
			assert(m->coldp < m->endp);
			m->coldp++;
		}
		if (nmatch == 1 && !g->backrefs)
			break;		/* no further info needed */

		/* oh my, he wants the subexpressions... */
		if (m->pmatch == NULL)
			m->pmatch = (RRegexMatch *)malloc((m->g->nsub + 1) *
							sizeof(RRegexMatch));
		if (m->pmatch == NULL) {
			STATETEARDOWN(m);
			return(R_REGEX_ESPACE);
		}
		for (i = 1; i <= m->g->nsub; i++)
			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
		if (!g->backrefs && !(m->eflags&R_REGEX_BACKR)) {
			NOTE("dissecting");
			dp = dissect(m, m->coldp, endp, gf, gl);
		} else {
			if (g->nplus > 0 && m->lastpos == NULL)
				m->lastpos = (char **)malloc((g->nplus+1) *
							sizeof(char *));
			if (g->nplus > 0 && m->lastpos == NULL) {
				free(m->pmatch);
				STATETEARDOWN(m);
				return(R_REGEX_ESPACE);
			}
			NOTE("backref dissect");
			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
		}
		if (dp != NULL)
			break;

		/* uh-oh... we couldn't find a subexpression-level match */
		assert(g->backrefs);	/* must be back references doing it */
		assert(g->nplus == 0 || m->lastpos != NULL);
		for (;;) {
			if (dp != NULL || endp <= m->coldp)
				break;		/* defeat */
			NOTE("backoff");
			endp = slow(m, m->coldp, endp-1, gf, gl);
			if (endp == NULL)
				break;		/* defeat */
			/* try it on a shorter possibility */
#ifndef NDEBUG
			for (i = 1; i <= m->g->nsub; i++) {
				assert(m->pmatch[i].rm_so == -1);
				assert(m->pmatch[i].rm_eo == -1);
			}
#endif
			NOTE("backoff dissect");
			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
		}
		assert(dp == NULL || dp == endp);
		if (dp != NULL)		/* found a shorter one */
			break;

		/* despite initial appearances, there is no match here */
		NOTE("false alarm");
		if (m->coldp == stop)
			break;
		start = m->coldp + 1;	/* recycle starting later */
	}

	/* fill in the details if requested */
	if (nmatch > 0) {
		pmatch[0].rm_so = m->coldp - m->offp;
		pmatch[0].rm_eo = endp - m->offp;
	}
	if (nmatch > 1) {
		assert(m->pmatch != NULL);
		for (i = 1; i < nmatch; i++)
			if (i <= m->g->nsub)
				pmatch[i] = m->pmatch[i];
			else {
				pmatch[i].rm_so = -1;
				pmatch[i].rm_eo = -1;
			}
	}

	if (m->pmatch != NULL)
		free((char *)m->pmatch);
	if (m->lastpos != NULL)
		free((char *)m->lastpos);
	STATETEARDOWN(m);
	return(0);
}