Ejemplo n.º 1
0
Reprog *regcomp(const char *pattern, int cflags, const char **errorp)
{
	struct cstate g;
	Renode *node;
	Reinst *split, *jump;
	int i;

	g.prog = malloc(sizeof (Reprog));
	g.pstart = g.pend = malloc(sizeof (Renode) * strlen(pattern) * 2);

	if (setjmp(g.kaboom)) {
		if (errorp) *errorp = g.error;
		free(g.pstart);
		free(g.prog);
		return NULL;
	}

	g.source = pattern;
	g.ncclass = 0;
	g.nsub = 1;
	for (i = 0; i < MAXSUB; ++i)
		g.sub[i] = 0;

	g.prog->flags = cflags;

	next(&g);
	node = parsealt(&g);
	if (g.lookahead == ')')
		die(&g, "unmatched ')'");
	if (g.lookahead != 0)
		die(&g, "syntax error");

	g.prog->nsub = g.nsub;
	g.prog->start = g.prog->end = malloc((count(node) + 6) * sizeof (Reinst));

	split = emit(g.prog, I_SPLIT);
	split->x = split + 3;
	split->y = split + 1;
	emit(g.prog, I_ANYNL);
	jump = emit(g.prog, I_JUMP);
	jump->x = split;
	emit(g.prog, I_LPAR);
	compile(g.prog, node);
	emit(g.prog, I_RPAR);
	emit(g.prog, I_END);

#ifdef TEST
	dumpnode(node);
	putchar('\n');
	dumpprog(g.prog);
#endif

	free(g.pstart);

	if (errorp) *errorp = NULL;
	return g.prog;
}
Ejemplo n.º 2
0
static Renode *parseatom(struct cstate *g)
{
	Renode *atom;
	if (g->lookahead == L_CHAR) {
		atom = newnode(g, P_CHAR);
		atom->c = g->yychar;
		next(g);
		return atom;
	}
	if (g->lookahead == L_CCLASS) {
		atom = newnode(g, P_CCLASS);
		atom->cc = g->yycc;
		next(g);
		return atom;
	}
	if (g->lookahead == L_NCCLASS) {
		atom = newnode(g, P_NCCLASS);
		atom->cc = g->yycc;
		next(g);
		return atom;
	}
	if (g->lookahead == L_REF) {
		atom = newnode(g, P_REF);
		if (g->yychar == 0 || g->yychar > g->nsub || !g->sub[g->yychar])
			die(g, "invalid back-reference");
		atom->n = g->yychar;
		atom->x = g->sub[g->yychar];
		next(g);
		return atom;
	}
	if (accept(g, '.'))
		return newnode(g, P_ANY);
	if (accept(g, '(')) {
		atom = newnode(g, P_PAR);
		if (g->nsub == MAXSUB)
			die(g, "too many captures");
		atom->n = g->nsub++;
		atom->x = parsealt(g);
		g->sub[atom->n] = atom;
		if (!accept(g, ')'))
			die(g, "unmatched '('");
		return atom;
	}
	if (accept(g, L_NC)) {
		atom = parsealt(g);
		if (!accept(g, ')'))
			die(g, "unmatched '('");
		return atom;
	}
	if (accept(g, L_PLA)) {
		atom = newnode(g, P_PLA);
		atom->x = parsealt(g);
		if (!accept(g, ')'))
			die(g, "unmatched '('");
		return atom;
	}
	if (accept(g, L_NLA)) {
		atom = newnode(g, P_NLA);
		atom->x = parsealt(g);
		if (!accept(g, ')'))
			die(g, "unmatched '('");
		return atom;
	}
	die(g, "syntax error");
	return NULL;
}
Ejemplo n.º 3
0
static Renode *parseatom(void)
{
	Renode *atom;
	if (g.lookahead == L_CHAR) {
		atom = newnode(P_CHAR);
		atom->c = g.yychar;
		next();
		return atom;
	}
	if (g.lookahead == L_CCLASS) {
		atom = newnode(P_CCLASS);
		atom->cc = g.yycc;
		next();
		return atom;
	}
	if (g.lookahead == L_NCCLASS) {
		atom = newnode(P_NCCLASS);
		atom->cc = g.yycc;
		next();
		return atom;
	}
	if (g.lookahead == L_REF) {
		atom = newnode(P_REF);
		if (g.yychar == 0 || g.yychar > g.nsub || !g.sub[g.yychar])
			die("invalid back-reference");
		atom->n = g.yychar;
		atom->x = g.sub[g.yychar];
		next();
		return atom;
	}
	if (re_accept('.'))
		return newnode(P_ANY);
	if (re_accept('(')) {
		atom = newnode(P_PAR);
		if (g.nsub == MAXSUB)
			die("too many captures");
		atom->n = g.nsub++;
		atom->x = parsealt();
		g.sub[atom->n] = atom;
		if (!re_accept(')'))
			die("unmatched '('");
		return atom;
	}
	if (re_accept(L_NC)) {
		atom = parsealt();
		if (!re_accept(')'))
			die("unmatched '('");
		return atom;
	}
	if (re_accept(L_PLA)) {
		atom = newnode(P_PLA);
		atom->x = parsealt();
		if (!re_accept(')'))
			die("unmatched '('");
		return atom;
	}
	if (re_accept(L_NLA)) {
		atom = newnode(P_NLA);
		atom->x = parsealt();
		if (!re_accept(')'))
			die("unmatched '('");
		return atom;
	}
	die("syntax error");
	return NULL;
}