Ejemplo n.º 1
0
void
printprog(Prog *p)
{
	Inst *pc, *e;
	
	pc = p->start;
	e = p->start + p->len;
	
	for(; pc < e; pc++) {
		switch(pc->opcode) {
		default:
			re1_5_fatal("printprog");
		case Split:
			printf("%2d. split %d, %d\n", (int)(pc-p->start), (int)(pc->x-p->start), (int)(pc->y-p->start));
			break;
		case Jmp:
			printf("%2d. jmp %d\n", (int)(pc-p->start), (int)(pc->x-p->start));
			break;
		case Char:
			printf("%2d. char %c\n", (int)(pc-p->start), pc->c);
			break;
		case Any:
			printf("%2d. any\n", (int)(pc-p->start));
			break;
		case Match:
			printf("%2d. match\n", (int)(pc-p->start));
			break;
		case Save:
			printf("%2d. save %d\n", (int)(pc-p->start), pc->n);
		}
	}
}
Ejemplo n.º 2
0
Archivo: util.c Proyecto: ampli/re1.5
void*
mal(int n)
{
	void *v;
	
	v = malloc(n);
	if(v == nil)
		re1_5_fatal("out of memory");
	memset(v, 0, n);
	return v;
}	
Ejemplo n.º 3
0
// how many instructions does r need?
static int
count(Regexp *r)
{
	switch(r->type) {
	default:
		re1_5_fatal("bad count");
	case Alt:
		return 2 + count(r->left) + count(r->right);
	case Cat:
		return count(r->left) + count(r->right);
	case Lit:
	case Dot:
		return 1;
	case Paren:
		return 2 + count(r->left);
	case Quest:
		return 1 + count(r->left);
	case Star:
		return 2 + count(r->left);
	case Plus:
		return 1 +  count(r->left);
	}
}
Ejemplo n.º 4
0
Archivo: main.c Proyecto: ampli/re1.5
int
main(int argc, char **argv)
{
	int i, j, k, l;
	int is_anchored = 0;

	argv++;
	argc--;
	while (argc > 0 && argv[0][0] == '-') {
		for (char *arg = &argv[0][1]; *arg; arg++) {
			switch (*arg) {
				case 'h':
					usage();
					break;
				case 'm':
					is_anchored = 1;
					break;
#ifdef DEBUG
				case 'd':
					debug = 1;
					break;
#endif
				case 'e':
					if (argv[1] == NULL)
						re1_5_fatal("-e: Missing Regex engine argument");
					if (re_engine)
						re1_5_fatal("-e: Regex engine already specified");
					re_engine = argv[1];
					argv++;
					argc--;
					break;
				default:
					re1_5_fatal("Unknown flag");
			}
		}
		argv++;
		argc--;
	}

	if(argc < 2)
		usage();

#ifdef ODEBUG
	// Old and unmaintained code
	Regexp *re = parse(argv[0]);
	printre(re);
	printf("\n");

	Prog *prog = compile(re);
	printprog(prog);
	printf("=============\n");
#endif
	int sz = re1_5_sizecode(argv[0]);
#ifdef DEBUG
	if (debug) printf("Precalculated size: %d\n", sz);
#endif
	if (sz == -1) {
		re1_5_fatal("Error in regexp");
	}

	ByteProg *code = malloc(sizeof(ByteProg) + sz);
	int ret = re1_5_compilecode(code, argv[0]);
        if (ret != 0) {
		re1_5_fatal("Error in regexp");
	}

	int sub_els = (code->sub + 1) * 2;
#ifdef DEBUG
	if (debug) re1_5_dumpcode(code);
#endif
	const char *sub[sub_els];
	int engine_found = 0;
	for(i=1; i<argc; i++) {
		printf("#%d %s\n", i, argv[i]);
		for(j=0; j<nelem(tab); j++) {
			Subject subj = {argv[i], argv[i] + strlen(argv[i])};
			if (re_engine) {
				if (0 != strcmp(re_engine, tab[j].name))
					continue;
				engine_found = 1;
			}
			printf("%s ", tab[j].name);
			memset(sub, 0, sub_els * sizeof sub[0]);
			if(!tab[j].fn(code, &subj, sub, sub_els, is_anchored)) {
				printf("-no match-\n");
				continue;
			}
			printf("match");
			for(k=sub_els; k>0; k--)
				if(sub[k-1])
					break;
			for(l=0; l<k; l+=2) {
				printf(" (");
				if(sub[l] == nil)
					printf("?");
				else
					printf("%d", (int)(sub[l] - argv[i]));
				printf(",");
				if(sub[l+1] == nil)
					printf("?");
				else
					printf("%d", (int)(sub[l+1] - argv[i]));
				printf(")");
			}
			printf("\n");
		}
		if (re_engine && !engine_found)
			re1_5_fatal("-e: Unknown engine name");
	}

	free(code);
	return 0;
}
Ejemplo n.º 5
0
static int
recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
{
	const char *old;
	int off;
	
	for(;;) {
		if(inst_is_consumer(*pc)) {
			// If we need to match a character, but there's none left, it's fail
			if(sp >= input->end)
				return 0;
		}
		switch(*pc++) {
		case Char:
			if(*sp != *pc++)
				return 0;
		case Any:
			sp++;
			continue;
		case Class:
		case ClassNot:
			if (!_re1_5_classmatch(pc, sp))
				return 0;
			pc += *(unsigned char*)pc * 2 + 1;
			sp++;
			continue;
                case NamedClass:
			if (!_re1_5_namedclassmatch(pc, sp))
				return 0;
			pc++;
			sp++;
			continue;
		case Match:
			return 1;
		case Jmp:
			off = (signed char)*pc++;
			pc = pc + off;
			continue;
		case Split:
			off = (signed char)*pc++;
			if(recursiveloop(pc, sp, input, subp, nsubp))
				return 1;
			pc = pc + off;
			continue;
		case RSplit:
			off = (signed char)*pc++;
			if(recursiveloop(pc + off, sp, input, subp, nsubp))
				return 1;
			continue;
		case Save:
			off = (unsigned char)*pc++;
			if(off >= nsubp) {
				continue;
			}
			old = subp[off];
			subp[off] = sp;
			if(recursiveloop(pc, sp, input, subp, nsubp))
				return 1;
			subp[off] = old;
			return 0;
		case Bol:
			if(sp != input->begin)
				return 0;
			continue;
		case Eol:
			if(sp != input->end)
				return 0;
			continue;
		}
		re1_5_fatal("recursiveloop");
	}
}
Ejemplo n.º 6
0
static void
emit(Regexp *r)
{
	Inst *p1, *p2, *t;

	switch(r->type) {
	default:
		re1_5_fatal("bad emit");

	case Alt:
		pc->opcode = Split;
		p1 = pc++;
		p1->x = pc;
		emit(r->left);
		pc->opcode = Jmp;
		p2 = pc++;
		p1->y = pc;
		emit(r->right);
		p2->x = pc;
		break;

	case Cat:
		emit(r->left);
		emit(r->right);
		break;
	
	case Lit:
		pc->opcode = Char;
		pc->c = r->ch;
		pc++;
		break;
	
	case Dot:
		pc++->opcode = Any;
		break;

	case Paren:
		pc->opcode = Save;
		pc->n = 2*r->n;
		pc++;
		emit(r->left);
		pc->opcode = Save;
		pc->n = 2*r->n + 1;
		pc++;
		break;
	
	case Quest:
		pc->opcode = Split;
		p1 = pc++;
		p1->x = pc;
		emit(r->left);
		p1->y = pc;
		if(r->n) {	// non-greedy
			t = p1->x;
			p1->x = p1->y;
			p1->y = t;
		}
		break;

	case Star:
		pc->opcode = Split;
		p1 = pc++;
		p1->x = pc;
		emit(r->left);
		pc->opcode = Jmp;
		pc->x = p1;
		pc++;
		p1->y = pc;
		if(r->n) {	// non-greedy
			t = p1->x;
			p1->x = p1->y;
			p1->y = t;
		}
		break;

	case Plus:
		p1 = pc;
		emit(r->left);
		pc->opcode = Split;
		pc->x = p1;
		p2 = pc;
		pc++;
		p2->y = pc;
		if(r->n) {	// non-greedy
			t = p2->x;
			p2->x = p2->y;
			p2->y = t;
		}
		break;
	}
}