Beispiel #1
0
void
main(int argc, char **argv)
{
	int i;
	Reprog *p;
	Dreprog *dp;

	i = 1;
		p = regcomp(argv[i]);
		if(p == 0){
			print("=== %s: bad regexp\n", argv[i]);
		}
	//	print("=== %s\n", argv[i]);
	//	rdump(p);
		dp = dregcvt(p);
		print("=== dfa\n");
		dump(dp);
	
	for(i=2; i<argc; i++)
		print("match %d\n", dregexec(dp, argv[i], 1));
	exits(0);
}
Beispiel #2
0
void
main(int argc, char **argv)
{
	int i, hdr, n, eof, off;
	Dreprog *re[3];
	int m[3];
	char *p, *ep, *tag;
	Biobuf bout, bin;
	char msg[1024+1];
	char buf[1024];

	refile = unsharp(refile);
	buildre(re);
	ARGBEGIN{
	case 'D':
		debug = 1;
		break;
	case 'n':
		maxtoklen = atoi(EARGF(usage()));
		break;
	case 'r':
		refile = EARGF(usage());
		break;
	default:
		usage();
	}ARGEND;

	if(argc > 1)
		usage();
	if(argc == 1){
		close(0);
		if(open(argv[0], OREAD) < 0)
			sysfatal("open %s: %r", argv[0]);
	}

	tag = nil;
	Binit(&bin, 0, OREAD);
	Binit(&bout, 1, OWRITE);
	ep = msg;
	p = msg;
	eof = 0;
	off = 0;
	hdr = 1;
	for(;;){
		/* replenish buffer */
		if(ep - p < 512 && !eof){
			if(p > msg + 1){
				n = ep - p;
				memmove(msg, p-1, ep-(p-1));
				off += (p-1) - msg;
				p = msg+1;
				ep = p + n;
			}
			n = Bread(&bin, ep, msg+(sizeof msg - 1)- ep);
			if(n < 0)
				sysfatal("read error: %r");
			if(n == 0)
				eof = 1;
			ep += n;
			*ep = 0;
		}
		if(p >= ep)
			break;

		if(*p == 0){
			p++;
			continue;
		}

		if(hdr && p[-1]=='\n'){
			if(p[0]=='\n')
				hdr = 0;
			else if(cistrncmp(p-1, "\nfrom:", 6) == 0)
				tag = "From*";
			else if(cistrncmp(p-1, "\nto:", 4) == 0)
				tag = "To*";
			else if(cistrncmp(p-1, "\nsubject:", 9) == 0)
				tag = "Subject*";
			else if(cistrncmp(p-1, "\nreturn-path:", 13) == 0)
				tag = "Return-Path*";
			else
				tag = nil;
		}
		m[0] = dregexec(re[0], p, p==msg || p[-1]=='\n');
		m[1] = dregexec(re[1], p, p==msg || p[-1]=='\n');
		m[2] = dregexec(re[2], p, p==msg || p[-1]=='\n');

		n = m[0];
		if(n < m[1])
			n = m[1];
		if(n < m[2])
			n = m[2];
		if(n <= 0){
fprint(2, "«%s» %.2ux", p, p[0]);
			sysfatal("no regexps matched at %ld", off + (p-msg));
		}

		if(m[0] >= m[1] && m[0] >= m[2]){
			/* "From " marks start of new message */
			Bprint(&bout, "*From*\n");
			n = m[0];
			hdr = 1;
		}else if(m[2] > 1){
			/* ignore */
			n = m[2];
		}else if(m[1] >= m[0] && m[1] >= m[2] && m[1] > 2 && m[1] <= maxtoklen){
			/* keyword */
			/* should do UTF-aware lowercasing, too much bother */
/*
			for(i=0; i<n; i++)
				if('A' <= p[i] && p[i] <= 'Z')
					p[i] += 'a' - 'A';
*/
			if(tag){
				i = strlen(tag);	
				memmove(buf, tag, i);
				memmove(buf+i, p, m[1]);
				buf[i+m[1]] = 0;
			}else{
				memmove(buf, p, m[1]);
				buf[m[1]] = 0;
			}
			Bprint(&bout, "%s\n", buf);
			while(trim(buf) >= 0)
				Bprint(&bout, "stem*%s\n", buf);
			n = m[1];
		}else
			n = m[2];
		if(debug)
			fprint(2, "%.*s¦", utfnlen(p, n), p);
		p += n;
	}
	Bterm(&bout);
	exits(0);
}