void main(int argc, char **argv) { int i; Reprog *p; Dreprog *dp; i = 1; p = regcomp(argv[i]); if(p == 0){ print("=== %s: bad regexp\n", argv[i]); } // print("=== %s\n", argv[i]); // rdump(p); dp = dregcvt(p); print("=== dfa\n"); dump(dp); for(i=2; i<argc; i++) print("match %d\n", dregexec(dp, argv[i], 1)); exits(0); }
void main(int argc, char **argv) { int i, hdr, n, eof, off; Dreprog *re[3]; int m[3]; char *p, *ep, *tag; Biobuf bout, bin; char msg[1024+1]; char buf[1024]; refile = unsharp(refile); buildre(re); ARGBEGIN{ case 'D': debug = 1; break; case 'n': maxtoklen = atoi(EARGF(usage())); break; case 'r': refile = EARGF(usage()); break; default: usage(); }ARGEND; if(argc > 1) usage(); if(argc == 1){ close(0); if(open(argv[0], OREAD) < 0) sysfatal("open %s: %r", argv[0]); } tag = nil; Binit(&bin, 0, OREAD); Binit(&bout, 1, OWRITE); ep = msg; p = msg; eof = 0; off = 0; hdr = 1; for(;;){ /* replenish buffer */ if(ep - p < 512 && !eof){ if(p > msg + 1){ n = ep - p; memmove(msg, p-1, ep-(p-1)); off += (p-1) - msg; p = msg+1; ep = p + n; } n = Bread(&bin, ep, msg+(sizeof msg - 1)- ep); if(n < 0) sysfatal("read error: %r"); if(n == 0) eof = 1; ep += n; *ep = 0; } if(p >= ep) break; if(*p == 0){ p++; continue; } if(hdr && p[-1]=='\n'){ if(p[0]=='\n') hdr = 0; else if(cistrncmp(p-1, "\nfrom:", 6) == 0) tag = "From*"; else if(cistrncmp(p-1, "\nto:", 4) == 0) tag = "To*"; else if(cistrncmp(p-1, "\nsubject:", 9) == 0) tag = "Subject*"; else if(cistrncmp(p-1, "\nreturn-path:", 13) == 0) tag = "Return-Path*"; else tag = nil; } m[0] = dregexec(re[0], p, p==msg || p[-1]=='\n'); m[1] = dregexec(re[1], p, p==msg || p[-1]=='\n'); m[2] = dregexec(re[2], p, p==msg || p[-1]=='\n'); n = m[0]; if(n < m[1]) n = m[1]; if(n < m[2]) n = m[2]; if(n <= 0){ fprint(2, "«%s» %.2ux", p, p[0]); sysfatal("no regexps matched at %ld", off + (p-msg)); } if(m[0] >= m[1] && m[0] >= m[2]){ /* "From " marks start of new message */ Bprint(&bout, "*From*\n"); n = m[0]; hdr = 1; }else if(m[2] > 1){ /* ignore */ n = m[2]; }else if(m[1] >= m[0] && m[1] >= m[2] && m[1] > 2 && m[1] <= maxtoklen){ /* keyword */ /* should do UTF-aware lowercasing, too much bother */ /* for(i=0; i<n; i++) if('A' <= p[i] && p[i] <= 'Z') p[i] += 'a' - 'A'; */ if(tag){ i = strlen(tag); memmove(buf, tag, i); memmove(buf+i, p, m[1]); buf[i+m[1]] = 0; }else{ memmove(buf, p, m[1]); buf[m[1]] = 0; } Bprint(&bout, "%s\n", buf); while(trim(buf) >= 0) Bprint(&bout, "stem*%s\n", buf); n = m[1]; }else n = m[2]; if(debug) fprint(2, "%.*s¦", utfnlen(p, n), p); p += n; } Bterm(&bout); exits(0); }