void printprog(Prog *p) { Inst *pc, *e; pc = p->start; e = p->start + p->len; for(; pc < e; pc++) { switch(pc->opcode) { default: re1_5_fatal("printprog"); case Split: printf("%2d. split %d, %d\n", (int)(pc-p->start), (int)(pc->x-p->start), (int)(pc->y-p->start)); break; case Jmp: printf("%2d. jmp %d\n", (int)(pc-p->start), (int)(pc->x-p->start)); break; case Char: printf("%2d. char %c\n", (int)(pc-p->start), pc->c); break; case Any: printf("%2d. any\n", (int)(pc-p->start)); break; case Match: printf("%2d. match\n", (int)(pc-p->start)); break; case Save: printf("%2d. save %d\n", (int)(pc-p->start), pc->n); } } }
void* mal(int n) { void *v; v = malloc(n); if(v == nil) re1_5_fatal("out of memory"); memset(v, 0, n); return v; }
// how many instructions does r need? static int count(Regexp *r) { switch(r->type) { default: re1_5_fatal("bad count"); case Alt: return 2 + count(r->left) + count(r->right); case Cat: return count(r->left) + count(r->right); case Lit: case Dot: return 1; case Paren: return 2 + count(r->left); case Quest: return 1 + count(r->left); case Star: return 2 + count(r->left); case Plus: return 1 + count(r->left); } }
int main(int argc, char **argv) { int i, j, k, l; int is_anchored = 0; argv++; argc--; while (argc > 0 && argv[0][0] == '-') { for (char *arg = &argv[0][1]; *arg; arg++) { switch (*arg) { case 'h': usage(); break; case 'm': is_anchored = 1; break; #ifdef DEBUG case 'd': debug = 1; break; #endif case 'e': if (argv[1] == NULL) re1_5_fatal("-e: Missing Regex engine argument"); if (re_engine) re1_5_fatal("-e: Regex engine already specified"); re_engine = argv[1]; argv++; argc--; break; default: re1_5_fatal("Unknown flag"); } } argv++; argc--; } if(argc < 2) usage(); #ifdef ODEBUG // Old and unmaintained code Regexp *re = parse(argv[0]); printre(re); printf("\n"); Prog *prog = compile(re); printprog(prog); printf("=============\n"); #endif int sz = re1_5_sizecode(argv[0]); #ifdef DEBUG if (debug) printf("Precalculated size: %d\n", sz); #endif if (sz == -1) { re1_5_fatal("Error in regexp"); } ByteProg *code = malloc(sizeof(ByteProg) + sz); int ret = re1_5_compilecode(code, argv[0]); if (ret != 0) { re1_5_fatal("Error in regexp"); } int sub_els = (code->sub + 1) * 2; #ifdef DEBUG if (debug) re1_5_dumpcode(code); #endif const char *sub[sub_els]; int engine_found = 0; for(i=1; i<argc; i++) { printf("#%d %s\n", i, argv[i]); for(j=0; j<nelem(tab); j++) { Subject subj = {argv[i], argv[i] + strlen(argv[i])}; if (re_engine) { if (0 != strcmp(re_engine, tab[j].name)) continue; engine_found = 1; } printf("%s ", tab[j].name); memset(sub, 0, sub_els * sizeof sub[0]); if(!tab[j].fn(code, &subj, sub, sub_els, is_anchored)) { printf("-no match-\n"); continue; } printf("match"); for(k=sub_els; k>0; k--) if(sub[k-1]) break; for(l=0; l<k; l+=2) { printf(" ("); if(sub[l] == nil) printf("?"); else printf("%d", (int)(sub[l] - argv[i])); printf(","); if(sub[l+1] == nil) printf("?"); else printf("%d", (int)(sub[l+1] - argv[i])); printf(")"); } printf("\n"); } if (re_engine && !engine_found) re1_5_fatal("-e: Unknown engine name"); } free(code); return 0; }
static int recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp) { const char *old; int off; for(;;) { if(inst_is_consumer(*pc)) { // If we need to match a character, but there's none left, it's fail if(sp >= input->end) return 0; } switch(*pc++) { case Char: if(*sp != *pc++) return 0; case Any: sp++; continue; case Class: case ClassNot: if (!_re1_5_classmatch(pc, sp)) return 0; pc += *(unsigned char*)pc * 2 + 1; sp++; continue; case NamedClass: if (!_re1_5_namedclassmatch(pc, sp)) return 0; pc++; sp++; continue; case Match: return 1; case Jmp: off = (signed char)*pc++; pc = pc + off; continue; case Split: off = (signed char)*pc++; if(recursiveloop(pc, sp, input, subp, nsubp)) return 1; pc = pc + off; continue; case RSplit: off = (signed char)*pc++; if(recursiveloop(pc + off, sp, input, subp, nsubp)) return 1; continue; case Save: off = (unsigned char)*pc++; if(off >= nsubp) { continue; } old = subp[off]; subp[off] = sp; if(recursiveloop(pc, sp, input, subp, nsubp)) return 1; subp[off] = old; return 0; case Bol: if(sp != input->begin) return 0; continue; case Eol: if(sp != input->end) return 0; continue; } re1_5_fatal("recursiveloop"); } }
static void emit(Regexp *r) { Inst *p1, *p2, *t; switch(r->type) { default: re1_5_fatal("bad emit"); case Alt: pc->opcode = Split; p1 = pc++; p1->x = pc; emit(r->left); pc->opcode = Jmp; p2 = pc++; p1->y = pc; emit(r->right); p2->x = pc; break; case Cat: emit(r->left); emit(r->right); break; case Lit: pc->opcode = Char; pc->c = r->ch; pc++; break; case Dot: pc++->opcode = Any; break; case Paren: pc->opcode = Save; pc->n = 2*r->n; pc++; emit(r->left); pc->opcode = Save; pc->n = 2*r->n + 1; pc++; break; case Quest: pc->opcode = Split; p1 = pc++; p1->x = pc; emit(r->left); p1->y = pc; if(r->n) { // non-greedy t = p1->x; p1->x = p1->y; p1->y = t; } break; case Star: pc->opcode = Split; p1 = pc++; p1->x = pc; emit(r->left); pc->opcode = Jmp; pc->x = p1; pc++; p1->y = pc; if(r->n) { // non-greedy t = p1->x; p1->x = p1->y; p1->y = t; } break; case Plus: p1 = pc; emit(r->left); pc->opcode = Split; pc->x = p1; p2 = pc; pc++; p2->y = pc; if(r->n) { // non-greedy t = p2->x; p2->x = p2->y; p2->y = t; } break; } }