static EC_OBJ EcLibRe_Compile( EC_OBJ stack, EcAny userdata ) { const char *pattern; EcInt patlen; EC_OBJ opts = EC_NIL; int options; EcBool study = FALSE; pcre *code; pcre_extra *extra = NULL; const char *errptr = NULL; int erroffs = 0; EC_OBJ res; res = EcParseStackFunction( "re.compile", TRUE, stack, "s#|Ob", &pattern, &patlen, &opts, &study ); if (EC_ERRORP(res)) return res; opts = obj2options( "re.compile", 2, opts, &options ); if (EC_ERRORP(opts)) return opts; code = pcre_compile( pattern, options, &errptr, &erroffs, /* tableptr */ NULL ); if (! code) return EcReError( errptr, erroffs ); if (study) { extra = pcre_study( code, 0, /* currently pcre has no options for study */ &errptr ); if (! extra) return EcReError( errptr, -1 ); } return make_regexp( code, extra, pattern, patlen, options ); }
struct regexp * regexp_concat_n(struct info *info, int n, struct regexp **r) { size_t len = 0; char *pat, *p; for (int i=0; i < n; i++) if (r[i] != NULL) len += strlen(r[i]->pattern->str) + strlen("()"); if (len == 0) return NULL; if (ALLOC_N(pat, len+1) < 0) return NULL; p = pat; for (int i=0; i < n; i++) { if (r[i] == NULL) continue; *p++ = '('; p = stpcpy(p, r[i]->pattern->str); *p++ = ')'; } return make_regexp(info, pat); }
struct regexp * make_regexp_unescape(struct info *info, const char *pat, int nocase) { char *p = unescape(pat, strlen(pat), NULL); if (p == NULL) return NULL; return make_regexp(info, p, nocase); }
void rule_for_word(char *word, char *token) { char regexp[STRING_LENGTH]; strcpy(regexp, word); make_regexp(regexp); printf("%s {\n yylval.str = newstr(yytext); return %s;\n}\n", regexp, token); }
struct regexp * regexp_concat(struct info *info, struct regexp *r1, struct regexp *r2) { const char *p1 = r1->pattern->str; const char *p2 = r2->pattern->str; char *s; if (asprintf(&s, "(%s)(%s)", p1, p2) == -1) return NULL; return make_regexp(info, s); }
struct regexp * regexp_maybe(struct info *info, struct regexp *r) { const char *p; char *s; int ret; if (r == NULL) return NULL; p = r->pattern->str; ret = asprintf(&s, "(%s)?", p); return (ret == -1) ? NULL : make_regexp(info, s, r->nocase); }
struct regexp * regexp_union_n(struct info *info, int n, struct regexp **r) { size_t len = 0; char *pat = NULL, *p, *expanded = NULL; int nnocase = 0, npresent = 0; int ret; for (int i=0; i < n; i++) if (r[i] != NULL) { len += strlen(r[i]->pattern->str) + strlen("()|"); npresent += 1; if (r[i]->nocase) nnocase += 1; } bool mixedcase = nnocase > 0 && nnocase < npresent; if (len == 0) return NULL; if (ALLOC_N(pat, len) < 0) return NULL; p = pat; int added = 0; for (int i=0; i < n; i++) { if (r[i] == NULL) continue; if (added > 0) *p++ = '|'; *p++ = '('; if (mixedcase && r[i]->nocase) { expanded = regexp_expand_nocase(r[i]); ERR_BAIL(r[i]->info); len += strlen(expanded) - strlen(r[i]->pattern->str); ret = REALLOC_N(pat, len); ERR_NOMEM(ret < 0, info); p = pat + strlen(pat); p = stpcpy(p, expanded); FREE(expanded); } else { p = stpcpy(p, r[i]->pattern->str); } *p++ = ')'; added += 1; } *p = '\0'; return make_regexp(info, pat, nnocase == npresent); error: FREE(expanded); FREE(pat); return NULL; }
static EC_OBJ regexp_copy( EC_OBJ obj, EcCopyType type ) { pcre *code; pcre_extra *extra; EC_OBJ src, sobj; EC_OBJ res; const char *old_str; EcInt old_slen; int old_options; const char *errptr = NULL; int erroffs = 0; /* compile a new regexp with the same string and options of the old one */ src = EC_REGEXPSRC(obj); ASSERT( EC_ARRAYP(src) ); ASSERT( EC_ARRAYLEN(src) == 2 ); sobj = EcArrayGet( src, 0 ); res = EcArrayGet( src, 1 ); ASSERT( EC_STRINGP(sobj) ); ASSERT( EC_INUMP(res) ); old_str = EC_STRDATA(sobj); old_slen = EC_STRLEN(sobj); old_options = EC_INUM(res); code = pcre_compile( old_str, old_options, &errptr, &erroffs, /* tableptr */ NULL ); if (! code) return EcReError( errptr, erroffs ); extra = NULL; if (EC_PCREXTRA(obj)) { extra = pcre_study( code, 0, /* currently pcre has no options for study */ &errptr ); if (! extra) return EcReError( errptr, -1 ); } return make_regexp( code, extra, old_str, old_slen, old_options ); }
struct regexp * regexp_minus(struct info *info, struct regexp *r1, struct regexp *r2) { const char *p1 = r1->pattern->str; const char *p2 = r2->pattern->str; struct regexp *result = NULL; struct fa *fa = NULL, *fa1 = NULL, *fa2 = NULL; int r; char *s = NULL; size_t s_len; r = fa_compile(p1, strlen(p1), &fa1); if (r != REG_NOERROR) goto error; r = fa_compile(p2, strlen(p2), &fa2); if (r != REG_NOERROR) goto error; fa = fa_minus(fa1, fa2); if (fa == NULL) goto error; r = fa_as_regexp(fa, &s, &s_len); if (r < 0) goto error; if (s == NULL) { /* FA is the empty set, which we can't represent as a regexp */ goto error; } result = make_regexp(info, s); s = NULL; done: fa_free(fa); fa_free(fa1); fa_free(fa2); free(s); return result; error: unref(result, regexp); goto done; }
struct regexp * regexp_minus(struct info *info, struct regexp *r1, struct regexp *r2) { struct regexp *result = NULL; struct fa *fa = NULL, *fa1 = NULL, *fa2 = NULL; int r; char *s = NULL; size_t s_len; fa1 = regexp_to_fa(r1); ERR_BAIL(r1->info); fa2 = regexp_to_fa(r2); ERR_BAIL(r2->info); fa = fa_minus(fa1, fa2); if (fa == NULL) goto error; r = fa_as_regexp(fa, &s, &s_len); if (r < 0) goto error; if (s == NULL) { /* FA is the empty set, which we can't represent as a regexp */ goto error; } if (regexp_c_locale(&s, NULL) < 0) goto error; result = make_regexp(info, s, fa_is_nocase(fa)); s = NULL; done: fa_free(fa); fa_free(fa1); fa_free(fa2); free(s); return result; error: unref(result, regexp); goto done; }
struct regexp * regexp_concat_n(struct info *info, int n, struct regexp **r) { size_t len = 0; char *pat = NULL, *p, *expanded = NULL; int nnocase = 0, npresent = 0; for (int i=0; i < n; i++) if (r[i] != NULL) { len += strlen(r[i]->pattern->str) + strlen("()"); npresent += 1; if (r[i]->nocase) nnocase += 1; } bool mixedcase = nnocase > 0 && nnocase < npresent; if (len == 0) return NULL; len += 1; if (ALLOC_N(pat, len) < 0) return NULL; p = pat; for (int i=0; i < n; i++) { if (r[i] == NULL) continue; *p++ = '('; if (mixedcase && r[i]->nocase) { p = append_expanded(r[i], &pat, p, &len); ERR_BAIL(r[i]->info); } else { p = stpcpy(p, r[i]->pattern->str); } *p++ = ')'; } *p = '\0'; return make_regexp(info, pat, nnocase == npresent); error: FREE(expanded); FREE(pat); return NULL; }
struct regexp * regexp_iter(struct info *info, struct regexp *r, int min, int max) { const char *p; char *s; int ret = 0; if (r == NULL) return NULL; p = r->pattern->str; if ((min == 0 || min == 1) && max == -1) { char q = (min == 0) ? '*' : '+'; ret = asprintf(&s, "(%s)%c", p, q); } else if (min == max) { ret = asprintf(&s, "(%s){%d}", p, min); } else { ret = asprintf(&s, "(%s){%d,%d}", p, min, max); } return (ret == -1) ? NULL : make_regexp(info, s, r->nocase); }
struct regexp *make_regexp_literal(struct info *info, const char *text) { char *pattern, *p; /* Escape special characters in text since it should be taken literally */ CALLOC(pattern, 2*strlen(text)+1); p = pattern; for (const char *t = text; *t != '\0'; t++) { if ((*t == '\\') && t[1]) { *p++ = *t++; *p++ = *t; } else if (strchr(".|{}[]()+*?", *t) != NULL) { *p++ = '\\'; *p++ = *t; } else { *p++ = *t; } } return make_regexp(info, pattern, 0); }
/* Take a POSIX glob and turn it into a regexp. The regexp is constructed * by doing the following translations of characters in the string: * * -> [^/]* * ? -> [^/] * leave characters escaped with a backslash alone * escape any of ".|{}()+^$" with a backslash * * Note that that ignores some of the finer points of globs, like * complementation. */ struct regexp *make_regexp_from_glob(struct info *info, const char *glob) { static const char *const star = "[^/]*"; static const char *const qmark = "[^/]"; static const char *const special = ".|{}()+^$"; int newlen = strlen(glob); char *pat = NULL; for (const char *s = glob; *s; s++) { if (*s == '\\' && *(s+1)) s += 1; else if (*s == '*') newlen += strlen(star)-1; else if (*s == '?') newlen += strlen(qmark)-1; else if (strchr(special, *s) != NULL) newlen += 1; } if (ALLOC_N(pat, newlen + 1) < 0) return NULL; char *t = pat; for (const char *s = glob; *s; s++) { if (*s == '\\' && *(s+1)) { *t++ = *s++; *t++ = *s; } else if (*s == '*') { t = stpcpy(t, star); } else if (*s == '?') { t = stpcpy(t, qmark); } else if (strchr(special, *s) != NULL) { *t++ = '\\'; *t++ = *s; } else { *t++ = *s; } } return make_regexp(info, pat, 0); }
Regexp * re_update(NODE *t) { NODE *t1; if ((t->re_flags & CASE) == IGNORECASE) { if ((t->re_flags & CONST) != 0) { assert(t->type == Node_regex); return t->re_reg; } t1 = force_string(tree_eval(t->re_exp)); if (t->re_text != NULL) { if (cmp_nodes(t->re_text, t1) == 0) { free_temp(t1); return t->re_reg; } unref(t->re_text); } t->re_text = dupnode(t1); free_temp(t1); } if (t->re_reg != NULL) refree(t->re_reg); if (t->re_cnt > 0) t->re_cnt++; if (t->re_cnt > 10) t->re_cnt = 0; if (t->re_text == NULL || (t->re_flags & CASE) != IGNORECASE) { t1 = force_string(tree_eval(t->re_exp)); unref(t->re_text); t->re_text = dupnode(t1); free_temp(t1); } t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, IGNORECASE, t->re_cnt); t->re_flags &= ~CASE; t->re_flags |= IGNORECASE; return t->re_reg; }