int main (int argc, char **argv) { struct dfa *dfa; char *beg, *end, *p; int allow_nl; set_program_name (argv[0]); if (argc < 3) exit (EXIT_FAILURE); setlocale (LC_ALL, ""); dfasyntax (RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n'); dfa = dfaalloc (); dfacomp (argv[1], strlen (argv[1]), dfa, 0); beg = argv[2]; end = argv[2] + strlen (argv[2]); allow_nl = argc > 3 && atoi (argv[3]); p = dfaexec (dfa, beg, end, allow_nl, NULL, NULL); if (p != NULL) printf ("%zd\n", p - beg); exit (EXIT_SUCCESS); }
void GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) { const char *err; const char *p, *sep; size_t total = size; char *motif; if (match_icase) syntax_bits |= RE_ICASE; re_set_syntax (syntax_bits); dfasyntax (syntax_bits, match_icase, eolbyte); /* For GNU regex compiler we have to pass the patterns separately to detect errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" GNU regex should have raise a syntax error. The same for backref, where the backref should have been local to each pattern. */ p = pattern; do { size_t len; sep = memchr (p, '\n', total); if (sep) { len = sep - p; sep++; total -= (len + 1); } else { len = total; total = 0; } patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns); patterns[pcount] = patterns0; if ((err = re_compile_pattern (p, len, &(patterns[pcount].regexbuf))) != NULL) error (EXIT_TROUBLE, 0, "%s", err); pcount++; p = sep; } while (sep && total != 0); /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. Then if DFA succeeds we do some hairy stuff using the regex matcher to decide whether the match should really count. */ if (match_words || match_lines) { static char const line_beg_no_bk[] = "^("; static char const line_end_no_bk[] = ")$"; static char const word_beg_no_bk[] = "(^|[^[:alnum:]_])("; static char const word_end_no_bk[] = ")([^[:alnum:]_]|$)"; static char const line_beg_bk[] = "^\\("; static char const line_end_bk[] = "\\)$"; static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\("; static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)"; int bk = !(syntax_bits & RE_NO_BK_PARENS); char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk); strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk) : (bk ? word_beg_bk : word_beg_no_bk)); total = strlen(n); memcpy (n + total, pattern, size); total += size; strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk) : (bk ? word_end_bk : word_end_no_bk)); total += strlen (n + total); pattern = motif = n; size = total; } else motif = NULL; dfa = dfaalloc (); dfacomp (pattern, size, dfa, 1); kwsmusts (); free(motif); }
static void compile_regex_1 (struct regex *new_regex, int needed_sub) { const char *error; int syntax = ((extended_regexp_flags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC); syntax &= ~RE_DOT_NOT_NULL; syntax |= RE_NO_POSIX_BACKTRACKING; switch (posixicity) { case POSIXLY_EXTENDED: syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD; break; case POSIXLY_CORRECT: syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD; break; case POSIXLY_BASIC: syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS; if (!(extended_regexp_flags & REG_EXTENDED)) syntax |= RE_LIMITED_OPS; break; } if (new_regex->flags & REG_ICASE) syntax |= RE_ICASE; else new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8)); syntax |= needed_sub ? 0 : RE_NO_SUB; /* If REG_NEWLINE is set, newlines are treated differently. */ if (new_regex->flags & REG_NEWLINE) { /* REG_NEWLINE implies neither . nor [^...] match newline. */ syntax &= ~RE_DOT_NEWLINE; syntax |= RE_HAT_LISTS_NOT_NEWLINE; } re_set_syntax (syntax); error = re_compile_pattern (new_regex->re, new_regex->sz, &new_regex->pattern); new_regex->pattern.newline_anchor = buffer_delimiter == '\n' && (new_regex->flags & REG_NEWLINE) != 0; new_regex->pattern.translate = NULL; #ifndef RE_ICASE if (new_regex->flags & REG_ICASE) { static char translate[1 << (sizeof (char) * 8)]; int i; for (i = 0; i < sizeof (translate) / sizeof (char); i++) translate[i] = tolower (i); new_regex->pattern.translate = translate; } #endif if (error) bad_prog (error); /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */ if (needed_sub && new_regex->pattern.re_nsub < needed_sub - 1 && posixicity == POSIXLY_EXTENDED) { char buf[200]; sprintf (buf, _("invalid reference \\%d on `s' command's RHS"), needed_sub - 1); bad_prog (buf); } int dfaopts = buffer_delimiter == '\n' ? 0 : DFA_EOL_NUL; new_regex->dfa = dfaalloc (); dfasyntax (new_regex->dfa, &localeinfo, syntax, dfaopts); dfacomp (new_regex->re, new_regex->sz, new_regex->dfa, 1); /* The patterns which consist of only ^ or $ often appear in substitution, but regex and dfa are not good at them, as regex does not build fastmap, and as all in buffer must be scanned for $. So we mark them to handle manually. */ if (new_regex->sz == 1) { if (new_regex->re[0] == '^') new_regex->begline = true; if (new_regex->re[0] == '$') new_regex->endline = true; } }