/* If the DFA turns out to have some set of fixed strings one of which must occur in the match, then we build a kwset matcher to find those strings, and thus quickly filter out impossible matches. */ static void kwsmusts (struct compiled_regex *cregex, bool match_icase, bool match_words, bool match_lines, char eolbyte) { struct dfamust const *dm; const char *err; if (cregex->dfa.musts) { kwsinit (&cregex->ckwset, match_icase, match_words, match_lines, eolbyte); /* First, we compile in the substrings known to be exact matches. The kwset matcher will return the index of the matching string that it chooses. */ for (dm = cregex->dfa.musts; dm; dm = dm->next) { if (!dm->exact) continue; cregex->kwset_exact_matches++; if ((err = kwsincr (cregex->ckwset.kwset, dm->must, strlen (dm->must))) != NULL) error (exit_failure, 0, err); } /* Now, we compile the substrings that will require the use of the regexp matcher. */ for (dm = cregex->dfa.musts; dm; dm = dm->next) { if (dm->exact) continue; if ((err = kwsincr (cregex->ckwset.kwset, dm->must, strlen (dm->must))) != NULL) error (exit_failure, 0, err); } if ((err = kwsprep (cregex->ckwset.kwset)) != NULL) error (exit_failure, 0, err); } }
int build_kws_from_dfa( struct _fregex * fre ) { struct dfamust * dm ; char * errstr ; int errval ; // struct _fregex * fre ; // fre = (struct _fregex *)( fre_t->data ) ; if (!fre ) { return 0 ; } if(!fre->dfa.musts) { fre->kwset = 0 ; return 0 ; } /* Foreach exact string */ for (dm = fre->dfa.musts; dm; dm = dm->next) { if (!dm->exact) continue; errstr = kwsincr( fre->kwset, dm->must, strlen(dm->must) ); if( errstr ) { errval = (*fast_regex_subsys_error)( "kwset", errstr ) ; if( errval ) return errval ; } else fre->num_exact_kws ++ ; } /* Foreach inexact string -- will require further parsing */ for (dm = fre->dfa.musts; dm; dm = dm->next) { if ( dm->exact ) continue ; errstr = kwsincr( fre->kwset, dm->must, strlen(dm->must) ); if( errstr ) { errval = (*fast_regex_subsys_error)( "kwset", errstr ) ; if( errval ) return errval ; } } errstr = kwsprep( fre->kwset ); if( errstr ) { errval = (*fast_regex_subsys_error)( "kwset", errstr ) ; if( errval ) return errval ; } return 0 ; }
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; if (opt->fixed || is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else p->fixed = 0; if (p->fixed) { if (opt->regflags & REG_ICASE || p->ignore_case) p->kws = kwsalloc(tolower_trans_tbl); else p->kws = kwsalloc(NULL); kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; } if (opt->pcre) { compile_pcre_regexp(p, opt); return; } err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; regerror(err, &p->regexp, errbuf, 1024); regfree(&p->regexp); compile_regexp_failed(p, errbuf); } }
static void diffcore_pickaxe_count(struct diff_options *o) { const char *needle = o->pickaxe; int opts = o->pickaxe_opts; unsigned long len = strlen(needle); regex_t regex, *regexp = NULL; kwset_t kws = NULL; if (opts & DIFF_PICKAXE_REGEX) { int err; err = regcomp(®ex, needle, REG_EXTENDED | REG_NEWLINE); if (err) { /* The POSIX.2 people are surely sick */ char errbuf[1024]; regerror(err, ®ex, errbuf, 1024); regfree(®ex); die("invalid pickaxe regex: %s", errbuf); } regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) ? tolower_trans_tbl : NULL); kwsincr(kws, needle, len); kwsprep(kws); } pickaxe(&diff_queued_diff, o, regexp, kws, has_changes); if (opts & DIFF_PICKAXE_REGEX) regfree(®ex); else kwsfree(kws); return; }
static void * Fcompile (const char *pattern, size_t pattern_size, bool match_icase, bool match_words, bool match_lines, char eolbyte) { struct compiled_kwset *ckwset; const char *beg, *lim, *err; ckwset = XMALLOC (struct compiled_kwset); kwsinit (ckwset, match_icase, match_words, match_lines, eolbyte); beg = pattern; do { for (lim = beg; lim < pattern + pattern_size && *lim != '\n'; ++lim) ; if ((err = kwsincr (ckwset->kwset, beg, lim - beg)) != NULL) error (exit_failure, 0, err); if (lim < pattern + pattern_size) ++lim; beg = lim; } while (beg < pattern + pattern_size); if ((err = kwsprep (ckwset->kwset)) != NULL) error (exit_failure, 0, err); return ckwset; }
static char const * kwsincr_case (const char *must) { size_t n = strlen (must); mb_len_map_t *map = NULL; const char *buf = (match_icase && MB_CUR_MAX > 1 ? mbtolower (must, &n, &map) : must); return kwsincr (kwset, buf, n); }
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int icase, ascii_only; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; icase = opt->regflags & REG_ICASE || p->ignore_case; ascii_only = !has_non_ascii(p->pattern); /* * Even when -F (fixed) asks us to do a non-regexp search, we * may not be able to correctly case-fold when -i * (ignore-case) is asked (in which case, we'll synthesize a * regexp to match the pattern that matches regexp special * characters literally, while ignoring case differences). On * the other hand, even without -F, if the pattern does not * have any regexp special characters and there is no need for * case-folding search, we can internally turn it into a * simple string match using kws. p->fixed tells us if we * want to use kws. */ if (opt->fixed || is_fixed(p->pattern, p->patternlen)) p->fixed = !icase || ascii_only; else p->fixed = 0; if (p->fixed) { p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; } else if (opt->fixed) { /* * We come here when the pattern has the non-ascii * characters we cannot case-fold, and asked to * ignore-case. */ compile_fixed_regexp(p, opt); return; } if (opt->pcre) { compile_pcre_regexp(p, opt); return; } err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; regerror(err, &p->regexp, errbuf, 1024); regfree(&p->regexp); compile_regexp_failed(p, errbuf); } }
static char const * kwsincr_case (const char *must) { const char *buf; size_t n; n = strlen (must); #if MBS_SUPPORT if (match_icase && MB_CUR_MAX > 1) buf = mbtolower (must, &n); else #endif buf = must; return kwsincr (kwset, buf, n); }
void diffcore_pickaxe(struct diff_options *o) { const char *needle = o->pickaxe; int opts = o->pickaxe_opts; regex_t regex, *regexp = NULL; kwset_t kws = NULL; if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { int cflags = REG_EXTENDED | REG_NEWLINE; if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE) cflags |= REG_ICASE; regcomp_or_die(®ex, needle, cflags); regexp = ®ex; } else if (opts & DIFF_PICKAXE_KIND_S) { if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE && has_non_ascii(needle)) { struct strbuf sb = STRBUF_INIT; int cflags = REG_NEWLINE | REG_ICASE; basic_regex_quote_buf(&sb, needle); regcomp_or_die(®ex, sb.buf, cflags); strbuf_release(&sb); regexp = ®ex; } else { kws = kwsalloc(o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE ? tolower_trans_tbl : NULL); kwsincr(kws, needle, strlen(needle)); kwsprep(kws); } } pickaxe(&diff_queued_diff, o, regexp, kws, (opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes); if (regexp) regfree(regexp); if (kws) kwsfree(kws); return; }
int compile_fast_regex( fast_regex_t * fre_t, const char * sre, size_t len ) { struct _fregex * fre = (struct _fregex *)( fre_t->data ) ; int errval ; char * errstr ; int sublen ; char * substr ; errval = 0 ; /* fprintf( stderr, "compile_fast_regex( %p[%p] , %s )\n", fre_t, fre->kwset, sre ) ; */ if( fast_regex_subsys_error == NULL ) fast_regex_subsys_error = fprintf_error ; if( fre_t->options & FRE_STRING_SEARCH ) { if( fre_t->options & FRE_NO_KWSET ) { return -1 ; } /* * straight string match * 'sre' represents a series of newline separated strings to search for */ while( sre ) { substr = strchr( sre, '\n' ) ; if( substr == NULL ) sublen = len ; else sublen = (substr - sre) ; errstr = kwsincr( fre->kwset, sre, sublen ); if( errstr ) { errval = (*fast_regex_subsys_error)( "kwset" , errstr ) ; if( errval ) return errval ; } else fre->num_exact_kws ++ ; len -= (sublen+1) ; sre = (substr) ? (substr + 1) : NULL ; } errstr = kwsprep( fre->kwset ); if( errstr ) { errval = (*fast_regex_subsys_error)( "kwset", errstr ) ; if( errval ) return errval ; } return 0 ; } if( HAS_DFA(fre_t->options) || HAS_KWSET(fre_t->options) ) { dfasyntax( RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, ( fre_t->options & FRE_CASE_INSENSITIVE ) ) ; dfacomp( sre, len, &(fre->dfa), 1 ) ; if( HAS_KWSET(fre_t->options) ) { errval = build_kws_from_dfa( fre ) ; if( fre_t->options & FRE_NO_DFA ) { /* We used the DFA only to get the keywords */ dfafree( &(fre->dfa) ); } } else { fre->kwset = 0 ; } } if( errval ) return errval ; if( HAS_REGEX(fre_t->options) ) { re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); errstr = re_compile_pattern( sre, len, &(fre->regex) ) ; if( errstr ) { errval = (*fast_regex_subsys_error)( "re", errstr ) ; if( errval ) return errval ; } } return errval ; }