Example #1
0
/* If the DFA turns out to have some set of fixed strings one of
   which must occur in the match, then we build a kwset matcher
   to find those strings, and thus quickly filter out impossible
   matches. */
static void
kwsmusts (struct compiled_regex *cregex,
	  bool match_icase, bool match_words, bool match_lines, char eolbyte)
{
  struct dfamust const *dm;
  const char *err;

  if (cregex->dfa.musts)
    {
      kwsinit (&cregex->ckwset, match_icase, match_words, match_lines, eolbyte);
      /* First, we compile in the substrings known to be exact
	 matches.  The kwset matcher will return the index
	 of the matching string that it chooses. */
      for (dm = cregex->dfa.musts; dm; dm = dm->next)
	{
	  if (!dm->exact)
	    continue;
	  cregex->kwset_exact_matches++;
	  if ((err = kwsincr (cregex->ckwset.kwset, dm->must, strlen (dm->must))) != NULL)
	    error (exit_failure, 0, err);
	}
      /* Now, we compile the substrings that will require
	 the use of the regexp matcher.  */
      for (dm = cregex->dfa.musts; dm; dm = dm->next)
	{
	  if (dm->exact)
	    continue;
	  if ((err = kwsincr (cregex->ckwset.kwset, dm->must, strlen (dm->must))) != NULL)
	    error (exit_failure, 0, err);
	}
      if ((err = kwsprep (cregex->ckwset.kwset)) != NULL)
	error (exit_failure, 0, err);
    }
}
Example #2
0
int build_kws_from_dfa( struct _fregex * fre )
{
	struct dfamust * dm ;
	char * errstr ;
	int errval ;

//	struct _fregex * fre ;
//	fre = (struct _fregex *)( fre_t->data ) ;
	
	if (!fre )
	{
		return 0 ;
	}
	if(!fre->dfa.musts)
	{
		fre->kwset = 0 ;
		return 0 ;
	}

	/* Foreach exact string */
	for (dm = fre->dfa.musts; dm; dm = dm->next)
	{
		if (!dm->exact)
			continue;
		errstr = kwsincr( fre->kwset, dm->must, strlen(dm->must) );
		if( errstr )
		{
			errval = (*fast_regex_subsys_error)( "kwset", errstr ) ;
			if( errval )
				return errval ;
		}
		else
			fre->num_exact_kws ++ ;
	}

	/* Foreach inexact string -- will require further parsing */
	for (dm = fre->dfa.musts; dm; dm = dm->next)
	{
		if ( dm->exact )
			continue ;
		errstr = kwsincr( fre->kwset, dm->must, strlen(dm->must) );
		if( errstr )
		{
			errval = (*fast_regex_subsys_error)( "kwset", errstr ) ;
			if( errval )
				return errval ;
		}
	}
	errstr = kwsprep( fre->kwset );
	if( errstr )
	{
		errval = (*fast_regex_subsys_error)( "kwset", errstr ) ;
		if( errval )
			return errval ;
	}
	return 0 ;
}
Example #3
0
File: grep.c Project: 2quala/git
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
	int err;

	p->word_regexp = opt->word_regexp;
	p->ignore_case = opt->ignore_case;

	if (opt->fixed || is_fixed(p->pattern, p->patternlen))
		p->fixed = 1;
	else
		p->fixed = 0;

	if (p->fixed) {
		if (opt->regflags & REG_ICASE || p->ignore_case)
			p->kws = kwsalloc(tolower_trans_tbl);
		else
			p->kws = kwsalloc(NULL);
		kwsincr(p->kws, p->pattern, p->patternlen);
		kwsprep(p->kws);
		return;
	}

	if (opt->pcre) {
		compile_pcre_regexp(p, opt);
		return;
	}

	err = regcomp(&p->regexp, p->pattern, opt->regflags);
	if (err) {
		char errbuf[1024];
		regerror(err, &p->regexp, errbuf, 1024);
		regfree(&p->regexp);
		compile_regexp_failed(p, errbuf);
	}
}
Example #4
0
static void diffcore_pickaxe_count(struct diff_options *o)
{
	const char *needle = o->pickaxe;
	int opts = o->pickaxe_opts;
	unsigned long len = strlen(needle);
	regex_t regex, *regexp = NULL;
	kwset_t kws = NULL;

	if (opts & DIFF_PICKAXE_REGEX) {
		int err;
		err = regcomp(&regex, needle, REG_EXTENDED | REG_NEWLINE);
		if (err) {
			/* The POSIX.2 people are surely sick */
			char errbuf[1024];
			regerror(err, &regex, errbuf, 1024);
			regfree(&regex);
			die("invalid pickaxe regex: %s", errbuf);
		}
		regexp = &regex;
	} else {
		kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
			       ? tolower_trans_tbl : NULL);
		kwsincr(kws, needle, len);
		kwsprep(kws);
	}

	pickaxe(&diff_queued_diff, o, regexp, kws, has_changes);

	if (opts & DIFF_PICKAXE_REGEX)
		regfree(&regex);
	else
		kwsfree(kws);
	return;
}
Example #5
0
static void *
Fcompile (const char *pattern, size_t pattern_size,
	  bool match_icase, bool match_words, bool match_lines,
	  char eolbyte)
{
  struct compiled_kwset *ckwset;
  const char *beg, *lim, *err;

  ckwset = XMALLOC (struct compiled_kwset);
  kwsinit (ckwset, match_icase, match_words, match_lines, eolbyte);

  beg = pattern;
  do
    {
      for (lim = beg; lim < pattern + pattern_size && *lim != '\n'; ++lim)
	;
      if ((err = kwsincr (ckwset->kwset, beg, lim - beg)) != NULL)
	error (exit_failure, 0, err);
      if (lim < pattern + pattern_size)
	++lim;
      beg = lim;
    }
  while (beg < pattern + pattern_size);

  if ((err = kwsprep (ckwset->kwset)) != NULL)
    error (exit_failure, 0, err);
  return ckwset;
}
Example #6
0
static char const *
kwsincr_case (const char *must)
{
  size_t n = strlen (must);
  mb_len_map_t *map = NULL;
  const char *buf = (match_icase && MB_CUR_MAX > 1
                     ? mbtolower (must, &n, &map)
                     : must);
  return kwsincr (kwset, buf, n);
}
Example #7
0
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
	int icase, ascii_only;
	int err;

	p->word_regexp = opt->word_regexp;
	p->ignore_case = opt->ignore_case;
	icase	       = opt->regflags & REG_ICASE || p->ignore_case;
	ascii_only     = !has_non_ascii(p->pattern);

	/*
	 * Even when -F (fixed) asks us to do a non-regexp search, we
	 * may not be able to correctly case-fold when -i
	 * (ignore-case) is asked (in which case, we'll synthesize a
	 * regexp to match the pattern that matches regexp special
	 * characters literally, while ignoring case differences).  On
	 * the other hand, even without -F, if the pattern does not
	 * have any regexp special characters and there is no need for
	 * case-folding search, we can internally turn it into a
	 * simple string match using kws.  p->fixed tells us if we
	 * want to use kws.
	 */
	if (opt->fixed || is_fixed(p->pattern, p->patternlen))
		p->fixed = !icase || ascii_only;
	else
		p->fixed = 0;

	if (p->fixed) {
		p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
		kwsincr(p->kws, p->pattern, p->patternlen);
		kwsprep(p->kws);
		return;
	} else if (opt->fixed) {
		/*
		 * We come here when the pattern has the non-ascii
		 * characters we cannot case-fold, and asked to
		 * ignore-case.
		 */
		compile_fixed_regexp(p, opt);
		return;
	}

	if (opt->pcre) {
		compile_pcre_regexp(p, opt);
		return;
	}

	err = regcomp(&p->regexp, p->pattern, opt->regflags);
	if (err) {
		char errbuf[1024];
		regerror(err, &p->regexp, errbuf, 1024);
		regfree(&p->regexp);
		compile_regexp_failed(p, errbuf);
	}
}
Example #8
0
static char const *
kwsincr_case (const char *must)
{
  const char *buf;
  size_t n;

  n = strlen (must);
#if MBS_SUPPORT
  if (match_icase && MB_CUR_MAX > 1)
    buf = mbtolower (must, &n);
  else
#endif
    buf = must;
  return kwsincr (kwset, buf, n);
}
Example #9
0
void diffcore_pickaxe(struct diff_options *o)
{
	const char *needle = o->pickaxe;
	int opts = o->pickaxe_opts;
	regex_t regex, *regexp = NULL;
	kwset_t kws = NULL;

	if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
		int cflags = REG_EXTENDED | REG_NEWLINE;
		if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE)
			cflags |= REG_ICASE;
		regcomp_or_die(&regex, needle, cflags);
		regexp = &regex;
	} else if (opts & DIFF_PICKAXE_KIND_S) {
		if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
		    has_non_ascii(needle)) {
			struct strbuf sb = STRBUF_INIT;
			int cflags = REG_NEWLINE | REG_ICASE;

			basic_regex_quote_buf(&sb, needle);
			regcomp_or_die(&regex, sb.buf, cflags);
			strbuf_release(&sb);
			regexp = &regex;
		} else {
			kws = kwsalloc(o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE
				       ? tolower_trans_tbl : NULL);
			kwsincr(kws, needle, strlen(needle));
			kwsprep(kws);
		}
	}

	pickaxe(&diff_queued_diff, o, regexp, kws,
		(opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes);

	if (regexp)
		regfree(regexp);
	if (kws)
		kwsfree(kws);
	return;
}
Example #10
0
int compile_fast_regex( fast_regex_t * fre_t, const char * sre, size_t len )
{
	struct _fregex * fre = (struct _fregex *)( fre_t->data ) ;

	int errval ;
	char * errstr ;
	int sublen ;
	char * substr ;
	
	errval = 0 ;

		/*	fprintf( stderr, "compile_fast_regex( %p[%p] , %s )\n", fre_t, fre->kwset, sre ) ; */

	if( fast_regex_subsys_error == NULL )
		fast_regex_subsys_error = fprintf_error ;
	
	if( fre_t->options & FRE_STRING_SEARCH )
	{
		if( fre_t->options & FRE_NO_KWSET )
		{
			return -1 ;
		}
			
		/*
		 * straight string match
		 * 'sre' represents a series of newline separated strings to search for
		 */
		while( sre )
		{
			substr = strchr( sre, '\n' ) ;
			if( substr == NULL )
				sublen = len ;
			else
				sublen = (substr - sre) ;

			errstr = kwsincr( fre->kwset, sre, sublen );
			if( errstr )
			{
				errval = (*fast_regex_subsys_error)( "kwset" , errstr ) ;
				if( errval )
					return errval ;
			}
			else
				fre->num_exact_kws ++ ;
		
			len -= (sublen+1) ;
			sre = (substr) ? (substr + 1) : NULL ;
		}
		errstr = kwsprep( fre->kwset );
		if( errstr )
		{
			errval = (*fast_regex_subsys_error)( "kwset", errstr ) ;
			if( errval )
				return errval ;
		}
		return 0 ;
	}
	
	if( HAS_DFA(fre_t->options) || HAS_KWSET(fre_t->options) )
	{
		dfasyntax( RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, ( fre_t->options & FRE_CASE_INSENSITIVE ) ) ;
		dfacomp( sre, len, &(fre->dfa), 1 ) ;

		if( HAS_KWSET(fre_t->options) )
		{
			errval = build_kws_from_dfa( fre ) ;
			if( fre_t->options & FRE_NO_DFA )
			{  /* We used the DFA only to get the keywords */
				dfafree( &(fre->dfa) );
			}
		}
		else
		{
			fre->kwset = 0 ;
		}
	}

	if( errval )
		return errval ;

	if( HAS_REGEX(fre_t->options) )
	{
		re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
		errstr = re_compile_pattern( sre, len, &(fre->regex) ) ;
		if( errstr )
		{
			errval = (*fast_regex_subsys_error)( "re", errstr ) ;
			if( errval )
				return errval ;
		}
	}
	
	return errval ;
}