예제 #1
0
int evaluate_fast_regex( struct fast_regex * fre_t, char * str, size_t len )
{
	char * sub ;
	struct _fregex * fre = (struct _fregex *)( fre_t->data ) ;

	if( fre->kwset )
	{
		struct kwsmatch kwsm ;
		sub = kwsexec( fre->kwset, (char *)str, len, &kwsm) ;
		if( sub == NULL )
			return 0 ;
		if( kwsm.index < fre->num_exact_kws )
		{
			return 1 ;
		}
	}

	if( HAS_DFA(fre_t->options) )
	{
		int backref = 0 ;
		sub = dfaexec( &(fre->dfa), str, (str+len), 0, NULL, &backref) ;
		if( sub == NULL )
			return 0 ;
		if ( !backref || (fre_t->options & FRE_NO_REGEX) )
			return 1 ;
	}

	return re_match( &fre->regex , str, len, 0, NULL ) > 0 ;
}
예제 #2
0
파일: grep.c 프로젝트: 2quala/git
static int fixmatch(struct grep_pat *p, char *line, char *eol,
		    regmatch_t *match)
{
	struct kwsmatch kwsm;
	size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
	if (offset == -1) {
		match->rm_so = match->rm_eo = -1;
		return REG_NOMATCH;
	} else {
		match->rm_so = offset;
		match->rm_eo = match->rm_so + kwsm.size[0];
		return 0;
	}
}
예제 #3
0
static unsigned int contains(mmfile_t *mf, struct diff_options *o,
			     regex_t *regexp, kwset_t kws)
{
	unsigned int cnt;
	unsigned long sz;
	const char *data;

	sz = mf->size;
	data = mf->ptr;
	cnt = 0;

	if (regexp) {
		regmatch_t regmatch;
		int flags = 0;

		assert(data[sz] == '\0');
		while (*data && !regexec(regexp, data, 1, &regmatch, flags)) {
			flags |= REG_NOTBOL;
			data += regmatch.rm_eo;
			if (*data && regmatch.rm_so == regmatch.rm_eo)
				data++;
			cnt++;
		}

	} else { /* Classic exact string match */
		while (sz) {
			struct kwsmatch kwsm;
			size_t offset = kwsexec(kws, data, sz, &kwsm);
			const char *found;
			if (offset == -1)
				break;
			else
				found = data + offset;
			sz -= found - data + kwsm.size[0];
			data = found + kwsm.size[0];
			cnt++;
		}
	}
	return cnt;
}
예제 #4
0
static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
{
	unsigned int cnt;
	unsigned long sz;
	const char *data;

	sz = mf->size;
	data = mf->ptr;
	cnt = 0;

	if (regexp) {
		regmatch_t regmatch;
		int flags = 0;

		while (sz && *data &&
		       !regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
			flags |= REG_NOTBOL;
			data += regmatch.rm_eo;
			sz -= regmatch.rm_eo;
			if (sz && *data && regmatch.rm_so == regmatch.rm_eo) {
				data++;
				sz--;
			}
			cnt++;
		}

	} else { /* Classic exact string match */
		while (sz) {
			struct kwsmatch kwsm;
			size_t offset = kwsexec(kws, data, sz, &kwsm);
			if (offset == -1)
				break;
			sz -= offset + kwsm.size[0];
			data += offset + kwsm.size[0];
			cnt++;
		}
	}
	return cnt;
}
예제 #5
0
size_t
EGexecute (char const *buf, size_t size, size_t *match_size,
           char const *start_ptr)
{
  char const *buflim, *beg, *end, *match, *best_match, *mb_start;
  char eol = eolbyte;
  int backref;
  regoff_t start;
  size_t len, best_len;
  struct kwsmatch kwsm;
  size_t i, ret_val;
  mb_len_map_t *map = NULL;

  if (MB_CUR_MAX > 1)
    {
      if (match_icase)
        {
          /* mbtolower adds a NUL byte at the end.  That will provide
             space for the sentinel byte dfaexec may add.  */
          char *case_buf = mbtolower (buf, &size, &map);
          if (start_ptr)
            start_ptr = case_buf + (start_ptr - buf);
          buf = case_buf;
        }
    }

  mb_start = buf;
  buflim = buf + size;

  for (beg = end = buf; end < buflim; beg = end)
    {
      if (!start_ptr)
        {
          /* We don't care about an exact match.  */
          if (kwset)
            {
              /* Find a possible match using the KWset matcher. */
              size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
              if (offset == (size_t) -1)
                goto failure;
              beg += offset;
              /* Narrow down to the line containing the candidate, and
                 run it through DFA. */
              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
                end++;
              else
                end = buflim;
              match = beg;
              while (beg > buf && beg[-1] != eol)
                --beg;
              if (kwsm.index < kwset_exact_matches)
                {
                  if (!MBS_SUPPORT)
                    goto success;

                  if (mb_start < beg)
                    mb_start = beg;
                  if (MB_CUR_MAX == 1
                      || !is_mb_middle (&mb_start, match, buflim,
                                        kwsm.size[0]))
                    goto success;
                }
              if (dfaexec (dfa, beg, (char *) end, 0, NULL, &backref) == NULL)
                continue;
            }
          else
            {
              /* No good fixed strings; start with DFA. */
              char const *next_beg = dfaexec (dfa, beg, (char *) buflim,
                                              0, NULL, &backref);
              /* If there's no match, or if we've matched the sentinel,
                 we're done.  */
              if (next_beg == NULL || next_beg == buflim)
                break;
              /* Narrow down to the line we've found. */
              beg = next_beg;
              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
                end++;
              else
                end = buflim;
              while (beg > buf && beg[-1] != eol)
                --beg;
            }
          /* Successful, no backreferences encountered! */
          if (!backref)
            goto success;
        }
      else
        {
          /* We are looking for the leftmost (then longest) exact match.
             We will go through the outer loop only once.  */
          beg = start_ptr;
          end = buflim;
        }

      /* If the "line" is longer than the maximum regexp offset,
         die as if we've run out of memory.  */
      if (TYPE_MAXIMUM (regoff_t) < end - buf - 1)
        xalloc_die ();

      /* If we've made it to this point, this means DFA has seen
         a probable match, and we need to run it through Regex. */
      best_match = end;
      best_len = 0;
      for (i = 0; i < pcount; i++)
        {
          patterns[i].regexbuf.not_eol = 0;
          start = re_search (&(patterns[i].regexbuf),
                             buf, end - buf - 1,
                             beg - buf, end - beg - 1,
                             &(patterns[i].regs));
          if (start < -1)
            xalloc_die ();
          else if (0 <= start)
            {
              len = patterns[i].regs.end[0] - start;
              match = buf + start;
              if (match > best_match)
                continue;
              if (start_ptr && !match_words)
                goto assess_pattern_match;
              if ((!match_lines && !match_words)
                  || (match_lines && len == end - beg - 1))
                {
                  match = beg;
                  len = end - beg;
                  goto assess_pattern_match;
                }
              /* If -w, check if the match aligns with word boundaries.
                 We do this iteratively because:
                 (a) the line may contain more than one occurrence of the
                 pattern, and
                 (b) Several alternatives in the pattern might be valid at a
                 given point, and we may need to consider a shorter one to
                 find a word boundary.  */
              if (match_words)
                while (match <= best_match)
                  {
                    regoff_t shorter_len = 0;
                    if ((match == buf || !WCHAR ((unsigned char) match[-1]))
                        && (start + len == end - buf - 1
                            || !WCHAR ((unsigned char) match[len])))
                      goto assess_pattern_match;
                    if (len > 0)
                      {
                        /* Try a shorter length anchored at the same place. */
                        --len;
                        patterns[i].regexbuf.not_eol = 1;
                        shorter_len = re_match (&(patterns[i].regexbuf),
                                                buf, match + len - beg,
                                                match - buf,
                                                &(patterns[i].regs));
                        if (shorter_len < -1)
                          xalloc_die ();
                      }
                    if (0 < shorter_len)
                      len = shorter_len;
                    else
                      {
                        /* Try looking further on. */
                        if (match == end - 1)
                          break;
                        match++;
                        patterns[i].regexbuf.not_eol = 0;
                        start = re_search (&(patterns[i].regexbuf),
                                           buf, end - buf - 1,
                                           match - buf, end - match - 1,
                                           &(patterns[i].regs));
                        if (start < 0)
                          {
                            if (start < -1)
                              xalloc_die ();
                            break;
                          }
                        len = patterns[i].regs.end[0] - start;
                        match = buf + start;
                      }
                  } /* while (match <= best_match) */
              continue;
            assess_pattern_match:
              if (!start_ptr)
                {
                  /* Good enough for a non-exact match.
                     No need to look at further patterns, if any.  */
                  goto success;
                }
              if (match < best_match || (match == best_match && len > best_len))
                {
                  /* Best exact match:  leftmost, then longest.  */
                  best_match = match;
                  best_len = len;
                }
            } /* if re_search >= 0 */
        } /* for Regex patterns.  */
        if (best_match < end)
          {
            /* We have found an exact match.  We were just
               waiting for the best one (leftmost then longest).  */
            beg = best_match;
            len = best_len;
            goto success_in_len;
          }
    } /* for (beg = end ..) */

 failure:
  ret_val = -1;
  goto out;

 success:
  len = end - beg;
 success_in_len:;
  size_t off = beg - buf;
  mb_case_map_apply (map, &off, &len);
  *match_size = len;
  ret_val = off;
 out:
  return ret_val;
}
예제 #6
0
static size_t
EGexecute (const void *compiled_pattern,
	   const char *buf, size_t buf_size,
	   size_t *match_size, bool exact)
{
  struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern;
  register const char *buflim, *beg, *end;
  char eol = cregex->eolbyte;
  int backref, start, len;
  struct kwsmatch kwsm;
  size_t i;
#ifdef MBS_SUPPORT
  char *mb_properties = NULL;
#endif /* MBS_SUPPORT */

#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && cregex->ckwset.kwset)
    mb_properties = check_multibyte_string (buf, buf_size);
#endif /* MBS_SUPPORT */

  buflim = buf + buf_size;

  for (beg = end = buf; end < buflim; beg = end)
    {
      if (!exact)
	{
	  if (cregex->ckwset.kwset)
	    {
	      /* Find a possible match using the KWset matcher. */
	      size_t offset = kwsexec (cregex->ckwset.kwset, beg, buflim - beg, &kwsm);
	      if (offset == (size_t) -1)
		{
#ifdef MBS_SUPPORT
		  if (MB_CUR_MAX > 1)
		    free (mb_properties);
#endif
		  return (size_t)-1;
		}
	      beg += offset;
	      /* Narrow down to the line containing the candidate, and
		 run it through DFA. */
	      end = memchr (beg, eol, buflim - beg);
	      if (end != NULL)
		end++;
	      else
		end = buflim;
#ifdef MBS_SUPPORT
	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
		continue;
#endif
	      while (beg > buf && beg[-1] != eol)
		--beg;
	      if (kwsm.index < cregex->kwset_exact_matches)
		goto success;
	      if (dfaexec (&cregex->dfa, beg, end - beg, &backref) == (size_t) -1)
		continue;
	    }
	  else
	    {
	      /* No good fixed strings; start with DFA. */
	      size_t offset = dfaexec (&cregex->dfa, beg, buflim - beg, &backref);
	      if (offset == (size_t) -1)
		break;
	      /* Narrow down to the line we've found. */
	      beg += offset;
	      end = memchr (beg, eol, buflim - beg);
	      if (end != NULL)
		end++;
	      else
		end = buflim;
	      while (beg > buf && beg[-1] != eol)
		--beg;
	    }
	  /* Successful, no backreferences encountered! */
	  if (!backref)
	    goto success;
	}
      else
	end = beg + buf_size;

      /* If we've made it to this point, this means DFA has seen
	 a probable match, and we need to run it through Regex. */
      for (i = 0; i < cregex->pcount; i++)
	{
	  cregex->patterns[i].regexbuf.not_eol = 0;
	  if (0 <= (start = re_search (&(cregex->patterns[i].regexbuf), beg,
				       end - beg - 1, 0,
				       end - beg - 1, &(cregex->patterns[i].regs))))
	    {
	      len = cregex->patterns[i].regs.end[0] - start;
	      if (exact)
		{
		  *match_size = len;
		  return start;
		}
	      if ((!cregex->match_lines && !cregex->match_words)
		  || (cregex->match_lines && len == end - beg - 1))
		goto success;
	      /* If -w, check if the match aligns with word boundaries.
		 We do this iteratively because:
		 (a) the line may contain more than one occurence of the
		 pattern, and
		 (b) Several alternatives in the pattern might be valid at a
		 given point, and we may need to consider a shorter one to
		 find a word boundary.  */
	      if (cregex->match_words)
		while (start >= 0)
		  {
		    if ((start == 0 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start - 1]))
			&& (len == end - beg - 1
			    || !IS_WORD_CONSTITUENT ((unsigned char) beg[start + len])))
		      goto success;
		    if (len > 0)
		      {
			/* Try a shorter length anchored at the same place. */
			--len;
			cregex->patterns[i].regexbuf.not_eol = 1;
			len = re_match (&(cregex->patterns[i].regexbuf), beg,
					start + len, start,
					&(cregex->patterns[i].regs));
		      }
		    if (len <= 0)
		      {
			/* Try looking further on. */
			if (start == end - beg - 1)
			  break;
			++start;
			cregex->patterns[i].regexbuf.not_eol = 0;
			start = re_search (&(cregex->patterns[i].regexbuf), beg,
					   end - beg - 1,
					   start, end - beg - 1 - start,
					   &(cregex->patterns[i].regs));
			len = cregex->patterns[i].regs.end[0] - start;
		      }
		  }
	    }
	} /* for Regex patterns.  */
    } /* for (beg = end ..) */
#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && mb_properties)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  return (size_t) -1;

 success:
#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && mb_properties)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  *match_size = end - beg;
  return beg - buf;
}
예제 #7
0
static size_t
Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size,
	  size_t *match_size, bool exact)
{
  struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
  register const char *beg, *curr, *end;
  register size_t len;
  char eol = ckwset->eolbyte;
  struct kwsmatch kwsmatch;
#ifdef MBS_SUPPORT
  char *mb_properties;
  if (MB_CUR_MAX > 1)
    mb_properties = check_multibyte_string (buf, buf_size);
#endif /* MBS_SUPPORT */

  for (beg = buf; beg <= buf + buf_size; ++beg)
    {
      size_t offset =
	kwsexec (ckwset->kwset, beg, buf + buf_size - beg, &kwsmatch);
      if (offset == (size_t) -1)
	{
#ifdef MBS_SUPPORT
	  if (MB_CUR_MAX > 1)
	    free (mb_properties);
#endif /* MBS_SUPPORT */
	  return offset;
	}
#ifdef MBS_SUPPORT
      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
	continue; /* It is a part of multibyte character.  */
#endif /* MBS_SUPPORT */
      beg += offset;
      len = kwsmatch.size[0];
      if (exact)
	{
	  *match_size = len;
#ifdef MBS_SUPPORT
	  if (MB_CUR_MAX > 1)
	    free (mb_properties);
#endif /* MBS_SUPPORT */
	  return beg - buf;
	}
      if (ckwset->match_lines)
	{
	  if (beg > buf && beg[-1] != eol)
	    continue;
	  if (beg + len < buf + buf_size && beg[len] != eol)
	    continue;
	  goto success;
	}
      else if (ckwset->match_words)
	for (curr = beg; len; )
	  {
	    if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1]))
	      break;
	    if (curr + len < buf + buf_size
		&& IS_WORD_CONSTITUENT ((unsigned char) curr[len]))
	      {
		offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch);
		if (offset == (size_t) -1)
		  {
#ifdef MBS_SUPPORT
		    if (MB_CUR_MAX > 1)
		      free (mb_properties);
#endif /* MBS_SUPPORT */
		    return offset;
		  }
		curr = beg + offset;
		len = kwsmatch.size[0];
	      }
	    else
	      goto success;
	  }
      else
	goto success;
    }

#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  return -1;

 success:
  end = (const char *) memchr (beg + len, eol, (buf + buf_size) - (beg + len));
  end++;
  while (buf < beg && beg[-1] != eol)
    --beg;
  *match_size = end - beg;
#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  return beg - buf;
}