Beispiel #1
0
static size_t
EGexecute (const void *compiled_pattern,
	   const char *buf, size_t buf_size,
	   size_t *match_size, bool exact)
{
  struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern;
  register const char *buflim, *beg, *end;
  char eol = cregex->eolbyte;
  int backref, start, len;
  struct kwsmatch kwsm;
  size_t i;
#ifdef MBS_SUPPORT
  char *mb_properties = NULL;
#endif /* MBS_SUPPORT */

#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && cregex->ckwset.kwset)
    mb_properties = check_multibyte_string (buf, buf_size);
#endif /* MBS_SUPPORT */

  buflim = buf + buf_size;

  for (beg = end = buf; end < buflim; beg = end)
    {
      if (!exact)
	{
	  if (cregex->ckwset.kwset)
	    {
	      /* Find a possible match using the KWset matcher. */
	      size_t offset = kwsexec (cregex->ckwset.kwset, beg, buflim - beg, &kwsm);
	      if (offset == (size_t) -1)
		{
#ifdef MBS_SUPPORT
		  if (MB_CUR_MAX > 1)
		    free (mb_properties);
#endif
		  return (size_t)-1;
		}
	      beg += offset;
	      /* Narrow down to the line containing the candidate, and
		 run it through DFA. */
	      end = memchr (beg, eol, buflim - beg);
	      if (end != NULL)
		end++;
	      else
		end = buflim;
#ifdef MBS_SUPPORT
	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
		continue;
#endif
	      while (beg > buf && beg[-1] != eol)
		--beg;
	      if (kwsm.index < cregex->kwset_exact_matches)
		goto success;
	      if (dfaexec (&cregex->dfa, beg, end - beg, &backref) == (size_t) -1)
		continue;
	    }
	  else
	    {
	      /* No good fixed strings; start with DFA. */
	      size_t offset = dfaexec (&cregex->dfa, beg, buflim - beg, &backref);
	      if (offset == (size_t) -1)
		break;
	      /* Narrow down to the line we've found. */
	      beg += offset;
	      end = memchr (beg, eol, buflim - beg);
	      if (end != NULL)
		end++;
	      else
		end = buflim;
	      while (beg > buf && beg[-1] != eol)
		--beg;
	    }
	  /* Successful, no backreferences encountered! */
	  if (!backref)
	    goto success;
	}
      else
	end = beg + buf_size;

      /* If we've made it to this point, this means DFA has seen
	 a probable match, and we need to run it through Regex. */
      for (i = 0; i < cregex->pcount; i++)
	{
	  cregex->patterns[i].regexbuf.not_eol = 0;
	  if (0 <= (start = re_search (&(cregex->patterns[i].regexbuf), beg,
				       end - beg - 1, 0,
				       end - beg - 1, &(cregex->patterns[i].regs))))
	    {
	      len = cregex->patterns[i].regs.end[0] - start;
	      if (exact)
		{
		  *match_size = len;
		  return start;
		}
	      if ((!cregex->match_lines && !cregex->match_words)
		  || (cregex->match_lines && len == end - beg - 1))
		goto success;
	      /* If -w, check if the match aligns with word boundaries.
		 We do this iteratively because:
		 (a) the line may contain more than one occurence of the
		 pattern, and
		 (b) Several alternatives in the pattern might be valid at a
		 given point, and we may need to consider a shorter one to
		 find a word boundary.  */
	      if (cregex->match_words)
		while (start >= 0)
		  {
		    if ((start == 0 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start - 1]))
			&& (len == end - beg - 1
			    || !IS_WORD_CONSTITUENT ((unsigned char) beg[start + len])))
		      goto success;
		    if (len > 0)
		      {
			/* Try a shorter length anchored at the same place. */
			--len;
			cregex->patterns[i].regexbuf.not_eol = 1;
			len = re_match (&(cregex->patterns[i].regexbuf), beg,
					start + len, start,
					&(cregex->patterns[i].regs));
		      }
		    if (len <= 0)
		      {
			/* Try looking further on. */
			if (start == end - beg - 1)
			  break;
			++start;
			cregex->patterns[i].regexbuf.not_eol = 0;
			start = re_search (&(cregex->patterns[i].regexbuf), beg,
					   end - beg - 1,
					   start, end - beg - 1 - start,
					   &(cregex->patterns[i].regs));
			len = cregex->patterns[i].regs.end[0] - start;
		      }
		  }
	    }
	} /* for Regex patterns.  */
    } /* for (beg = end ..) */
#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && mb_properties)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  return (size_t) -1;

 success:
#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && mb_properties)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  *match_size = end - beg;
  return beg - buf;
}
Beispiel #2
0
static size_t
Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size,
	  size_t *match_size, bool exact)
{
  struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
  register const char *beg, *curr, *end;
  register size_t len;
  char eol = ckwset->eolbyte;
  struct kwsmatch kwsmatch;
#ifdef MBS_SUPPORT
  char *mb_properties;
  if (MB_CUR_MAX > 1)
    mb_properties = check_multibyte_string (buf, buf_size);
#endif /* MBS_SUPPORT */

  for (beg = buf; beg <= buf + buf_size; ++beg)
    {
      size_t offset =
	kwsexec (ckwset->kwset, beg, buf + buf_size - beg, &kwsmatch);
      if (offset == (size_t) -1)
	{
#ifdef MBS_SUPPORT
	  if (MB_CUR_MAX > 1)
	    free (mb_properties);
#endif /* MBS_SUPPORT */
	  return offset;
	}
#ifdef MBS_SUPPORT
      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
	continue; /* It is a part of multibyte character.  */
#endif /* MBS_SUPPORT */
      beg += offset;
      len = kwsmatch.size[0];
      if (exact)
	{
	  *match_size = len;
#ifdef MBS_SUPPORT
	  if (MB_CUR_MAX > 1)
	    free (mb_properties);
#endif /* MBS_SUPPORT */
	  return beg - buf;
	}
      if (ckwset->match_lines)
	{
	  if (beg > buf && beg[-1] != eol)
	    continue;
	  if (beg + len < buf + buf_size && beg[len] != eol)
	    continue;
	  goto success;
	}
      else if (ckwset->match_words)
	for (curr = beg; len; )
	  {
	    if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1]))
	      break;
	    if (curr + len < buf + buf_size
		&& IS_WORD_CONSTITUENT ((unsigned char) curr[len]))
	      {
		offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch);
		if (offset == (size_t) -1)
		  {
#ifdef MBS_SUPPORT
		    if (MB_CUR_MAX > 1)
		      free (mb_properties);
#endif /* MBS_SUPPORT */
		    return offset;
		  }
		curr = beg + offset;
		len = kwsmatch.size[0];
	      }
	    else
	      goto success;
	  }
      else
	goto success;
    }

#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  return -1;

 success:
  end = (const char *) memchr (beg + len, eol, (buf + buf_size) - (beg + len));
  end++;
  while (buf < beg && beg[-1] != eol)
    --beg;
  *match_size = end - beg;
#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  return beg - buf;
}