Ejemplo n.º 1
0
void
GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
{
  const char *err;
  const char *p, *sep;
  size_t total = size;
  char *motif;

  if (match_icase)
    syntax_bits |= RE_ICASE;
  re_set_syntax (syntax_bits);
  dfasyntax (syntax_bits, match_icase, eolbyte);

  /* For GNU regex compiler we have to pass the patterns separately to detect
     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
     GNU regex should have raise a syntax error.  The same for backref, where
     the backref should have been local to each pattern.  */
  p = pattern;
  do
    {
      size_t len;
      sep = memchr (p, '\n', total);
      if (sep)
        {
          len = sep - p;
          sep++;
          total -= (len + 1);
        }
      else
        {
          len = total;
          total = 0;
        }

      patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns);
      patterns[pcount] = patterns0;

      if ((err = re_compile_pattern (p, len,
                                    &(patterns[pcount].regexbuf))) != NULL)
        error (EXIT_TROUBLE, 0, "%s", err);
      pcount++;

      p = sep;
    } while (sep && total != 0);

  /* In the match_words and match_lines cases, we use a different pattern
     for the DFA matcher that will quickly throw out cases that won't work.
     Then if DFA succeeds we do some hairy stuff using the regex matcher
     to decide whether the match should really count. */
  if (match_words || match_lines)
    {
      static char const line_beg_no_bk[] = "^(";
      static char const line_end_no_bk[] = ")$";
      static char const word_beg_no_bk[] = "(^|[^[:alnum:]_])(";
      static char const word_end_no_bk[] = ")([^[:alnum:]_]|$)";
      static char const line_beg_bk[] = "^\\(";
      static char const line_end_bk[] = "\\)$";
      static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
      static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)";
      int bk = !(syntax_bits & RE_NO_BK_PARENS);
      char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk);

      strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
                             : (bk ? word_beg_bk : word_beg_no_bk));
      total = strlen(n);
      memcpy (n + total, pattern, size);
      total += size;
      strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
                                     : (bk ? word_end_bk : word_end_no_bk));
      total += strlen (n + total);
      pattern = motif = n;
      size = total;
    }
  else
    motif = NULL;

  dfa = dfaalloc ();
  dfacomp (pattern, size, dfa, 1);
  kwsmusts ();

  free(motif);
}
Ejemplo n.º 2
0
static void *
compile (const char *pattern, size_t pattern_size,
	 bool match_icase, bool match_words, bool match_lines, char eolbyte,
	 reg_syntax_t syntax)
{
  struct compiled_regex *cregex;
  const char *err;
  const char *sep;
  size_t total = pattern_size;
  const char *motif = pattern;

  cregex = (struct compiled_regex *) xmalloc (sizeof (struct compiled_regex));
  memset (cregex, '\0', sizeof (struct compiled_regex));
  cregex->match_words = match_words;
  cregex->match_lines = match_lines;
  cregex->eolbyte = eolbyte;
  cregex->patterns = NULL;
  cregex->pcount = 0;

  re_set_syntax (syntax);
  dfasyntax (syntax, match_icase, eolbyte);

  /* For GNU regex compiler we have to pass the patterns separately to detect
     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
     GNU regex should have raise a syntax error.  The same for backref, where
     the backref should have been local to each pattern.  */
  do
    {
      size_t len;
      sep = memchr (motif, '\n', total);
      if (sep)
	{
	  len = sep - motif;
	  sep++;
	  total -= (len + 1);
	}
      else
	{
	  len = total;
	  total = 0;
	}

      cregex->patterns = xrealloc (cregex->patterns, (cregex->pcount + 1) * sizeof (struct patterns));
      memset (&cregex->patterns[cregex->pcount], '\0', sizeof (struct patterns));

      if ((err = re_compile_pattern (motif, len,
				     &(cregex->patterns[cregex->pcount].regexbuf))) != NULL)
	error (exit_failure, 0, err);
      cregex->pcount++;

      motif = sep;
    } while (sep && total != 0);

  /* In the match_words and match_lines cases, we use a different pattern
     for the DFA matcher that will quickly throw out cases that won't work.
     Then if DFA succeeds we do some hairy stuff using the regex matcher
     to decide whether the match should really count. */
  if (match_words || match_lines)
    {
      /* In the whole-word case, we use the pattern:
	 (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
	 In the whole-line case, we use the pattern:
	 ^(userpattern)$.  */

      static const char line_beg[] = "^(";
      static const char line_end[] = ")$";
      static const char word_beg[] = "(^|[^[:alnum:]_])(";
      static const char word_end[] = ")([^[:alnum:]_]|$)";
      char *n = (char *) xmalloc (sizeof word_beg - 1 + pattern_size + sizeof word_end);
      size_t i;
      strcpy (n, match_lines ? line_beg : word_beg);
      i = strlen(n);
      memcpy (n + i, pattern, pattern_size);
      i += pattern_size;
      strcpy (n + i, match_lines ? line_end : word_end);
      i += strlen (n + i);
      pattern = n;
      pattern_size = i;
    }

  dfacomp (pattern, pattern_size, &cregex->dfa, 1);
  kwsmusts (cregex, match_icase, match_words, match_lines, eolbyte);

  return cregex;
}