Exemple #1
0
int
main (void)
{
  struct re_pattern_buffer r;
  size_t i;
  int ret = 0;

  for (i = 0; i < sizeof (tests) / sizeof (tests[i]); ++i)
    {
      re_set_syntax (tests[i].syntax);
      memset (&r, 0, sizeof (r));
      if (re_compile_pattern (tests[i].regex, strlen (tests[i].regex), &r))
	{
	  printf ("re_compile_pattern %zd failed\n", i);
	  ret = 1;
	  continue;
	}
      size_t len = strlen (tests[i].string);
      int rv = re_search (&r, tests[i].string, len, 0, len, NULL);
      if (rv != tests[i].retval)
	{
	  printf ("re_search %zd unexpected value %d != %d\n",
		  i, rv, tests[i].retval);
	  ret = 1;
	}
      regfree (&r);
    }
  return ret;
}
Exemple #2
0
static int
find_substr (astr as, const char *s2, size_t s2size, size_t from, size_t to,
             bool forward, bool notbol, bool noteol, bool regex, bool icase)
{
  int ret = -1;
  struct re_pattern_buffer pattern;
  struct re_registers search_regs;
  reg_syntax_t syntax = RE_SYNTAX_EMACS;

  memset (&pattern, 0, sizeof (pattern));

  if (!regex)
    syntax |= RE_PLAIN;
  if (icase)
    syntax |= RE_ICASE;
  re_set_syntax (syntax);
  search_regs.num_regs = 1;

  re_find_err = re_compile_pattern (s2, (int) s2size, &pattern);
  pattern.not_bol = notbol;
  pattern.not_eol = noteol;
  if (!re_find_err)
    ret = re_search (&pattern, astr_cstr (as), (int) astr_len (as), forward ? from : to - 1,
                     forward ? (to - from) : -(to - 1 - from), &search_regs);

  if (ret >= 0)
    {
      ret = forward ? search_regs.end[0] : ret;
      free (search_regs.start);
      free (search_regs.end);
    }

  regfree (&pattern);
  return ret;
}
Exemple #3
0
static int
run_test_backwards (const char *expr, const char *mem, size_t memlen,
		    int icase, int expected)
{
  regex_t re;
  const char *err;
  size_t offset;
  int cnt;

  re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE)
		 | RE_HAT_LISTS_NOT_NEWLINE
		 | (icase ? RE_ICASE : 0));

  memset (&re, 0, sizeof (re));
  re.fastmap = malloc (256);
  if (re.fastmap == NULL)
    error (EXIT_FAILURE, errno, "cannot allocate fastmap");

  err = re_compile_pattern (expr, strlen (expr), &re);
  if (err != NULL)
    error (EXIT_FAILURE, 0, "cannot compile expression: %s", err);

  if (re_compile_fastmap (&re))
    error (EXIT_FAILURE, 0, "couldn't compile fastmap");

  cnt = 0;
  offset = memlen;
  assert (mem[memlen] == '\0');
  while (offset <= memlen)
    {
      int start;
      const char *sp;
      const char *ep;

      start = re_search (&re, mem, memlen, offset, -offset, NULL);
      if (start == -1)
	break;

      if (start == -2)
	error (EXIT_FAILURE, 0, "internal error in re_search");

      sp = mem + start;
      while (sp > mem && sp[-1] != '\n')
	--sp;

      ep = mem + start;
      while (*ep != '\0' && *ep != '\n')
	++ep;

      printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);

      offset = sp - 1 - mem;
    }

  regfree (&re);

  /* Return an error if the number of matches found is not match we
     expect.  */
  return cnt != expected;
}
Exemple #4
0
int regexp_compile(struct regexp *r) {
    /* See the GNU regex manual or regex.h in gnulib for
     * an explanation of these flags. They are set so that the regex
     * matcher interprets regular expressions the same way that libfa
     * does
     */
    static const reg_syntax_t syntax =
        RE_CONTEXT_INDEP_OPS|RE_CONTEXT_INVALID_OPS|RE_DOT_NOT_NULL
        |RE_INTERVALS|RE_NO_BK_BRACES|RE_NO_BK_PARENS|RE_NO_BK_REFS
        |RE_NO_BK_VBAR|RE_NO_EMPTY_RANGES
        |RE_NO_POSIX_BACKTRACKING|RE_CONTEXT_INVALID_DUP|RE_NO_GNU_OPS;
    reg_syntax_t old_syntax = re_syntax_options;
    const char *c = NULL;

    if (r->re == NULL)
        CALLOC(r->re, 1);

    re_syntax_options = syntax;
    c = re_compile_pattern(r->pattern->str, strlen(r->pattern->str), r->re);
    re_syntax_options = old_syntax;

    r->re->regs_allocated = REGS_REALLOCATE;
    if (c != NULL) {
        char *p = escape(r->pattern->str, -1);
        syntax_error(r->info, "invalid regexp /%s/: %s", p, c);
        free(p);
        return -1;
    }
    return 0;
}
Exemple #5
0
/* Compile a regexp and signal a Lisp error if anything goes wrong.  */
void
compile_pattern (Lisp_Object pattern, struct re_pattern_buffer *bufp, char *translate, int backward)
{
  char *val;
  Lisp_Object dummy;

  if (EQ (pattern, last_regexp)
      && translate == bufp->translate /* 92.4.10 by K.Handa */
      /* 93.7.13 by K.Handa */
      && NILP (current_buffer->mc_flag) == !bufp->mc_flag
      && (!bufp->syntax_version
	  || bufp->syntax_version == syntax_table_version)
      && (!bufp->category_version
	  || bufp->category_version == category_table_version))
    return;

  if (CONSP (pattern))			/* pre-compiled regexp */
    {
      Lisp_Object compiled;

      val = 0;
      pattern = XCONS (pattern)->car;
      if (CONSP (pattern)
	  && (compiled = backward ? XCONS(pattern)->cdr : XCONS(pattern)->car)
	  && XTYPE (compiled) == Lisp_Vector
	  && XVECTOR (compiled)->size == 4) {
	/* set_pattern will set bufp->allocated to NULL */
	set_pattern (compiled, bufp, translate);
	return;
      }

      val = "Invalied pre-compiled regexp";
      goto invalid_regexp;
    }

  CHECK_STRING (pattern, 0);

  last_regexp = Qnil;
  bufp->translate = translate;
  bufp->syntax_version = bufp->category_version = 0; /* 93.7.13 by K.Handa */
  /* 92.7.10 by T.Enami
     'bufp->allocated == 0' means bufp->buffer points to pre-compiled pattern
     in a lisp string, which should not be 'realloc'ed. */
  if (bufp->allocated == 0) bufp->buffer = 0; 

  val = re_compile_pattern (XSTRING (pattern)->data,
			    XSTRING (pattern)->size,
			    bufp);

  if (val)
    {
    invalid_regexp:
      dummy = build_string (val);
      while (1)
	Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
    }
  last_regexp = pattern;
  return;
}
Exemple #6
0
static int modregex_regex (INSTANCE * my, int * params)
{
    const char * reg = string_get(params[0]);
    const char * str = string_get(params[1]);
    int result = -1;
    unsigned n;

    struct re_pattern_buffer pb;
    struct re_registers re;
    int start[16];
    int end[16];
    int * regex_reg;

    /* Alloc the pattern resources */

    memset (&pb, 0, sizeof(pb));
    memset (&re, 0, sizeof(re));
    pb.buffer = malloc(4096);
    pb.allocated = 4096;
    pb.fastmap = malloc(256);
    pb.regs_allocated = 16;
    re.num_regs = 16;
    re.start = start;
    re.end = end;

    re_syntax_options = RE_SYNTAX_POSIX_MINIMAL_EXTENDED | REG_ICASE;

    /* Match the regex */

    if (re_compile_pattern (reg, strlen(reg), &pb) == 0)
    {
        result = re_search (&pb, str, strlen(str), 0, strlen(str), &re);

        if (result != -1)
        {
            /* Fill the regex_reg global variables */
            regex_reg = (int *) &GLODWORD( mod_regex, REGEX_REG);
            for (n = 0 ; n < 16 && n <= pb.re_nsub ; n++)
            {
                string_discard (regex_reg[n]);
                regex_reg[n] = string_newa (str + re.start[n], re.end[n] - re.start[n]);
                string_use (regex_reg[n]);
            }
        }
    }

    /* Free the resources */
    free (pb.buffer);
    free (pb.fastmap);
    string_discard(params[0]);
    string_discard(params[1]);

    return result;
}
Exemple #7
0
/*
**----------------------------------------------------------------------
** .Klasse:       TBRegexp
**
** .Methode:      compile
**
** .Beschreibung: Initialisierung der Searchengine
**
** .Parameter:  const char*,  pattern,    I, das Suchmuster
**              bool       , use_fastmap, I, TRUE->benutze Fastmap
**
** .Rueckgabewert:
**
** ---------------------------------------------------------------------
**
** .Methodenbeschreibung:
**      Compile the pattern `pattern` into an internal form.
**      On Resumption: I am not usable until a valid pattern is set.
**-----------------------------------------------------------------
*/
void TBRegexp::compile( const char* pattern, bool use_fastmap )
{
  free();
  registers_ = new re_registers;
  pattern_buf_ = new re_pattern_buffer;
  ZeroMemory( pattern_buf_, zsizeof( re_pattern_buffer ) );
  ZeroMemory( registers_, zsizeof( re_registers ) );
  if ( use_fastmap )
    pattern_buf_->fastmap = (char*) malloc( 256 ); // 256 :-).
  const char* error =
    re_compile_pattern( pattern, lstrlen( pattern ), pattern_buf_ );
}
Exemple #8
0
void
set_word_regexp (const char *regexp)
{
  const char *msg;
  struct re_pattern_buffer new_word_regexp;

  if (!*regexp || STREQ (regexp, DEFAULT_WORD_REGEXP))
    {
      default_word_regexp = true;
      return;
    }

  /* Dry run to see whether the new expression is compilable.  */
  init_pattern_buffer (&new_word_regexp, NULL);
  msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp);
  regfree (&new_word_regexp);

  if (msg != NULL)
    {
      M4ERROR ((warning_status, 0,
                "bad regular expression `%s': %s", regexp, msg));
      return;
    }

  /* If compilation worked, retry using the word_regexp struct.  We
     can't rely on struct assigns working, so redo the compilation.
     The fastmap can be reused between compilations, and will be freed
     by the final regfree.  */
  if (!word_regexp.fastmap)
    word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1);
  msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
  assert (!msg);
  re_set_registers (&word_regexp, &regs, regs.num_regs, regs.start, regs.end);
  if (re_compile_fastmap (&word_regexp))
    assert (false);

  default_word_regexp = false;
}
Exemple #9
0
int
main (void)
{
  struct re_pattern_buffer r;
  struct re_registers s;
  setlocale (LC_ALL, "en_US.UTF-8");
  memset (&r, 0, sizeof (r));
  memset (&s, 0, sizeof (s));
  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | RE_ICASE);
  re_compile_pattern ("insert into", 11, &r);
  re_search (&r, "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK",
	     15, 0, 15, &s);
  return 0;
}
Exemple #10
0
int
do_one_test (const struct test_s *test, const char *fail)
{
  int res;
  const char *err;
  struct re_pattern_buffer regbuf;

  re_set_syntax (test->syntax);
  memset (&regbuf, '\0', sizeof (regbuf));
  err = re_compile_pattern (test->pattern, strlen (test->pattern),
			    &regbuf);
  if (err != NULL)
    {
      printf ("%sre_compile_pattern \"%s\" failed: %s\n", fail, test->pattern,
	      err);
      return 1;
    }

  res = re_search (&regbuf, test->string, strlen (test->string),
		   test->start, strlen (test->string) - test->start, NULL);
  if (res != test->res)
    {
      printf ("%sre_search \"%s\" \"%s\" failed: %d (expected %d)\n",
	      fail, test->pattern, test->string, res, test->res);
      regfree (&regbuf);
      return 1;
    }

  if (test->res > 0 && test->start == 0)
    {
      res = re_search (&regbuf, test->string, strlen (test->string),
		       test->res, strlen (test->string) - test->res, NULL);
      if (res != test->res)
	{
	  printf ("%sre_search from expected \"%s\" \"%s\" failed: %d (expected %d)\n",
		  fail, test->pattern, test->string, res, test->res);
	  regfree (&regbuf);
	  return 1;
	}
    }

  regfree (&regbuf);
  return 0;
}
Exemple #11
0
static int
do_test (void)
{
  struct re_pattern_buffer r;
  /* ကျွန်ုပ်x */
  const char *s = "\xe1\x80\x80\xe1\x80\xbb\xe1\x80\xbd\xe1\x80\x94\xe1\x80\xba\xe1\x80\xaf\xe1\x80\x95\xe1\x80\xbax";

  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
    {
      puts ("setlocale failed");
      return 1;
    }
  memset (&r, 0, sizeof (r));

  re_compile_pattern ("[^x]x", 5, &r);
  /* This was triggering a buffer overflow.  */
  re_search (&r, s, strlen (s), 0, strlen (s), 0);
  return 0;
}
Exemple #12
0
bool
Regexp::compile(vespalib::stringref re, Flags flags)
{
    re_set_syntax(flags.flags());
    regex_t *preg = (regex_t *)_data;
    preg->translate = NULL;
    preg->fastmap = static_cast<char *>(malloc(256));
    preg->buffer = NULL;
    preg->allocated = 0;
    const char * error = re_compile_pattern(re.data(), re.size(), preg);
    if (error != 0) {
        LOG(warning, "invalid regexp '%s': %s", vespalib::string(re).c_str(), error);
        return false;
    }
    if (re_compile_fastmap(preg) != 0) {
        LOG(warning, "re_compile_fastmap failed for regexp '%s'", vespalib::string(re).c_str());
        return false;
    }
    return true;
}
Exemple #13
0
int
main (void)
{
  struct re_pattern_buffer regbuf;
  const char *err;
  size_t i;
  int ret = 0;

#ifdef HAVE_MCHECK_H
  mtrace ();
#endif

  for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
    {
      int start;
      re_set_syntax (tests[i].syntax);
      memset (&regbuf, '\0', sizeof (regbuf));
      err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern),
                                &regbuf);
      if (err != NULL)
	{
	  printf ("re_compile_pattern failed: %s\n", err);
	  ret = 1;
	  continue;
	}

      start = re_search (&regbuf, tests[i].string, strlen (tests[i].string),
                         0, strlen (tests[i].string), NULL);
      if (start != tests[i].start)
	{
	  printf ("re_search failed %d\n", start);
	  ret = 1;
	  regfree (&regbuf);
	  continue;
	}
      regfree (&regbuf);
    }

  return ret;
}
Exemple #14
0
void
set_word_regexp (const char *regexp)
{
  int i;
  char test[2];
  const char *msg;

  if (!strcmp (regexp, DEFAULT_WORD_REGEXP))
    {
      default_word_regexp = TRUE;
      return;
    }

  default_word_regexp = FALSE;

  msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);

  if (msg != NULL)
    {
      M4ERROR ((warning_status, 0,
		"Bad regular expression `%s': %s", regexp, msg));
      return;
    }

  if (word_start == NULL)
    word_start = xmalloc (256);

  word_start[0] = '\0';
  test[1] = '\0';
  for (i = 1; i < 256; i++)
    {
      test[0] = i;
      if (re_search (&word_regexp, test, 1, 0, 0, &regs) >= 0)
	strcat (word_start, test);
    }
}
Exemple #15
0
int
main (void)
{
  struct re_pattern_buffer regex;
  struct re_registers regs;
  const char *s;
  int match;
  int result = 0;

  regs.num_regs = 1;
  memset (&regex, '\0', sizeof (regex));
  s = re_compile_pattern ("[abc]*d", 7, &regex);
  if (s != NULL)
    {
      puts ("re_compile_pattern return non-NULL value");
      result = 1;
    }
  else
    {
      match = re_match (&regex, "foacabdxy", 9, 2, &regs);
      if (match != 5)
	{
	  printf ("re_match returned %d, expected 5\n", match);
	  result = 1;
	}
      else if (regs.start[0] != 2 || regs.end[0] != 7)
	{
	  printf ("re_match returned %d..%d, expected 2..7\n",
		  regs.start[0], regs.end[0]);
	  result = 1;
	}
	puts (" -> OK");
    }

  return result;
}
Exemple #16
0
Fichier : search.c Projet : dmt4/ne
int find_regexp(buffer * const b, const char *regex, const bool skip_first) {

	const unsigned char * const up_case = b->encoding == ENC_UTF8 ? ascii_up_case : localised_up_case;
	bool recompile_string;

	if (!regex) {
		regex = b->find_string;
		recompile_string = b->find_string_changed || !b->last_was_regexp;
	}
	else recompile_string = true;

	if (!regex || !strlen(regex)) return ERROR;

	if (re_pb.buffer == NULL) {
		if (re_pb.buffer = malloc(START_BUFFER_SIZE)) re_pb.allocated = START_BUFFER_SIZE;
		else return OUT_OF_MEMORY;
	}

	re_pb.fastmap = (void *)d;

	/* We have to be careful: even if the search string has not changed, it
	is possible that case sensitivity has. In this case, we force recompilation. */

	if (b->opt.case_search) {
		if (re_pb.translate != 0) recompile_string = true;
		re_pb.translate = 0;
	}
	else {
		if (re_pb.translate != up_case) recompile_string = true;
		re_pb.translate = (unsigned char *)up_case;
	}

	if (recompile_string) {
		const char *actual_regex = regex;

		/* If the buffer encoding is UTF-8, we need to replace dots with UTF8DOT,
			non-word-constituents (\W) with UTF8NONWORD, and embed complemented
			character classes in UTF8COMP, so that they do not match UTF-8
			subsequences. Moreover, we must compute the remapping from the virtual
			to the actual groups caused by the new groups thus introduced. */

		if (b->encoding == ENC_UTF8) {
			const char *s;
			char *q;
			bool escape = false;
			int virtual_group = 0, real_group = 0, dots = 0, comps = 0, nonwords = 0;

			s = regex;

			/* We first scan regex to compute the exact number of characters of
				the actual (i.e., after substitutions) regex. */

			do {
				if (!escape) {
					if (*s == '.') dots++;
					else if (*s == '[') {
						if (*(s+1) == '^') {
							comps++;
							s++;
						}

						if (*(s+1) == ']') s++; /* A literal ]. */

						/* We scan the list up to ] and check that no non-US-ASCII characters appear. */
						do if (utf8len(*(++s)) != 1) return UTF8_REGEXP_CHARACTER_CLASS_NOT_SUPPORTED; while(*s && *s != ']');
					}
					else if (*s == '\\') {
						escape = true;
						continue;
					}
				}
				else if (*s == 'W') nonwords++;
				escape = false;
			} while(*(++s));

			actual_regex = q = malloc(strlen(regex) + 1 + (strlen(UTF8DOT) - 1) * dots + (strlen(UTF8NONWORD) - 2) * nonwords + (strlen(UTF8COMP) - 1) * comps);
			if (!actual_regex) return OUT_OF_MEMORY;
			s = regex;
			escape = false;

			do {
				if (escape || *s != '.' && *s != '(' && *s != '[' && *s != '\\') {
					if (escape && *s == 'W') {
						q--;
						strcpy(q, UTF8NONWORD);
						q += strlen(UTF8NONWORD);
						real_group++;
					}
					else *(q++) = *s;
				}
				else {
					if (*s == '\\') {
						escape = true;
						*(q++) = '\\';
						continue;
					}

					if (*s == '.') {
						strcpy(q, UTF8DOT);
						q += strlen(UTF8DOT);
						real_group++;
					}
					else if (*s == '(') {
						*(q++) = '(';
						if (virtual_group < RE_NREGS - 1) map_group[++virtual_group] = ++real_group;
					}
					else if (*s == '[') {
						if (*(s+1) == '^') {
							strcpy(q, UTF8COMP);
							q += strlen(UTF8COMP);
							s++;
							if (*(s+1) == ']') *(q++) = *(++s); /* A literal ]. */
							do	*(q++) = *(++s); while (*s && *s != ']');
							if (*s) *(q++) = ')';
							real_group++;
						}
						else {
							*(q++) = '[';
							if (*(s+1) == ']') *(q++) = *(++s); /* A literal ]. */
							do	*(q++) = *(++s); while (*s && *s != ']');
						}
					}
				}

				escape = false;
			} while(*(s++));

			/* This assert may be false if a [ is not closed. */
			assert(strlen(actual_regex) == strlen(regex) + (strlen(UTF8DOT) - 1) * dots + (strlen(UTF8NONWORD) - 2) * nonwords + (strlen(UTF8COMP) - 1) * comps);
		}

		const char * p = re_compile_pattern(actual_regex, strlen(actual_regex), &re_pb);

		if (b->encoding == ENC_UTF8) free((void*)actual_regex);

		if (p) {
			/* Here we have a very dirty hack: since we cannot return the error of
				regex, we print it here. Which means that we access term.c's
				functions. 8^( */
			print_message(p);
			alert();
			return ERROR;
		}

	}

	b->find_string_changed = 0;

	line_desc *ld = b->cur_line_desc;
	int64_t y = b->cur_line;
	stop = false;

	if (! b->opt.search_back) {

		int64_t start_pos = b->cur_pos + (skip_first ? 1 : 0);

		while(y < b->num_lines && !stop) {
			assert(ld->ld_node.next != NULL);

			int64_t pos;
			if (start_pos <= ld->line_len &&
				 (pos = re_search(&re_pb, ld->line ? ld->line : "", ld->line_len, start_pos, ld->line_len - start_pos, &re_reg)) >= 0) {
				goto_line(b, y);
				goto_pos(b, pos);
				return OK;
			}

			ld = (line_desc *)ld->ld_node.next;
			start_pos = 0;
			y++;
		}
	}
	else {

		int64_t start_pos = b->cur_pos + (skip_first ? -1 : 0);

		while(y >= 0 && !stop) {

			assert(ld->ld_node.prev != NULL);

			int64_t pos;
			if (start_pos >= 0 &&
				 (pos = re_search(&re_pb, ld->line ? ld->line : "", ld->line_len, start_pos, -start_pos - 1, &re_reg)) >= 0) {
				goto_line(b, y);
				goto_pos(b, pos);
				return OK;
			}

			ld = (line_desc *)ld->ld_node.prev;
			if (ld->ld_node.prev) start_pos = ld->line_len;
			y--;
		}
	}

	return stop ? STOPPED : NOT_FOUND;
}
Exemple #17
0
int
main (void)
{
  int result = 0;
  static struct re_pattern_buffer regex;
  unsigned char folded_chars[UCHAR_MAX + 1];
  int i;
  const char *s;
  struct re_registers regs;

#if HAVE_DECL_ALARM
  /* Some builds of glibc go into an infinite loop on this test.  */
  int alarm_value = 2;
  signal (SIGALRM, SIG_DFL);
  alarm (alarm_value);
#endif
  if (setlocale (LC_ALL, "en_US.UTF-8"))
    {
      {
        /* http://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
           This test needs valgrind to catch the bug on Debian
           GNU/Linux 3.1 x86, but it might catch the bug better
           on other platforms and it shouldn't hurt to try the
           test here.  */
        static char const pat[] = "insert into";
        static char const data[] =
          "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
        re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
                       | RE_ICASE);
        memset (&regex, 0, sizeof regex);
        s = re_compile_pattern (pat, sizeof pat - 1, &regex);
        if (s)
          result |= 1;
        else if (re_search (&regex, data, sizeof data - 1,
                            0, sizeof data - 1, &regs)
                 != -1)
          result |= 1;
      }

      /* Check whether it's really a UTF-8 locale.
         On mingw, the setlocale call succeeds but returns
         "English_United States.1252", with locale_charset() returning
         "CP1252".  */
      if (strcmp (locale_charset (), "UTF-8") == 0)
        {
          /* This test is from glibc bug 15078.
             The test case is from Andreas Schwab in
             <http://www.sourceware.org/ml/libc-alpha/2013-01/msg00967.html>.
          */
          static char const pat[] = "[^x]x";
          static char const data[] =
            /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */
            "\xe1\x80\x80"
            "\xe1\x80\xbb"
            "\xe1\x80\xbd"
            "\xe1\x80\x94"
            "\xe1\x80\xba"
            "\xe1\x80\xaf"
            "\xe1\x80\x95"
            "\xe1\x80\xba"
            "x";
          re_set_syntax (0);
          memset (&regex, 0, sizeof regex);
          s = re_compile_pattern (pat, sizeof pat - 1, &regex);
          if (s)
            result |= 1;
          else
            {
              i = re_search (&regex, data, sizeof data - 1,
                             0, sizeof data - 1, 0);
              if (i != 0 && i != 21)
                result |= 1;
            }
        }

      if (! setlocale (LC_ALL, "C"))
        return 1;
    }

  /* This test is from glibc bug 3957, reported by Andrew Mackey.  */
  re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("a[^x]b", 6, &regex);
  if (s)
    result |= 2;
  /* This should fail, but succeeds for glibc-2.5.  */
  else if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
    result |= 2;

  /* This regular expression is from Spencer ere test number 75
     in grep-2.3.  */
  re_set_syntax (RE_SYNTAX_POSIX_EGREP);
  memset (&regex, 0, sizeof regex);
  for (i = 0; i <= UCHAR_MAX; i++)
    folded_chars[i] = i;
  regex.translate = folded_chars;
  s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
  /* This should fail with _Invalid character class name_ error.  */
  if (!s)
    result |= 4;

  /* Ensure that [b-a] is diagnosed as invalid, when
     using RE_NO_EMPTY_RANGES. */
  re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("a[b-a]", 6, &regex);
  if (s == 0)
    result |= 8;

  /* This should succeed, but does not for glibc-2.1.3.  */
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("{1", 2, &regex);
  if (s)
    result |= 8;

  /* The following example is derived from a problem report
     against gawk from Jorge Stolfi <*****@*****.**>.  */
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("[an\371]*n", 7, &regex);
  if (s)
    result |= 8;
  /* This should match, but does not for glibc-2.2.1.  */
  else if (re_match (&regex, "an", 2, 0, &regs) != 2)
    result |= 8;

  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("x", 1, &regex);
  if (s)
    result |= 8;
  /* glibc-2.2.93 does not work with a negative RANGE argument.  */
  else if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
    result |= 8;

  /* The version of regex.c in older versions of gnulib
     ignored RE_ICASE.  Detect that problem too.  */
  re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("x", 1, &regex);
  if (s)
    result |= 16;
  else if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
    result |= 16;

  /* Catch a bug reported by Vin Shelton in
     http://lists.gnu.org/archive/html/bug-coreutils/2007-06/msg00089.html
     */
  re_set_syntax (RE_SYNTAX_POSIX_BASIC
                 & ~RE_CONTEXT_INVALID_DUP
                 & ~RE_NO_EMPTY_RANGES);
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
  if (s)
    result |= 32;

  /* REG_STARTEND was added to glibc on 2004-01-15.
     Reject older versions.  */
  if (! REG_STARTEND)
    result |= 64;

#if 0
  /* It would be nice to reject hosts whose regoff_t values are too
     narrow (including glibc on hosts with 64-bit ptrdiff_t and
     32-bit int), but we should wait until glibc implements this
     feature.  Otherwise, support for equivalence classes and
     multibyte collation symbols would always be broken except
     when compiling --without-included-regex.   */
  if (sizeof (regoff_t) < sizeof (ptrdiff_t)
      || sizeof (regoff_t) < sizeof (ssize_t))
    result |= 64;
#endif

  return result;
}
Exemple #18
0
void
GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
{
  const char *err;
  const char *p, *sep;
  size_t total = size;
  char *motif;

  if (match_icase)
    syntax_bits |= RE_ICASE;
  re_set_syntax (syntax_bits);
  dfasyntax (syntax_bits, match_icase, eolbyte);

  /* For GNU regex compiler we have to pass the patterns separately to detect
     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
     GNU regex should have raise a syntax error.  The same for backref, where
     the backref should have been local to each pattern.  */
  p = pattern;
  do
    {
      size_t len;
      sep = memchr (p, '\n', total);
      if (sep)
        {
          len = sep - p;
          sep++;
          total -= (len + 1);
        }
      else
        {
          len = total;
          total = 0;
        }

      patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns);
      patterns[pcount] = patterns0;

      if ((err = re_compile_pattern (p, len,
                                    &(patterns[pcount].regexbuf))) != NULL)
        error (EXIT_TROUBLE, 0, "%s", err);
      pcount++;

      p = sep;
    } while (sep && total != 0);

  /* In the match_words and match_lines cases, we use a different pattern
     for the DFA matcher that will quickly throw out cases that won't work.
     Then if DFA succeeds we do some hairy stuff using the regex matcher
     to decide whether the match should really count. */
  if (match_words || match_lines)
    {
      static char const line_beg_no_bk[] = "^(";
      static char const line_end_no_bk[] = ")$";
      static char const word_beg_no_bk[] = "(^|[^[:alnum:]_])(";
      static char const word_end_no_bk[] = ")([^[:alnum:]_]|$)";
      static char const line_beg_bk[] = "^\\(";
      static char const line_end_bk[] = "\\)$";
      static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
      static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)";
      int bk = !(syntax_bits & RE_NO_BK_PARENS);
      char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk);

      strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
                             : (bk ? word_beg_bk : word_beg_no_bk));
      total = strlen(n);
      memcpy (n + total, pattern, size);
      total += size;
      strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
                                     : (bk ? word_end_bk : word_end_no_bk));
      total += strlen (n + total);
      pattern = motif = n;
      size = total;
    }
  else
    motif = NULL;

  dfa = dfaalloc ();
  dfacomp (pattern, size, dfa, 1);
  kwsmusts ();

  free(motif);
}
Exemple #19
0
static void
compile_regex_1 (struct regex *new_regex, int needed_sub)
{
#ifdef REG_PERL
  int errcode;
  errcode = regncomp(&new_regex->pattern, new_regex->re, new_regex->sz,
                     (needed_sub ? 0 : REG_NOSUB)
                     | new_regex->flags
                     | extended_regexp_flags);

  if (errcode)
    {
      char errorbuf[200];
      regerror(errcode, NULL, errorbuf, 200);
      bad_prog(gettext(errorbuf));
    }
#else
  const char *error;
  int syntax = ((extended_regexp_flags & REG_EXTENDED)
                 ? RE_SYNTAX_POSIX_EXTENDED
                 : RE_SYNTAX_POSIX_BASIC);

  syntax &= ~RE_DOT_NOT_NULL;
  syntax |= RE_NO_POSIX_BACKTRACKING;

  switch (posixicity)
    {
    case POSIXLY_EXTENDED:
      syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD;
      break;
    case POSIXLY_CORRECT:
      syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD;
      break;
    case POSIXLY_BASIC:
      syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_LIMITED_OPS | RE_NO_GNU_OPS;
      break;
    }

#ifdef RE_ICASE
  syntax |= (new_regex->flags & REG_ICASE) ? RE_ICASE : 0;
#endif
#ifdef RE_NO_SUB
  syntax |= needed_sub ? 0 : RE_NO_SUB;
#endif

  new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8));

  /* If REG_NEWLINE is set, newlines are treated differently.  */
  if (new_regex->flags & REG_NEWLINE)
    {
      /* REG_NEWLINE implies neither . nor [^...] match newline.  */
      syntax &= ~RE_DOT_NEWLINE;
      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
    }

  re_set_syntax (syntax);
  error = re_compile_pattern (new_regex->re, new_regex->sz,
                              &new_regex->pattern);
  new_regex->pattern.newline_anchor = (new_regex->flags & REG_NEWLINE) != 0;

  new_regex->pattern.translate = NULL;
#ifndef RE_ICASE
  if (new_regex->flags & REG_ICASE)
    {
      static char translate[1 << (sizeof(char) * 8)];
      int i;
      for (i = 0; i < sizeof(translate) / sizeof(char); i++)
        translate[i] = tolower (i);

      new_regex->pattern.translate = translate;
    }
#endif

  if (error)
    bad_prog(error);
#endif

  /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */
  if (needed_sub
      && new_regex->pattern.re_nsub < needed_sub - 1
      && posixicity == POSIXLY_EXTENDED)
    {
      char buf[200];
      sprintf(buf, _("invalid reference \\%d on `s' command's RHS"),
              needed_sub - 1);
      bad_prog(buf);
    }
}
Exemple #20
0
Fichier : re.c Projet : WndSks/msys
Regexp *
make_regexp(const char *s, size_t len, int ignorecase, int dfa)
{
	Regexp *rp;
	const char *rerr;
	const char *src = s;
	char *temp;
	const char *end = s + len;
	register char *dest;
	register int c, c2;
	static short first = TRUE;
	static short no_dfa = FALSE;
	int has_anchor = FALSE;

	/* The number of bytes in the current multibyte character.
	   It is 0, when the current character is a singlebyte character.  */
	size_t is_multibyte = 0;
#ifdef MBS_SUPPORT
	mbstate_t mbs;

	if (gawk_mb_cur_max > 1)
		memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize.  */
#endif

	if (first) {
		first = FALSE;
		no_dfa = (getenv("GAWK_NO_DFA") != NULL);	/* for debugging and testing */
	}

	/* Handle escaped characters first. */

	/*
	 * Build a copy of the string (in dest) with the
	 * escaped characters translated, and generate the regex
	 * from that.  
	 */
	emalloc(dest, char *, len + 2, "make_regexp");
	temp = dest;

	while (src < end) {
#ifdef MBS_SUPPORT
		if (gawk_mb_cur_max > 1 && ! is_multibyte) {
			/* The previous byte is a singlebyte character, or last byte
			   of a multibyte character.  We check the next character.  */
			is_multibyte = mbrlen(src, end - src, &mbs);
			if ((is_multibyte == 1) || (is_multibyte == (size_t) -1)
				|| (is_multibyte == (size_t) -2 || (is_multibyte == 0))) {
				/* We treat it as a singlebyte character.  */
				is_multibyte = 0;
			}
		}
#endif

		/* We skip multibyte character, since it must not be a special
		   character.  */
		if ((gawk_mb_cur_max == 1 || ! is_multibyte) &&
		    (*src == '\\')) {
			c = *++src;
			switch (c) {
			case 'a':
			case 'b':
			case 'f':
			case 'n':
			case 'r':
			case 't':
			case 'v':
			case 'x':
			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
				c2 = parse_escape(&src);
				if (c2 < 0)
					cant_happen();
				/*
				 * Unix awk treats octal (and hex?) chars
				 * literally in re's, so escape regexp
				 * metacharacters.
				 */
				if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x')
				    && strchr("()|*+?.^$\\[]", c2) != NULL)
					*dest++ = '\\';
				*dest++ = (char) c2;
				break;
			case '8':
			case '9':	/* a\9b not valid */
				*dest++ = c;
				src++;
				break;
			case 'y':	/* normally \b */
				/* gnu regex op */
				if (! do_traditional) {
					*dest++ = '\\';
					*dest++ = 'b';
					src++;
					break;
				}
				/* else, fall through */
			default:
				*dest++ = '\\';
				*dest++ = (char) c;
				src++;
				break;
			} /* switch */
		} else {
			c = *src;
			if (c == '^' || c == '$')
				has_anchor = TRUE;
			*dest++ = *src++;	/* not '\\' */
		}
		if (gawk_mb_cur_max > 1 && is_multibyte)
			is_multibyte--;
	} /* while */

	*dest = '\0' ;	/* Only necessary if we print dest ? */
	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
	memset((char *) rp, 0, sizeof(*rp));
	rp->pat.allocated = 0;	/* regex will allocate the buffer */
	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");

	/*
	 * Lo these many years ago, had I known what a P.I.T.A. IGNORECASE
	 * was going to turn out to be, I wouldn't have bothered with it.
	 *
	 * In the case where we have a multibyte character set, we have no
	 * choice but to use RE_ICASE, since the casetable is for single-byte
	 * character sets only.
	 *
	 * On the other hand, if we do have a single-byte character set,
	 * using the casetable should give  a performance improvement, since
	 * it's computed only once, not each time a regex is compiled.  We
	 * also think it's probably better for portability.  See the
	 * discussion by the definition of casetable[] in eval.c.
	 */

	if (ignorecase) {
		if (gawk_mb_cur_max > 1) {
			syn |= RE_ICASE;
			rp->pat.translate = NULL;
		} else {
			syn &= ~RE_ICASE;
			rp->pat.translate = (char *) casetable;
		}
	} else {
		rp->pat.translate = NULL;
		syn &= ~RE_ICASE;
	}

	dfasyntax(syn | (ignorecase ? RE_ICASE : 0), ignorecase ? TRUE : FALSE, '\n');
	re_set_syntax(syn);

	len = dest - temp;
	if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
		fatal("%s: /%s/", rerr, temp);	/* rerr already gettextized inside regex routines */

	/* gack. this must be done *after* re_compile_pattern */
	rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */
	if (dfa && ! no_dfa) {
		dfacomp(temp, len, &(rp->dfareg), TRUE);
		rp->dfa = TRUE;
	} else
		rp->dfa = FALSE;
	rp->has_anchor = has_anchor;

	free(temp);
	return rp;
}
Exemple #21
0
static void *
compile (const char *pattern, size_t pattern_size,
	 bool match_icase, bool match_words, bool match_lines, char eolbyte,
	 reg_syntax_t syntax)
{
  struct compiled_regex *cregex;
  const char *err;
  const char *sep;
  size_t total = pattern_size;
  const char *motif = pattern;

  cregex = (struct compiled_regex *) xmalloc (sizeof (struct compiled_regex));
  memset (cregex, '\0', sizeof (struct compiled_regex));
  cregex->match_words = match_words;
  cregex->match_lines = match_lines;
  cregex->eolbyte = eolbyte;
  cregex->patterns = NULL;
  cregex->pcount = 0;

  re_set_syntax (syntax);
  dfasyntax (syntax, match_icase, eolbyte);

  /* For GNU regex compiler we have to pass the patterns separately to detect
     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
     GNU regex should have raise a syntax error.  The same for backref, where
     the backref should have been local to each pattern.  */
  do
    {
      size_t len;
      sep = memchr (motif, '\n', total);
      if (sep)
	{
	  len = sep - motif;
	  sep++;
	  total -= (len + 1);
	}
      else
	{
	  len = total;
	  total = 0;
	}

      cregex->patterns = xrealloc (cregex->patterns, (cregex->pcount + 1) * sizeof (struct patterns));
      memset (&cregex->patterns[cregex->pcount], '\0', sizeof (struct patterns));

      if ((err = re_compile_pattern (motif, len,
				     &(cregex->patterns[cregex->pcount].regexbuf))) != NULL)
	error (exit_failure, 0, err);
      cregex->pcount++;

      motif = sep;
    } while (sep && total != 0);

  /* In the match_words and match_lines cases, we use a different pattern
     for the DFA matcher that will quickly throw out cases that won't work.
     Then if DFA succeeds we do some hairy stuff using the regex matcher
     to decide whether the match should really count. */
  if (match_words || match_lines)
    {
      /* In the whole-word case, we use the pattern:
	 (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
	 In the whole-line case, we use the pattern:
	 ^(userpattern)$.  */

      static const char line_beg[] = "^(";
      static const char line_end[] = ")$";
      static const char word_beg[] = "(^|[^[:alnum:]_])(";
      static const char word_end[] = ")([^[:alnum:]_]|$)";
      char *n = (char *) xmalloc (sizeof word_beg - 1 + pattern_size + sizeof word_end);
      size_t i;
      strcpy (n, match_lines ? line_beg : word_beg);
      i = strlen(n);
      memcpy (n + i, pattern, pattern_size);
      i += pattern_size;
      strcpy (n + i, match_lines ? line_end : word_end);
      i += strlen (n + i);
      pattern = n;
      pattern_size = i;
    }

  dfacomp (pattern, pattern_size, &cregex->dfa, 1);
  kwsmusts (cregex, match_icase, match_words, match_lines, eolbyte);

  return cregex;
}
Exemple #22
0
string *grep(char *regexp, char *line, int num_vars) 
{
  struct re_pattern_buffer *rc;
  struct re_registers *p;
  const_string ok;
  string *vars = NULL;
  string *lookup;
  int i;

  if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) {
    fprintf(stderr, "Grep\n\t%s\n\tin\n\t%s\n", regexp, line);
  }

  if (test_file('z', line))
    return NULL;

  /* This will retrieve the precompiled regexp or compile it and
     remember it. vars contains the strings matched, num_vars the number
     of these strings. */
#if 0
  if ((lookup = hash_lookup(symtab, regexp)))
    rc = (struct re_pattern_buffer *)lookup[0];
  else
    rc = NULL;
  if (rc == NULL) {
#endif
    /* Compile the regexp and stores the result */

    if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) {
      fprintf(stderr, "\tCompiling the regexp\n");
    }

    re_syntax_options = RE_SYNTAX_POSIX_EGREP;
    rc = (struct re_pattern_buffer *) calloc(1, sizeof(struct re_pattern_buffer));
    rc->regs_allocated = REGS_UNALLOCATED;
    if ((ok = re_compile_pattern(regexp, strlen(regexp), rc)) != 0)
      FATAL1("Can't compile regex %s\n", regexp);
#if 0
    hash_remove_all(symtab, regexp);
    hash_insert(symtab, regexp, (char *)rc);
  }
  else   if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) {
    fprintf(stderr, "\tAlready compiled\n");
  }
#endif

  p = (struct re_registers *) calloc(1, sizeof(struct re_registers));
  p->num_regs = num_vars;
  if ((re_match(rc, line, strlen(line), 0, p)) > 0) {
    vars = (char **) xmalloc ((num_vars+1) * sizeof(char *));
    for (i = 0; i <= num_vars; i++) {
      vars[i] = malloc((p->end[i] - p->start[i] + 1)*sizeof(char));
      strncpy(vars[i], line+p->start[i], p->end[i] - p->start[i]);
      vars[i][p->end[i] - p->start[i]] = '\0';
    }
  }
  free (p);
  if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) {
    if (vars)
      for(i = 0; i <= num_vars; i++)
	fprintf(stderr, "String %d matches %s\n", i, vars[i]);
  }
  return vars;
}
Exemple #23
0
static int modregex_regex_replace (INSTANCE * my, int * params)
{
    const char * reg = string_get(params[0]);
    const char * rep = string_get(params[1]);
    const char * str = string_get(params[2]);

    unsigned reg_len = strlen(reg);
    unsigned str_len = strlen(str);
    unsigned rep_len = strlen(rep);
    char * replacement;
    unsigned replacement_len;
    int fixed_replacement = strchr(rep, '\\') ? 0:1;

    struct re_pattern_buffer pb;
    struct re_registers re;
    int start[16];
    int end[16];

    unsigned startpos = 0;
    unsigned nextpos;
    int regex_filled = 0;

    char * result = 0;
    unsigned result_allocated = 0;
    int result_string = 0;

    unsigned n;
    int * regex_reg;

    /* Alloc a buffer for the resulting string */

    result = malloc(128);
    result_allocated = 128;
    *result = 0;

    /* Alloc the pattern resources */

    memset (&pb, 0, sizeof(pb));
    memset (&re, 0, sizeof(re));
    pb.buffer = malloc(4096);
    pb.allocated = 4096;
    pb.used = 0;
    pb.fastmap = malloc(256);
    pb.translate = NULL;
    pb.fastmap_accurate = 0;
    pb.regs_allocated = 16;
    re.start = start;
    re.end = end;

    re_syntax_options = RE_SYNTAX_POSIX_MINIMAL_EXTENDED;

    /* Run the regex */

    if (re_compile_pattern (reg, reg_len, &pb) == 0)
    {
        startpos = 0;

        while (startpos < str_len)
        {
            nextpos = re_search (&pb, str, str_len, startpos,
                str_len - startpos, &re);
            if ((int)nextpos < 0) break;

            /* Fill the REGEX_REG global variables */

            if (regex_filled == 0)
            {
                regex_filled = 1;
                regex_reg = (int *)&GLODWORD( mod_regex, REGEX_REG);
                for (n = 0 ; n < 16 && n <= pb.re_nsub ; n++)
                {
                    string_discard (regex_reg[n]);
                    regex_reg[n] = string_newa (str + re.start[n], re.end[n] - re.start[n]);
                    string_use (regex_reg[n]);
                }
            }

            /* Prepare the replacement string */

            if (fixed_replacement == 0)
            {
                int total_length = rep_len;
                const char * bptr;
                char *  ptr;

                /* Count the size */

                ptr = strchr(rep, '\\');
                while (ptr)
                {
                    if (ptr[1] >= '0' && ptr[1] <= '9')
                        total_length += re.end[ptr[1]-'0'] - re.start[ptr[1]-'0'] - 2;
                    ptr = strchr(ptr+1, '\\');
                }

                /* Fill the replacement string */

                replacement = calloc (total_length+1, 1);

                bptr = rep;
                ptr = strchr(rep, '\\');
                while (ptr)
                {
                    if (ptr[1] >= '0' && ptr[1] <= '9')
                    {
                        strncpy (replacement+strlen(replacement), bptr, ptr-bptr);
                        strncpy (replacement+strlen(replacement), str + re.start[ptr[1]-'0'], re.end[ptr[1]-'0'] - re.start[ptr[1]-'0']);
                        bptr = ptr+2;
                    }
                    ptr = strchr (ptr+1, '\\');
                }
                strcat (replacement, bptr);
                replacement_len = strlen(replacement);
            }
            else
            {
                replacement = (char *)rep;
                replacement_len = rep_len;
            }

            /* Fill the resulting string */

            if (result_allocated < strlen(result)+(nextpos-startpos)+1+replacement_len)
            {
                result_allocated += ((nextpos-startpos+1+replacement_len) & ~127) + 128;
                result = realloc(result, result_allocated);
            }
            result[strlen(result)+(nextpos-startpos)] = 0;
            memcpy (result + strlen(result), str+startpos, nextpos-startpos);
            strcat (result, replacement);

            if (fixed_replacement == 0) free (replacement);

            /* Continue the search */

            startpos = nextpos+re_match(&pb, str, str_len, nextpos, 0);
            if (startpos <  nextpos) break;
            if (startpos == nextpos) startpos++;
        }
    }

    /* Copy remaining characters */

    nextpos = str_len;
    if (result_allocated < strlen(result)+(nextpos-startpos)+1)
    {
        result_allocated += ((nextpos-startpos+1) & ~127) + 128;
        result = realloc(result, result_allocated);
    }
    result[strlen(result)+(nextpos-startpos)] = 0;
    memcpy (result + strlen(result), str+startpos, nextpos-startpos);

    /* Free resources */

    free (pb.buffer);
    free (pb.fastmap);
    string_discard(params[0]);
    string_discard(params[1]);
    string_discard(params[2]);

    /* Return the new string */

    result_string = string_new(result);
    string_use(result_string);
    free(result);

    return result_string;
}
Exemple #24
0
static int modregex_split (INSTANCE * my, int * params)
{
    const char * reg = string_get(params[0]);
    const char * str = string_get(params[1]);
    int * result_array = (int *)params[2];
    int result_array_size = params[3];
    int count = 0;
    int pos, lastpos = 0;

    struct re_pattern_buffer pb;
    struct re_registers re;
    int start[16];
    int end[16];

    /* Alloc the pattern resources */

    memset (&pb, 0, sizeof(pb));
    memset (&re, 0, sizeof(re));
    pb.buffer = malloc(4096);
    pb.allocated = 4096;
    pb.fastmap = malloc(256);
    pb.regs_allocated = 16;
    re.num_regs = 16;
    re.start = start;
    re.end = end;

    re_syntax_options = RE_SYNTAX_POSIX_MINIMAL_EXTENDED;

    /* Match the regex */

    if (re_compile_pattern (reg, strlen(reg), &pb) == 0)
    {
        for (;;)
        {
            pos = re_search (&pb, str, strlen(str), lastpos, strlen(str), &re);
            if (pos == -1) break;
            *result_array = string_newa (str + lastpos, pos-lastpos);
            string_use(*result_array);
            result_array++;
            count++;
            result_array_size--;
            if (result_array_size == 0) break;
            lastpos = pos + re_match (&pb, str, strlen(str), pos, 0);
            if (lastpos < pos) break;
            if (lastpos == pos) lastpos++;
        }
        if (result_array_size > 0)
        {
            *result_array = string_new (str + lastpos);
            string_use (*result_array);
            count++;
        }
    }

    /* Free the resources */
    free (pb.buffer);
    free (pb.fastmap);
    string_discard(params[0]);
    string_discard(params[1]);

    return count;
}
_EXPORT void SubjectToThread (BString &string)
{
// a regex that matches a non-ASCII UTF8 character:
#define U8C \
	"[\302-\337][\200-\277]" \
	"|\340[\302-\337][\200-\277]" \
	"|[\341-\357][\200-\277][\200-\277]" \
	"|\360[\220-\277][\200-\277][\200-\277]" \
	"|[\361-\367][\200-\277][\200-\277][\200-\277]" \
	"|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \
	"|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \
	"|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \
	"|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]"

#define PATTERN \
	"^ +" \
	"|^(\\[[^]]*\\])(\\<|  +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
	"|^(  +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
	"| *\\(fwd\\) *$"

	if (gRebuf == NULL && atomic_add(&gLocker,1) == 0)
	{
		// the idea is to compile the regexp once to speed up testing

		for (int i=0; i<256; ++i) gTranslation[i]=i;
		for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i);

		gRe.translate = gTranslation;
		gRe.regs_allocated = REGS_FIXED;
		re_syntax_options = RE_SYNTAX_POSIX_EXTENDED;

		const char *pattern = PATTERN;
		// count subexpressions in PATTERN
		for (unsigned int i=0; pattern[i] != 0; ++i)
		{
			if (pattern[i] == '\\')
				++i;
			else if (pattern[i] == '(')
				++gNsub;
		}

		const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe);
		if (err == NULL)
			gRebuf = &gRe;
		else
			fprintf(stderr, "Failed to compile the regex: %s\n", err);
	}
	else
	{
		int32 tries = 200;
		while (gRebuf == NULL && tries-- > 0)
			snooze(10000);
	}

	if (gRebuf)
	{
		struct re_registers regs;
		// can't be static if this function is to be thread-safe

		regs.num_regs = gNsub;
		regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
		regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t));

		for (int start=0;
		    (start=re_search(gRebuf, string.String(), string.Length(),
							0, string.Length(), &regs)) >= 0;
			)
		{
			//
			// we found something
			//

			// don't delete [bemaildaemon]...
			if (start == regs.start[1])
				start = regs.start[2];

			string.Remove(start,regs.end[0]-start);
			if (start) string.Insert(' ',1,start);
		}

		free(regs.start);
		free(regs.end);
	}

	// Finally remove leading and trailing space.  Some software, like
	// tm-edit 1.8, appends a space to the subject, which would break
	// threading if we left it in.
	trim_white_space(string);
}
Exemple #26
0
Fichier : ne.c Projet : vigna/ne
int main(int argc, char **argv) {

	char *locale = setlocale(LC_ALL, "");
	for(int i = 0; i < 256; i++) localised_up_case[i] = toupper(i);

	if (locale) {
		struct re_pattern_buffer re_pb;
		struct re_registers re_reg;
		memset(&re_pb, 0, sizeof re_pb);
		memset(&re_reg, 0, sizeof re_reg);

		re_pb.translate = localised_up_case;
		re_compile_pattern(LOCALE_REGEX, strlen(LOCALE_REGEX), &re_pb);
		if (re_search(&re_pb, locale, strlen(locale), 0, strlen(locale), &re_reg) >= 0) {
			if (re_reg.start[1] >= 0) io_utf8 = true;
		}
		free(re_reg.start);
		free(re_reg.end);
	}

	bool no_config = false;
	char *macro_name = NULL, *key_bindings_name = NULL, *menu_conf_name = NULL, *startup_prefs_name = DEF_PREFS_NAME;

	char * const skiplist = calloc(argc, 1);
	if (!skiplist) exit(1);  /* We need this many flags. */

	for(int i = 1; i < argc; i++) {

		if (argv[i][0] == '-' && (!strcmp(&argv[i][1], "h") || !strcmp(&argv[i][1], "-help" "\0" VERSION_STRING))) {
			puts(ARG_HELP);
			exit(0);
		}

		/* Special arguments start with two dashes. If we find one, we
		   cancel its entry in argv[], so that it will be skipped when opening
		   the specified files. The only exception is +N for skipping to the
		   N-th line. */

		if (argv[i][0] == '-' && argv[i][1] == '-') {
			if (!argv[i][2]) i++; /* You can use "--" to force the next token to be a filename */
			else if (!strcmp(&argv[i][2], "noconfig") || !strcmp(&argv[i][2], "no-config")) {
				no_config = true;
				skiplist[i] = 1; /* argv[i] = NULL; */
			}
			else if (!strcmp(&argv[i][2], "noansi") || !strcmp(&argv[i][2], "no-ansi")) {
				ansi = false;
				skiplist[i] = 1; /* argv[i] = NULL; */
			}
			else if (!strcmp(&argv[i][2], "no-syntax")) {
				do_syntax = false;
				skiplist[i] = 1; /* argv[i] = NULL; */
			}
			else if (!strcmp(&argv[i][2], "prefs")) {
				if (i < argc-1) {
					startup_prefs_name = argv[i+1];
					skiplist[i] = skiplist[i+1] = 1; /* argv[i] = argv[i+1] = NULL; */
				}
			}
			else if (!strcmp(&argv[i][2], "ansi")) {
				ansi = true;
				skiplist[i] = 1; /* argv[i] = NULL; */
			}
			else if (!strcmp(&argv[i][2], "utf8")) {
				io_utf8 = true;
				skiplist[i] = 1; /* argv[i] = NULL; */
			}
			else if (!strcmp(&argv[i][2], "no-utf8")) {
				io_utf8 = false;
				skiplist[i] = 1; /* argv[i] = NULL; */
			}
			else if (!strcmp(&argv[i][2], "macro")) {
				if (i < argc-1) {
					macro_name = argv[i+1];
					skiplist[i] = skiplist[i+1] = 1; /* argv[i] = argv[i+1] = NULL; */
				}
			}
			else if (!strcmp(&argv[i][2], "keys")) {
				if (i < argc-1) {
					key_bindings_name = argv[i+1];
					skiplist[i] = skiplist[i+1] = 1; /* argv[i] = argv[i+1] = NULL; */
				}
			}
			else if (!strcmp(&argv[i][2], "menus")) {
				if (i < argc-1) {
					menu_conf_name = argv[i+1];
					skiplist[i] = skiplist[i+1] = 1; /* argv[i] = argv[i+1] = NULL; */
				}
			}
		}
	}

#ifdef NE_TEST
	/* Dump the builtin menu and key bindings to compare to
	   doc/default.menus and doc/default.keys. */
	int dump_config(void);
	dump_config();
#endif

	/* Unless --noconfig was specified, we try to configure the
	   menus and the keyboard. Note that these functions can exit() on error. */

	if (!no_config) {
		get_menu_configuration(menu_conf_name);
		get_key_bindings(key_bindings_name);
	}

	/* If we cannot even create a buffer, better go... */

	if (!new_buffer()) exit(1);

	/* Now that key_bindings are loaded, try to fix up the message for NOT_FOUND. */
	{
		char *repeat_last_keystroke, *new_not_found;
		if ((repeat_last_keystroke = find_key_strokes(REPEATLAST_A, 1))) {
			if ((new_not_found = malloc(39+strlen(repeat_last_keystroke)))) {
				strcat(strcat(strcpy(new_not_found, "Not Found. (RepeatLast with "), repeat_last_keystroke), " to wrap.)");
				error_msg[NOT_FOUND] = new_not_found;
			}
			free(repeat_last_keystroke);
		}
	}

	clear_buffer(cur_buffer);

	/* The INT_MAX clip always exists, and it is used by the Through command. */

	clip_desc * const cd = alloc_clip_desc(INT_MAX, 0);
	if (!cd) exit(1);

	add_head(&clips, &cd->cd_node);

	/* General terminfo and cursor motion initalization. From here onwards,
	   we cannot exit() lightly. */

	term_init();

	/* We will be always using the last line for the status bar. */

	set_terminal_window(ne_lines-1);

	/* We read in all the key capabilities. */

	read_key_capabilities();

	/* Some initializations of other modules... */

	re_set_syntax(
		RE_CONTEXT_INDEP_ANCHORS |
		RE_CONTEXT_INDEP_OPS     | RE_HAT_LISTS_NOT_NEWLINE |
		RE_NEWLINE_ALT           | RE_NO_BK_PARENS          |
		RE_NO_BK_VBAR            | RE_NO_EMPTY_RANGES
	);

	bool first_file = true;

	load_virtual_extensions();
	load_auto_prefs(cur_buffer, startup_prefs_name);

	buffer *stdin_buffer = NULL;
	if (!isatty(fileno(stdin))) {
		first_file = false;
		const int error = load_fd_in_buffer(cur_buffer, fileno(stdin));
		print_error(error);
		stdin_buffer = cur_buffer;

		if (!(freopen("/dev/tty", "r", stdin))) {
			fprintf(stderr, "Cannot reopen input tty\n");
			abort();
		}
	}

	/* The terminal is prepared for interactive I/O. */

	set_interactive_mode();

	clear_entire_screen();

	/* This function sets fatal_code() as signal interrupt handler
	   for all the dangerous signals (SIGILL, SIGSEGV etc.). */

	set_fatal_code();

	if (argc > 1) {

		/* The first file opened does not need a NEWDOC_A action. Note that
		   file loading can be interrupted (wildcarding can sometimes produce
		   unwanted results). */

		uint64_t first_line = 0, first_col = 0;
		bool binary = false, skip_plus = false, read_only = false;
		stop = false;

		for(int i = 1; i < argc && !stop; i++) {
			if (argv[i] && !skiplist[i]) {
				if (argv[i][0] == '+' && !skip_plus) {       /* looking for "+", or "+N" or "+N,M"  */
					uint64_t tmp_l = INT64_MAX, tmp_c = 0;
					char *d;
					errno = 0;
					if (argv[i][1]) {
						if (isdigit((unsigned char)argv[i][1])) {
							tmp_l = strtoll(argv[i]+1, &d, 10);
							if (!errno) {
								if (*d) {  /* separator between N and M */
									if (isdigit((unsigned char)d[1])) {
										tmp_c = strtoll(d+1, &d, 10);
										if (*d) errno = ERANGE;
									}
									else errno = ERANGE;
								}
							}
						}
						else errno = ERANGE;
					}
					if (!errno) {
						first_line = tmp_l;
						first_col  = tmp_c;
					}
					else {
						skip_plus = true;
						i--;
					}
				}
				else if (!strcmp(argv[i], "--binary")) {
					binary = true;
				}
				else if (!strcmp(argv[i], "--read-only") || !strcmp(argv[i], "--readonly") || !strcmp(argv[i], "--ro")) {
					read_only = true;
				}
				else {
					if (!strcmp(argv[i], "-") && stdin_buffer) {
						stdin_buffer->opt.binary = binary;
						if (read_only) stdin_buffer->opt.read_only = read_only;
						if (first_line) do_action(stdin_buffer, GOTOLINE_A, first_line, NULL);
						if (first_col)  do_action(stdin_buffer, GOTOCOLUMN_A, first_col, NULL);
						stdin_buffer = NULL;
					}
					else {
						if (!strcmp(argv[i], "--")) i++;
						if (!first_file) do_action(cur_buffer, NEWDOC_A, -1, NULL);
						else first_file = false;
						cur_buffer->opt.binary = binary;
						if (i < argc) do_action(cur_buffer, OPEN_A, 0, str_dup(argv[i]));
						if (first_line) do_action(cur_buffer, GOTOLINE_A, first_line, NULL);
						if (first_col)  do_action(cur_buffer, GOTOCOLUMN_A, first_col, NULL);
						if (read_only) cur_buffer->opt.read_only = read_only;
					}
					first_line =
					first_col  = 0;
					skip_plus  =
					binary    =
					read_only  = false;
				}
			}
		}

		free(skiplist);

		/* This call makes current the first specified file. It is called
		   only if more than one buffer exist. */

		if (get_nth_buffer(1)) do_action(cur_buffer, NEXTDOC_A, -1, NULL);

	}

	/* We delay updates. In this way the macro activity does not cause display activity. */

	reset_window();
	delay_update();

	if (macro_name) do_action(cur_buffer, MACRO_A, -1, str_dup(macro_name));
	else if (first_file) {
		/* If there is no file to load, and no macro to execute, we display
		   the "NO WARRANTY" message. */
		about();
	}

	while(true) {
		/* If we are displaying the "NO WARRANTY" info, we should not refresh the
		   window now */
		if (!displaying_info) {
			refresh_window(cur_buffer);
			if (cur_buffer->opt.automatch) automatch_bracket(cur_buffer, true);
		}

		draw_status_bar();
		move_cursor(cur_buffer->cur_y, cur_buffer->cur_x);

		int c = get_key_code();

		if (window_changed_size) {
			print_error(do_action(cur_buffer, REFRESH_A, 0, NULL));
			window_changed_size = displaying_info = false;
			cur_buffer->automatch.shown = 0;
		}

		if (c == INVALID_CHAR) continue; /* Window resizing. */
		const input_class ic = CHAR_CLASS(c);

		if (displaying_info) {
			refresh_window(cur_buffer);
			displaying_info = false;
		}

		if (cur_buffer->automatch.shown) automatch_bracket(cur_buffer, false);

		switch(ic) {
		case INVALID:
			print_error(INVALID_CHARACTER);
			break;

		case ALPHA:
			print_error(do_action(cur_buffer, INSERTCHAR_A, c, NULL));
			break;

		case TAB:
			print_error(do_action(cur_buffer, INSERTTAB_A, 1, NULL));
			break;

		case RETURN:
			print_error(do_action(cur_buffer, INSERTLINE_A, -1, NULL));
			break;

		case COMMAND:
			if (c < 0) c = -c - 1;
			if (key_binding[c]) print_error(execute_command_line(cur_buffer, key_binding[c]));
			break;

		default:
			break;
		}
	}
}
/**
 * \brief Lookup a specific path in the list
 *
 * \param list a #GPPortInfoList
 * \param path a path
 *
 * Looks for an entry in the list with the supplied path. If no exact match
 * can be found, a regex search will be performed in the hope some driver
 * claimed ports like "serial:*".
 *
 * \return The index of the entry or a gphoto2 error code
 **/
int
gp_port_info_list_lookup_path (GPPortInfoList *list, const char *path)
{
	int i, result, generic;
	regex_t pattern;
#ifdef HAVE_GNU_REGEX
	const char *rv;
#else
	regmatch_t match;
#endif

	CHECK_NULL (list && path);

	gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list", ngettext(
		"Looking for path '%s' (%i entry available)...",
		"Looking for path '%s' (%i entries available)...",
		list->count
	), path, list->count);

	/* Exact match? */
	for (generic = i = 0; i < list->count; i++)
		if (!strlen (list->info[i].name))
			generic++;
		else if (!strcmp (list->info[i].path, path))
			return (i - generic);

	/* Regex match? */
	gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list",
		_("Starting regex search for '%s'..."), path);
	for (i = 0; i < list->count; i++) {
		GPPortInfo newinfo;

		if (strlen (list->info[i].name))
			continue;

		gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list",
			_("Trying '%s'..."), list->info[i].path);

		/* Compile the pattern */
#ifdef HAVE_GNU_REGEX
		memset (&pattern, 0, sizeof (pattern));
		rv = re_compile_pattern (list->info[i].path,
					 strlen (list->info[i].path), &pattern);
		if (rv) {
			gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list",
				"%s", rv);
			continue;
		}
#else
		result = regcomp (&pattern, list->info[i].path, REG_ICASE);
		if (result) {
			char buf[1024];
			if (regerror (result, &pattern, buf, sizeof (buf)))
				gp_log (GP_LOG_ERROR, "gphoto2-port-info-list",
					"%s", buf);
			else
				gp_log (GP_LOG_ERROR, "gphoto2-port-info-list",
					_("regcomp failed"));
			return (GP_ERROR_UNKNOWN_PORT);
		}
#endif

		/* Try to match */
#ifdef HAVE_GNU_REGEX
		result = re_match (&pattern, path, strlen (path), 0, NULL);
		regfree (&pattern);
		if (result < 0) {
			gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list",
				_("re_match failed (%i)"), result);
			continue;
		}
#else
		result = regexec (&pattern, path, 1, &match, 0);
		regfree (&pattern);
		if (result) {
			gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list",
				_("regexec failed"));
			continue;
		}
#endif
		memcpy (&newinfo, &list->info[i], sizeof(newinfo));
		strncpy (newinfo.path, path, sizeof (newinfo.path));
		strncpy (newinfo.name, _("Generic Port"), sizeof (newinfo.name));
		CR (result = gp_port_info_list_append (list, newinfo));
		return result;
	}

	return (GP_ERROR_UNKNOWN_PORT);
}
Exemple #28
0
Regexp *
make_regexp(char *s, size_t len, int ignorecase, int dfa)
{
	Regexp *rp;
	const char *rerr;
	char *src = s;
	char *temp;
	char *end = s + len;
	register char *dest;
	register int c, c2;

	/* Handle escaped characters first. */

	/*
	 * Build a copy of the string (in dest) with the
	 * escaped characters translated, and generate the regex
	 * from that.  
	 */
	emalloc(dest, char *, len + 2, "make_regexp");
	temp = dest;

	while (src < end) {
		if (*src == '\\') {
			c = *++src;
			switch (c) {
			case 'a':
			case 'b':
			case 'f':
			case 'n':
			case 'r':
			case 't':
			case 'v':
			case 'x':
			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
				c2 = parse_escape(&src);
				if (c2 < 0)
					cant_happen();
				/*
				 * Unix awk treats octal (and hex?) chars
				 * literally in re's, so escape regexp
				 * metacharacters.
				 */
				if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x')
				    && strchr("()|*+?.^$\\[]", c2) != NULL)
					*dest++ = '\\';
				*dest++ = (char) c2;
				break;
			case '8':
			case '9':	/* a\9b not valid */
				*dest++ = c;
				src++;
				break;
			case 'y':	/* normally \b */
				/* gnu regex op */
				if (! do_traditional) {
					*dest++ = '\\';
					*dest++ = 'b';
					src++;
					break;
				}
				/* else, fall through */
			default:
				*dest++ = '\\';
				*dest++ = (char) c;
				src++;
				break;
			} /* switch */
		} else
			*dest++ = *src++;	/* not '\\' */
	} /* for */

	*dest = '\0' ;	/* Only necessary if we print dest ? */
	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
	memset((char *) rp, 0, sizeof(*rp));
	rp->pat.allocated = 0;	/* regex will allocate the buffer */
	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");

	if (ignorecase)
		rp->pat.translate = casetable;
	else
		rp->pat.translate = NULL;
	len = dest - temp;
	if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
		fatal("%s: /%s/", gettext(rerr), temp);

	/* gack. this must be done *after* re_compile_pattern */
	rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */
	if (dfa && ! ignorecase) {
		dfacomp(temp, len, &(rp->dfareg), TRUE);
		rp->dfa = TRUE;
	} else
		rp->dfa = FALSE;

	free(temp);
	return rp;
}
int
main (void)
{
  struct re_pattern_buffer regbuf;
  const char *err;
  size_t i;
  int ret = 0;

  mtrace ();

  setlocale (LC_ALL, "de_DE.UTF-8");
  for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
    {
      int res, optimized;

      re_set_syntax (tests[i].syntax);
      memset (&regbuf, '\0', sizeof (regbuf));
      err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern),
                                &regbuf);
      if (err != NULL)
	{
	  printf ("re_compile_pattern failed: %s\n", err);
	  ret = 1;
	  continue;
	}

      /* Check if re_search will be done as multi-byte or single-byte.  */
      optimized = ((re_dfa_t *) regbuf.buffer)->mb_cur_max == 1;
      if (optimized != tests[i].optimize)
        {
          printf ("pattern %zd %soptimized while it should%s be\n",
		  i, optimized ? "" : "not ", tests[i].optimize ? "" : " not");
	  ret = 1;
        }

      int str_len = strlen (tests[i].string);
      res = re_search (&regbuf, tests[i].string, str_len, 0, str_len, NULL);
      if (res != tests[i].res)
	{
	  printf ("re_search %zd failed: %d\n", i, res);
	  ret = 1;
	  regfree (&regbuf);
	  continue;
	}

      res = re_search (&regbuf, tests[i].string, str_len, str_len, -str_len,
		       NULL);
      if (res != tests[i].res)
	{
	  printf ("backward re_search %zd failed: %d\n", i, res);
	  ret = 1;
	  regfree (&regbuf);
	  continue;
	}
      regfree (&regbuf);

      re_set_syntax (tests[i].syntax | RE_ICASE);
      memset (&regbuf, '\0', sizeof (regbuf));
      err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern),
                                &regbuf);
      if (err != NULL)
	{
	  printf ("re_compile_pattern failed: %s\n", err);
	  ret = 1;
	  continue;
	}

      /* Check if re_search will be done as multi-byte or single-byte.  */
      optimized = ((re_dfa_t *) regbuf.buffer)->mb_cur_max == 1;
      if (optimized)
        {
          printf ("pattern %zd optimized while it should not be when case insensitive\n",
		  i);
	  ret = 1;
        }

      res = re_search (&regbuf, tests[i].string, str_len, 0, str_len, NULL);
      if (res != tests[i].res)
	{
	  printf ("ICASE re_search %zd failed: %d\n", i, res);
	  ret = 1;
	  regfree (&regbuf);
	  continue;
	}

      res = re_search (&regbuf, tests[i].string, str_len, str_len, -str_len,
		       NULL);
      if (res != tests[i].res)
	{
	  printf ("ICASE backward re_search %zd failed: %d\n", i, res);
	  ret = 1;
	  regfree (&regbuf);
	  continue;
	}
      regfree (&regbuf);
    }

  return ret;
}
Exemple #30
0
static void
compile_regex_1 (struct regex *new_regex, int needed_sub)
{
  const char *error;
  int syntax = ((extended_regexp_flags & REG_EXTENDED)
                 ? RE_SYNTAX_POSIX_EXTENDED
                 : RE_SYNTAX_POSIX_BASIC);

  syntax &= ~RE_DOT_NOT_NULL;
  syntax |= RE_NO_POSIX_BACKTRACKING;

  switch (posixicity)
    {
    case POSIXLY_EXTENDED:
      syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD;
      break;
    case POSIXLY_CORRECT:
      syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD;
      break;
    case POSIXLY_BASIC:
      syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS;
      if (!(extended_regexp_flags & REG_EXTENDED))
        syntax |= RE_LIMITED_OPS;
      break;
    }

  if (new_regex->flags & REG_ICASE)
    syntax |= RE_ICASE;
  else
    new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8));
  syntax |= needed_sub ? 0 : RE_NO_SUB;

  /* If REG_NEWLINE is set, newlines are treated differently.  */
  if (new_regex->flags & REG_NEWLINE)
    {
      /* REG_NEWLINE implies neither . nor [^...] match newline.  */
      syntax &= ~RE_DOT_NEWLINE;
      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
    }

  re_set_syntax (syntax);
  error = re_compile_pattern (new_regex->re, new_regex->sz,
                              &new_regex->pattern);
  new_regex->pattern.newline_anchor =
    buffer_delimiter == '\n' && (new_regex->flags & REG_NEWLINE) != 0;

  new_regex->pattern.translate = NULL;
#ifndef RE_ICASE
  if (new_regex->flags & REG_ICASE)
    {
      static char translate[1 << (sizeof (char) * 8)];
      int i;
      for (i = 0; i < sizeof (translate) / sizeof (char); i++)
        translate[i] = tolower (i);

      new_regex->pattern.translate = translate;
    }
#endif

  if (error)
    bad_prog (error);

  /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */
  if (needed_sub
      && new_regex->pattern.re_nsub < needed_sub - 1
      && posixicity == POSIXLY_EXTENDED)
    {
      char buf[200];
      sprintf (buf, _("invalid reference \\%d on `s' command's RHS"),
              needed_sub - 1);
      bad_prog (buf);
    }

  int dfaopts = buffer_delimiter == '\n' ? 0 : DFA_EOL_NUL;
  new_regex->dfa = dfaalloc ();
  dfasyntax (new_regex->dfa, &localeinfo, syntax, dfaopts);
  dfacomp (new_regex->re, new_regex->sz, new_regex->dfa, 1);

  /* The patterns which consist of only ^ or $ often appear in
     substitution, but regex and dfa are not good at them, as regex does
     not build fastmap, and as all in buffer must be scanned for $.  So
     we mark them to handle manually.  */
  if (new_regex->sz == 1)
    {
      if (new_regex->re[0] == '^')
        new_regex->begline = true;
      if (new_regex->re[0] == '$')
        new_regex->endline = true;
    }
}