コード例 #1
0
/*
 * Returns: REG_OK on success, error code otherwise
 */
int
tre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n,
		 int cflags)
{
  tre_char_t *tmp;
  size_t pos = 0, hasdot = 0, whasdot = 0;
  ssize_t firstdot = -1, wfirstdot = -1;
  bool escaped = false;
  bool *_escmap = NULL;

  INIT_COMP;

  /* Remove beginning-of-line character ('^'). */
  if (pat[0] == TRE_CHAR('^'))
    {
      fg->bol = true;
      n--;
      pat++;
    }

  CHECK_MATCHALL(false);

  /* Handle word-boundary matching when GNU extensions are enabled */
  if ((cflags & REG_GNU) && (n >= 14) &&
      (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) &&
      (memcmp(pat + n - 7, TRE_CHAR("[[:>:]]"),
	      7 * sizeof(tre_char_t)) == 0))
    {
      n -= 14;
      pat += 7;
      fg->word = true;
    }

  /* Cannot handle word boundaries with MB string */
  if (fg->word && (TRE_MB_CUR_MAX > 1))
    return REG_BADPAT;

  tmp = xmalloc((n + 1) * sizeof(tre_char_t));
  if (tmp == NULL)
    return REG_ESPACE;

/* Copies the char into the stored pattern and skips to the next char. */
#define STORE_CHAR							\
  do									\
    {									\
      tmp[pos++] = pat[i];						\
      escaped = false;							\
      continue;								\
    } while (0)

  /* Traverse the input pattern for processing */
  for (unsigned int i = 0; i < n; i++)
    {
      switch (pat[i])
	{
	  case TRE_CHAR('\\'):
	    if (escaped)
	      STORE_CHAR;
	    else if (i == n - 1)
	      goto badpat;
	    else
	      escaped = true;
	    continue;
	  case TRE_CHAR('['):
	    if (escaped)
	      STORE_CHAR;
	    else
	      goto badpat;
	    continue;
	  case TRE_CHAR('*'):
	    if (escaped || (!(cflags & REG_EXTENDED) && (i == 0)))
	      STORE_CHAR;
	    else
	      goto badpat;
	    continue;
	  case TRE_CHAR('+'):
	  case TRE_CHAR('?'):
	    if ((cflags & REG_EXTENDED) && (i == 0))
	      continue;
	    else if ((cflags & REG_EXTENDED) ^ !escaped)
	      STORE_CHAR;
	    else
	      goto badpat;
	    continue;
	  case TRE_CHAR('.'):
	    if (escaped)
	      {
		if (!_escmap)
		  _escmap = xmalloc(n * sizeof(bool));
		if (!_escmap)
		  {
		    xfree(tmp);
		    return REG_ESPACE;
		  }
		_escmap[i] = true;
		STORE_CHAR;
	      }
	    else
	      {
		whasdot = i;
		if (wfirstdot == -1)
			wfirstdot = i;
		STORE_CHAR;
	      }
	    continue;
	  case TRE_CHAR('^'):
	    STORE_CHAR;
	    continue;
	  case TRE_CHAR('$'):
	    if (!escaped && (i == n - 1))
	      fg->eol = true;
	    else
	      STORE_CHAR;
	    continue;
	  case TRE_CHAR('('):
	    if ((cflags & REG_EXTENDED) ^ escaped)
	      goto badpat;
	    else
	      STORE_CHAR;
	    continue;
	  case TRE_CHAR('{'):
	    if (!(cflags & REG_EXTENDED) ^ escaped)
	      STORE_CHAR;
	    else if (!(cflags & REG_EXTENDED) && (i == 0))
	      STORE_CHAR;
	    else if ((cflags & REG_EXTENDED) && (i == 0))
	      continue;
	    else
	      goto badpat;
	    continue;
	  case TRE_CHAR('|'):
	    if ((cflags & REG_EXTENDED) ^ escaped)
	      goto badpat;
	    else
	      STORE_CHAR;
	    continue;
	  default:
	    if (escaped)
	      goto badpat;
	    else
	      STORE_CHAR;
	    continue;
	}
      continue;
badpat:
      xfree(tmp);
      DPRINT(("tre_compile_fast: compilation of pattern failed, falling"
	      "back to NFA\n"));
      return REG_BADPAT;
    }

  fg->hasdot = wfirstdot > -1;

  /*
   * The pattern has been processed and copied to tmp as a literal string
   * with escapes, anchors (^$) and the word boundary match character
   * classes stripped out.
   */
#ifdef TRE_WCHAR
  SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen);
  fg->wescmap = _escmap;
  STORE_MBS_PAT;

  /*
   * The position of dots and escaped dots is different in the MB string
   * than in to the wide string so traverse the converted string, as well,
   * to store these positions.
   */
  if (fg->hasdot || (fg->wescmap != NULL))
    {
      if (fg->wescmap != NULL)
	{
	  fg->escmap = xmalloc(fg->len * sizeof(bool));
	  if (!fg->escmap)
	    {
	      tre_free_fast(fg);
	      return REG_ESPACE;
	    }
	}

      escaped = false;
      for (unsigned int i = 0; i < fg->len; i++)
	if (fg->pattern[i] == '\\')
	  escaped = !escaped;
	else if (fg->pattern[i] == '.' && escaped)
	  {
	    fg->escmap[i] = true;
	    escaped = false;
	  }
	else if (fg->pattern[i] == '.' && !escaped)
	  {
	    hasdot = i;
	    if (firstdot == -1)
	      firstdot = i;
	  }
	else
	  escaped = false;
    }
#else
  SAVE_PATTERN(tmp, pos, fg->pattern, fg->len);
  fg->escmap = _escmap;
#endif

  xfree(tmp);

  DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, "
	 "icase: %c, word: %c, newline %c\n", fg->pattern, fg->len,
	 fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n',
	 fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n',
	 fg->newline ? 'y' : 'n'));

  /* Check whether reverse QS algorithm is more efficient */
  if ((wfirstdot > -1) && (fg->wlen - whasdot + 1 < (size_t)wfirstdot) &&
      fg->nosub)
    {
      fg->reversed = true;
      DPRINT(("tre_compile_fast: using reverse QS algorithm\n"));
    }

  FILL_QSBC;
  FILL_BMGS;
#ifdef TRE_WCHAR
  FILL_QSBC_WIDE;
  FILL_BMGS_WIDE;
#endif

  return REG_OK;
}
コード例 #2
0
void
tre_fastfree(fastmatch_t *preg)
{
  tre_free_fast(preg);
}