コード例 #1
0
ファイル: regexp.c プロジェクト: uarka/sed
struct regex *
compile_regex(struct buffer *b, int flags, int needed_sub)
{
  struct regex *new_regex;
  size_t re_len;

  /* // matches the last RE */
  if (size_buffer(b) == 0)
    {
      if (flags > 0)
        bad_prog(_(BAD_MODIF));
      return NULL;
    }

  re_len = size_buffer(b);
  new_regex = ck_malloc(sizeof (struct regex) + re_len - 1);
  new_regex->flags = flags;
  memcpy (new_regex->re, get_buffer(b), re_len);

#ifdef REG_PERL
  new_regex->sz = re_len;
#else
  /* GNU regex does not process \t & co. */
  new_regex->sz = normalize_text(new_regex->re, re_len, TEXT_REGEX);
#endif

  compile_regex_1 (new_regex, needed_sub);
  return new_regex;
}
コード例 #2
0
ファイル: regexp.c プロジェクト: agordon/sed
int
match_regex (struct regex *regex, char *buf, size_t buflen,
            size_t buf_start_offset, struct re_registers *regarray,
            int regsize)
{
  int ret;
  static struct regex *regex_last;

  /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */

  /* Keep track of the last regexp matched. */
  if (!regex)
    {
      regex = regex_last;
      if (!regex_last)
        bad_prog (_(NO_REGEX));
    }
  else
    regex_last = regex;

  /* gnulib's re_search uses signed-int as length */
  if (buflen >= INT_MAX)
    panic (_("regex input buffer length larger than INT_MAX"));

  if (regex->pattern.no_sub && regsize)
    {
      /* Re-compiling an existing regex, free the previously allocated
         structures.  */
      if (regex->dfa)
        {
          dfafree (regex->dfa);
          free (regex->dfa);
          regex->dfa = NULL;
        }
      regfree (&regex->pattern);

      compile_regex_1 (regex, regsize);
    }

  regex->pattern.regs_allocated = REGS_REALLOCATE;

  /* Optimized handling for '^' and '$' patterns */
  if (regex->begline || regex->endline)
    {
      size_t offset;

      if (regex->endline)
        {
          const char *p = NULL;

          if (regex->flags & REG_NEWLINE)
            p = memchr (buf + buf_start_offset, buffer_delimiter,
                        buflen - buf_start_offset);

          offset = p ? p - buf : buflen;
        }
      else if (buf_start_offset == 0)
        /* begline anchor, starting at beginning of the buffer. */
        offset = 0;
      else if (!(regex->flags & REG_NEWLINE))
        /* begline anchor, starting in the middle of the text buffer,
           and multiline regex is not specified - will never match.
           Example: seq 2 | sed 'N;s/^/X/g' */
        return 0;
      else if (buf[buf_start_offset - 1] == buffer_delimiter)
        /* begline anchor, starting in the middle of the text buffer,
           with multiline match, and the current character
           is the line delimiter - start here.
           Example: seq 2 | sed 'N;s/^/X/mg' */
        offset = buf_start_offset;
      else
        {
          /* begline anchor, starting in the middle of the search buffer,
             all previous optimizions didn't work: search
             for the next line delimiter character in the buffer,
             and start from there if found. */
          const char *p = memchr (buf + buf_start_offset, buffer_delimiter,
                                  buflen - buf_start_offset);

          if (p == NULL)
            return 0;

          offset = p - buf + 1;
        }

      if (regsize)
        {
          size_t i;

          if (!regarray->start)
            {
              regarray->start = XCALLOC (1, regoff_t);
              regarray->end = XCALLOC (1, regoff_t);
              regarray->num_regs = 1;
            }

          regarray->start[0] = offset;
          regarray->end[0] = offset;

          for (i = 1 ; i < regarray->num_regs; ++i)
            regarray->start[i] = regarray->end[i] = -1;
        }

      return 1;
    }

  if (buf_start_offset == 0)
    {
      struct dfa *superset = dfasuperset (regex->dfa);

      if (superset && !dfaexec (superset, buf, buf + buflen, true, NULL, NULL))
        return 0;

      if ((!regsize && (regex->flags & REG_NEWLINE))
          || (!superset && dfaisfast (regex->dfa)))
        {
          bool backref = false;

          if (!dfaexec (regex->dfa, buf, buf + buflen, true, NULL, &backref))
            return 0;

          if (!regsize && (regex->flags & REG_NEWLINE) && !backref)
            return 1;
        }
    }

  /* If the buffer delimiter is not newline character, we cannot use
     newline_anchor flag of regex.  So do it line-by-line, and add offset
     value to results.  */
  if ((regex->flags & REG_NEWLINE) && buffer_delimiter != '\n')
    {
      const char *beg, *end;
      const char *start;

      beg = buf;

      if (buf_start_offset > 0)
        {
          const char *eol = memrchr (buf, buffer_delimiter, buf_start_offset);

          if (eol != NULL)
            beg = eol + 1;
        }

      start = buf + buf_start_offset;

      for (;;)
        {
          end = memchr (beg, buffer_delimiter, buf + buflen - beg);

          if (end == NULL)
            end = buf + buflen;

          ret = re_search (&regex->pattern, beg, end - beg,
                           start - beg, end - start,
                           regsize ? regarray : NULL);

          if (ret > -1)
            {
              size_t i;

              ret += beg - buf;

              if (regsize)
                {
                  for (i = 0; i < regarray->num_regs; ++i)
                    {
                      if (regarray->start[i] > -1)
                        regarray->start[i] += beg - buf;
                      if (regarray->end[i] > -1)
                        regarray->end[i] += beg - buf;
                    }
                }

              break;
            }

          if (end == buf + buflen)
            break;

          beg = start = end + 1;
        }
    }
  else
    ret = re_search (&regex->pattern, buf, buflen, buf_start_offset,
                     buflen - buf_start_offset,
                     regsize ? regarray : NULL);

  return (ret > -1);
}