Пример #1
0
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
regcomp(regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
int errorcode;
int options = 0;

if ((cflags & REG_ICASE) != 0)    options |= PCRE_CASELESS;
if ((cflags & REG_NEWLINE) != 0)  options |= PCRE_MULTILINE;
if ((cflags & REG_DOTALL) != 0)   options |= PCRE_DOTALL;
if ((cflags & REG_NOSUB) != 0)    options |= PCRE_NO_AUTO_CAPTURE;
if ((cflags & REG_UTF8) != 0)     options |= PCRE_UTF8;
if ((cflags & REG_UCP) != 0)      options |= PCRE_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY;

preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
  &erroffset, NULL);
preg->re_erroffset = erroffset;

/* Safety: if the error code is too big for the translation vector (which
should not happen, but we all make mistakes), return REG_BADPAT. */

if (preg->re_pcre == NULL)
  {
  return (errorcode < sizeof(eint)/sizeof(const int))?
    eint[errorcode] : REG_BADPAT;
  }

preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
return 0;
}
Пример #2
0
static RegInfo *tf_reg_compile_fl(const char *pattern, int optimize,
    const char *file, int line)
{
    RegInfo *ri;
    const char *emsg, *s;
    int eoffset, n;
    /* PCRE_DOTALL optimizes patterns starting with ".*" */
    int options = PCRE_DOLLAR_ENDONLY | PCRE_DOTALL | PCRE_CASELESS;

    ri = dmalloc(NULL, sizeof(RegInfo), file, line);
    if (!ri) return NULL;
    ri->extra = NULL;
    ri->ovector = NULL;
    ri->Str = NULL;
    ri->links = 1;

    if (warn_curly_re && (s = estrchr(pattern, '{', '\\')) &&
	(is_digit(s[1]) || s[1] == ','))
    {
	wprintf("regexp contains '{', which has a new meaning in version 5.0.  "
	    "(This warning can be disabled with '/set warn_curly_re=off'.)");
    }
    for (s = pattern; *s; s++) {
	if (*s == '\\') {
	    if (s[1]) s++;
	} else if (is_upper(*s)) {
	    options &= ~PCRE_CASELESS;
	    break;
	}
    }

    ri->re = pcre_compile((char*)pattern, options, &emsg, &eoffset, re_tables);
    if (!ri->re) {
	/* don't trust emsg to be non-NULL or NUL-terminated */
	eprintf("regexp error: character %d: %.128s", eoffset,
	    emsg ? emsg : "unknown error");
	goto tf_reg_compile_error;
    }
    n = pcre_info(ri->re, NULL, NULL);
    if (n < 0) goto tf_reg_compile_error;
    ri->ovecsize = 3 * (n + 1);
    ri->ovector = dmalloc(NULL, sizeof(int) * ri->ovecsize, file, line);
    if (!ri->ovector) goto tf_reg_compile_error;
    if (optimize) {
	ri->extra = pcre_study(ri->re, 0, &emsg);
	if (emsg) {
	    eprintf("regexp study error: %.128s", emsg);
	    goto tf_reg_compile_error;
	}
    }
    return ri;

tf_reg_compile_error:
    tf_reg_free(ri);
    return NULL;
}
Пример #3
0
AP_DECLARE(int) ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
int options = 0;

if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS;
if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE;

preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
preg->re_erroffset = erroffset;

if (preg->re_pcre == NULL) return AP_REG_INVARG;

preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
return 0;
}
Пример #4
0
EXPORT int
regcomp(regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
int options = 0;

if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;

preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
preg->re_erroffset = erroffset;

if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);

preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
return 0;
}
Пример #5
0
PCREPOSIX_EXP_DEFN int
regcomp(regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
int errorcode;
int options = 0;

if ((cflags & REG_ICASE) != 0)   options |= PCRE_CASELESS;
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
if ((cflags & REG_DOTALL) != 0)  options |= PCRE_DOTALL;
if ((cflags & REG_NOSUB) != 0)   options |= PCRE_NO_AUTO_CAPTURE;
if ((cflags & REG_UTF8) != 0)    options |= PCRE_UTF8;

preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
  &erroffset, NULL);
preg->re_erroffset = erroffset;

if (preg->re_pcre == NULL) return eint[errorcode];

preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
return 0;
}
Пример #6
0
int main(int argc, char **argv)
{
FILE *infile = stdin;
int options = 0;
int study_options = 0;
int op = 1;
int timeit = 0;
int showinfo = 0;
int showstore = 0;
int size_offsets = 45;
int size_offsets_max;
int *offsets;
#if !defined NOPOSIX
int posix = 0;
#endif
int debug = 0;
int done = 0;
unsigned char buffer[30000];
unsigned char dbuffer[1024];

/* Static so that new_malloc can use it. */

outfile = stdout;

/* Scan options */

while (argc > 1 && argv[op][0] == '-')
  {
  char *endptr;

  if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
    showstore = 1;
  else if (strcmp(argv[op], "-t") == 0) timeit = 1;
  else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
  else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
  else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
      ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
    {
    op++;
    argc--;
    }
#if !defined NOPOSIX
  else if (strcmp(argv[op], "-p") == 0) posix = 1;
#endif
  else
    {
    printf("** Unknown or malformed option %s\n", argv[op]);
    printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
    printf("  -d     debug: show compiled code; implies -i\n"
           "  -i     show information about compiled pattern\n"
           "  -o <n> set size of offsets vector to <n>\n");
#if !defined NOPOSIX
    printf("  -p     use POSIX interface\n");
#endif
    printf("  -s     output store information\n"
           "  -t     time compilation and execution\n");
    return 1;
    }
  op++;
  argc--;
  }

/* Get the store for the offsets vector, and remember what it was */

size_offsets_max = size_offsets;
offsets = malloc(size_offsets_max * sizeof(int));
if (offsets == NULL)
  {
  printf("** Failed to get %d bytes of memory for offsets vector\n",
    size_offsets_max * sizeof(int));
  return 1;
  }

/* Sort out the input and output files */

if (argc > 1)
  {
  infile = fopen(argv[op], "r");
  if (infile == NULL)
    {
    printf("** Failed to open %s\n", argv[op]);
    return 1;
    }
  }

if (argc > 2)
  {
  outfile = fopen(argv[op+1], "w");
  if (outfile == NULL)
    {
    printf("** Failed to open %s\n", argv[op+1]);
    return 1;
    }
  }

/* Set alternative malloc function */

pcre_malloc = new_malloc;

/* Heading line, then prompt for first regex if stdin */

fprintf(outfile, "PCRE version %s\n\n", pcre_version());

/* Main loop */

while (!done)
  {
  pcre *re = NULL;
  pcre_extra *extra = NULL;

#if !defined NOPOSIX  /* There are still compilers that require no indent */
  regex_t preg;
  int do_posix = 0;
#endif

  const char *error;
  unsigned char *p, *pp, *ppp;
  const unsigned char *tables = NULL;
  int do_study = 0;
  int do_debug = debug;
  int do_G = 0;
  int do_g = 0;
  int do_showinfo = showinfo;
  int do_showrest = 0;
  int utf8 = 0;
  int erroroffset, len, delimiter;

  if (infile == stdin) printf("  re> ");
  if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);

  p = buffer;
  while (isspace(*p)) p++;
  if (*p == 0) continue;

  /* Get the delimiter and seek the end of the pattern; if is isn't
  complete, read more. */

  delimiter = *p++;

  if (isalnum(delimiter) || delimiter == '\\')
    {
    fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
    goto SKIP_DATA;
    }

  pp = p;

  for(;;)
    {
    while (*pp != 0)
      {
      if (*pp == '\\' && pp[1] != 0) pp++;
        else if (*pp == delimiter) break;
      pp++;
      }
    if (*pp != 0) break;

    len = sizeof(buffer) - (pp - buffer);
    if (len < 256)
      {
      fprintf(outfile, "** Expression too long - missing delimiter?\n");
      goto SKIP_DATA;
      }

    if (infile == stdin) printf("    > ");
    if (fgets((char *)pp, len, infile) == NULL)
      {
      fprintf(outfile, "** Unexpected EOF\n");
      done = 1;
      goto CONTINUE;
      }
    if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
    }

  /* If the first character after the delimiter is backslash, make
  the pattern end with backslash. This is purely to provide a way
  of testing for the error message when a pattern ends with backslash. */

  if (pp[1] == '\\') *pp++ = '\\';

  /* Terminate the pattern at the delimiter */

  *pp++ = 0;

  /* Look for options after final delimiter */

  options = 0;
  study_options = 0;
  log_store = showstore;  /* default from command line */

  while (*pp != 0)
    {
    switch (*pp++)
      {
      case 'g': do_g = 1; break;
      case 'i': options |= PCRE_CASELESS; break;
      case 'm': options |= PCRE_MULTILINE; break;
      case 's': options |= PCRE_DOTALL; break;
      case 'x': options |= PCRE_EXTENDED; break;

      case '+': do_showrest = 1; break;
      case 'A': options |= PCRE_ANCHORED; break;
      case 'D': do_debug = do_showinfo = 1; break;
      case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
      case 'G': do_G = 1; break;
      case 'I': do_showinfo = 1; break;
      case 'M': log_store = 1; break;

#if !defined NOPOSIX
      case 'P': do_posix = 1; break;
#endif

      case 'S': do_study = 1; break;
      case 'U': options |= PCRE_UNGREEDY; break;
      case 'X': options |= PCRE_EXTRA; break;
      case '8': options |= PCRE_UTF8; utf8 = 1; break;

      case 'L':
      ppp = pp;
      while (*ppp != '\n' && *ppp != ' ') ppp++;
      *ppp = 0;
      if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
        {
        fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
        goto SKIP_DATA;
        }
      tables = pcre_maketables();
      pp = ppp;
      break;

      case '\n': case ' ': break;
      default:
      fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
      goto SKIP_DATA;
      }
    }

  /* Handle compiling via the POSIX interface, which doesn't support the
  timing, showing, or debugging options, nor the ability to pass over
  local character tables. */

#if !defined NOPOSIX
  if (posix || do_posix)
    {
    int rc;
    int cflags = 0;
    if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
    if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
    rc = regcomp(&preg, (char *)p, cflags);

    /* Compilation failed; go back for another re, skipping to blank line
    if non-interactive. */

    if (rc != 0)
      {
      (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
      fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
      goto SKIP_DATA;
      }
    }

  /* Handle compiling via the native interface */

  else
#endif  /* !defined NOPOSIX */

    {
    if (timeit)
      {
      register int i;
      clock_t time_taken;
      clock_t start_time = clock();
      for (i = 0; i < LOOPREPEAT; i++)
        {
        re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
        if (re != NULL) free(re);
        }
      time_taken = clock() - start_time;
      fprintf(outfile, "Compile time %.3f milliseconds\n",
        ((double)time_taken * 1000.0) /
        ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
      }

    re = pcre_compile((char *)p, options, &error, &erroroffset, tables);

    /* Compilation failed; go back for another re, skipping to blank line
    if non-interactive. */

    if (re == NULL)
      {
      fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
      SKIP_DATA:
      if (infile != stdin)
        {
        for (;;)
          {
          if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
            {
            done = 1;
            goto CONTINUE;
            }
          len = (int)strlen((char *)buffer);
          while (len > 0 && isspace(buffer[len-1])) len--;
          if (len == 0) break;
          }
        fprintf(outfile, "\n");
        }
      goto CONTINUE;
      }

    /* Compilation succeeded; print data if required. There are now two
    info-returning functions. The old one has a limited interface and
    returns only limited data. Check that it agrees with the newer one. */

    if (do_showinfo)
      {
      unsigned long int get_options;
      int old_first_char, old_options, old_count;
      int count, backrefmax, first_char, need_char;
      size_t size;

      if (do_debug) print_internals(re);

      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
      new_info(re, NULL, PCRE_INFO_SIZE, &size);
      new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
      new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
      new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
      new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);

      old_count = pcre_info(re, &old_options, &old_first_char);
      if (count < 0) fprintf(outfile,
        "Error %d from pcre_info()\n", count);
      else
        {
        if (old_count != count) fprintf(outfile,
          "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
            old_count);

        if (old_first_char != first_char) fprintf(outfile,
          "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
            first_char, old_first_char);

        if (old_options != (int)get_options) fprintf(outfile,
          "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
            get_options, old_options);
        }

      if (size != gotten_store) fprintf(outfile,
        "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
        size, gotten_store);

      fprintf(outfile, "Capturing subpattern count = %d\n", count);
      if (backrefmax > 0)
        fprintf(outfile, "Max back reference = %d\n", backrefmax);
      if (get_options == 0) fprintf(outfile, "No options\n");
        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
          ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
          ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
          ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
          ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
          ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
          ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
          ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
          ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
          ((get_options & PCRE_UTF8) != 0)? " utf8" : "");

      if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
        fprintf(outfile, "Case state changes\n");

      if (first_char == -1)
        {
        fprintf(outfile, "First char at start or follows \\n\n");
        }
      else if (first_char < 0)
        {
        fprintf(outfile, "No first char\n");
        }
      else
        {
        if (isprint(first_char))
          fprintf(outfile, "First char = \'%c\'\n", first_char);
        else
          fprintf(outfile, "First char = %d\n", first_char);
        }

      if (need_char < 0)
        {
        fprintf(outfile, "No need char\n");
        }
      else
        {
        if (isprint(need_char))
          fprintf(outfile, "Need char = \'%c\'\n", need_char);
        else
          fprintf(outfile, "Need char = %d\n", need_char);
        }
      }

    /* If /S was present, study the regexp to generate additional info to
    help with the matching. */

    if (do_study)
      {
      if (timeit)
        {
        register int i;
        clock_t time_taken;
        clock_t start_time = clock();
        for (i = 0; i < LOOPREPEAT; i++)
          extra = pcre_study(re, study_options, &error);
        time_taken = clock() - start_time;
        if (extra != NULL) free(extra);
        fprintf(outfile, "  Study time %.3f milliseconds\n",
          ((double)time_taken * 1000.0)/
          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
        }

      extra = pcre_study(re, study_options, &error);
      if (error != NULL)
        fprintf(outfile, "Failed to study: %s\n", error);
      else if (extra == NULL)
        fprintf(outfile, "Study returned NULL\n");

      else if (do_showinfo)
        {
        uschar *start_bits = NULL;
        new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
        if (start_bits == NULL)
          fprintf(outfile, "No starting character set\n");
        else
          {
          int i;
          int c = 24;
          fprintf(outfile, "Starting character set: ");
          for (i = 0; i < 256; i++)
            {
            if ((start_bits[i/8] & (1<<(i%8))) != 0)
              {
              if (c > 75)
                {
                fprintf(outfile, "\n  ");
                c = 2;
                }
              if (isprint(i) && i != ' ')
                {
                fprintf(outfile, "%c ", i);
                c += 2;
                }
              else
                {
                fprintf(outfile, "\\x%02x ", i);
                c += 5;
                }
              }
            }
          fprintf(outfile, "\n");
          }
        }
      }
    }

  /* Read data lines and test them */

  for (;;)
    {
    unsigned char *q;
    unsigned char *bptr = dbuffer;
    int *use_offsets = offsets;
    int use_size_offsets = size_offsets;
    int count, c;
    int copystrings = 0;
    int getstrings = 0;
    int getlist = 0;
    int gmatched = 0;
    int start_offset = 0;
    int g_notempty = 0;

    options = 0;

    if (infile == stdin) printf("data> ");
    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
      {
      done = 1;
      goto CONTINUE;
      }
    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);

    len = (int)strlen((char *)buffer);
    while (len > 0 && isspace(buffer[len-1])) len--;
    buffer[len] = 0;
    if (len == 0) break;

    p = buffer;
    while (isspace(*p)) p++;

    q = dbuffer;
    while ((c = *p++) != 0)
      {
      int i = 0;
      int n = 0;
      if (c == '\\') switch ((c = *p++))
        {
        case 'a': c =    7; break;
        case 'b': c = '\b'; break;
        case 'e': c =   27; break;
        case 'f': c = '\f'; break;
        case 'n': c = '\n'; break;
        case 'r': c = '\r'; break;
        case 't': c = '\t'; break;
        case 'v': c = '\v'; break;

        case '0': case '1': case '2': case '3':
        case '4': case '5': case '6': case '7':
        c -= '0';
        while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
          c = c * 8 + *p++ - '0';
        break;

        case 'x':

        /* Handle \x{..} specially - new Perl thing for utf8 */

        if (*p == '{')
          {
          unsigned char *pt = p;
          c = 0;
          while (isxdigit(*(++pt)))
            c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
          if (*pt == '}')
            {
            unsigned char buffer[8];
            int ii, utn;
            utn = ord2utf8(c, buffer);
            for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
            c = buffer[ii];   /* Last byte */
            p = pt + 1;
            break;
            }
          /* Not correct form; fall through */
          }

        /* Ordinary \x */

        c = 0;
        while (i++ < 2 && isxdigit(*p))
          {
          c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
          p++;
          }
        break;

        case 0:   /* Allows for an empty line */
        p--;
        continue;

        case 'A':  /* Option setting */
        options |= PCRE_ANCHORED;
        continue;

        case 'B':
        options |= PCRE_NOTBOL;
        continue;

        case 'C':
        while(isdigit(*p)) n = n * 10 + *p++ - '0';
        copystrings |= 1 << n;
        continue;

        case 'G':
        while(isdigit(*p)) n = n * 10 + *p++ - '0';
        getstrings |= 1 << n;
        continue;

        case 'L':
        getlist = 1;
        continue;

        case 'N':
        options |= PCRE_NOTEMPTY;
        continue;

        case 'O':
        while(isdigit(*p)) n = n * 10 + *p++ - '0';
        if (n > size_offsets_max)
          {
          size_offsets_max = n;
          free(offsets);
          use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
          if (offsets == NULL)
            {
            printf("** Failed to get %d bytes of memory for offsets vector\n",
              size_offsets_max * sizeof(int));
            return 1;
            }
          }
        use_size_offsets = n;
        if (n == 0) use_offsets = NULL;
        continue;

        case 'Z':
        options |= PCRE_NOTEOL;
        continue;
        }
      *q++ = c;
      }
    *q = 0;
    len = q - dbuffer;

    /* Handle matching via the POSIX interface, which does not
    support timing. */

#if !defined NOPOSIX
    if (posix || do_posix)
      {
      int rc;
      int eflags = 0;
      regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
      if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
      if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;

      rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);

      if (rc != 0)
        {
        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
        fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
        }
      else
        {
        size_t i;
        for (i = 0; i < use_size_offsets; i++)
          {
          if (pmatch[i].rm_so >= 0)
            {
            fprintf(outfile, "%2d: ", (int)i);
            pchars(dbuffer + pmatch[i].rm_so,
              pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
            fprintf(outfile, "\n");
            if (i == 0 && do_showrest)
              {
              fprintf(outfile, " 0+ ");
              pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
              fprintf(outfile, "\n");
              }
            }
          }
        }
      free(pmatch);
      }

    /* Handle matching via the native interface - repeats for /g and /G */

    else
#endif  /* !defined NOPOSIX */

    for (;; gmatched++)    /* Loop for /g or /G */
      {
      if (timeit)
        {
        register int i;
        clock_t time_taken;
        clock_t start_time = clock();
        for (i = 0; i < LOOPREPEAT; i++)
          count = pcre_exec(re, extra, (char *)bptr, len,
            start_offset, options | g_notempty, use_offsets, use_size_offsets);
        time_taken = clock() - start_time;
        fprintf(outfile, "Execute time %.3f milliseconds\n",
          ((double)time_taken * 1000.0)/
          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
        }

      count = pcre_exec(re, extra, (char *)bptr, len,
        start_offset, options | g_notempty, use_offsets, use_size_offsets);

      if (count == 0)
        {
        fprintf(outfile, "Matched, but too many substrings\n");
        count = use_size_offsets/3;
        }

      /* Matched */

      if (count >= 0)
        {
        int i;
        for (i = 0; i < count * 2; i += 2)
          {
          if (use_offsets[i] < 0)
            fprintf(outfile, "%2d: <unset>\n", i/2);
          else
            {
            fprintf(outfile, "%2d: ", i/2);
            pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
            fprintf(outfile, "\n");
            if (i == 0)
              {
              if (do_showrest)
                {
                fprintf(outfile, " 0+ ");
                pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
                fprintf(outfile, "\n");
                }
              }
            }
          }

        for (i = 0; i < 32; i++)
          {
          if ((copystrings & (1 << i)) != 0)
            {
            char copybuffer[16];
            int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
              i, copybuffer, sizeof(copybuffer));
            if (rc < 0)
              fprintf(outfile, "copy substring %d failed %d\n", i, rc);
            else
              fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
            }
          }

        for (i = 0; i < 32; i++)
          {
          if ((getstrings & (1 << i)) != 0)
            {
            const char *substring;
            int rc = pcre_get_substring((char *)bptr, use_offsets, count,
              i, &substring);
            if (rc < 0)
              fprintf(outfile, "get substring %d failed %d\n", i, rc);
            else
              {
              fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
              /* free((void *)substring); */
              pcre_free_substring(substring);
              }
            }
          }

        if (getlist)
          {
          const char **stringlist;
          int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
            &stringlist);
          if (rc < 0)
            fprintf(outfile, "get substring list failed %d\n", rc);
          else
            {
            for (i = 0; i < count; i++)
              fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
            if (stringlist[i] != NULL)
              fprintf(outfile, "string list not terminated by NULL\n");
            /* free((void *)stringlist); */
            pcre_free_substring_list(stringlist);
            }
          }
        }

      /* Failed to match. If this is a /g or /G loop and we previously set
      g_notempty after a null match, this is not necessarily the end.
      We want to advance the start offset, and continue. Fudge the offset
      values to achieve this. We won't be at the end of the string - that
      was checked before setting g_notempty. */

      else
        {
        if (g_notempty != 0)
          {
          use_offsets[0] = start_offset;
          use_offsets[1] = start_offset + 1;
          }
        else
          {
          if (gmatched == 0)   /* Error if no previous matches */
            {
            if (count == -1) fprintf(outfile, "No match\n");
              else fprintf(outfile, "Error %d\n", count);
            }
          break;  /* Out of the /g loop */
          }
        }

      /* If not /g or /G we are done */

      if (!do_g && !do_G) break;

      /* If we have matched an empty string, first check to see if we are at
      the end of the subject. If so, the /g loop is over. Otherwise, mimic
      what Perl's /g options does. This turns out to be rather cunning. First
      we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
      same point. If this fails (picked up above) we advance to the next
      character. */

      g_notempty = 0;
      if (use_offsets[0] == use_offsets[1])
        {
        if (use_offsets[0] == len) break;
        g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
        }

      /* For /g, update the start offset, leaving the rest alone */

      if (do_g) start_offset = use_offsets[1];

      /* For /G, update the pointer and length */

      else
        {
        bptr += use_offsets[1];
        len -= use_offsets[1];
        }
      }  /* End of loop for /g and /G */
    }    /* End of loop for data lines */

  CONTINUE:

#if !defined NOPOSIX
  if (posix || do_posix) regfree(&preg);
#endif

  if (re != NULL) free(re);
  if (extra != NULL) free(extra);
  if (tables != NULL)
    {
    free((void *)tables);
    setlocale(LC_CTYPE, "C");
    }
  }

fprintf(outfile, "\n");
return 0;
}
Пример #7
0
	inline int info(int *optptr, int *firstcharptr) const {
		_ASSERTE(compiled());
		return pcre_info(_Code::get(), optptr, firstcharptr);
	}
Пример #8
0
str
rxx::init (const char *pat, const char *opt)
{
  extra = NULL;
  nsubpat = 0;
  ovector = NULL;
  bool studyit = false;

  int options = 0;
  for (; *opt; opt++)
    switch (*opt) {
    case '^':
      options |= PCRE_ANCHORED;
      break;
    case 'i':
      options |= PCRE_CASELESS;
      break;
    case 's':
      options |= PCRE_DOTALL;
      break;
    case 'm':
      options |= PCRE_MULTILINE;
      break;
    case 'x':
      options |= PCRE_EXTENDED;
      break;
    case 'U':
      options |= PCRE_UNGREEDY;
      break;
    case 'X':
      options |= PCRE_EXTRA;
      break;
    case 'S':
      studyit = true;
      break;
    default:
      return strbuf ("invalid regular expression option '%c'\n", *opt);
    }

  const char *errptr;
  int erroffset;
  re = pcre_compile (pat, options, &errptr, &erroffset, NULL);
  if (!re) {
    strbuf err;
    err << "Invalid regular expression:\n"
	<< "   " << pat << "\n";
    suio_fill (err.tosuio (), ' ', erroffset);
    err << "   ^\n"
	<< errptr << ".\n";
    return err;
  }
  if (studyit) {
    str err = study ();
    if (err)
      return strbuf () << "Could not study regular expression: " << err;
  }

  int ns = pcre_info (re, NULL, NULL);
  assert (ns >= 0);
  ovecsize = (ns + 1) * 3;
  return NULL;
}
Пример #9
0
//PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
pcre *compileRegex( char *regex , int regex_len , char *msg , int msglen ) 
{
	pcre				*re = NULL;
	pcre_extra			*extra;
	int					 coptions = 0;
	int					 soptions = 0;
	const char			*error;
	int					 erroffset;
	char				 delimiter;
	char				 start_delimiter;
	char				 end_delimiter;
	char				*p, *pp;
	char				*pattern;
	int					 do_study = 0;
	int					 poptions = 0;
	unsigned const char *tables = NULL;
    char buf[ 1024 ] ;

#if HAVE_SETLOCALE
	char				*locale = setlocale(LC_CTYPE, NULL);
#endif

    //R.A.W.
	//pcre_cache_entry	*pce;
	//pcre_cache_entry	 new_entry;
    if( msglen )
    {
        *msg = '\0';
    }

	/* Try to lookup the cached regex entry, and if successful, just pass
	   back the compiled pattern, otherwise go on and compile it. */
	regex_len = strlen(regex);

//R.A.W.
#if 0  

	if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
		/*
		 * We use a quick pcre_info() check to see whether cache is corrupted, and if it
		 * is, we flush it and compile the pattern from scratch.
		 */
		if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) {
			zend_hash_clean(&PCRE_G(pcre_cache));
		} else {
#if HAVE_SETLOCALE
			if (!strcmp(pce->locale, locale)) {
#endif
				return pce;
#if HAVE_SETLOCALE
			}
#endif
		}
	}
#endif
	p = regex;
	
	/* Parse through the leading whitespace, and display a warning if we
	   get to the end without encountering a delimiter. */
	while (isspace((int)*(unsigned char *)p)) p++;
	if (*p == 0) {

        // R.A.W.
		//php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
        strncpy(msg, "Empty regular expression" , msglen );

		return NULL;
	}
	
	/* Get the delimiter and display a warning if it is alphanumeric
	   or a backslash. */
	delimiter = *p++;
	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
		//php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
		strncpy( msg, "Delimiter must not be alphanumeric or backslash",msglen);
		return NULL;
	}

	start_delimiter = delimiter;
	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
		delimiter = pp[5];
	end_delimiter = delimiter;

	if (start_delimiter == end_delimiter) {
		/* We need to iterate through the pattern, searching for the ending delimiter,
		   but skipping the backslashed delimiters.  If the ending delimiter is not
		   found, display a warning. */
		pp = p;
		while (*pp != 0) {
			if (*pp == '\\' && pp[1] != 0) pp++;
			else if (*pp == delimiter)
				break;
			pp++;
		}
		if (*pp == 0) {
			//php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);

            //R.A.W.
            strncpy( msg, "No ending delimiter found" ,msglen ) ;
			return NULL;
		}
	} else {
		/* We iterate through the pattern, searching for the matching ending
		 * delimiter. For each matching starting delimiter, we increment nesting
		 * level, and decrement it for each matching ending delimiter. If we
		 * reach the end of the pattern without matching, display a warning.
		 */
		int brackets = 1; 	/* brackets nesting level */
		pp = p;
		while (*pp != 0) {
			if (*pp == '\\' && pp[1] != 0) pp++;
			else if (*pp == end_delimiter && --brackets <= 0)
				break;
			else if (*pp == start_delimiter)
				brackets++;
			pp++;
		}
		if (*pp == 0) {
			//php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
			strncpy( msg,"No ending matching delimiter found",msglen ) ;
			return NULL;
		}
	}
	
	/* Make a copy of the actual pattern. */
    // R.A.W.
	//pattern = strndup(p, pp-p);
    // osx compile is complaining about strndup and since I have te
    // other function anyway and since I'll someday rewrite this fn, just
    // call that other function now :>)
    pattern = ghstrndup( p,pp-p) ;


	/* Move on to the options */
	pp++;

	/* Parse through the options, setting appropriate flags.  Display
	   a warning if we encounter an unknown modifier. */	
	while (*pp != 0) {
		switch (*pp++) {
			/* Perl compatible options */
			case 'i':	coptions |= PCRE_CASELESS;		break;
			case 'm':	coptions |= PCRE_MULTILINE;		break;
			case 's':	coptions |= PCRE_DOTALL;		break;
			case 'x':	coptions |= PCRE_EXTENDED;		break;
			
			/* PCRE specific options */
			case 'A':	coptions |= PCRE_ANCHORED;		break;
			case 'D':	coptions |= PCRE_DOLLAR_ENDONLY;break;
			case 'S':	do_study  = 1;					break;
			case 'U':	coptions |= PCRE_UNGREEDY;		break;
			case 'X':	coptions |= PCRE_EXTRA;			break;
			case 'u':	coptions |= PCRE_UTF8;			break;

                // R.A.W.
			/* Custom preg options */
                //case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
			
			case ' ':
			case '\n':
				break;

			default:
				//php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
                strncpy( msg,"Unknown modifier",msglen ) ;
				free(pattern);
				return NULL;
		}
	}

    //R.A.W.
    tables = NULL ;
#if 0 
#if HAVE_SETLOCALE
	if (strcmp(locale, "C"))
		tables = pcre_maketables();
#endif
#endif

	/* Compile pattern and display a warning if compilation failed. */
	re = pcre_compile(pattern,
					  coptions,
					  &error,
					  &erroffset,
					  tables);

	if (re == NULL) {
		//php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);

        // R.A.W.
		sprintf(buf, "Compilation of /%s/ failed: %s at offset %d",pattern, error, erroffset);
		//sprintf(buf, "Compilation failed: %s at offset %d",error, erroffset);
        strncpy( msg, buf , msglen ) ;
		free(pattern);
        // R.A.W.
		//if (tables) {
			//pefree((void*)tables, 1);
        //}
		return NULL;
	}

	/* If study option was specified, study the pattern and
	   store the result in extra for passing to pcre_exec. */
	if (do_study) {
		extra = pcre_study(re, soptions, &error);
		if (extra) {
			extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
		}
		if (error != NULL) {
			strncpy( msg, "Error while studying pattern",msglen);
		}
	} else {
		extra = NULL;
	}

	free(pattern);


//R.A.W.
#if 0 
	/*
	 * If we reached cache limit, clean out the items from the head of the list;
	 * these are supposedly the oldest ones (but not necessarily the least used
	 * ones).
	 */
	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
		int num_clean = PCRE_CACHE_SIZE / 8;
		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
	}

	/* Store the compiled pattern and extra info in the cache. */
	new_entry.re = re;
	new_entry.extra = extra;
	new_entry.preg_options = poptions;
	new_entry.compile_options = coptions;
#if HAVE_SETLOCALE
	new_entry.locale = pestrdup(locale, 1);
	new_entry.tables = tables;
#endif
	zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
						sizeof(pcre_cache_entry), (void**)&pce);

#endif

    //	return pce;
    //R.A.W.
    return re ;
}
Пример #10
0
 int getNumberOfCapturingSubpatterns() const {
     return pcre_info(re, NULL, NULL);
 }
Пример #11
0
 int getOvecSize() const {
     return 3 * (pcre_info(re, NULL, NULL) + 1);
 }