示例#1
0
文件: pcresearch.c 项目: aixoss/grep
/* Match the already-compiled PCRE pattern against the data in SUBJECT,
   of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
   options OPTIONS, and storing resulting matches into SUB.  Return
   the (nonnegative) match location or a (negative) error number.  */
static int
jit_exec (char const *subject, int search_bytes, int search_offset,
          int options, int *sub)
{
  while (true)
    {
      int e = pcre_exec (cre, extra, subject, search_bytes, search_offset,
                         options, sub, NSUB);

# if PCRE_STUDY_JIT_COMPILE
      if (e == PCRE_ERROR_JIT_STACKLIMIT
          && 0 < jit_stack_size && jit_stack_size <= INT_MAX / 2)
        {
          int old_size = jit_stack_size;
          int new_size = jit_stack_size = old_size * 2;
          static pcre_jit_stack *jit_stack;
          if (jit_stack)
            pcre_jit_stack_free (jit_stack);
          jit_stack = pcre_jit_stack_alloc (old_size, new_size);
          if (!jit_stack)
            error (EXIT_TROUBLE, 0,
                   _("failed to allocate memory for the PCRE JIT stack"));
          pcre_assign_jit_stack (extra, NULL, jit_stack);
          continue;
        }
# endif

      return e;
    }
}
示例#2
0
static int
pcre_RegexObject_compile(pcre_RegexObject *self)
{
	char *error;
	int erroffset;

	self->re = pcre_compile(self->pattern, self->flags, &error, &erroffset, NULL);
	if (self->re == NULL) {
	  	sprintf(message_buffer, "Pattern compilation error at offset %d: %s", erroffset, error);
		PyErr_SetString(PcreError, message_buffer);
		return 0;
	}

	if (!self->optimize && self->use_jit) {
		PyErr_SetString(PcreError, "Invalid combination of arguments. To enable JIT you must enable pattern optimization.");
		return 0;
	}

	if (!self->optimize)
		return 1;

	int options = 0;

	if (self->use_jit) {
		if (!jit_enabled) {
			PyErr_SetString(PcreError, "Current version of libpcre is compiled without JIT support.");
			return 0;
		}

		options |= PCRE_STUDY_JIT_COMPILE;
	}

	self->study = pcre_study(self->re, options, &error); // can return NULL when success
	if (error != NULL) {
		sprintf(message_buffer, "Pattern study error: %s", error);
		PyErr_SetString(PcreError, message_buffer);
		return 0;
	}

	if (!self->use_jit)
		return 1;

	// TODO: zkontrolovat, jak se to bude chovat s "neobvyklymi" parametry stacku

	self->jit_stack = pcre_jit_stack_alloc(self->jit_stack_init, self->jit_stack_max);
	if (self->jit_stack == NULL) {
		PyErr_SetString(PcreError, "JIT stack allocation exited with an error.");
		return 0;
	}
	pcre_assign_jit_stack(self->study, NULL, self->jit_stack);

	return 1;
}
示例#3
0
static void setstack8(pcre_extra *extra)
{
	static pcre_jit_stack *stack;

	if (!extra) {
		if (stack)
			pcre_jit_stack_free(stack);
		stack = NULL;
		return;
	}

	if (!stack)
		stack = pcre_jit_stack_alloc(1, 1024 * 1024);
	/* Extra can be NULL. */
	pcre_assign_jit_stack(extra, callback8, stack);
}
示例#4
0
文件: af.c 项目: lucy/af
void re_compile(re *r, const char *s, int options)
{
	options |= PCRE_NO_AUTO_CAPTURE;
	options |= PCRE_NO_AUTO_POSSESS;
	int erroff;
	const char *err = NULL;
	r->re = pcre_compile(s, options, &err, &erroff, NULL);
	if (err != NULL)
		die("error at %d: %s", erroff, err);
	r->extra = pcre_study(r->re, PCRE_STUDY_JIT_COMPILE, &err);
	if (err != NULL)
		die("error at %d: %s", erroff, err);
	r->stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
	if (r->stack == NULL)
		die_errno("%s", "compile: pcre_jit_stack_alloc failed");
}
示例#5
0
文件: grep.c 项目: niketpathak/git
static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
	const char *error;
	int erroffset;
	int options = PCRE_MULTILINE;

	if (opt->ignore_case) {
		if (has_non_ascii(p->pattern))
			p->pcre1_tables = pcre_maketables();
		options |= PCRE_CASELESS;
	}
	if (is_utf8_locale() && has_non_ascii(p->pattern))
		options |= PCRE_UTF8;

	p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
				      p->pcre1_tables);
	if (!p->pcre1_regexp)
		compile_regexp_failed(p, error);

	p->pcre1_extra_info = pcre_study(p->pcre1_regexp, PCRE_STUDY_JIT_COMPILE, &error);
	if (!p->pcre1_extra_info && error)
		die("%s", error);

#ifdef GIT_PCRE1_USE_JIT
	pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
	if (p->pcre1_jit_on == 1) {
		p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
		if (!p->pcre1_jit_stack)
			die("Couldn't allocate PCRE JIT stack");
		pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
	} else if (p->pcre1_jit_on != 0) {
		die("BUG: The pcre1_jit_on variable should be 0 or 1, not %d",
		    p->pcre1_jit_on);
	}
#endif
}
示例#6
0
/**
 * @brief Execute the rule.
 *
 * @param[in] ib Ironbee engine
 * @param[in] tx The transaction.
 * @param[in,out] User data. A @c pcre_rule_data_t.
 * @param[in] flags Operator instance flags
 * @param[in] field The field content.
 * @param[out] result The result.
 * @returns IB_OK most times. IB_EALLOC when a memory allocation error handles.
 */
static ib_status_t pcre_operator_execute(ib_engine_t *ib,
                                         ib_tx_t *tx,
                                         const ib_rule_t *rule,
                                         void *data,
                                         ib_flags_t flags,
                                         ib_field_t *field,
                                         ib_num_t *result)
{
    IB_FTRACE_INIT();

    assert(ib!=NULL);
    assert(tx!=NULL);
    assert(tx->dpi!=NULL);
    assert(data!=NULL);

    int matches;
    ib_status_t ib_rc;
    const int ovecsize = 3 * MATCH_MAX;
    int *ovector = (int *)malloc(ovecsize*sizeof(*ovector));
    const char* subject = NULL;
    size_t subject_len = 0;
    const ib_bytestr_t* bytestr;
    pcre_rule_data_t *rule_data = (pcre_rule_data_t *)data;
    pcre_extra *edata = NULL;
#ifdef PCRE_JIT_STACK
    pcre_jit_stack *jit_stack = pcre_jit_stack_alloc(PCRE_JIT_MIN_STACK_SZ,
                                                     PCRE_JIT_MAX_STACK_SZ);
#endif

    if (ovector==NULL) {
        IB_FTRACE_RET_STATUS(IB_EALLOC);
    }

    if (field->type == IB_FTYPE_NULSTR) {
        ib_rc = ib_field_value(field, ib_ftype_nulstr_out(&subject));
        if (ib_rc != IB_OK) {
            free(ovector);
            IB_FTRACE_RET_STATUS(ib_rc);
        }

        if (subject != NULL) {
            subject_len = strlen(subject);
        }
    }
    else if (field->type == IB_FTYPE_BYTESTR) {
        ib_rc = ib_field_value(field, ib_ftype_bytestr_out(&bytestr));
        if (ib_rc != IB_OK) {
            free(ovector);
            IB_FTRACE_RET_STATUS(ib_rc);
        }

        if (bytestr != NULL) {
            subject_len = ib_bytestr_length(bytestr);
            subject = (const char *) ib_bytestr_const_ptr(bytestr);
        }
    }
    else {
        free(ovector);
        IB_FTRACE_RET_STATUS(IB_EINVAL);
    }

    if (subject == NULL) {
        subject     = "";
    }

    /* Debug block. Escapes a string and prints it to the log.
     * Memory is freed. */
    if (ib_log_get_level(ib) >= 9) {

        /* Worst case, we can have a string that is 4x larger.
         * Consider if a string of 0xF7 is passed.  That single character
         * will expand to a string of 4 printed characters +1 for the \0
         * character. */
        char *debug_str = ib_util_hex_escape(subject, subject_len);

        if ( debug_str != NULL ) {
            ib_log_debug3_tx(tx, "Matching against: %s", debug_str);
            free( debug_str );
        }
    }

#ifdef PCRE_JIT_STACK
    /* Log if we expected jit, but did not get it. */
    if (rule_data->is_jit && jit_stack == NULL) {
        ib_log_debug(ib,
                     "Failed to allocate a jit stack for a jit-compiled rule. "
                     "Not using jit for this call.");
        edata = NULL;
    }

    /* If the study data is NULL or size zero, don't use it. */
    else if (rule_data->edata == NULL || rule_data->study_data_sz <= 0) {
        edata = NULL;
    }

    /* Only if we get here do we use the study data (edata) in the rule_data. */
    else {
        edata = rule_data->edata;
        pcre_assign_jit_stack(rule_data->edata, NULL, jit_stack);
    }

#endif

    matches = pcre_exec(rule_data->cpatt,
                        edata,
                        subject,
                        subject_len,
                        0, /* Starting offset. */
                        0, /* Options. */
                        ovector,
                        ovecsize);

#ifdef PCRE_JIT_STACK
    if (jit_stack != NULL) {
        pcre_jit_stack_free(jit_stack);
    }
#endif

    if (matches > 0) {
        if (ib_flags_all(rule->flags, IB_RULE_FLAG_CAPTURE) == true) {
            pcre_set_matches(ib, tx, ovector, matches, subject);
        }
        ib_rc = IB_OK;
        *result = 1;
    }
    else if (matches == PCRE_ERROR_NOMATCH) {

        if (ib_log_get_level(ib) >= 7) {
            char* tmp_c = malloc(subject_len+1);
            memcpy(tmp_c, subject, subject_len);
            tmp_c[subject_len] = '\0';
            /* No match. Return false to the caller (*result = 0). */
            ib_log_debug2_tx(tx, "No match for [%s] using pattern [%s].",
                        tmp_c,
                        rule_data->patt);
            free(tmp_c);
        }


        ib_rc = IB_OK;
        *result = 0;
    }
    else {
        /* Some other error occurred. Set the status to false and
        report the error. */
        ib_rc = IB_EUNKNOWN;
        *result = 0;
    }

    free(ovector);
    IB_FTRACE_RET_STATUS(ib_rc);
}
示例#7
0
文件: regexp.c 项目: skibbipl/rspamd
rspamd_regexp_t*
rspamd_regexp_new (const gchar *pattern, const gchar *flags,
		GError **err)
{
	const gchar *start = pattern, *end, *flags_str = NULL, *err_str;
	rspamd_regexp_t *res;
	pcre *r;
	gchar sep = 0, *real_pattern;
	gint regexp_flags = 0, rspamd_flags = 0, err_off, study_flags = 0, ncaptures;
	gboolean strict_flags = FALSE;

	rspamd_regexp_library_init ();

	if (flags == NULL) {
		/* We need to parse pattern and detect flags set */
		if (*start == '/') {
			sep = '/';
		}
		else if (*start == 'm') {
			start ++;
			sep = *start;

			/* Paired braces */
			if (sep == '{') {
				sep = '}';
			}

			rspamd_flags |= RSPAMD_REGEXP_FLAG_FULL_MATCH;
		}
		if (sep == '\0' || g_ascii_isalnum (sep)) {
			/* We have no flags, no separators and just use all line as expr */
			start = pattern;
			end = start + strlen (pattern);
			rspamd_flags &= ~RSPAMD_REGEXP_FLAG_FULL_MATCH;
		}
		else {
			end = strrchr (pattern, sep);

			if (end == NULL || end <= start) {
				g_set_error (err, rspamd_regexp_quark(), EINVAL,
						"pattern is not enclosed with %c: %s",
						sep, pattern);
				return NULL;
			}
			flags_str = end + 1;
			start ++;
		}
	}
	else {
		/* Strictly check all flags */
		strict_flags = TRUE;
		start = pattern;
		end = pattern + strlen (pattern);
		flags_str = flags;
	}

	rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
	regexp_flags &= ~PCRE_UTF8;

	if (flags_str != NULL) {
		while (*flags_str) {
			switch (*flags_str) {
			case 'i':
				regexp_flags |= PCRE_CASELESS;
				break;
			case 'm':
				regexp_flags |= PCRE_MULTILINE;
				break;
			case 's':
				regexp_flags |= PCRE_DOTALL;
				break;
			case 'x':
				regexp_flags |= PCRE_EXTENDED;
				break;
			case 'u':
				rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW;
				regexp_flags |= PCRE_UTF8;
				break;
			case 'O':
				/* We optimize all regexps by default */
				rspamd_flags |= RSPAMD_REGEXP_FLAG_NOOPT;
				break;
			case 'r':
				rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
				regexp_flags &= ~PCRE_UTF8;
				break;
			default:
				if (strict_flags) {
					g_set_error (err, rspamd_regexp_quark(), EINVAL,
							"invalid regexp flag: %c in pattern %s",
							*flags_str, pattern);
					return NULL;
				}
				msg_warn ("invalid flag '%c' in pattern %s", *flags_str, pattern);
				goto fin;
				break;
			}
			flags_str++;
		}
	}
fin:

	real_pattern = g_malloc (end - start + 1);
	rspamd_strlcpy (real_pattern, start, end - start + 1);

	r = pcre_compile (real_pattern, regexp_flags, &err_str, &err_off, NULL);

	if (r == NULL) {
		g_set_error (err, rspamd_regexp_quark(), EINVAL,
			"invalid regexp pattern: '%s': %s at position %d",
			pattern, err_str, err_off);
		g_free (real_pattern);

		return NULL;
	}

	/* Now allocate the target structure */
	res = g_slice_alloc0 (sizeof (*res));
	REF_INIT_RETAIN (res, rspamd_regexp_dtor);
	res->flags = rspamd_flags;
	res->pattern = real_pattern;

	if (rspamd_flags & RSPAMD_REGEXP_FLAG_RAW) {
		res->raw_re = r;
	}
	else {
		res->re = r;
		res->raw_re = pcre_compile (pattern, regexp_flags & ~PCRE_UTF8,
				&err_str, &err_off, NULL);

		if (res->raw_re == NULL) {
			msg_warn ("invalid raw regexp pattern: '%s': %s at position %d",
					pattern, err_str, err_off);
		}
	}

#ifdef HAVE_PCRE_JIT
	study_flags |= PCRE_STUDY_JIT_COMPILE;
#endif

	if (!(rspamd_flags & RSPAMD_REGEXP_FLAG_NOOPT)) {
		/* Optimize regexp */
		if (res->re) {
			res->extra = pcre_study (res->re, study_flags, &err_str);
			if (res->extra != NULL) {
#ifdef HAVE_PCRE_JIT
				gint jit, n;

				if (can_jit) {
					jit = 0;
					n = pcre_fullinfo (res->re, res->extra,
							PCRE_INFO_JIT, &jit);

					if (n != 0 || jit != 1) {
						msg_debug ("jit compilation of %s is not supported", pattern);
						res->jstack = NULL;
					}
					else {
						res->jstack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);
						pcre_assign_jit_stack (res->extra, NULL, res->jstack);
					}
				}
#endif
			}
			else {
				msg_warn ("cannot optimize regexp pattern: '%s': %s",
						pattern, err_str);
			}
		}

		if (res->raw_re) {
			if (res->raw_re != res->re) {
				res->raw_extra = pcre_study (res->raw_re, study_flags, &err_str);
				if (res->raw_extra != NULL) {
#ifdef HAVE_PCRE_JIT
					gint jit, n;

					if (can_jit) {
						jit = 0;
						n = pcre_fullinfo (res->raw_re, res->raw_extra,
								PCRE_INFO_JIT, &jit);

						if (n != 0 || jit != 1) {
							msg_debug ("jit compilation of %s is not supported",
									pattern);
							res->raw_jstack = NULL;
						}
						else {
							res->raw_jstack = pcre_jit_stack_alloc (32 * 1024,
									512 * 1024);
							pcre_assign_jit_stack (res->raw_extra, NULL,
									res->raw_jstack);
						}
					}
#endif
				}
				else {
					msg_warn ("cannot optimize raw regexp pattern: '%s': %s",
							pattern, err_str);
				}
			}
			else {
#ifdef HAVE_PCRE_JIT
				/* Just alias pointers */
				res->raw_extra = res->extra;
				res->raw_jstack = res->jstack;
#endif
			}
		}
	}

	rspamd_regexp_generate_id (pattern, flags, res->id);

	/* Check number of captures */
	if (pcre_fullinfo (res->raw_re, res->extra, PCRE_INFO_CAPTURECOUNT,
			&ncaptures) == 0) {
		res->ncaptures = ncaptures;
	}

	return res;
}
示例#8
0
文件: pcre.c 项目: niubl/ironbee
/**
 * @brief Execute the PCRE operator
 *
 * @param[in] tx Current transaction.
 * @param[in] instance_data Instance data needed for execution.
 * @param[in] field The field to operate on.
 * @param[in] capture If non-NULL, the collection to capture to.
 * @param[out] result The result of the operator 1=true 0=false.
 * @param[in] cbdata Callback data.
 *
 * @returns IB_OK most times. IB_EALLOC when a memory allocation error handles.
 */
static
ib_status_t pcre_operator_execute(
    ib_tx_t *tx,
    void *instance_data,
    const ib_field_t *field,
    ib_field_t *capture,
    ib_num_t *result,
    void *cbdata
)
{
    assert(instance_data != NULL);
    assert(tx            != NULL);

    int matches;
    ib_status_t ib_rc;
    const int ovecsize = 3 * MATCH_MAX;
    int *ovector = (int *)malloc(ovecsize*sizeof(*ovector));
    const char *subject = NULL;
    size_t subject_len = 0;
    const ib_bytestr_t *bytestr;
    modpcre_operator_data_t *operator_data =
        (modpcre_operator_data_t *)instance_data;
    pcre_extra *edata = NULL;
#ifdef PCRE_JIT_STACK
    pcre_jit_stack *jit_stack = NULL;
#endif

    assert(operator_data->cpdata->is_dfa == false);

    if (ovector==NULL) {
        return IB_EALLOC;
    }

    if (field->type == IB_FTYPE_NULSTR) {
        ib_rc = ib_field_value(field, ib_ftype_nulstr_out(&subject));
        if (ib_rc != IB_OK) {
            free(ovector);
            return ib_rc;
        }

        if (subject != NULL) {
            subject_len = strlen(subject);
        }
    }
    else if (field->type == IB_FTYPE_BYTESTR) {
        ib_rc = ib_field_value(field, ib_ftype_bytestr_out(&bytestr));
        if (ib_rc != IB_OK) {
            free(ovector);
            return ib_rc;
        }

        if (bytestr != NULL) {
            subject_len = ib_bytestr_length(bytestr);
            subject = (const char *) ib_bytestr_const_ptr(bytestr);
        }
    }
    else {
        free(ovector);
        return IB_EINVAL;
    }

    if (subject == NULL) {
        subject     = "";
    }

    if (operator_data->cpdata->is_jit) {
#ifdef PCRE_JIT_STACK
        jit_stack = pcre_jit_stack_alloc(operator_data->cpdata->jit_stack_start,
                                         operator_data->cpdata->jit_stack_max);
        if (jit_stack == NULL) {
            ib_log_warn(ib,
                "Failed to allocate a jit stack for a jit-compiled rule.  "
                "Not using jit for this call."
            );
        }
        /* If the study data is NULL or size zero, don't use it. */
        else if (operator_data->cpdata->study_data_sz > 0) {
            edata = operator_data->cpdata->edata;
        }
        if (edata != NULL) {
            pcre_assign_jit_stack(edata, NULL, jit_stack);
        }
#else
        edata = NULL;
#endif
    }
    else if (operator_data->cpdata->study_data_sz > 0) {
        edata = operator_data->cpdata->edata;
    }
    else {
        edata = NULL;
    }

    matches = pcre_exec(operator_data->cpdata->cpatt,
                        edata,
                        subject,
                        subject_len,
                        0, /* Starting offset. */
                        0, /* Options. */
                        ovector,
                        ovecsize);

#ifdef PCRE_JIT_STACK
    if (jit_stack != NULL) {
        pcre_jit_stack_free(jit_stack);
    }
#endif

    if (matches > 0) {
        if (capture != NULL) {
            pcre_set_matches(tx, capture, ovector, matches, subject);
        }
        ib_rc = IB_OK;
        *result = 1;
    }
    else if (matches == PCRE_ERROR_NOMATCH) {
        ib_rc = IB_OK;
        *result = 0;
    }
    else {
        /* Some other error occurred. Set the status to false return the
         * error. */
        ib_rc = IB_EUNKNOWN;
        *result = 0;
    }

    free(ovector);
    return ib_rc;
}
示例#9
0
void
Pcompile (char const *pattern, size_t size)
{
#if !HAVE_LIBPCRE
  error (EXIT_TROUBLE, 0, "%s",
         _("support for the -P option is not compiled into "
           "this --disable-perl-regexp binary"));
#else
  int e;
  char const *ep;
  char *re = xnmalloc (4, size + 7);
  int flags = (PCRE_MULTILINE
               | (match_icase ? PCRE_CASELESS : 0)
               | (using_utf8 () ? PCRE_UTF8 : 0));
  char const *patlim = pattern + size;
  char *n = re;
  char const *p;
  char const *pnul;

  /* FIXME: Remove these restrictions.  */
  if (memchr (pattern, '\n', size))
    error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));

  *n = '\0';
  if (match_lines)
    strcpy (n, "^(?:");
  if (match_words)
    strcpy (n, "(?<!\\w)(?:");
  n += strlen (n);

  /* The PCRE interface doesn't allow NUL bytes in the pattern, so
     replace each NUL byte in the pattern with the four characters
     "\000", removing a preceding backslash if there are an odd
     number of backslashes before the NUL.

     FIXME: This method does not work with some multibyte character
     encodings, notably Shift-JIS, where a multibyte character can end
     in a backslash byte.  */
  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
    {
      memcpy (n, p, pnul - p);
      n += pnul - p;
      for (p = pnul; pattern < p && p[-1] == '\\'; p--)
        continue;
      n -= (pnul - p) & 1;
      strcpy (n, "\\000");
      n += 4;
    }

  memcpy (n, p, patlim - p);
  n += patlim - p;
  *n = '\0';
  if (match_words)
    strcpy (n, ")(?!\\w)");
  if (match_lines)
    strcpy (n, ")$");

  cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
  if (!cre)
    error (EXIT_TROUBLE, 0, "%s", ep);

  extra = pcre_study (cre, PCRE_STUDY_JIT_COMPILE, &ep);
  if (ep)
    error (EXIT_TROUBLE, 0, "%s", ep);

# if PCRE_STUDY_JIT_COMPILE
  if (pcre_fullinfo (cre, extra, PCRE_INFO_JIT, &e))
    error (EXIT_TROUBLE, 0, _("internal error (should never happen)"));

  if (e)
    {
      /* A 32K stack is allocated for the machine code by default, which
         can grow to 512K if necessary. Since JIT uses far less memory
         than the interpreter, this should be enough in practice.  */
      jit_stack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);
      if (!jit_stack)
        error (EXIT_TROUBLE, 0,
               _("failed to allocate memory for the PCRE JIT stack"));
      pcre_assign_jit_stack (extra, NULL, jit_stack);
    }
# endif
  free (re);
#endif /* HAVE_LIBPCRE */
}
示例#10
0
文件: state.c 项目: dongh11/ymd
void vm_pcre_lazy(struct ymd_mach *vm) {
	if (vm->pcre_js)
		return;
	vm->pcre_js = pcre_jit_stack_alloc(YMD_JS_START, YMD_JS_MAX);
	assert (vm->pcre_js);
}
示例#11
0
文件: pcre.c 项目: nickleroy/ironbee
/**
 * @brief Execute the rule.
 *
 * @param[in] ib Ironbee engine
 * @param[in] tx The transaction.
 * @param[in,out] User data. A @c pcre_rule_data_t.
 * @param[in] flags Operator instance flags
 * @param[in] field The field content.
 * @param[in] result The result.
 * @returns IB_OK most times. IB_EALLOC when a memory allocation error handles.
 */
static ib_status_t pcre_operator_execute(ib_engine_t *ib,
                                         ib_tx_t *tx,
                                         void *data,
                                         ib_flags_t flags,
                                         ib_field_t *field,
                                         ib_num_t *result)
{
    IB_FTRACE_INIT();

    assert(ib!=NULL);
    assert(tx!=NULL);
    assert(tx->dpi!=NULL);
    assert(data!=NULL);

    int matches;
    ib_status_t ib_rc;
    const int ovecsize = 3 * MATCH_MAX;
    int *ovector = (int *)malloc(ovecsize*sizeof(*ovector));
    const char* subject;
    size_t subject_len;
    const ib_bytestr_t* bytestr;
    pcre_rule_data_t *rule_data = (pcre_rule_data_t *)data;
    pcre *regex;
    pcre_extra *regex_extra = NULL;
#ifdef PCRE_JIT_STACK
    pcre_jit_stack *jit_stack = pcre_jit_stack_alloc(PCRE_JIT_MIN_STACK_SZ,
                                                     PCRE_JIT_MAX_STACK_SZ);
#endif

    if (ovector==NULL) {
        IB_FTRACE_RET_STATUS(IB_EALLOC);
    }

    if (field->type == IB_FTYPE_NULSTR) {
        ib_rc = ib_field_value(field, ib_ftype_nulstr_out(&subject));
        if (ib_rc != IB_OK) {
            IB_FTRACE_RET_STATUS(ib_rc);
        }

        subject_len = strlen(subject);
    }
    else if (field->type == IB_FTYPE_BYTESTR) {
        ib_rc = ib_field_value(field, ib_ftype_bytestr_out(&bytestr));
        if (ib_rc != IB_OK) {
            IB_FTRACE_RET_STATUS(ib_rc);
        }

        subject_len = ib_bytestr_length(bytestr);
        subject = (const char *) ib_bytestr_const_ptr(bytestr);
    }
    else {
        free(ovector);
        IB_FTRACE_RET_STATUS(IB_EALLOC);
    }

    /* Debug block. Escapes a string and prints it to the log.
     * Memory is freed. */
    if (ib_log_get_level(ib) >= 9) {

        /* Worst case, we can have a string that is 4x larger.
         * Consider if a string of 0xF7 is passed.  That single character
         * will expand to a string of 4 printed characters +1 for the \0
         * character. */
        char *debug_str = ib_util_hex_escape(subject, subject_len);

        if ( debug_str != NULL ) {
            ib_log_debug3_tx(tx, "Matching against: %s", debug_str);
            free( debug_str );
        }
    }

    /* Alloc space to copy regex. */
    regex = (pcre *)malloc(rule_data->cpatt_sz);

    if (regex == NULL ) {
        free(ovector);
        IB_FTRACE_RET_STATUS(IB_EALLOC);
    }

    memcpy(regex, rule_data->cpatt, rule_data->cpatt_sz);

    if (rule_data->study_data_sz == 0 ) {
        regex_extra = NULL;
    }
    else {
        regex_extra = (pcre_extra *) malloc(sizeof(*regex_extra));

        if (regex_extra == NULL ) {
            free(ovector);
            free(regex);
            IB_FTRACE_RET_STATUS(IB_EALLOC);
        }
        *regex_extra = *rule_data->edata;

        if ( rule_data->study_data_sz == 0 ) {
            regex_extra->study_data = NULL;
        }
        else {
            regex_extra->study_data = malloc(rule_data->study_data_sz);

            if (regex_extra->study_data == NULL ) {
                free(ovector);
                if (regex_extra != NULL) {
                    free(regex_extra);
                }
                free(regex);
                IB_FTRACE_RET_STATUS(IB_EALLOC);
            }

            memcpy(regex_extra->study_data,
                   rule_data->edata->study_data,
                   rule_data->study_data_sz);
        }

        /* Put some modest limits on our regex. */
        regex_extra->match_limit = 1000;
        regex_extra->match_limit_recursion = 1000;
        regex_extra->flags = regex_extra->flags |
                            PCRE_EXTRA_MATCH_LIMIT |
                            PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    }

#ifdef PCRE_JIT_STACK
    if (jit_stack == NULL) {
        if ( regex_extra != NULL ) {
            if ( regex_extra->study_data != NULL ) {
                free(regex_extra->study_data);
            }

            free(regex_extra);
        }
        free(ovector);
        free(regex);
        IB_FTRACE_RET_STATUS(IB_EALLOC);
    }

    pcre_assign_jit_stack(regex_extra, NULL, jit_stack);
#endif

    matches = pcre_exec(regex,
                        regex_extra,
                        subject,
                        subject_len,
                        0, /* Starting offset. */
                        0, /* Options. */
                        ovector,
                        ovecsize);

#ifdef PCRE_JIT_STACK
    pcre_jit_stack_free(jit_stack);
#endif

    if (matches > 0) {
        pcre_set_matches(ib, tx, "TX", ovector, matches, subject);
        ib_rc = IB_OK;
        *result = 1;
    }
    else if (matches == PCRE_ERROR_NOMATCH) {

        if (ib_log_get_level(ib) >= 7) {
            char* tmp_c = malloc(subject_len+1);
            memcpy(tmp_c, subject, subject_len);
            tmp_c[subject_len] = '\0';
            /* No match. Return false to the caller (*result = 0). */
            ib_log_debug2_tx(tx, "No match for [%s] using pattern [%s].",
                        tmp_c,
                        rule_data->patt);
            free(tmp_c);
        }


        ib_rc = IB_OK;
        *result = 0;
    }
    else {
        /* Some other error occurred. Set the status to false and
        report the error. */
        ib_rc = IB_EUNKNOWN;
        *result = 0;
    }

    if ( regex_extra != NULL ) {
        if ( regex_extra->study_data != NULL ) {
            free(regex_extra->study_data);
        }

        free(regex_extra);
    }
    free(ovector);
    free(regex);
    IB_FTRACE_RET_STATUS(ib_rc);
}
示例#12
0
void pcre_find_all(char* pattern, char* subject, int subject_len, int repeat, int mode)
{
	pcre *re;
	const char *error;
	int err_val, match[64];
	pcre_extra *extra;
	pcre_jit_stack *stack = NULL;
	char *ptr;
	int len;
	clock_t best_time = 0, time = 0;
	int found;

	static int work_space[4096];
	re = pcre_compile(
		pattern,	/* the pattern */
		PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF,	/* options */
		&error,		/* for error message */
		&err_val,	/* for error offset */
		NULL);		/* use default character tables */

	if (!re) {
		printf("PCRE compilation failed at offset %d: %s\n", err_val, error);
		return;
	}

	error = NULL;
	extra = pcre_study(re, mode == 2 ? PCRE_STUDY_JIT_COMPILE : 0, &error);
	if (error) {
		printf("PCRE study failed: %s\n", error);
		return;
	}
	if (mode == 2) {
		found = 0;
		pcre_fullinfo(re, extra, PCRE_INFO_JIT, &found);
		if (!found) {
			printf("PCRE JIT compilation failed: %s\n", error);
			return;
		}
		stack = pcre_jit_stack_alloc(65536, 65536);
	}

	do {
		found = 0;
		ptr = subject;
		len = subject_len;
		switch (mode) {
		case 0:
			time = clock();
			while (1) {
				err_val = pcre_exec(
					re,		/* the compiled pattern */
					extra,		/* extra data */
					ptr,		/* the subject string */
					len,		/* the length of the subject */
					0,		/* start at offset 0 in the subject */
					0,		/* default options */
					match,		/* output vector for substring information */
					64);		/* number of elements in the output vector */

				if (err_val <= 0) {
					if (err_val == PCRE_ERROR_NOMATCH)
						break;
					printf("PCRE pcre_exec failed with: %d\n", err_val);
					break;
				}

				// printf("match: %d %d\n", (ptr - subject) + match[0], (ptr - subject) + match[1]);
				ptr += match[1];
				len -= match[1];
				found++;
			}
			time = clock() - time;
			break;

		case 1:
			time = clock();
			while (1) {
				err_val = pcre_dfa_exec(
					re,		/* the compiled pattern */
					extra,		/* extra data */
					ptr,	/* the subject string */
					len,	/* the length of the subject */
					0,		/* start at offset 0 in the subject */
					0,		/* default options */
					match,		/* output vector for substring information */
					2,		/* number of elements in the output vector */
					work_space,	/* number of elements (NOT size in bytes) */
					4096);

				if (err_val < 0) {
					if (err_val == PCRE_ERROR_NOMATCH)
						break;
					printf("PCRE pcre_exec failed\n");
					break;
				}

				// printf("match: %d %d\n", (ptr - subject) + match[0], (ptr - subject) + match[1]);
				ptr += match[1];
				len -= match[1];
				found++;
			}
			time = clock() - time;
			break;

		case 2:
			time = clock();
			while (1) {
				err_val = pcre_jit_exec(
					re,		/* the compiled pattern */
					extra,		/* extra data */
					ptr,		/* the subject string */
					len,		/* the length of the subject */
					0,		/* start at offset 0 in the subject */
					0,		/* default options */
					match,		/* output vector for substring information */
					64,		/* number of elements in the output vector */
					stack);		/* jit stack */

				if (err_val <= 0) {
					if (err_val == PCRE_ERROR_NOMATCH)
						break;
					printf("PCRE pcre_exec failed with: %d\n", err_val);
					break;
				}

				// printf("match: %d %d\n", (ptr - subject) + match[0], (ptr - subject) + match[1]);
				ptr += match[1];
				len -= match[1];
				found++;
			}
			time = clock() - time;
			break;
		}
		if (!best_time || time < best_time)
			best_time = time;
	} while (--repeat > 0);
	printResult(mode == 0 ? "pcre" : (mode == 1 ? "pcre-dfa" : "pcre-jit"), best_time * 1000 / CLOCKS_PER_SEC, found);

	if (extra)
		pcre_free_study(extra);
	if (stack)
		pcre_jit_stack_free(stack);
	pcre_free(re);
}
示例#13
0
int regex_compile(tvh_regex_t *regex, const char *re_str, int flags, int subsys)
{
#if ENABLE_PCRE || ENABLE_PCRE2
  regex->is_posix = 0;
  if (flags & TVHREGEX_POSIX) {
    regex->is_posix = 1;
#endif
    int options = REG_EXTENDED;
    if (flags & TVHREGEX_CASELESS)
      options |= REG_ICASE;
    if (!regcomp(&regex->re_posix_code, re_str, options))
      return 0;
    tvherror(subsys, "Unable to compile regex '%s'", re_str);
    return -1;
#if ENABLE_PCRE || ENABLE_PCRE2
  } else {
#if ENABLE_PCRE
    const char *estr;
    int eoff;
    int options = PCRE_UTF8;
    if (flags & TVHREGEX_CASELESS)
      options |= PCRE_CASELESS;
#if PCRE_STUDY_JIT_COMPILE
    regex->re_jit_stack = NULL;
#endif
    regex->re_extra = NULL;
    regex->re_code = pcre_compile(re_str, options, &estr, &eoff, NULL);
    if (regex->re_code == NULL) {
      tvherror(subsys, "Unable to compile PCRE '%s': %s", re_str, estr);
    } else {
      regex->re_extra = pcre_study(regex->re_code,
                                   PCRE_STUDY_JIT_COMPILE, &estr);
      if (regex->re_extra == NULL && estr)
        tvherror(subsys, "Unable to study PCRE '%s': %s", re_str, estr);
      else {
#if PCRE_STUDY_JIT_COMPILE
        regex->re_jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
        if (regex->re_jit_stack)
          pcre_assign_jit_stack(regex->re_extra, NULL, regex->re_jit_stack);
#endif
        return 0;
      }
    }
    return -1;
#elif ENABLE_PCRE2
    PCRE2_UCHAR8 ebuf[128];
    int ecode;
    PCRE2_SIZE eoff;
    size_t jsz;
    uint32_t options;
    assert(regex->re_jit_stack == NULL);
    regex->re_jit_stack = NULL;
    regex->re_match = NULL;
    regex->re_mcontext = pcre2_match_context_create(NULL);
    options = PCRE2_UTF;
    if (flags & TVHREGEX_CASELESS)
      options |= PCRE2_CASELESS;
    regex->re_code = pcre2_compile((PCRE2_SPTR8)re_str, -1, options,
                                   &ecode, &eoff, NULL);
    if (regex->re_code == NULL) {
      (void)pcre2_get_error_message(ecode, ebuf, 120);
      tvherror(subsys, "Unable to compile PCRE2 '%s': %s", re_str, ebuf);
    } else {
      regex->re_match = pcre2_match_data_create(TVHREGEX_MAX_MATCHES, NULL);
      if (re_str[0] && pcre2_jit_compile(regex->re_code, PCRE2_JIT_COMPLETE) >= 0) {
        jsz = 0;
        if (pcre2_pattern_info(regex->re_code, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0) {
          regex->re_jit_stack = pcre2_jit_stack_create(32 * 1024, 512 * 1024, NULL);
          if (regex->re_jit_stack)
            pcre2_jit_stack_assign(regex->re_mcontext, NULL, regex->re_jit_stack);
        }
      }
      return 0;
    }
    return -1;
#endif
  }
#endif
}