Ejemplo n.º 1
0
pcre2_code *compile(char *pattern) {

  PCRE2_SPTR pcre2_pattern = (PCRE2_SPTR)pattern;

  pcre2_code *re;
  int errornumber;
  PCRE2_SIZE erroroffset;


  re = pcre2_compile(
    pcre2_pattern,               /* the pattern */
    PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
    0,                     /* default options */
    &errornumber,          /* for error number */
    &erroroffset,          /* for error offset */
    NULL);                 /* use default compile context */

  pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);

  if (re == NULL) {
    PCRE2_UCHAR buffer[256];
    pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
    printf("PCRE2 compilation failed at offset %d: %s\n for pattern: %s", (int)erroroffset,
      buffer, pattern);
    exit(1);
  }

  return re;
}
Ejemplo n.º 2
0
static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
{
	int error;
	PCRE2_UCHAR errbuf[256];
	PCRE2_SIZE erroffset;
	int options = PCRE2_MULTILINE;
	const uint8_t *character_tables = NULL;
	int jitret;

	assert(opt->pcre2);

	p->pcre2_compile_context = NULL;

	if (opt->ignore_case) {
		if (has_non_ascii(p->pattern)) {
			character_tables = pcre2_maketables(NULL);
			p->pcre2_compile_context = pcre2_compile_context_create(NULL);
			pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
		}
		options |= PCRE2_CASELESS;
	}
	if (is_utf8_locale() && has_non_ascii(p->pattern))
		options |= PCRE2_UTF;

	p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
					 p->patternlen, options, &error, &erroffset,
					 p->pcre2_compile_context);

	if (p->pcre2_pattern) {
		p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
		if (!p->pcre2_match_data)
			die("Couldn't allocate PCRE2 match data");
	} else {
		pcre2_get_error_message(error, errbuf, sizeof(errbuf));
		compile_regexp_failed(p, (const char *)&errbuf);
	}

	pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
	if (p->pcre2_jit_on == 1) {
		jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
		if (jitret)
			die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
		p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
		if (!p->pcre2_jit_stack)
			die("Couldn't allocate PCRE2 JIT stack");
		p->pcre2_match_context = pcre2_match_context_create(NULL);
		if (!p->pcre2_match_context)
			die("Couldn't allocate PCRE2 match context");
		pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
	} else if (p->pcre2_jit_on != 0) {
		die("BUG: The pcre2_jit_on variable should be 0 or 1, not %d",
		    p->pcre1_jit_on);
	}
}
Ejemplo n.º 3
0
/* Compile a regular expression to be used later
 * Allowed flags are:
 *      - OS_CASE_SENSITIVE
 *      - OS_RETURN_SUBSTRING
 * Returns 1 on success or 0 on error
 * The error code is set on reg->error
 */
int OSRegex_Compile(const char *pattern, OSRegex *reg, int flags)
{
    char *pattern_pcre2 = NULL;
    int flags_compile = 0;
    int error = 0;
    PCRE2_SIZE erroroffset = 0;
    size_t pattern_len = 0UL;
    char first_char, last_char;
    uint32_t count, i;

    /* Check for references not initialized */
    if (reg == NULL) {
        return (0);
    }

    /* Initialize OSRegex structure */
    reg->error = 0;
    reg->sub_strings = NULL;
    reg->regex = NULL;
    reg->match_data = NULL;
    reg->pattern_len = 0UL;
    reg->pattern = NULL;
    reg->exec_function = NULL;

    /* The pattern can't be null */
    if (pattern == NULL) {
        reg->error = OS_REGEX_PATTERN_NULL;
        goto compile_error;
    }

    /* Maximum size of the pattern */
    pattern_len = strlen(pattern);
    if (pattern_len > OS_PATTERN_MAXSIZE) {
        reg->error = OS_REGEX_MAXSIZE;
        goto compile_error;
    }

    if (OSRegex_CouldBeOptimized(pattern)) {
        first_char = pattern[0];
        last_char = pattern[pattern_len - 1];

        if (first_char == '^') {
            if (last_char == '$') {
                reg->pattern = strdup(&pattern[1]);
                reg->pattern_len = pattern_len - 2;
                reg->pattern[reg->pattern_len] = '\0';
                if (flags & OS_CASE_SENSITIVE) {
                    reg->exec_function = OSRegex_Execute_strcmp;
                } else {
                    reg->exec_function = OSRegex_Execute_strcasecmp;
                }
                return (1);
            } else {
                reg->pattern = strdup(&pattern[1]);
                reg->pattern_len = pattern_len - 1;
                if (flags & OS_CASE_SENSITIVE) {
                    reg->exec_function = OSRegex_Execute_strncmp;
                } else {
                    reg->exec_function = OSRegex_Execute_strncasecmp;
                }
                return (1);
            }
        } else {
            if (last_char == '$') {
                reg->pattern = strdup(pattern);
                reg->pattern_len = pattern_len - 1;
                reg->pattern[reg->pattern_len] = '\0';
                if (flags & OS_CASE_SENSITIVE) {
                    reg->exec_function = OSRegex_Execute_strrcmp;
                } else {
                    reg->exec_function = OSRegex_Execute_strrcasecmp;
                }
                return (1);
            }
        }
    }

    reg->exec_function = OSRegex_Execute_pcre2_match;

    /* Ossec pattern conversion */
    if (OSRegex_Convert(pattern, &pattern_pcre2, OS_CONVERT_REGEX) == 0) {
        reg->error = OS_REGEX_BADREGEX;
        goto compile_error;
    }

    flags_compile |= PCRE2_UTF;
    flags_compile |= PCRE2_NO_UTF_CHECK;
    flags_compile |= (flags & OS_CASE_SENSITIVE) ? 0 : PCRE2_CASELESS;
    reg->regex = pcre2_compile((PCRE2_SPTR)pattern_pcre2, PCRE2_ZERO_TERMINATED, flags_compile,
                               &error, &erroroffset, NULL);
    if (reg->regex == NULL) {
        reg->error = OS_REGEX_BADREGEX;
        goto compile_error;
    }

    reg->match_data = pcre2_match_data_create_from_pattern(reg->regex, NULL);
    if (reg->match_data == NULL) {
        reg->error = OS_REGEX_OUTOFMEMORY;
        goto compile_error;
    }

#ifdef USE_PCRE2_JIT
    /* Just In Time compilation for faster execution */
    if (pcre2_jit_compile(reg->regex, PCRE2_JIT_COMPLETE) != 0) {
        reg->error = OS_REGEX_NO_JIT;
        goto compile_error;
    }
#endif

    if (flags & OS_RETURN_SUBSTRING) {
        pcre2_pattern_info(reg->regex, PCRE2_INFO_CAPTURECOUNT, (void *)&count);
        count++; // to store NULL pointer at the end
        reg->sub_strings = calloc(count, sizeof(char *));
        if (reg->sub_strings == NULL) {
            reg->error = OS_REGEX_OUTOFMEMORY;
            goto compile_error;
        }
        for (i = 0; i < count; i++) {
            reg->sub_strings[i] = NULL;
        }
    }

    free(pattern_pcre2);

    return (1);

compile_error:
    /* Error handling */

    if (pattern_pcre2) {
        free(pattern_pcre2);
    }

    OSRegex_FreePattern(reg);

    return (0);
}
Ejemplo n.º 4
0
int regex_compile(tvh_regex_t *regex, const char *re_str, int flags, int subsys)
{
#if ENABLE_PCRE || ENABLE_PCRE2
  regex->is_posix = 0;
  if (flags & TVHREGEX_POSIX) {
    regex->is_posix = 1;
#endif
    int options = REG_EXTENDED;
    if (flags & TVHREGEX_CASELESS)
      options |= REG_ICASE;
    if (!regcomp(&regex->re_posix_code, re_str, options))
      return 0;
    tvherror(subsys, "Unable to compile regex '%s'", re_str);
    return -1;
#if ENABLE_PCRE || ENABLE_PCRE2
  } else {
#if ENABLE_PCRE
    const char *estr;
    int eoff;
    int options = PCRE_UTF8;
    if (flags & TVHREGEX_CASELESS)
      options |= PCRE_CASELESS;
#if PCRE_STUDY_JIT_COMPILE
    regex->re_jit_stack = NULL;
#endif
    regex->re_extra = NULL;
    regex->re_code = pcre_compile(re_str, options, &estr, &eoff, NULL);
    if (regex->re_code == NULL) {
      tvherror(subsys, "Unable to compile PCRE '%s': %s", re_str, estr);
    } else {
      regex->re_extra = pcre_study(regex->re_code,
                                   PCRE_STUDY_JIT_COMPILE, &estr);
      if (regex->re_extra == NULL && estr)
        tvherror(subsys, "Unable to study PCRE '%s': %s", re_str, estr);
      else {
#if PCRE_STUDY_JIT_COMPILE
        regex->re_jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
        if (regex->re_jit_stack)
          pcre_assign_jit_stack(regex->re_extra, NULL, regex->re_jit_stack);
#endif
        return 0;
      }
    }
    return -1;
#elif ENABLE_PCRE2
    PCRE2_UCHAR8 ebuf[128];
    int ecode;
    PCRE2_SIZE eoff;
    size_t jsz;
    uint32_t options;
    assert(regex->re_jit_stack == NULL);
    regex->re_jit_stack = NULL;
    regex->re_match = NULL;
    regex->re_mcontext = pcre2_match_context_create(NULL);
    options = PCRE2_UTF;
    if (flags & TVHREGEX_CASELESS)
      options |= PCRE2_CASELESS;
    regex->re_code = pcre2_compile((PCRE2_SPTR8)re_str, -1, options,
                                   &ecode, &eoff, NULL);
    if (regex->re_code == NULL) {
      (void)pcre2_get_error_message(ecode, ebuf, 120);
      tvherror(subsys, "Unable to compile PCRE2 '%s': %s", re_str, ebuf);
    } else {
      regex->re_match = pcre2_match_data_create(TVHREGEX_MAX_MATCHES, NULL);
      if (re_str[0] && pcre2_jit_compile(regex->re_code, PCRE2_JIT_COMPLETE) >= 0) {
        jsz = 0;
        if (pcre2_pattern_info(regex->re_code, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0) {
          regex->re_jit_stack = pcre2_jit_stack_create(32 * 1024, 512 * 1024, NULL);
          if (regex->re_jit_stack)
            pcre2_jit_stack_assign(regex->re_mcontext, NULL, regex->re_jit_stack);
        }
      }
      return 0;
    }
    return -1;
#endif
  }
#endif
}
Ejemplo n.º 5
0
static void
rspamd_regexp_post_process (rspamd_regexp_t *r)
{
	if (global_re_cache == NULL) {
		rspamd_regexp_library_init (NULL);
	}
#if defined(WITH_PCRE2)
	gsize jsz;
	guint jit_flags = PCRE2_JIT_COMPLETE;
	/* Create match context */

	r->mcontext = pcre2_match_context_create (NULL);

	if (r->re != r->raw_re) {
		r->raw_mcontext = pcre2_match_context_create (NULL);
	}
	else {
		r->raw_mcontext = r->mcontext;
	}

#ifdef HAVE_PCRE_JIT
	if (pcre2_jit_compile (r->re, jit_flags) < 0) {
		msg_err ("jit compilation of %s is not supported: %d", r->pattern, jit_flags);
		r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
	}
	else {
		if (!(pcre2_pattern_info (r->re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) {
			msg_err ("jit compilation of %s is not supported", r->pattern);
			r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
		}
	}

	if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) {
		pcre2_jit_stack_assign (r->mcontext, NULL, global_re_cache->jstack);
	}

	if (r->re != r->raw_re) {
		if (pcre2_jit_compile (r->raw_re, jit_flags) < 0) {
			msg_debug ("jit compilation of %s is not supported", r->pattern);
			r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
		}

		if (!(pcre2_pattern_info (r->raw_re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) {
			msg_debug ("jit compilation of raw %s is not supported", r->pattern);
		}
		else if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) {
			pcre2_jit_stack_assign (r->raw_mcontext, NULL, global_re_cache->jstack);
		}
	}
#endif

#else
	const gchar *err_str = "unknown";
	gboolean try_jit = TRUE, try_raw_jit = TRUE;
	gint study_flags = 0;

#if defined(HAVE_PCRE_JIT)
	study_flags |= PCRE_STUDY_JIT_COMPILE;
#endif

	/* Pcre 1 needs study */
	if (r->re) {
		r->extra = pcre_study (r->re, study_flags, &err_str);

		if (r->extra == NULL) {
			msg_debug ("cannot optimize regexp pattern: '%s': %s",
					r->pattern, err_str);
			try_jit = FALSE;
			r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
		}
	}
	else {
		g_assert_not_reached ();
	}

	if (r->raw_re && r->raw_re != r->re) {
		r->raw_extra = pcre_study (r->re, study_flags, &err_str);
	}
	else if (r->raw_re == r->re) {
		r->raw_extra = r->extra;
	}

	if (r->raw_extra == NULL) {

		msg_debug ("cannot optimize raw regexp pattern: '%s': %s",
				r->pattern, err_str);
		try_raw_jit = FALSE;
	}
	/* JIT path */
	if (try_jit) {
#ifdef HAVE_PCRE_JIT
		gint jit, n;

		if (can_jit) {
			jit = 0;
			n = pcre_fullinfo (r->re, r->extra,
					PCRE_INFO_JIT, &jit);

			if (n != 0 || jit != 1) {
				msg_debug ("jit compilation of %s is not supported", r->pattern);
				r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
			}
			else {
				pcre_assign_jit_stack (r->extra, NULL, global_re_cache->jstack);
			}
		}
#endif
	}
	else {
		msg_debug ("cannot optimize regexp pattern: '%s': %s",
				r->pattern, err_str);
		r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
	}

	if (try_raw_jit) {
#ifdef HAVE_PCRE_JIT
		gint jit, n;

		if (can_jit) {

			if (r->raw_re != r->re) {
				jit = 0;
				n = pcre_fullinfo (r->raw_re, r->raw_extra,
						PCRE_INFO_JIT, &jit);

				if (n != 0 || jit != 1) {
					msg_debug ("jit compilation of %s is not supported", r->pattern);
					r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
				}
				else {
					pcre_assign_jit_stack (r->raw_extra, NULL,
							global_re_cache->jstack);
				}
			}
		}
#endif
	}
#endif /* WITH_PCRE2 */
}