pcre2_code *compile(char *pattern) { PCRE2_SPTR pcre2_pattern = (PCRE2_SPTR)pattern; pcre2_code *re; int errornumber; PCRE2_SIZE erroroffset; re = pcre2_compile( pcre2_pattern, /* the pattern */ PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ 0, /* default options */ &errornumber, /* for error number */ &erroroffset, /* for error offset */ NULL); /* use default compile context */ pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); if (re == NULL) { PCRE2_UCHAR buffer[256]; pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); printf("PCRE2 compilation failed at offset %d: %s\n for pattern: %s", (int)erroroffset, buffer, pattern); exit(1); } return re; }
static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) { int error; PCRE2_UCHAR errbuf[256]; PCRE2_SIZE erroffset; int options = PCRE2_MULTILINE; const uint8_t *character_tables = NULL; int jitret; assert(opt->pcre2); p->pcre2_compile_context = NULL; if (opt->ignore_case) { if (has_non_ascii(p->pattern)) { character_tables = pcre2_maketables(NULL); p->pcre2_compile_context = pcre2_compile_context_create(NULL); pcre2_set_character_tables(p->pcre2_compile_context, character_tables); } options |= PCRE2_CASELESS; } if (is_utf8_locale() && has_non_ascii(p->pattern)) options |= PCRE2_UTF; p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, p->patternlen, options, &error, &erroffset, p->pcre2_compile_context); if (p->pcre2_pattern) { p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL); if (!p->pcre2_match_data) die("Couldn't allocate PCRE2 match data"); } else { pcre2_get_error_message(error, errbuf, sizeof(errbuf)); compile_regexp_failed(p, (const char *)&errbuf); } pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on); if (p->pcre2_jit_on == 1) { jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE); if (jitret) die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret); p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL); if (!p->pcre2_jit_stack) die("Couldn't allocate PCRE2 JIT stack"); p->pcre2_match_context = pcre2_match_context_create(NULL); if (!p->pcre2_match_context) die("Couldn't allocate PCRE2 match context"); pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack); } else if (p->pcre2_jit_on != 0) { die("BUG: The pcre2_jit_on variable should be 0 or 1, not %d", p->pcre1_jit_on); } }
/* Compile a regular expression to be used later * Allowed flags are: * - OS_CASE_SENSITIVE * - OS_RETURN_SUBSTRING * Returns 1 on success or 0 on error * The error code is set on reg->error */ int OSRegex_Compile(const char *pattern, OSRegex *reg, int flags) { char *pattern_pcre2 = NULL; int flags_compile = 0; int error = 0; PCRE2_SIZE erroroffset = 0; size_t pattern_len = 0UL; char first_char, last_char; uint32_t count, i; /* Check for references not initialized */ if (reg == NULL) { return (0); } /* Initialize OSRegex structure */ reg->error = 0; reg->sub_strings = NULL; reg->regex = NULL; reg->match_data = NULL; reg->pattern_len = 0UL; reg->pattern = NULL; reg->exec_function = NULL; /* The pattern can't be null */ if (pattern == NULL) { reg->error = OS_REGEX_PATTERN_NULL; goto compile_error; } /* Maximum size of the pattern */ pattern_len = strlen(pattern); if (pattern_len > OS_PATTERN_MAXSIZE) { reg->error = OS_REGEX_MAXSIZE; goto compile_error; } if (OSRegex_CouldBeOptimized(pattern)) { first_char = pattern[0]; last_char = pattern[pattern_len - 1]; if (first_char == '^') { if (last_char == '$') { reg->pattern = strdup(&pattern[1]); reg->pattern_len = pattern_len - 2; reg->pattern[reg->pattern_len] = '\0'; if (flags & OS_CASE_SENSITIVE) { reg->exec_function = OSRegex_Execute_strcmp; } else { reg->exec_function = OSRegex_Execute_strcasecmp; } return (1); } else { reg->pattern = strdup(&pattern[1]); reg->pattern_len = pattern_len - 1; if (flags & OS_CASE_SENSITIVE) { reg->exec_function = OSRegex_Execute_strncmp; } else { reg->exec_function = OSRegex_Execute_strncasecmp; } return (1); } } else { if (last_char == '$') { reg->pattern = strdup(pattern); reg->pattern_len = pattern_len - 1; reg->pattern[reg->pattern_len] = '\0'; if (flags & OS_CASE_SENSITIVE) { reg->exec_function = OSRegex_Execute_strrcmp; } else { reg->exec_function = OSRegex_Execute_strrcasecmp; } return (1); } } } reg->exec_function = OSRegex_Execute_pcre2_match; /* Ossec pattern conversion */ if (OSRegex_Convert(pattern, &pattern_pcre2, OS_CONVERT_REGEX) == 0) { reg->error = OS_REGEX_BADREGEX; goto compile_error; } flags_compile |= PCRE2_UTF; flags_compile |= PCRE2_NO_UTF_CHECK; flags_compile |= (flags & OS_CASE_SENSITIVE) ? 0 : PCRE2_CASELESS; reg->regex = pcre2_compile((PCRE2_SPTR)pattern_pcre2, PCRE2_ZERO_TERMINATED, flags_compile, &error, &erroroffset, NULL); if (reg->regex == NULL) { reg->error = OS_REGEX_BADREGEX; goto compile_error; } reg->match_data = pcre2_match_data_create_from_pattern(reg->regex, NULL); if (reg->match_data == NULL) { reg->error = OS_REGEX_OUTOFMEMORY; goto compile_error; } #ifdef USE_PCRE2_JIT /* Just In Time compilation for faster execution */ if (pcre2_jit_compile(reg->regex, PCRE2_JIT_COMPLETE) != 0) { reg->error = OS_REGEX_NO_JIT; goto compile_error; } #endif if (flags & OS_RETURN_SUBSTRING) { pcre2_pattern_info(reg->regex, PCRE2_INFO_CAPTURECOUNT, (void *)&count); count++; // to store NULL pointer at the end reg->sub_strings = calloc(count, sizeof(char *)); if (reg->sub_strings == NULL) { reg->error = OS_REGEX_OUTOFMEMORY; goto compile_error; } for (i = 0; i < count; i++) { reg->sub_strings[i] = NULL; } } free(pattern_pcre2); return (1); compile_error: /* Error handling */ if (pattern_pcre2) { free(pattern_pcre2); } OSRegex_FreePattern(reg); return (0); }
int regex_compile(tvh_regex_t *regex, const char *re_str, int flags, int subsys) { #if ENABLE_PCRE || ENABLE_PCRE2 regex->is_posix = 0; if (flags & TVHREGEX_POSIX) { regex->is_posix = 1; #endif int options = REG_EXTENDED; if (flags & TVHREGEX_CASELESS) options |= REG_ICASE; if (!regcomp(®ex->re_posix_code, re_str, options)) return 0; tvherror(subsys, "Unable to compile regex '%s'", re_str); return -1; #if ENABLE_PCRE || ENABLE_PCRE2 } else { #if ENABLE_PCRE const char *estr; int eoff; int options = PCRE_UTF8; if (flags & TVHREGEX_CASELESS) options |= PCRE_CASELESS; #if PCRE_STUDY_JIT_COMPILE regex->re_jit_stack = NULL; #endif regex->re_extra = NULL; regex->re_code = pcre_compile(re_str, options, &estr, &eoff, NULL); if (regex->re_code == NULL) { tvherror(subsys, "Unable to compile PCRE '%s': %s", re_str, estr); } else { regex->re_extra = pcre_study(regex->re_code, PCRE_STUDY_JIT_COMPILE, &estr); if (regex->re_extra == NULL && estr) tvherror(subsys, "Unable to study PCRE '%s': %s", re_str, estr); else { #if PCRE_STUDY_JIT_COMPILE regex->re_jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024); if (regex->re_jit_stack) pcre_assign_jit_stack(regex->re_extra, NULL, regex->re_jit_stack); #endif return 0; } } return -1; #elif ENABLE_PCRE2 PCRE2_UCHAR8 ebuf[128]; int ecode; PCRE2_SIZE eoff; size_t jsz; uint32_t options; assert(regex->re_jit_stack == NULL); regex->re_jit_stack = NULL; regex->re_match = NULL; regex->re_mcontext = pcre2_match_context_create(NULL); options = PCRE2_UTF; if (flags & TVHREGEX_CASELESS) options |= PCRE2_CASELESS; regex->re_code = pcre2_compile((PCRE2_SPTR8)re_str, -1, options, &ecode, &eoff, NULL); if (regex->re_code == NULL) { (void)pcre2_get_error_message(ecode, ebuf, 120); tvherror(subsys, "Unable to compile PCRE2 '%s': %s", re_str, ebuf); } else { regex->re_match = pcre2_match_data_create(TVHREGEX_MAX_MATCHES, NULL); if (re_str[0] && pcre2_jit_compile(regex->re_code, PCRE2_JIT_COMPLETE) >= 0) { jsz = 0; if (pcre2_pattern_info(regex->re_code, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0) { regex->re_jit_stack = pcre2_jit_stack_create(32 * 1024, 512 * 1024, NULL); if (regex->re_jit_stack) pcre2_jit_stack_assign(regex->re_mcontext, NULL, regex->re_jit_stack); } } return 0; } return -1; #endif } #endif }
static void rspamd_regexp_post_process (rspamd_regexp_t *r) { if (global_re_cache == NULL) { rspamd_regexp_library_init (NULL); } #if defined(WITH_PCRE2) gsize jsz; guint jit_flags = PCRE2_JIT_COMPLETE; /* Create match context */ r->mcontext = pcre2_match_context_create (NULL); if (r->re != r->raw_re) { r->raw_mcontext = pcre2_match_context_create (NULL); } else { r->raw_mcontext = r->mcontext; } #ifdef HAVE_PCRE_JIT if (pcre2_jit_compile (r->re, jit_flags) < 0) { msg_err ("jit compilation of %s is not supported: %d", r->pattern, jit_flags); r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; } else { if (!(pcre2_pattern_info (r->re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) { msg_err ("jit compilation of %s is not supported", r->pattern); r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; } } if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) { pcre2_jit_stack_assign (r->mcontext, NULL, global_re_cache->jstack); } if (r->re != r->raw_re) { if (pcre2_jit_compile (r->raw_re, jit_flags) < 0) { msg_debug ("jit compilation of %s is not supported", r->pattern); r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; } if (!(pcre2_pattern_info (r->raw_re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) { msg_debug ("jit compilation of raw %s is not supported", r->pattern); } else if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) { pcre2_jit_stack_assign (r->raw_mcontext, NULL, global_re_cache->jstack); } } #endif #else const gchar *err_str = "unknown"; gboolean try_jit = TRUE, try_raw_jit = TRUE; gint study_flags = 0; #if defined(HAVE_PCRE_JIT) study_flags |= PCRE_STUDY_JIT_COMPILE; #endif /* Pcre 1 needs study */ if (r->re) { r->extra = pcre_study (r->re, study_flags, &err_str); if (r->extra == NULL) { msg_debug ("cannot optimize regexp pattern: '%s': %s", r->pattern, err_str); try_jit = FALSE; r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; } } else { g_assert_not_reached (); } if (r->raw_re && r->raw_re != r->re) { r->raw_extra = pcre_study (r->re, study_flags, &err_str); } else if (r->raw_re == r->re) { r->raw_extra = r->extra; } if (r->raw_extra == NULL) { msg_debug ("cannot optimize raw regexp pattern: '%s': %s", r->pattern, err_str); try_raw_jit = FALSE; } /* JIT path */ if (try_jit) { #ifdef HAVE_PCRE_JIT gint jit, n; if (can_jit) { jit = 0; n = pcre_fullinfo (r->re, r->extra, PCRE_INFO_JIT, &jit); if (n != 0 || jit != 1) { msg_debug ("jit compilation of %s is not supported", r->pattern); r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; } else { pcre_assign_jit_stack (r->extra, NULL, global_re_cache->jstack); } } #endif } else { msg_debug ("cannot optimize regexp pattern: '%s': %s", r->pattern, err_str); r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; } if (try_raw_jit) { #ifdef HAVE_PCRE_JIT gint jit, n; if (can_jit) { if (r->raw_re != r->re) { jit = 0; n = pcre_fullinfo (r->raw_re, r->raw_extra, PCRE_INFO_JIT, &jit); if (n != 0 || jit != 1) { msg_debug ("jit compilation of %s is not supported", r->pattern); r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; } else { pcre_assign_jit_stack (r->raw_extra, NULL, global_re_cache->jstack); } } } #endif } #endif /* WITH_PCRE2 */ }