static int pcre1match(struct grep_pat *p, const char *line, const char *eol, regmatch_t *match, int eflags) { int ovector[30], ret, flags = 0; if (eflags & REG_NOTBOL) flags |= PCRE_NOTBOL; #ifdef GIT_PCRE1_USE_JIT if (p->pcre1_jit_on) { ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line, eol - line, 0, flags, ovector, ARRAY_SIZE(ovector), p->pcre1_jit_stack); } else #endif { ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, eol - line, 0, flags, ovector, ARRAY_SIZE(ovector)); } if (ret < 0 && ret != PCRE_ERROR_NOMATCH) die("pcre_exec failed with error code %d", ret); if (ret > 0) { ret = 0; match->rm_so = ovector[0]; match->rm_eo = ovector[1]; } return ret; }
gboolean rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, const gchar **start, const gchar **end, gboolean raw, GArray *captures) { pcre *r; pcre_extra *ext; #if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) pcre_jit_stack *st = NULL; #endif const gchar *mt; gsize remain = 0; gint rc, match_flags = 0, *ovec, ncaptures, i; g_assert (re != NULL); g_assert (text != NULL); if (len == 0) { len = strlen (text); } if (end != NULL && *end != NULL) { /* Incremental search */ mt = (*end); if ((gint)len > (mt - text)) { remain = len - (mt - text); } } else { mt = text; remain = len; } if (remain == 0) { return FALSE; } match_flags = PCRE_NEWLINE_ANYCRLF; if ((re->flags & RSPAMD_REGEXP_FLAG_RAW) || raw) { r = re->raw_re; ext = re->raw_extra; #if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) st = re->raw_jstack; #endif } else { r = re->re; ext = re->extra; #if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) if (g_utf8_validate (mt, remain, NULL)) { st = re->jstack; } #endif } g_assert (r != NULL); ncaptures = (re->ncaptures + 1) * 3; ovec = g_alloca (sizeof (gint) * ncaptures); if (!(re->flags & RSPAMD_REGEXP_FLAG_NOOPT)) { #ifdef HAVE_PCRE_JIT # ifdef HAVE_PCRE_JIT_FAST /* XXX: flags seems to be broken with jit fast path */ g_assert (remain > 0); g_assert (mt != NULL); if (st != NULL) { rc = pcre_jit_exec (r, ext, mt, remain, 0, 0, ovec, ncaptures, st); } else { rc = pcre_exec (r, ext, mt, remain, 0, match_flags, ovec, ncaptures); } # else rc = pcre_exec (r, ext, mt, remain, 0, match_flags, ovec, ncaptures); #endif #else rc = pcre_exec (r, ext, mt, remain, 0, match_flags, ovec, ncaptures); #endif } else { rc = pcre_exec (r, ext, mt, remain, 0, match_flags, ovec, ncaptures); } if (rc >= 0) { if (start) { *start = mt + ovec[0]; } if (end) { *end = mt + ovec[1]; } if (captures != NULL && rc > 1) { struct rspamd_re_capture *elt; g_assert (g_array_get_element_size (captures) == sizeof (struct rspamd_re_capture)); g_array_set_size (captures, rc); for (i = 0; i < rc; i ++) { elt = &g_array_index (captures, struct rspamd_re_capture, i); elt->p = mt + ovec[i * 2]; elt->len = (mt + ovec[i * 2 + 1]) - elt->p; } } if (re->flags & RSPAMD_REGEXP_FLAG_FULL_MATCH) { /* We also ensure that the match is full */ if (ovec[0] != 0 || (guint)ovec[1] < len) { return FALSE; } } return TRUE; } return FALSE; }
void pcre_find_all(char* pattern, char* subject, int subject_len, int repeat, int mode) { pcre *re; const char *error; int err_val, match[64]; pcre_extra *extra; pcre_jit_stack *stack = NULL; char *ptr; int len; clock_t best_time = 0, time = 0; int found; static int work_space[4096]; re = pcre_compile( pattern, /* the pattern */ PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF, /* options */ &error, /* for error message */ &err_val, /* for error offset */ NULL); /* use default character tables */ if (!re) { printf("PCRE compilation failed at offset %d: %s\n", err_val, error); return; } error = NULL; extra = pcre_study(re, mode == 2 ? PCRE_STUDY_JIT_COMPILE : 0, &error); if (error) { printf("PCRE study failed: %s\n", error); return; } if (mode == 2) { found = 0; pcre_fullinfo(re, extra, PCRE_INFO_JIT, &found); if (!found) { printf("PCRE JIT compilation failed: %s\n", error); return; } stack = pcre_jit_stack_alloc(65536, 65536); } do { found = 0; ptr = subject; len = subject_len; switch (mode) { case 0: time = clock(); while (1) { err_val = pcre_exec( re, /* the compiled pattern */ extra, /* extra data */ ptr, /* the subject string */ len, /* the length of the subject */ 0, /* start at offset 0 in the subject */ 0, /* default options */ match, /* output vector for substring information */ 64); /* number of elements in the output vector */ if (err_val <= 0) { if (err_val == PCRE_ERROR_NOMATCH) break; printf("PCRE pcre_exec failed with: %d\n", err_val); break; } // printf("match: %d %d\n", (ptr - subject) + match[0], (ptr - subject) + match[1]); ptr += match[1]; len -= match[1]; found++; } time = clock() - time; break; case 1: time = clock(); while (1) { err_val = pcre_dfa_exec( re, /* the compiled pattern */ extra, /* extra data */ ptr, /* the subject string */ len, /* the length of the subject */ 0, /* start at offset 0 in the subject */ 0, /* default options */ match, /* output vector for substring information */ 2, /* number of elements in the output vector */ work_space, /* number of elements (NOT size in bytes) */ 4096); if (err_val < 0) { if (err_val == PCRE_ERROR_NOMATCH) break; printf("PCRE pcre_exec failed\n"); break; } // printf("match: %d %d\n", (ptr - subject) + match[0], (ptr - subject) + match[1]); ptr += match[1]; len -= match[1]; found++; } time = clock() - time; break; case 2: time = clock(); while (1) { err_val = pcre_jit_exec( re, /* the compiled pattern */ extra, /* extra data */ ptr, /* the subject string */ len, /* the length of the subject */ 0, /* start at offset 0 in the subject */ 0, /* default options */ match, /* output vector for substring information */ 64, /* number of elements in the output vector */ stack); /* jit stack */ if (err_val <= 0) { if (err_val == PCRE_ERROR_NOMATCH) break; printf("PCRE pcre_exec failed with: %d\n", err_val); break; } // printf("match: %d %d\n", (ptr - subject) + match[0], (ptr - subject) + match[1]); ptr += match[1]; len -= match[1]; found++; } time = clock() - time; break; } if (!best_time || time < best_time) best_time = time; } while (--repeat > 0); printResult(mode == 0 ? "pcre" : (mode == 1 ? "pcre-dfa" : "pcre-jit"), best_time * 1000 / CLOCKS_PER_SEC, found); if (extra) pcre_free_study(extra); if (stack) pcre_jit_stack_free(stack); pcre_free(re); }