gint rspamd_re_cache_process (struct rspamd_task *task, struct rspamd_re_runtime *rt, rspamd_regexp_t *re, enum rspamd_re_type type, gpointer type_data, gsize datalen, gboolean is_strong) { guint64 re_id; struct rspamd_re_class *re_class; struct rspamd_re_cache *cache; g_assert (rt != NULL); g_assert (task != NULL); g_assert (re != NULL); cache = rt->cache; re_id = rspamd_regexp_get_cache_id (re); if (re_id == RSPAMD_INVALID_ID || re_id > cache->nre) { msg_err_task ("re '%s' has no valid id for the cache", rspamd_regexp_get_pattern (re)); return 0; } if (isset (rt->checked, re_id)) { /* Fast path */ rt->stat.regexp_fast_cached ++; return rt->results[re_id]; } else { /* Slow path */ re_class = rspamd_regexp_get_class (re); if (re_class == NULL) { msg_err_task ("cannot find re class for regexp '%s'", rspamd_regexp_get_pattern (re)); return 0; } return rspamd_re_cache_exec_re (task, rt, re, re_class, is_strong); } return 0; }
void rspamd_re_cache_replace (struct rspamd_re_cache *cache, rspamd_regexp_t *what, rspamd_regexp_t *with) { guint64 re_id; struct rspamd_re_class *re_class; rspamd_regexp_t *src; struct rspamd_re_cache_elt *elt; g_assert (cache != NULL); g_assert (what != NULL); g_assert (with != NULL); re_class = rspamd_regexp_get_class (what); if (re_class != NULL) { re_id = rspamd_regexp_get_cache_id (what); g_assert (re_id != RSPAMD_INVALID_ID); src = g_hash_table_lookup (re_class->re, rspamd_regexp_get_id (what)); elt = g_ptr_array_index (cache->re, re_id); g_assert (elt != NULL); g_assert (src != NULL); rspamd_regexp_set_cache_id (what, RSPAMD_INVALID_ID); rspamd_regexp_set_class (what, NULL); rspamd_regexp_set_cache_id (with, re_id); rspamd_regexp_set_class (with, re_class); /* * On calling of this function, we actually unref old re (what) */ g_hash_table_insert (re_class->re, rspamd_regexp_get_id (what), rspamd_regexp_ref (with)); rspamd_regexp_unref (elt->re); elt->re = rspamd_regexp_ref (with); /* XXX: do not touch match type here */ } }
static guint rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt, rspamd_regexp_t *re, rspamd_mempool_t *pool, const guchar **in, guint *lens, guint count, gboolean is_raw) { guint64 re_id; guint ret = 0; guint i; re_id = rspamd_regexp_get_cache_id (re); if (count == 0 || in == NULL) { /* We assume this as absence of the specified data */ setbit (rt->checked, re_id); rt->results[re_id] = ret; return ret; } #ifndef WITH_HYPERSCAN for (i = 0; i < count; i++) { ret = rspamd_re_cache_process_pcre (rt, re, pool, in[i], lens[i], is_raw); } setbit (rt->checked, re_id); rt->results[re_id] = ret; #else struct rspamd_re_cache_elt *elt; struct rspamd_re_class *re_class; struct rspamd_re_hyperscan_cbdata cbdata; elt = g_ptr_array_index (rt->cache->re, re_id); re_class = rspamd_regexp_get_class (re); if (rt->cache->disable_hyperscan || elt->match_type == RSPAMD_RE_CACHE_PCRE) { for (i = 0; i < count; i++) { ret = rspamd_re_cache_process_pcre (rt, re, pool, in[i], lens[i], is_raw); } setbit (rt->checked, re_id); rt->results[re_id] = ret; } else { for (i = 0; i < count; i ++) { if (rt->cache->max_re_data > 0 && lens[i] > rt->cache->max_re_data) { lens[i] = rt->cache->max_re_data; } rt->stat.bytes_scanned += lens[i]; } g_assert (re_class->hs_scratch != NULL); g_assert (re_class->hs_db != NULL); /* Go through hyperscan API */ if (!rt->cache->vectorized_hyperscan) { for (i = 0; i < count; i++) { cbdata.ins = &in[i]; cbdata.re = re; cbdata.rt = rt; cbdata.lens = &lens[i]; cbdata.count = 1; cbdata.pool = pool; if ((hs_scan (re_class->hs_db, in[i], lens[i], 0, re_class->hs_scratch, rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) { ret = 0; } else { ret = rt->results[re_id]; } } } else { cbdata.ins = in; cbdata.re = re; cbdata.rt = rt; cbdata.lens = lens; cbdata.count = 1; cbdata.pool = pool; if ((hs_scan_vector (re_class->hs_db, (const char **)in, lens, count, 0, re_class->hs_scratch, rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) { ret = 0; } else { ret = rt->results[re_id]; } } } #endif return ret; }
void rspamd_re_cache_init (struct rspamd_re_cache *cache, struct rspamd_config *cfg) { guint i, fl; GHashTableIter it; gpointer k, v; struct rspamd_re_class *re_class; rspamd_cryptobox_hash_state_t st_global; rspamd_regexp_t *re; struct rspamd_re_cache_elt *elt; guchar hash_out[rspamd_cryptobox_HASHBYTES]; g_assert (cache != NULL); rspamd_cryptobox_hash_init (&st_global, NULL, 0); /* Resort all regexps */ g_ptr_array_sort (cache->re, rspamd_re_cache_sort_func); for (i = 0; i < cache->re->len; i ++) { elt = g_ptr_array_index (cache->re, i); re = elt->re; re_class = rspamd_regexp_get_class (re); g_assert (re_class != NULL); rspamd_regexp_set_cache_id (re, i); if (re_class->st == NULL) { re_class->st = g_slice_alloc (sizeof (*re_class->st)); rspamd_cryptobox_hash_init (re_class->st, NULL, 0); } /* Update hashes */ rspamd_cryptobox_hash_update (re_class->st, (gpointer) &re_class->id, sizeof (re_class->id)); rspamd_cryptobox_hash_update (&st_global, (gpointer) &re_class->id, sizeof (re_class->id)); rspamd_cryptobox_hash_update (re_class->st, rspamd_regexp_get_id (re), rspamd_cryptobox_HASHBYTES); rspamd_cryptobox_hash_update (&st_global, rspamd_regexp_get_id (re), rspamd_cryptobox_HASHBYTES); fl = rspamd_regexp_get_pcre_flags (re); rspamd_cryptobox_hash_update (re_class->st, (const guchar *)&fl, sizeof (fl)); rspamd_cryptobox_hash_update (&st_global, (const guchar *) &fl, sizeof (fl)); fl = rspamd_regexp_get_flags (re); rspamd_cryptobox_hash_update (re_class->st, (const guchar *) &fl, sizeof (fl)); rspamd_cryptobox_hash_update (&st_global, (const guchar *) &fl, sizeof (fl)); fl = rspamd_regexp_get_maxhits (re); rspamd_cryptobox_hash_update (re_class->st, (const guchar *) &fl, sizeof (fl)); rspamd_cryptobox_hash_update (&st_global, (const guchar *) &fl, sizeof (fl)); } rspamd_cryptobox_hash_final (&st_global, hash_out); rspamd_snprintf (cache->hash, sizeof (cache->hash), "%*xs", (gint) rspamd_cryptobox_HASHBYTES, hash_out); /* Now finalize all classes */ g_hash_table_iter_init (&it, cache->re_classes); while (g_hash_table_iter_next (&it, &k, &v)) { re_class = v; if (re_class->st) { /* * We finally update all classes with the number of expressions * in the cache to ensure that if even a single re has been changed * we won't be broken due to id mismatch */ rspamd_cryptobox_hash_update (re_class->st, (gpointer)&cache->re->len, sizeof (cache->re->len)); rspamd_cryptobox_hash_final (re_class->st, hash_out); rspamd_snprintf (re_class->hash, sizeof (re_class->hash), "%*xs", (gint) rspamd_cryptobox_HASHBYTES, hash_out); g_slice_free1 (sizeof (*re_class->st), re_class->st); re_class->st = NULL; } } #ifdef WITH_HYPERSCAN const gchar *platform = "generic"; rspamd_fstring_t *features = rspamd_fstring_new (); cache->disable_hyperscan = cfg->disable_hyperscan; cache->vectorized_hyperscan = cfg->vectorized_hyperscan; g_assert (hs_populate_platform (&cache->plt) == HS_SUCCESS); /* Now decode what we do have */ switch (cache->plt.tune) { case HS_TUNE_FAMILY_HSW: platform = "haswell"; break; case HS_TUNE_FAMILY_SNB: platform = "sandy"; break; case HS_TUNE_FAMILY_BDW: platform = "broadwell"; break; case HS_TUNE_FAMILY_IVB: platform = "ivy"; break; default: break; } if (cache->plt.cpu_features & HS_CPU_FEATURES_AVX2) { features = rspamd_fstring_append (features, "AVX2", 4); } hs_set_allocator (g_malloc, g_free); msg_info_re_cache ("loaded hyperscan engine witch cpu tune '%s' and features '%V'", platform, features); rspamd_fstring_free (features); #endif }
static guint rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt, rspamd_regexp_t *re, rspamd_mempool_t *pool, const guchar *in, gsize len, gboolean is_raw) { guint64 re_id; guint ret = 0; re_id = rspamd_regexp_get_cache_id (re); if (len == 0 || in == NULL) { /* We assume this as absence of the specified data */ setbit (rt->checked, re_id); rt->results[re_id] = ret; return ret; } #ifndef WITH_HYPERSCAN ret = rspamd_re_cache_process_pcre (rt, re, pool, in, len, is_raw); setbit (rt->checked, re_id); rt->results[re_id] = ret; #else struct rspamd_re_cache_elt *elt; struct rspamd_re_class *re_class; struct rspamd_re_hyperscan_cbdata cbdata; elt = g_ptr_array_index (rt->cache->re, re_id); re_class = rspamd_regexp_get_class (re); if (rt->cache->disable_hyperscan || elt->match_type == RSPAMD_RE_CACHE_PCRE) { ret = rspamd_re_cache_process_pcre (rt, re, pool, in, len, is_raw); setbit (rt->checked, re_id); rt->results[re_id] = ret; } else { if (rt->cache->max_re_data > 0 && len > rt->cache->max_re_data) { len = rt->cache->max_re_data; } g_assert (re_class->hs_scratch != NULL); g_assert (re_class->hs_db != NULL); /* Go through hyperscan API */ cbdata.in = in; cbdata.re = re; cbdata.rt = rt; cbdata.len = len; cbdata.pool = pool; rt->stat.bytes_scanned += len; if ((hs_scan (re_class->hs_db, in, len, 0, re_class->hs_scratch, rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) { ret = 0; } else { ret = rt->results[re_id]; } } #endif return ret; }