Ejemplo n.º 1
0
gint
rspamd_re_cache_process (struct rspamd_task *task,
		struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re,
		enum rspamd_re_type type,
		gpointer type_data,
		gsize datalen,
		gboolean is_strong)
{
	guint64 re_id;
	struct rspamd_re_class *re_class;
	struct rspamd_re_cache *cache;

	g_assert (rt != NULL);
	g_assert (task != NULL);
	g_assert (re != NULL);

	cache = rt->cache;
	re_id = rspamd_regexp_get_cache_id (re);

	if (re_id == RSPAMD_INVALID_ID || re_id > cache->nre) {
		msg_err_task ("re '%s' has no valid id for the cache",
				rspamd_regexp_get_pattern (re));
		return 0;
	}

	if (isset (rt->checked, re_id)) {
		/* Fast path */
		rt->stat.regexp_fast_cached ++;
		return rt->results[re_id];
	}
	else {
		/* Slow path */
		re_class = rspamd_regexp_get_class (re);

		if (re_class == NULL) {
			msg_err_task ("cannot find re class for regexp '%s'",
					rspamd_regexp_get_pattern (re));
			return 0;
		}

		return rspamd_re_cache_exec_re (task, rt, re, re_class,
				is_strong);
	}

	return 0;
}
Ejemplo n.º 2
0
void
rspamd_re_cache_replace (struct rspamd_re_cache *cache,
		rspamd_regexp_t *what,
		rspamd_regexp_t *with)
{
	guint64 re_id;
	struct rspamd_re_class *re_class;
	rspamd_regexp_t *src;
	struct rspamd_re_cache_elt *elt;

	g_assert (cache != NULL);
	g_assert (what != NULL);
	g_assert (with != NULL);

	re_class = rspamd_regexp_get_class (what);

	if (re_class != NULL) {
		re_id = rspamd_regexp_get_cache_id (what);

		g_assert (re_id != RSPAMD_INVALID_ID);
		src = g_hash_table_lookup (re_class->re, rspamd_regexp_get_id (what));
		elt = g_ptr_array_index (cache->re, re_id);
		g_assert (elt != NULL);
		g_assert (src != NULL);

		rspamd_regexp_set_cache_id (what, RSPAMD_INVALID_ID);
		rspamd_regexp_set_class (what, NULL);
		rspamd_regexp_set_cache_id (with, re_id);
		rspamd_regexp_set_class (with, re_class);
		/*
		 * On calling of this function, we actually unref old re (what)
		 */
		g_hash_table_insert (re_class->re,
				rspamd_regexp_get_id (what),
				rspamd_regexp_ref (with));

		rspamd_regexp_unref (elt->re);
		elt->re = rspamd_regexp_ref (with);
		/* XXX: do not touch match type here */
	}
}
Ejemplo n.º 3
0
static guint
rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re, rspamd_mempool_t *pool,
		const guchar **in, guint *lens,
		guint count,
		gboolean is_raw)
{

	guint64 re_id;
	guint ret = 0;
	guint i;

	re_id = rspamd_regexp_get_cache_id (re);

	if (count == 0 || in == NULL) {
		/* We assume this as absence of the specified data */
		setbit (rt->checked, re_id);
		rt->results[re_id] = ret;
		return ret;
	}

#ifndef WITH_HYPERSCAN
	for (i = 0; i < count; i++) {
		ret = rspamd_re_cache_process_pcre (rt,
				re,
				pool,
				in[i],
				lens[i],
				is_raw);
	}
	setbit (rt->checked, re_id);
	rt->results[re_id] = ret;
#else
	struct rspamd_re_cache_elt *elt;
	struct rspamd_re_class *re_class;
	struct rspamd_re_hyperscan_cbdata cbdata;

	elt = g_ptr_array_index (rt->cache->re, re_id);
	re_class = rspamd_regexp_get_class (re);

	if (rt->cache->disable_hyperscan || elt->match_type == RSPAMD_RE_CACHE_PCRE) {
		for (i = 0; i < count; i++) {
			ret = rspamd_re_cache_process_pcre (rt,
					re,
					pool,
					in[i],
					lens[i],
					is_raw);
		}

		setbit (rt->checked, re_id);
		rt->results[re_id] = ret;
	}
	else {
		for (i = 0; i < count; i ++) {
			if (rt->cache->max_re_data > 0 && lens[i] > rt->cache->max_re_data) {
				lens[i] = rt->cache->max_re_data;
			}
			rt->stat.bytes_scanned += lens[i];
		}

		g_assert (re_class->hs_scratch != NULL);
		g_assert (re_class->hs_db != NULL);

		/* Go through hyperscan API */
		if (!rt->cache->vectorized_hyperscan) {
			for (i = 0; i < count; i++) {
				cbdata.ins = &in[i];
				cbdata.re = re;
				cbdata.rt = rt;
				cbdata.lens = &lens[i];
				cbdata.count = 1;
				cbdata.pool = pool;

				if ((hs_scan (re_class->hs_db, in[i], lens[i], 0,
						re_class->hs_scratch,
						rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) {
					ret = 0;
				}
				else {
					ret = rt->results[re_id];
				}
			}
		}
		else {
			cbdata.ins = in;
			cbdata.re = re;
			cbdata.rt = rt;
			cbdata.lens = lens;
			cbdata.count = 1;
			cbdata.pool = pool;

			if ((hs_scan_vector (re_class->hs_db, (const char **)in, lens, count, 0,
					re_class->hs_scratch,
					rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) {
				ret = 0;
			}
			else {
				ret = rt->results[re_id];
			}
		}
	}
#endif

	return ret;
}
Ejemplo n.º 4
0
void
rspamd_re_cache_init (struct rspamd_re_cache *cache, struct rspamd_config *cfg)
{
	guint i, fl;
	GHashTableIter it;
	gpointer k, v;
	struct rspamd_re_class *re_class;
	rspamd_cryptobox_hash_state_t st_global;
	rspamd_regexp_t *re;
	struct rspamd_re_cache_elt *elt;
	guchar hash_out[rspamd_cryptobox_HASHBYTES];

	g_assert (cache != NULL);

	rspamd_cryptobox_hash_init (&st_global, NULL, 0);
	/* Resort all regexps */
	g_ptr_array_sort (cache->re, rspamd_re_cache_sort_func);

	for (i = 0; i < cache->re->len; i ++) {
		elt = g_ptr_array_index (cache->re, i);
		re = elt->re;
		re_class = rspamd_regexp_get_class (re);
		g_assert (re_class != NULL);
		rspamd_regexp_set_cache_id (re, i);

		if (re_class->st == NULL) {
			re_class->st = g_slice_alloc (sizeof (*re_class->st));
			rspamd_cryptobox_hash_init (re_class->st, NULL, 0);
		}

		/* Update hashes */
		rspamd_cryptobox_hash_update (re_class->st, (gpointer) &re_class->id,
				sizeof (re_class->id));
		rspamd_cryptobox_hash_update (&st_global, (gpointer) &re_class->id,
				sizeof (re_class->id));
		rspamd_cryptobox_hash_update (re_class->st, rspamd_regexp_get_id (re),
				rspamd_cryptobox_HASHBYTES);
		rspamd_cryptobox_hash_update (&st_global, rspamd_regexp_get_id (re),
				rspamd_cryptobox_HASHBYTES);
		fl = rspamd_regexp_get_pcre_flags (re);
		rspamd_cryptobox_hash_update (re_class->st, (const guchar *)&fl,
				sizeof (fl));
		rspamd_cryptobox_hash_update (&st_global, (const guchar *) &fl,
				sizeof (fl));
		fl = rspamd_regexp_get_flags (re);
		rspamd_cryptobox_hash_update (re_class->st, (const guchar *) &fl,
				sizeof (fl));
		rspamd_cryptobox_hash_update (&st_global, (const guchar *) &fl,
				sizeof (fl));
		fl = rspamd_regexp_get_maxhits (re);
		rspamd_cryptobox_hash_update (re_class->st, (const guchar *) &fl,
				sizeof (fl));
		rspamd_cryptobox_hash_update (&st_global, (const guchar *) &fl,
				sizeof (fl));
	}

	rspamd_cryptobox_hash_final (&st_global, hash_out);
	rspamd_snprintf (cache->hash, sizeof (cache->hash), "%*xs",
			(gint) rspamd_cryptobox_HASHBYTES, hash_out);

	/* Now finalize all classes */
	g_hash_table_iter_init (&it, cache->re_classes);

	while (g_hash_table_iter_next (&it, &k, &v)) {
		re_class = v;

		if (re_class->st) {
			/*
			 * We finally update all classes with the number of expressions
			 * in the cache to ensure that if even a single re has been changed
			 * we won't be broken due to id mismatch
			 */
			rspamd_cryptobox_hash_update (re_class->st,
					(gpointer)&cache->re->len,
					sizeof (cache->re->len));
			rspamd_cryptobox_hash_final (re_class->st, hash_out);
			rspamd_snprintf (re_class->hash, sizeof (re_class->hash), "%*xs",
					(gint) rspamd_cryptobox_HASHBYTES, hash_out);
			g_slice_free1 (sizeof (*re_class->st), re_class->st);
			re_class->st = NULL;
		}
	}

#ifdef WITH_HYPERSCAN
	const gchar *platform = "generic";
	rspamd_fstring_t *features = rspamd_fstring_new ();

	cache->disable_hyperscan = cfg->disable_hyperscan;
	cache->vectorized_hyperscan = cfg->vectorized_hyperscan;

	g_assert (hs_populate_platform (&cache->plt) == HS_SUCCESS);

	/* Now decode what we do have */
	switch (cache->plt.tune) {
	case HS_TUNE_FAMILY_HSW:
		platform = "haswell";
		break;
	case HS_TUNE_FAMILY_SNB:
		platform = "sandy";
		break;
	case HS_TUNE_FAMILY_BDW:
		platform = "broadwell";
		break;
	case HS_TUNE_FAMILY_IVB:
		platform = "ivy";
		break;
	default:
		break;
	}

	if (cache->plt.cpu_features & HS_CPU_FEATURES_AVX2) {
		features = rspamd_fstring_append (features, "AVX2", 4);
	}

	hs_set_allocator (g_malloc, g_free);

	msg_info_re_cache ("loaded hyperscan engine witch cpu tune '%s' and features '%V'",
			platform, features);

	rspamd_fstring_free (features);
#endif
}
Ejemplo n.º 5
0
static guint
rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re, rspamd_mempool_t *pool,
		const guchar *in, gsize len,
		gboolean is_raw)
{

	guint64 re_id;
	guint ret = 0;

	re_id = rspamd_regexp_get_cache_id (re);

	if (len == 0 || in == NULL) {
		/* We assume this as absence of the specified data */
		setbit (rt->checked, re_id);
		rt->results[re_id] = ret;
		return ret;
	}

#ifndef WITH_HYPERSCAN
	ret = rspamd_re_cache_process_pcre (rt, re, pool, in, len, is_raw);
	setbit (rt->checked, re_id);
	rt->results[re_id] = ret;
#else
	struct rspamd_re_cache_elt *elt;
	struct rspamd_re_class *re_class;
	struct rspamd_re_hyperscan_cbdata cbdata;

	elt = g_ptr_array_index (rt->cache->re, re_id);
	re_class = rspamd_regexp_get_class (re);

	if (rt->cache->disable_hyperscan || elt->match_type == RSPAMD_RE_CACHE_PCRE) {
		ret = rspamd_re_cache_process_pcre (rt, re, pool, in, len, is_raw);
		setbit (rt->checked, re_id);
		rt->results[re_id] = ret;
	}
	else {
		if (rt->cache->max_re_data > 0 && len > rt->cache->max_re_data) {
			len = rt->cache->max_re_data;
		}

		g_assert (re_class->hs_scratch != NULL);
		g_assert (re_class->hs_db != NULL);

		/* Go through hyperscan API */
		cbdata.in = in;
		cbdata.re = re;
		cbdata.rt = rt;
		cbdata.len = len;
		cbdata.pool = pool;
		rt->stat.bytes_scanned += len;

		if ((hs_scan (re_class->hs_db, in, len, 0, re_class->hs_scratch,
				rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) {
			ret = 0;
		}
		else {
			ret = rt->results[re_id];
		}
	}
#endif

	return ret;
}