Exemple #1
0
static guint
rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re, rspamd_mempool_t *pool,
		const guchar *in, gsize len,
		gboolean is_raw)
{
	guint r = 0;
	const gchar *start = NULL, *end = NULL;
	guint max_hits = rspamd_regexp_get_maxhits (re);
	guint64 id = rspamd_regexp_get_cache_id (re);
	gdouble t1, t2;
	const gdouble slow_time = 0.1;

	if (len == 0) {
		len = strlen (in);
	}

	if (rt->cache->max_re_data > 0 && len > rt->cache->max_re_data) {
		len = rt->cache->max_re_data;
	}

	r = rt->results[id];

	t1 = rspamd_get_ticks ();

	if (max_hits == 0 || r < max_hits) {
		while (rspamd_regexp_search (re,
				in,
				len,
				&start,
				&end,
				is_raw,
				NULL)) {
			r++;

			if (max_hits > 0 && r >= max_hits) {
				break;
			}
		}

		rt->stat.regexp_checked++;
		rt->stat.bytes_scanned_pcre += len;
		rt->stat.bytes_scanned += len;

		if (r > 0) {
			rt->stat.regexp_matched += r;
		}
	}

	t2 = rspamd_get_ticks ();

	if (t2 - t1 > slow_time) {
		msg_info_pool ("regexp '%16s' took %.2f seconds to execute",
				rspamd_regexp_get_pattern (re), t2 - t1);
	}

	return r;
}
Exemple #2
0
gint
rspamd_re_cache_process (struct rspamd_task *task,
		struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re,
		enum rspamd_re_type type,
		gpointer type_data,
		gsize datalen,
		gboolean is_strong)
{
	guint64 re_id;
	struct rspamd_re_class *re_class;
	struct rspamd_re_cache *cache;

	g_assert (rt != NULL);
	g_assert (task != NULL);
	g_assert (re != NULL);

	cache = rt->cache;
	re_id = rspamd_regexp_get_cache_id (re);

	if (re_id == RSPAMD_INVALID_ID || re_id > cache->nre) {
		msg_err_task ("re '%s' has no valid id for the cache",
				rspamd_regexp_get_pattern (re));
		return 0;
	}

	if (isset (rt->checked, re_id)) {
		/* Fast path */
		rt->stat.regexp_fast_cached ++;
		return rt->results[re_id];
	}
	else {
		/* Slow path */
		re_class = rspamd_regexp_get_class (re);

		if (re_class == NULL) {
			msg_err_task ("cannot find re class for regexp '%s'",
					rspamd_regexp_get_pattern (re));
			return 0;
		}

		return rspamd_re_cache_exec_re (task, rt, re, re_class,
				is_strong);
	}

	return 0;
}
Exemple #3
0
void
rspamd_re_cache_replace (struct rspamd_re_cache *cache,
		rspamd_regexp_t *what,
		rspamd_regexp_t *with)
{
	guint64 re_id;
	struct rspamd_re_class *re_class;
	rspamd_regexp_t *src;
	struct rspamd_re_cache_elt *elt;

	g_assert (cache != NULL);
	g_assert (what != NULL);
	g_assert (with != NULL);

	re_class = rspamd_regexp_get_class (what);

	if (re_class != NULL) {
		re_id = rspamd_regexp_get_cache_id (what);

		g_assert (re_id != RSPAMD_INVALID_ID);
		src = g_hash_table_lookup (re_class->re, rspamd_regexp_get_id (what));
		elt = g_ptr_array_index (cache->re, re_id);
		g_assert (elt != NULL);
		g_assert (src != NULL);

		rspamd_regexp_set_cache_id (what, RSPAMD_INVALID_ID);
		rspamd_regexp_set_class (what, NULL);
		rspamd_regexp_set_cache_id (with, re_id);
		rspamd_regexp_set_class (with, re_class);
		/*
		 * On calling of this function, we actually unref old re (what)
		 */
		g_hash_table_insert (re_class->re,
				rspamd_regexp_get_id (what),
				rspamd_regexp_ref (with));

		rspamd_regexp_unref (elt->re);
		elt->re = rspamd_regexp_ref (with);
		/* XXX: do not touch match type here */
	}
}
Exemple #4
0
/*
 * Calculates the specified regexp for the specified class if it's not calculated
 */
static guint
rspamd_re_cache_exec_re (struct rspamd_task *task,
		struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re,
		struct rspamd_re_class *re_class,
		gboolean is_strong)
{
	guint ret = 0, i, re_id;
	GPtrArray *headerlist;
	GHashTableIter it;
	struct raw_header *rh;
	const gchar *in, *end;
	const guchar **scvec;
	guint *lenvec;
	gboolean raw = FALSE;
	struct mime_text_part *part;
	struct rspamd_url *url;
	struct rspamd_re_cache *cache = rt->cache;
	gpointer k, v;
	guint len, cnt;

	msg_debug_re_cache ("get to the slow path for re type: %s: %s",
			rspamd_re_cache_type_to_string (re_class->type),
			rspamd_regexp_get_pattern (re));
	re_id = rspamd_regexp_get_cache_id (re);

	switch (re_class->type) {
	case RSPAMD_RE_HEADER:
	case RSPAMD_RE_RAWHEADER:
		/* Get list of specified headers */
		headerlist = rspamd_message_get_header_array (task,
				re_class->type_data,
				is_strong);

		if (headerlist) {
			scvec = g_malloc (sizeof (*scvec) * headerlist->len);
			lenvec = g_malloc (sizeof (*lenvec) * headerlist->len);

			for (i = 0; i < headerlist->len; i ++) {
				rh = g_ptr_array_index (headerlist, i);

				if (re_class->type == RSPAMD_RE_RAWHEADER) {
					in = rh->value;
					raw = TRUE;
					lenvec[i] = strlen (rh->value);
				}
				else {
					in = rh->decoded;
					/* Validate input */
					if (!in || !g_utf8_validate (in, -1, &end)) {
						lenvec[i] = 0;
						scvec[i] = (guchar *)"";
						continue;
					}
					lenvec[i] = end - in;
				}

				scvec[i] = (guchar *)in;
			}

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, scvec, lenvec, headerlist->len, raw);
			debug_task ("checking header %s regexp: %s -> %d",
					re_class->type_data,
					rspamd_regexp_get_pattern (re), ret);
			g_free (scvec);
			g_free (lenvec);
		}
		break;
	case RSPAMD_RE_ALLHEADER:
		raw = TRUE;
		in = task->raw_headers_content.begin;
		len = task->raw_headers_content.len;
		ret = rspamd_re_cache_process_regexp_data (rt, re,
				task->task_pool, (const guchar **)&in, &len, 1, raw);
		debug_task ("checking allheader regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_MIME:
	case RSPAMD_RE_RAWMIME:
		/* Iterate through text parts */
		if (task->text_parts->len > 0) {
			scvec = g_malloc (sizeof (*scvec) * task->text_parts->len);
			lenvec = g_malloc (sizeof (*lenvec) * task->text_parts->len);

			for (i = 0; i < task->text_parts->len; i++) {
				part = g_ptr_array_index (task->text_parts, i);

				/* Skip empty parts */
				if (IS_PART_EMPTY (part)) {
					lenvec[i] = 0;
					scvec[i] = (guchar *) "";
					continue;
				}

				/* Check raw flags */
				if (!IS_PART_UTF (part)) {
					raw = TRUE;
				}
				/* Select data for regexp */
				if (re_class->type == RSPAMD_RE_RAWMIME) {
					in = part->orig->data;
					len = part->orig->len;
					raw = TRUE;
				}
				else {
					in = part->content->data;
					len = part->content->len;
				}

				scvec[i] = (guchar *) in;
				lenvec[i] = len;
			}

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, scvec, lenvec, task->text_parts->len, raw);
			debug_task ("checking mime regexp: %s -> %d",
					rspamd_regexp_get_pattern (re), ret);
			g_free (scvec);
			g_free (lenvec);
		}
		break;
	case RSPAMD_RE_URL:
		cnt = g_hash_table_size (task->urls) + g_hash_table_size (task->emails);

		if (cnt > 0) {
			scvec = g_malloc (sizeof (*scvec) * cnt);
			lenvec = g_malloc (sizeof (*lenvec) * cnt);
			g_hash_table_iter_init (&it, task->urls);
			i = 0;

			while (g_hash_table_iter_next (&it, &k, &v)) {
				url = v;
				in = url->string;
				len = url->urllen;
				raw = FALSE;

				scvec[i] = (guchar *)in;
				lenvec[i++] = len;
			}

			g_hash_table_iter_init (&it, task->emails);

			while (g_hash_table_iter_next (&it, &k, &v)) {
				url = v;
				in = url->string;
				len = url->urllen;
				raw = FALSE;

				scvec[i] = (guchar *) in;
				lenvec[i++] = len;
			}

			g_assert (i == cnt);

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, scvec, lenvec, i, raw);
			debug_task ("checking url regexp: %s -> %d",
					rspamd_regexp_get_pattern (re), ret);
			g_free (scvec);
			g_free (lenvec);
		}
		break;
	case RSPAMD_RE_BODY:
		raw = TRUE;
		in = task->msg.begin;
		len = task->msg.len;

		ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool,
				(const guchar **)&in, &len, 1, raw);
		debug_task ("checking rawbody regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_MAX:
		msg_err_task ("regexp of class invalid has been called: %s",
				rspamd_regexp_get_pattern (re));
		break;
	}

#if WITH_HYPERSCAN
	if (!rt->cache->disable_hyperscan) {
		rspamd_re_cache_finish_class (rt, re_class);
	}
#endif

	setbit (rt->checked, re_id);

	return rt->results[re_id];
}
Exemple #5
0
static guint
rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re, rspamd_mempool_t *pool,
		const guchar **in, guint *lens,
		guint count,
		gboolean is_raw)
{

	guint64 re_id;
	guint ret = 0;
	guint i;

	re_id = rspamd_regexp_get_cache_id (re);

	if (count == 0 || in == NULL) {
		/* We assume this as absence of the specified data */
		setbit (rt->checked, re_id);
		rt->results[re_id] = ret;
		return ret;
	}

#ifndef WITH_HYPERSCAN
	for (i = 0; i < count; i++) {
		ret = rspamd_re_cache_process_pcre (rt,
				re,
				pool,
				in[i],
				lens[i],
				is_raw);
	}
	setbit (rt->checked, re_id);
	rt->results[re_id] = ret;
#else
	struct rspamd_re_cache_elt *elt;
	struct rspamd_re_class *re_class;
	struct rspamd_re_hyperscan_cbdata cbdata;

	elt = g_ptr_array_index (rt->cache->re, re_id);
	re_class = rspamd_regexp_get_class (re);

	if (rt->cache->disable_hyperscan || elt->match_type == RSPAMD_RE_CACHE_PCRE) {
		for (i = 0; i < count; i++) {
			ret = rspamd_re_cache_process_pcre (rt,
					re,
					pool,
					in[i],
					lens[i],
					is_raw);
		}

		setbit (rt->checked, re_id);
		rt->results[re_id] = ret;
	}
	else {
		for (i = 0; i < count; i ++) {
			if (rt->cache->max_re_data > 0 && lens[i] > rt->cache->max_re_data) {
				lens[i] = rt->cache->max_re_data;
			}
			rt->stat.bytes_scanned += lens[i];
		}

		g_assert (re_class->hs_scratch != NULL);
		g_assert (re_class->hs_db != NULL);

		/* Go through hyperscan API */
		if (!rt->cache->vectorized_hyperscan) {
			for (i = 0; i < count; i++) {
				cbdata.ins = &in[i];
				cbdata.re = re;
				cbdata.rt = rt;
				cbdata.lens = &lens[i];
				cbdata.count = 1;
				cbdata.pool = pool;

				if ((hs_scan (re_class->hs_db, in[i], lens[i], 0,
						re_class->hs_scratch,
						rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) {
					ret = 0;
				}
				else {
					ret = rt->results[re_id];
				}
			}
		}
		else {
			cbdata.ins = in;
			cbdata.re = re;
			cbdata.rt = rt;
			cbdata.lens = lens;
			cbdata.count = 1;
			cbdata.pool = pool;

			if ((hs_scan_vector (re_class->hs_db, (const char **)in, lens, count, 0,
					re_class->hs_scratch,
					rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) {
				ret = 0;
			}
			else {
				ret = rt->results[re_id];
			}
		}
	}
#endif

	return ret;
}
Exemple #6
0
gint
rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
		const char *cache_dir, gdouble max_time, gboolean silent,
		GError **err)
{
	g_assert (cache != NULL);
	g_assert (cache_dir != NULL);

#ifndef WITH_HYPERSCAN
	g_set_error (err, rspamd_re_cache_quark (), EINVAL, "hyperscan is disabled");
	return -1;
#else
	GHashTableIter it, cit;
	gpointer k, v;
	struct rspamd_re_class *re_class;
	gchar path[PATH_MAX];
	hs_database_t *test_db;
	gint fd, i, n, *hs_ids = NULL, pcre_flags, re_flags;
	guint64 crc;
	rspamd_regexp_t *re;
	hs_compile_error_t *hs_errors;
	guint *hs_flags = NULL;
	const gchar **hs_pats = NULL;
	gchar *hs_serialized;
	gsize serialized_len, total = 0;
	struct iovec iov[7];

	g_hash_table_iter_init (&it, cache->re_classes);

	while (g_hash_table_iter_next (&it, &k, &v)) {
		re_class = v;
		rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cache_dir,
				G_DIR_SEPARATOR, re_class->hash);

		if (rspamd_re_cache_is_valid_hyperscan_file (cache, path, TRUE, TRUE)) {

			fd = open (path, O_RDONLY, 00600);

			/* Read number of regexps */
			g_assert (fd != -1);
			lseek (fd, RSPAMD_HS_MAGIC_LEN + sizeof (cache->plt), SEEK_SET);
			read (fd, &n, sizeof (n));
			close (fd);

			if (re_class->type_len > 0) {
				if (!silent) {
					msg_info_re_cache (
							"skip already valid class %s(%*s) to cache %6s, %d regexps",
							rspamd_re_cache_type_to_string (re_class->type),
							(gint) re_class->type_len - 1,
							re_class->type_data,
							re_class->hash,
							n);
				}
			}
			else {
				if (!silent) {
					msg_info_re_cache (
							"skip already valid class %s to cache %6s, %d regexps",
							rspamd_re_cache_type_to_string (re_class->type),
							re_class->hash,
							n);
				}
			}

			continue;
		}

		fd = open (path, O_CREAT|O_TRUNC|O_EXCL|O_WRONLY, 00600);

		if (fd == -1) {
			g_set_error (err, rspamd_re_cache_quark (), errno, "cannot open file "
					"%s: %s", path, strerror (errno));
			return -1;
		}

		g_hash_table_iter_init (&cit, re_class->re);
		n = g_hash_table_size (re_class->re);
		hs_flags = g_malloc0 (sizeof (*hs_flags) * n);
		hs_ids = g_malloc (sizeof (*hs_ids) * n);
		hs_pats = g_malloc (sizeof (*hs_pats) * n);
		i = 0;

		while (g_hash_table_iter_next (&cit, &k, &v)) {
			re = v;

			pcre_flags = rspamd_regexp_get_pcre_flags (re);
			re_flags = rspamd_regexp_get_flags (re);

			if (re_flags & RSPAMD_REGEXP_FLAG_PCRE_ONLY) {
				/* Do not try to compile bad regexp */
				msg_info_re_cache (
						"do not try compile %s to hyperscan as it is PCRE only",
						rspamd_regexp_get_pattern (re));
				continue;
			}

			hs_flags[i] = 0;
#ifndef WITH_PCRE2
			if (pcre_flags & PCRE_FLAG(UTF8)) {
				hs_flags[i] |= HS_FLAG_UTF8;
			}
#else
			if (pcre_flags & PCRE_FLAG(UTF)) {
				hs_flags[i] |= HS_FLAG_UTF8;
			}
#endif
			if (pcre_flags & PCRE_FLAG(CASELESS)) {
				hs_flags[i] |= HS_FLAG_CASELESS;
			}
			if (pcre_flags & PCRE_FLAG(MULTILINE)) {
				hs_flags[i] |= HS_FLAG_MULTILINE;
			}
			if (pcre_flags & PCRE_FLAG(DOTALL)) {
				hs_flags[i] |= HS_FLAG_DOTALL;
			}
			if (rspamd_regexp_get_maxhits (re) == 1) {
				hs_flags[i] |= HS_FLAG_SINGLEMATCH;
			}

			if (hs_compile (rspamd_regexp_get_pattern (re),
					hs_flags[i],
					cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
					&cache->plt,
					&test_db,
					&hs_errors) != HS_SUCCESS) {
				msg_info_re_cache ("cannot compile %s to hyperscan, try prefilter match",
						rspamd_regexp_get_pattern (re));
				hs_free_compile_error (hs_errors);

				/* The approximation operation might take a significant
				 * amount of time, so we need to check if it's finite
				 */
				if (rspamd_re_cache_is_finite (cache, re, hs_flags[i], max_time)) {
					hs_flags[i] |= HS_FLAG_PREFILTER;
					hs_ids[i] = rspamd_regexp_get_cache_id (re);
					hs_pats[i] = rspamd_regexp_get_pattern (re);
					i++;
				}
			}
			else {
				hs_ids[i] = rspamd_regexp_get_cache_id (re);
				hs_pats[i] = rspamd_regexp_get_pattern (re);
				i ++;
				hs_free_database (test_db);
			}
		}
		/* Adjust real re number */
		n = i;

		if (n > 0) {
			/* Create the hs tree */
			if (hs_compile_multi (hs_pats,
					hs_flags,
					hs_ids,
					n,
					cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
					&cache->plt,
					&test_db,
					&hs_errors) != HS_SUCCESS) {

				g_set_error (err, rspamd_re_cache_quark (), EINVAL,
						"cannot create tree of regexp when processing '%s': %s",
						hs_pats[hs_errors->expression], hs_errors->message);
				g_free (hs_flags);
				g_free (hs_ids);
				g_free (hs_pats);
				close (fd);
				hs_free_compile_error (hs_errors);

				return -1;
			}

			g_free (hs_pats);

			if (hs_serialize_database (test_db, &hs_serialized,
					&serialized_len) != HS_SUCCESS) {
				g_set_error (err,
						rspamd_re_cache_quark (),
						errno,
						"cannot serialize tree of regexp for %s",
						re_class->hash);

				close (fd);
				g_free (hs_ids);
				g_free (hs_flags);
				hs_free_database (test_db);

				return -1;
			}

			hs_free_database (test_db);

			/*
			 * Magic - 8 bytes
			 * Platform - sizeof (platform)
			 * n - number of regexps
			 * n * <regexp ids>
			 * n * <regexp flags>
			 * crc - 8 bytes checksum
			 * <hyperscan blob>
			 */
			crc = XXH64 (hs_serialized, serialized_len, 0xdeadbabe);

			if (cache->vectorized_hyperscan) {
				iov[0].iov_base = (void *) rspamd_hs_magic_vector;
			}
			else {
				iov[0].iov_base = (void *) rspamd_hs_magic;
			}
			iov[0].iov_len = RSPAMD_HS_MAGIC_LEN;
			iov[1].iov_base = &cache->plt;
			iov[1].iov_len = sizeof (cache->plt);
			iov[2].iov_base = &n;
			iov[2].iov_len = sizeof (n);
			iov[3].iov_base = hs_ids;
			iov[3].iov_len = sizeof (*hs_ids) * n;
			iov[4].iov_base = hs_flags;
			iov[4].iov_len = sizeof (*hs_flags) * n;
			iov[5].iov_base = &crc;
			iov[5].iov_len = sizeof (crc);
			iov[6].iov_base = hs_serialized;
			iov[6].iov_len = serialized_len;

			if (writev (fd, iov, G_N_ELEMENTS (iov)) == -1) {
				g_set_error (err,
						rspamd_re_cache_quark (),
						errno,
						"cannot serialize tree of regexp to %s: %s",
						path, strerror (errno));
				close (fd);
				g_free (hs_ids);
				g_free (hs_flags);
				g_free (hs_serialized);

				return -1;
			}

			if (re_class->type_len > 0) {
				msg_info_re_cache (
						"compiled class %s(%*s) to cache %6s, %d regexps",
						rspamd_re_cache_type_to_string (re_class->type),
						(gint) re_class->type_len - 1,
						re_class->type_data,
						re_class->hash,
						n);
			}
			else {
				msg_info_re_cache (
						"compiled class %s to cache %6s, %d regexps",
						rspamd_re_cache_type_to_string (re_class->type),
						re_class->hash,
						n);
			}

			total += n;

			g_free (hs_serialized);
			g_free (hs_ids);
			g_free (hs_flags);
		}

		close (fd);
	}

	return total;
#endif
}
Exemple #7
0
/*
 * Calculates the specified regexp for the specified class if it's not calculated
 */
static guint
rspamd_re_cache_exec_re (struct rspamd_task *task,
		struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re,
		struct rspamd_re_class *re_class,
		gboolean is_strong)
{
	guint ret = 0, i, re_id;
	GList *cur, *headerlist;
	GHashTableIter it;
	struct raw_header *rh;
	const gchar *in;
	gboolean raw = FALSE;
	struct mime_text_part *part;
	struct rspamd_url *url;
	struct rspamd_re_cache *cache = rt->cache;
	gpointer k, v;
	gsize len;

	msg_debug_re_cache ("get to the slow path for re type: %s: %s",
			rspamd_re_cache_type_to_string (re_class->type),
			rspamd_regexp_get_pattern (re));
	re_id = rspamd_regexp_get_cache_id (re);

	switch (re_class->type) {
	case RSPAMD_RE_HEADER:
	case RSPAMD_RE_RAWHEADER:
		/* Get list of specified headers */
		headerlist = rspamd_message_get_header (task,
				re_class->type_data,
				is_strong);

		if (headerlist) {
			cur = headerlist;

			while (cur) {
				rh = cur->data;
				if (re_class->type == RSPAMD_RE_RAWHEADER) {
					in = rh->value;
					raw = TRUE;
				}
				else {
					in = rh->decoded;
					/* Validate input */
					if (!in || !g_utf8_validate (in, -1, NULL)) {
						cur = g_list_next (cur);
						continue;
					}
				}

				/* Match re */
				if (in) {
					ret = rspamd_re_cache_process_regexp_data (rt, re,
							task->task_pool, in, strlen (in), raw);
					debug_task ("checking header %s regexp: %s -> %d",
							re_class->type_data,
							rspamd_regexp_get_pattern (re), ret);
				}

				cur = g_list_next (cur);
			}
		}
		break;
	case RSPAMD_RE_ALLHEADER:
		raw = TRUE;
		in = task->raw_headers_content.begin;
		len = task->raw_headers_content.len;
		ret = rspamd_re_cache_process_regexp_data (rt, re,
				task->task_pool, in, len, raw);
		debug_task ("checking allheader regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_MIME:
	case RSPAMD_RE_RAWMIME:
		/* Iterate throught text parts */
		for (i = 0; i < task->text_parts->len; i++) {
			part = g_ptr_array_index (task->text_parts, i);

			/* Skip empty parts */
			if (IS_PART_EMPTY (part)) {
				continue;
			}

			/* Check raw flags */
			if (!IS_PART_UTF (part)) {
				raw = TRUE;
			}
			/* Select data for regexp */
			if (re_class->type == RSPAMD_RE_RAWMIME) {
				in = part->orig->data;
				len = part->orig->len;
				raw = TRUE;
			}
			else {
				in = part->content->data;
				len = part->content->len;
			}

			if (len > 0) {
				ret = rspamd_re_cache_process_regexp_data (rt, re,
						task->task_pool, in, len, raw);
				debug_task ("checking mime regexp: %s -> %d",
						rspamd_regexp_get_pattern (re), ret);
			}
		}
		break;
	case RSPAMD_RE_URL:
		g_hash_table_iter_init (&it, task->urls);

		while (g_hash_table_iter_next (&it, &k, &v)) {
			url = v;
			in = url->string;
			len = url->urllen;
			raw = FALSE;

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, in, len, raw);
		}

		g_hash_table_iter_init (&it, task->emails);

		while (g_hash_table_iter_next (&it, &k, &v)) {
			url = v;
			in = url->string;
			len = url->urllen;
			raw = FALSE;

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, in, len, raw);
		}

		debug_task ("checking url regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_BODY:
		raw = TRUE;
		in = task->msg.begin;
		len = task->msg.len;

		ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, in,
				len, raw);
		debug_task ("checking rawbody regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_MAX:
		msg_err_task ("regexp of class invalid has been called: %s",
				rspamd_regexp_get_pattern (re));
		break;
	}

#if WITH_HYPERSCAN
	if (!rt->cache->disable_hyperscan) {
		rspamd_re_cache_finish_class (rt, re_class);
	}
#endif

	setbit (rt->checked, re_id);

	return rt->results[re_id];
}
Exemple #8
0
static guint
rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re, rspamd_mempool_t *pool,
		const guchar *in, gsize len,
		gboolean is_raw)
{

	guint64 re_id;
	guint ret = 0;

	re_id = rspamd_regexp_get_cache_id (re);

	if (len == 0 || in == NULL) {
		/* We assume this as absence of the specified data */
		setbit (rt->checked, re_id);
		rt->results[re_id] = ret;
		return ret;
	}

#ifndef WITH_HYPERSCAN
	ret = rspamd_re_cache_process_pcre (rt, re, pool, in, len, is_raw);
	setbit (rt->checked, re_id);
	rt->results[re_id] = ret;
#else
	struct rspamd_re_cache_elt *elt;
	struct rspamd_re_class *re_class;
	struct rspamd_re_hyperscan_cbdata cbdata;

	elt = g_ptr_array_index (rt->cache->re, re_id);
	re_class = rspamd_regexp_get_class (re);

	if (rt->cache->disable_hyperscan || elt->match_type == RSPAMD_RE_CACHE_PCRE) {
		ret = rspamd_re_cache_process_pcre (rt, re, pool, in, len, is_raw);
		setbit (rt->checked, re_id);
		rt->results[re_id] = ret;
	}
	else {
		if (rt->cache->max_re_data > 0 && len > rt->cache->max_re_data) {
			len = rt->cache->max_re_data;
		}

		g_assert (re_class->hs_scratch != NULL);
		g_assert (re_class->hs_db != NULL);

		/* Go through hyperscan API */
		cbdata.in = in;
		cbdata.re = re;
		cbdata.rt = rt;
		cbdata.len = len;
		cbdata.pool = pool;
		rt->stat.bytes_scanned += len;

		if ((hs_scan (re_class->hs_db, in, len, 0, re_class->hs_scratch,
				rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) {
			ret = 0;
		}
		else {
			ret = rt->results[re_id];
		}
	}
#endif

	return ret;
}