Esempio n. 1
0
static gint
lua_textpart_is_utf (lua_State * L)
{
	struct mime_text_part *part = lua_check_textpart (L);

	if (part == NULL || IS_PART_EMPTY (part)) {
		lua_pushboolean (L, FALSE);
		return 1;
	}

	lua_pushboolean (L, IS_PART_UTF (part));

	return 1;
}
Esempio n. 2
0
/*
 * Calculates the specified regexp for the specified class if it's not calculated
 */
static guint
rspamd_re_cache_exec_re (struct rspamd_task *task,
		struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re,
		struct rspamd_re_class *re_class,
		gboolean is_strong)
{
	guint ret = 0, i, re_id;
	GPtrArray *headerlist;
	GHashTableIter it;
	struct raw_header *rh;
	const gchar *in, *end;
	const guchar **scvec;
	guint *lenvec;
	gboolean raw = FALSE;
	struct mime_text_part *part;
	struct rspamd_url *url;
	struct rspamd_re_cache *cache = rt->cache;
	gpointer k, v;
	guint len, cnt;

	msg_debug_re_cache ("get to the slow path for re type: %s: %s",
			rspamd_re_cache_type_to_string (re_class->type),
			rspamd_regexp_get_pattern (re));
	re_id = rspamd_regexp_get_cache_id (re);

	switch (re_class->type) {
	case RSPAMD_RE_HEADER:
	case RSPAMD_RE_RAWHEADER:
		/* Get list of specified headers */
		headerlist = rspamd_message_get_header_array (task,
				re_class->type_data,
				is_strong);

		if (headerlist) {
			scvec = g_malloc (sizeof (*scvec) * headerlist->len);
			lenvec = g_malloc (sizeof (*lenvec) * headerlist->len);

			for (i = 0; i < headerlist->len; i ++) {
				rh = g_ptr_array_index (headerlist, i);

				if (re_class->type == RSPAMD_RE_RAWHEADER) {
					in = rh->value;
					raw = TRUE;
					lenvec[i] = strlen (rh->value);
				}
				else {
					in = rh->decoded;
					/* Validate input */
					if (!in || !g_utf8_validate (in, -1, &end)) {
						lenvec[i] = 0;
						scvec[i] = (guchar *)"";
						continue;
					}
					lenvec[i] = end - in;
				}

				scvec[i] = (guchar *)in;
			}

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, scvec, lenvec, headerlist->len, raw);
			debug_task ("checking header %s regexp: %s -> %d",
					re_class->type_data,
					rspamd_regexp_get_pattern (re), ret);
			g_free (scvec);
			g_free (lenvec);
		}
		break;
	case RSPAMD_RE_ALLHEADER:
		raw = TRUE;
		in = task->raw_headers_content.begin;
		len = task->raw_headers_content.len;
		ret = rspamd_re_cache_process_regexp_data (rt, re,
				task->task_pool, (const guchar **)&in, &len, 1, raw);
		debug_task ("checking allheader regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_MIME:
	case RSPAMD_RE_RAWMIME:
		/* Iterate through text parts */
		if (task->text_parts->len > 0) {
			scvec = g_malloc (sizeof (*scvec) * task->text_parts->len);
			lenvec = g_malloc (sizeof (*lenvec) * task->text_parts->len);

			for (i = 0; i < task->text_parts->len; i++) {
				part = g_ptr_array_index (task->text_parts, i);

				/* Skip empty parts */
				if (IS_PART_EMPTY (part)) {
					lenvec[i] = 0;
					scvec[i] = (guchar *) "";
					continue;
				}

				/* Check raw flags */
				if (!IS_PART_UTF (part)) {
					raw = TRUE;
				}
				/* Select data for regexp */
				if (re_class->type == RSPAMD_RE_RAWMIME) {
					in = part->orig->data;
					len = part->orig->len;
					raw = TRUE;
				}
				else {
					in = part->content->data;
					len = part->content->len;
				}

				scvec[i] = (guchar *) in;
				lenvec[i] = len;
			}

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, scvec, lenvec, task->text_parts->len, raw);
			debug_task ("checking mime regexp: %s -> %d",
					rspamd_regexp_get_pattern (re), ret);
			g_free (scvec);
			g_free (lenvec);
		}
		break;
	case RSPAMD_RE_URL:
		cnt = g_hash_table_size (task->urls) + g_hash_table_size (task->emails);

		if (cnt > 0) {
			scvec = g_malloc (sizeof (*scvec) * cnt);
			lenvec = g_malloc (sizeof (*lenvec) * cnt);
			g_hash_table_iter_init (&it, task->urls);
			i = 0;

			while (g_hash_table_iter_next (&it, &k, &v)) {
				url = v;
				in = url->string;
				len = url->urllen;
				raw = FALSE;

				scvec[i] = (guchar *)in;
				lenvec[i++] = len;
			}

			g_hash_table_iter_init (&it, task->emails);

			while (g_hash_table_iter_next (&it, &k, &v)) {
				url = v;
				in = url->string;
				len = url->urllen;
				raw = FALSE;

				scvec[i] = (guchar *) in;
				lenvec[i++] = len;
			}

			g_assert (i == cnt);

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, scvec, lenvec, i, raw);
			debug_task ("checking url regexp: %s -> %d",
					rspamd_regexp_get_pattern (re), ret);
			g_free (scvec);
			g_free (lenvec);
		}
		break;
	case RSPAMD_RE_BODY:
		raw = TRUE;
		in = task->msg.begin;
		len = task->msg.len;

		ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool,
				(const guchar **)&in, &len, 1, raw);
		debug_task ("checking rawbody regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_MAX:
		msg_err_task ("regexp of class invalid has been called: %s",
				rspamd_regexp_get_pattern (re));
		break;
	}

#if WITH_HYPERSCAN
	if (!rt->cache->disable_hyperscan) {
		rspamd_re_cache_finish_class (rt, re_class);
	}
#endif

	setbit (rt->checked, re_id);

	return rt->results[re_id];
}
Esempio n. 3
0
static gboolean
check_part (struct mime_text_part *part, gboolean raw_mode)
{
	guchar *p, *p1;
	gunichar c, t;
	GUnicodeScript scc, sct;
	guint32 mark = 0, total = 0, max = 0, i;
	guint32 remain = part->content->len;
	guint32 scripts[G_UNICODE_SCRIPT_NKO];
	GUnicodeScript sel = 0;

	p = part->content->data;

	if (IS_PART_UTF (part) || raw_mode) {
		while (remain > 1) {
			if ((g_ascii_isalpha (*p) &&
				(*(p + 1) & 0x80)) ||
				((*p & 0x80) && g_ascii_isalpha (*(p + 1)))) {
				mark++;
				total++;
			}
			/* Current and next symbols are of one class */
			else if (((*p & 0x80) &&
				(*(p + 1) & 0x80)) ||
				(g_ascii_isalpha (*p) && g_ascii_isalpha (*(p + 1)))) {
				total++;
			}
			p++;
			remain--;
		}
	}
	else {
		memset (&scripts, 0, sizeof (scripts));
		while (remain > 0) {
			c = g_utf8_get_char_validated (p, remain);
			if (c == (gunichar) - 2 || c == (gunichar) - 1) {
				/* Invalid characters detected, stop processing */
				return FALSE;
			}

			scc = g_unichar_get_script (c);
			if (scc < (gint)G_N_ELEMENTS (scripts)) {
				scripts[scc]++;
			}
			p1 = g_utf8_next_char (p);
			remain -= p1 - p;
			p = p1;

			if (remain > 0) {
				t = g_utf8_get_char_validated (p, remain);
				if (t == (gunichar) - 2 || t == (gunichar) - 1) {
					/* Invalid characters detected, stop processing */
					return FALSE;
				}
				sct = g_unichar_get_script (t);
				if (g_unichar_isalpha (c) && g_unichar_isalpha (t)) {
					/* We have two unicode alphanumeric characters, so we can check its script */
					if (sct != scc) {
						mark++;
					}
					total++;
				}
				p1 = g_utf8_next_char (p);
				remain -= p1 - p;
				p = p1;
			}
		}
		/* Detect the mostly charset of this part */
		for (i = 0; i < G_N_ELEMENTS (scripts); i++) {
			if (scripts[i] > max) {
				max = scripts[i];
				sel = i;
			}
		}
		part->script = sel;
	}

	if (total == 0) {
		return 0;
	}

	return ((double)mark / (double)total) > chartable_module_ctx->threshold;
}
Esempio n. 4
0
/*
 * Calculates the specified regexp for the specified class if it's not calculated
 */
static guint
rspamd_re_cache_exec_re (struct rspamd_task *task,
		struct rspamd_re_runtime *rt,
		rspamd_regexp_t *re,
		struct rspamd_re_class *re_class,
		gboolean is_strong)
{
	guint ret = 0, i, re_id;
	GList *cur, *headerlist;
	GHashTableIter it;
	struct raw_header *rh;
	const gchar *in;
	gboolean raw = FALSE;
	struct mime_text_part *part;
	struct rspamd_url *url;
	struct rspamd_re_cache *cache = rt->cache;
	gpointer k, v;
	gsize len;

	msg_debug_re_cache ("get to the slow path for re type: %s: %s",
			rspamd_re_cache_type_to_string (re_class->type),
			rspamd_regexp_get_pattern (re));
	re_id = rspamd_regexp_get_cache_id (re);

	switch (re_class->type) {
	case RSPAMD_RE_HEADER:
	case RSPAMD_RE_RAWHEADER:
		/* Get list of specified headers */
		headerlist = rspamd_message_get_header (task,
				re_class->type_data,
				is_strong);

		if (headerlist) {
			cur = headerlist;

			while (cur) {
				rh = cur->data;
				if (re_class->type == RSPAMD_RE_RAWHEADER) {
					in = rh->value;
					raw = TRUE;
				}
				else {
					in = rh->decoded;
					/* Validate input */
					if (!in || !g_utf8_validate (in, -1, NULL)) {
						cur = g_list_next (cur);
						continue;
					}
				}

				/* Match re */
				if (in) {
					ret = rspamd_re_cache_process_regexp_data (rt, re,
							task->task_pool, in, strlen (in), raw);
					debug_task ("checking header %s regexp: %s -> %d",
							re_class->type_data,
							rspamd_regexp_get_pattern (re), ret);
				}

				cur = g_list_next (cur);
			}
		}
		break;
	case RSPAMD_RE_ALLHEADER:
		raw = TRUE;
		in = task->raw_headers_content.begin;
		len = task->raw_headers_content.len;
		ret = rspamd_re_cache_process_regexp_data (rt, re,
				task->task_pool, in, len, raw);
		debug_task ("checking allheader regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_MIME:
	case RSPAMD_RE_RAWMIME:
		/* Iterate throught text parts */
		for (i = 0; i < task->text_parts->len; i++) {
			part = g_ptr_array_index (task->text_parts, i);

			/* Skip empty parts */
			if (IS_PART_EMPTY (part)) {
				continue;
			}

			/* Check raw flags */
			if (!IS_PART_UTF (part)) {
				raw = TRUE;
			}
			/* Select data for regexp */
			if (re_class->type == RSPAMD_RE_RAWMIME) {
				in = part->orig->data;
				len = part->orig->len;
				raw = TRUE;
			}
			else {
				in = part->content->data;
				len = part->content->len;
			}

			if (len > 0) {
				ret = rspamd_re_cache_process_regexp_data (rt, re,
						task->task_pool, in, len, raw);
				debug_task ("checking mime regexp: %s -> %d",
						rspamd_regexp_get_pattern (re), ret);
			}
		}
		break;
	case RSPAMD_RE_URL:
		g_hash_table_iter_init (&it, task->urls);

		while (g_hash_table_iter_next (&it, &k, &v)) {
			url = v;
			in = url->string;
			len = url->urllen;
			raw = FALSE;

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, in, len, raw);
		}

		g_hash_table_iter_init (&it, task->emails);

		while (g_hash_table_iter_next (&it, &k, &v)) {
			url = v;
			in = url->string;
			len = url->urllen;
			raw = FALSE;

			ret = rspamd_re_cache_process_regexp_data (rt, re,
					task->task_pool, in, len, raw);
		}

		debug_task ("checking url regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_BODY:
		raw = TRUE;
		in = task->msg.begin;
		len = task->msg.len;

		ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, in,
				len, raw);
		debug_task ("checking rawbody regexp: %s -> %d",
				rspamd_regexp_get_pattern (re), ret);
		break;
	case RSPAMD_RE_MAX:
		msg_err_task ("regexp of class invalid has been called: %s",
				rspamd_regexp_get_pattern (re));
		break;
	}

#if WITH_HYPERSCAN
	if (!rt->cache->disable_hyperscan) {
		rspamd_re_cache_finish_class (rt, re_class);
	}
#endif

	setbit (rt->checked, re_id);

	return rt->results[re_id];
}