Beispiel #1
0
rspamd_regexp_t*
rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache,
		const gchar *pattern,
		const gchar *flags, GError **err)
{
	rspamd_regexp_t *res;

	if (cache == NULL) {
		rspamd_regexp_library_init ();
		cache = global_re_cache;
	}

	g_assert (cache != NULL);
	res = rspamd_regexp_cache_query (cache, pattern, flags);

	if (res != NULL) {
		return res;
	}

	res = rspamd_regexp_new (pattern, flags, err);

	if (res) {
		REF_RETAIN (res);
		g_hash_table_insert (cache->tbl, res->id, res);
	}

	return res;
}
Beispiel #2
0
void rspamd_regexp_cache_insert (struct rspamd_regexp_cache* cache,
		const gchar *pattern,
		const gchar *flags, rspamd_regexp_t *re)
{
	g_assert (re != NULL);
	g_assert (pattern != NULL);

	if (cache == NULL) {
		rspamd_regexp_library_init ();
		cache = global_re_cache;
	}

	g_assert (cache != NULL);
	/* Generate custom id */
	rspamd_regexp_generate_id (pattern, flags, re->id);

	REF_RETAIN (re);
	g_hash_table_insert (cache->tbl, re->id, re);
}
Beispiel #3
0
rspamd_regexp_t*
rspamd_regexp_cache_query (struct rspamd_regexp_cache* cache,
		const gchar *pattern,
		const gchar *flags)
{
	rspamd_regexp_t *res = NULL;
	regexp_id_t id;

	if (cache == NULL) {
		rspamd_regexp_library_init ();
		cache = global_re_cache;
	}

	g_assert (cache != NULL);
	rspamd_regexp_generate_id (pattern, flags, id);

	res = g_hash_table_lookup (cache->tbl, id);

	return res;
}
Beispiel #4
0
rspamd_regexp_t*
rspamd_regexp_new (const gchar *pattern, const gchar *flags,
		GError **err)
{
	const gchar *start = pattern, *end, *flags_str = NULL, *err_str;
	rspamd_regexp_t *res;
	pcre *r;
	gchar sep = 0, *real_pattern;
	gint regexp_flags = 0, rspamd_flags = 0, err_off, study_flags = 0, ncaptures;
	gboolean strict_flags = FALSE;

	rspamd_regexp_library_init ();

	if (flags == NULL) {
		/* We need to parse pattern and detect flags set */
		if (*start == '/') {
			sep = '/';
		}
		else if (*start == 'm') {
			start ++;
			sep = *start;

			/* Paired braces */
			if (sep == '{') {
				sep = '}';
			}

			rspamd_flags |= RSPAMD_REGEXP_FLAG_FULL_MATCH;
		}
		if (sep == '\0' || g_ascii_isalnum (sep)) {
			/* We have no flags, no separators and just use all line as expr */
			start = pattern;
			end = start + strlen (pattern);
			rspamd_flags &= ~RSPAMD_REGEXP_FLAG_FULL_MATCH;
		}
		else {
			end = strrchr (pattern, sep);

			if (end == NULL || end <= start) {
				g_set_error (err, rspamd_regexp_quark(), EINVAL,
						"pattern is not enclosed with %c: %s",
						sep, pattern);
				return NULL;
			}
			flags_str = end + 1;
			start ++;
		}
	}
	else {
		/* Strictly check all flags */
		strict_flags = TRUE;
		start = pattern;
		end = pattern + strlen (pattern);
		flags_str = flags;
	}

	rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
	regexp_flags &= ~PCRE_UTF8;

	if (flags_str != NULL) {
		while (*flags_str) {
			switch (*flags_str) {
			case 'i':
				regexp_flags |= PCRE_CASELESS;
				break;
			case 'm':
				regexp_flags |= PCRE_MULTILINE;
				break;
			case 's':
				regexp_flags |= PCRE_DOTALL;
				break;
			case 'x':
				regexp_flags |= PCRE_EXTENDED;
				break;
			case 'u':
				rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW;
				regexp_flags |= PCRE_UTF8;
				break;
			case 'O':
				/* We optimize all regexps by default */
				rspamd_flags |= RSPAMD_REGEXP_FLAG_NOOPT;
				break;
			case 'r':
				rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
				regexp_flags &= ~PCRE_UTF8;
				break;
			default:
				if (strict_flags) {
					g_set_error (err, rspamd_regexp_quark(), EINVAL,
							"invalid regexp flag: %c in pattern %s",
							*flags_str, pattern);
					return NULL;
				}
				msg_warn ("invalid flag '%c' in pattern %s", *flags_str, pattern);
				goto fin;
				break;
			}
			flags_str++;
		}
	}
fin:

	real_pattern = g_malloc (end - start + 1);
	rspamd_strlcpy (real_pattern, start, end - start + 1);

	r = pcre_compile (real_pattern, regexp_flags, &err_str, &err_off, NULL);

	if (r == NULL) {
		g_set_error (err, rspamd_regexp_quark(), EINVAL,
			"invalid regexp pattern: '%s': %s at position %d",
			pattern, err_str, err_off);
		g_free (real_pattern);

		return NULL;
	}

	/* Now allocate the target structure */
	res = g_slice_alloc0 (sizeof (*res));
	REF_INIT_RETAIN (res, rspamd_regexp_dtor);
	res->flags = rspamd_flags;
	res->pattern = real_pattern;

	if (rspamd_flags & RSPAMD_REGEXP_FLAG_RAW) {
		res->raw_re = r;
	}
	else {
		res->re = r;
		res->raw_re = pcre_compile (pattern, regexp_flags & ~PCRE_UTF8,
				&err_str, &err_off, NULL);

		if (res->raw_re == NULL) {
			msg_warn ("invalid raw regexp pattern: '%s': %s at position %d",
					pattern, err_str, err_off);
		}
	}

#ifdef HAVE_PCRE_JIT
	study_flags |= PCRE_STUDY_JIT_COMPILE;
#endif

	if (!(rspamd_flags & RSPAMD_REGEXP_FLAG_NOOPT)) {
		/* Optimize regexp */
		if (res->re) {
			res->extra = pcre_study (res->re, study_flags, &err_str);
			if (res->extra != NULL) {
#ifdef HAVE_PCRE_JIT
				gint jit, n;

				if (can_jit) {
					jit = 0;
					n = pcre_fullinfo (res->re, res->extra,
							PCRE_INFO_JIT, &jit);

					if (n != 0 || jit != 1) {
						msg_debug ("jit compilation of %s is not supported", pattern);
						res->jstack = NULL;
					}
					else {
						res->jstack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);
						pcre_assign_jit_stack (res->extra, NULL, res->jstack);
					}
				}
#endif
			}
			else {
				msg_warn ("cannot optimize regexp pattern: '%s': %s",
						pattern, err_str);
			}
		}

		if (res->raw_re) {
			if (res->raw_re != res->re) {
				res->raw_extra = pcre_study (res->raw_re, study_flags, &err_str);
				if (res->raw_extra != NULL) {
#ifdef HAVE_PCRE_JIT
					gint jit, n;

					if (can_jit) {
						jit = 0;
						n = pcre_fullinfo (res->raw_re, res->raw_extra,
								PCRE_INFO_JIT, &jit);

						if (n != 0 || jit != 1) {
							msg_debug ("jit compilation of %s is not supported",
									pattern);
							res->raw_jstack = NULL;
						}
						else {
							res->raw_jstack = pcre_jit_stack_alloc (32 * 1024,
									512 * 1024);
							pcre_assign_jit_stack (res->raw_extra, NULL,
									res->raw_jstack);
						}
					}
#endif
				}
				else {
					msg_warn ("cannot optimize raw regexp pattern: '%s': %s",
							pattern, err_str);
				}
			}
			else {
#ifdef HAVE_PCRE_JIT
				/* Just alias pointers */
				res->raw_extra = res->extra;
				res->raw_jstack = res->jstack;
#endif
			}
		}
	}

	rspamd_regexp_generate_id (pattern, flags, res->id);

	/* Check number of captures */
	if (pcre_fullinfo (res->raw_re, res->extra, PCRE_INFO_CAPTURECOUNT,
			&ncaptures) == 0) {
		res->ncaptures = ncaptures;
	}

	return res;
}
Beispiel #5
0
/*
 * Perform post load actions
 */
void
rspamd_config_post_load (struct rspamd_config *cfg)
{
#ifdef HAVE_CLOCK_GETTIME
	struct timespec ts;
#endif
	struct metric *def_metric;

#ifdef HAVE_CLOCK_GETTIME
#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID
	clock_getres (CLOCK_PROCESS_CPUTIME_ID, &ts);
# elif defined(HAVE_CLOCK_VIRTUAL)
	clock_getres (CLOCK_VIRTUAL,			&ts);
# else
	clock_getres (CLOCK_REALTIME,			&ts);
# endif

	cfg->clock_res = log10 (1000000. / ts.tv_nsec);
	if (cfg->clock_res < 0) {
		cfg->clock_res = 0;
	}
	if (cfg->clock_res > 3) {
		cfg->clock_res = 3;
	}
#else
	/* For gettimeofday */
	cfg->clock_res = 1;
#endif

	rspamd_regexp_library_init ();

	if ((def_metric =
		g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC)) == NULL) {
		def_metric = rspamd_config_new_metric (cfg, NULL, DEFAULT_METRIC);
		def_metric->actions[METRIC_ACTION_REJECT].score = DEFAULT_SCORE;
	}

	if (cfg->tld_file == NULL) {
		/* Try to guess tld file */
		GString *fpath = g_string_new (NULL);

		rspamd_printf_gstring (fpath, "%s%c%s", RSPAMD_PLUGINSDIR,
				G_DIR_SEPARATOR, "effective_tld_names.dat");

		if (access (fpath->str, R_OK)) {
			msg_warn_config ("url_tld option is not specified but %s is available,"
					" therefore this file is assumed as TLD file for URL"
					" extraction", fpath->str);
			cfg->tld_file = rspamd_mempool_strdup (cfg->cfg_pool, fpath->str);
		}
		else {
			msg_err_config ("no url_tld option has been specified, URL's detection "
					"will be awfully broken");
		}

		g_string_free (fpath, TRUE);
	}

	/* Lua options */
	(void)rspamd_lua_post_load_config (cfg);
	init_dynamic_config (cfg);

	rspamd_url_init (cfg->tld_file);

	/* Insert classifiers symbols */
	(void)rspamd_config_insert_classify_symbols (cfg);
}
Beispiel #6
0
rspamd_regexp_t*
rspamd_regexp_new (const gchar *pattern, const gchar *flags,
		GError **err)
{
	const gchar *start = pattern, *end, *flags_str = NULL;
	gchar *err_str;
	rspamd_regexp_t *res;
	PCRE_T *r;
	gchar sep = 0, *real_pattern;
#ifndef WITH_PCRE2
	gint err_off;
#else
	gsize err_off;
#endif
	gint regexp_flags = 0, rspamd_flags = 0, err_code, ncaptures;
	gboolean strict_flags = FALSE;

	rspamd_regexp_library_init (NULL);

	if (flags == NULL) {
		/* We need to parse pattern and detect flags set */
		if (*start == '/') {
			sep = '/';
		}
		else if (*start == 'm') {
			start ++;
			sep = *start;

			/* Paired braces */
			if (sep == '{') {
				sep = '}';
			}

			rspamd_flags |= RSPAMD_REGEXP_FLAG_FULL_MATCH;
		}
		if (sep == '\0' || g_ascii_isalnum (sep)) {
			/* We have no flags, no separators and just use all line as expr */
			start = pattern;
			end = start + strlen (pattern);
			rspamd_flags &= ~RSPAMD_REGEXP_FLAG_FULL_MATCH;
		}
		else {
			end = strrchr (pattern, sep);

			if (end == NULL || end <= start) {
				g_set_error (err, rspamd_regexp_quark(), EINVAL,
						"pattern is not enclosed with %c: %s",
						sep, pattern);
				return NULL;
			}
			flags_str = end + 1;
			start ++;
		}
	}
	else {
		/* Strictly check all flags */
		strict_flags = TRUE;
		start = pattern;
		end = pattern + strlen (pattern);
		flags_str = flags;
	}

	rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;

#ifndef WITH_PCRE2
	regexp_flags &= ~PCRE_FLAG(UTF8);
	regexp_flags |= PCRE_FLAG(NEWLINE_ANYCRLF);
#else
	regexp_flags &= ~PCRE_FLAG(UTF);
#endif

	if (flags_str != NULL) {
		while (*flags_str) {
			switch (*flags_str) {
			case 'i':
				regexp_flags |= PCRE_FLAG(CASELESS);
				break;
			case 'm':
				regexp_flags |= PCRE_FLAG(MULTILINE);
				break;
			case 's':
				regexp_flags |= PCRE_FLAG(DOTALL);
				break;
			case 'x':
				regexp_flags |= PCRE_FLAG(EXTENDED);
				break;
			case 'u':
				rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW;
#ifndef WITH_PCRE2
				regexp_flags |= PCRE_FLAG(UTF8);
#else
				regexp_flags |= PCRE_FLAG(UTF);
#endif
				break;
			case 'O':
				/* We optimize all regexps by default */
				rspamd_flags |= RSPAMD_REGEXP_FLAG_NOOPT;
				break;
			case 'r':
				rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
#ifndef WITH_PCRE2
				regexp_flags &= ~PCRE_FLAG(UTF8);
#else
				regexp_flags &= ~PCRE_FLAG(UTF);
#endif
				break;
			default:
				if (strict_flags) {
					g_set_error (err, rspamd_regexp_quark(), EINVAL,
							"invalid regexp flag: %c in pattern %s",
							*flags_str, pattern);
					return NULL;
				}
				msg_warn ("invalid flag '%c' in pattern %s", *flags_str, pattern);
				goto fin;
				break;
			}
			flags_str++;
		}
	}
fin:

	real_pattern = g_malloc (end - start + 1);
	rspamd_strlcpy (real_pattern, start, end - start + 1);

#ifndef WITH_PCRE2
	r = pcre_compile (real_pattern, regexp_flags,
			(const char **)&err_str, &err_off, NULL);
	(void)err_code;
#else
	r = pcre2_compile (real_pattern, PCRE2_ZERO_TERMINATED,
			regexp_flags,
			&err_code, &err_off, pcre2_ctx);

	if (r == NULL) {
		err_str = g_alloca (1024);
		memset (err_str, 0, 1024);
		pcre2_get_error_message (err_code, err_str, 1024);
	}
#endif

	if (r == NULL) {
		g_set_error (err, rspamd_regexp_quark(), EINVAL,
			"regexp parsing error: '%s' at position %d",
			err_str, (gint)err_off);
		g_free (real_pattern);

		return NULL;
	}

	/* Now allocate the target structure */
	res = g_malloc0 (sizeof (*res));
	REF_INIT_RETAIN (res, rspamd_regexp_dtor);
	res->flags = rspamd_flags;
	res->pattern = real_pattern;
	res->cache_id = RSPAMD_INVALID_ID;
	res->pcre_flags = regexp_flags;
	res->max_hits = 0;
	res->re = r;

	if (rspamd_flags & RSPAMD_REGEXP_FLAG_RAW) {
		res->raw_re = r;
	}
	else {
#ifndef WITH_PCRE2
		res->raw_re = pcre_compile (real_pattern, regexp_flags & ~PCRE_FLAG(UTF8),
				(const char **)&err_str, &err_off, NULL);
		(void)err_code;
#else
		res->raw_re = pcre2_compile (real_pattern, PCRE2_ZERO_TERMINATED,
					regexp_flags & ~PCRE_FLAG(UTF),
					&err_code, &err_off, pcre2_ctx);
		if (res->raw_re == NULL) {
			err_str = g_alloca (1024);
			memset (err_str, 0, 1024);
			pcre2_get_error_message (err_code, err_str, 1024);
		}
#endif
		if (res->raw_re == NULL) {
			msg_warn ("raw regexp parsing error: '%s': '%s' at position %d",
					err_str, real_pattern, (gint)err_off);
		}
	}

	rspamd_regexp_post_process (res);
	rspamd_regexp_generate_id (pattern, flags, res->id);

#ifndef WITH_PCRE2
	/* Check number of captures */
	if (pcre_fullinfo (res->raw_re, res->extra, PCRE_INFO_CAPTURECOUNT,
			&ncaptures) == 0) {
		res->ncaptures = ncaptures;
	}

	/* Check number of backrefs */
	if (pcre_fullinfo (res->raw_re, res->extra, PCRE_INFO_BACKREFMAX,
			&ncaptures) == 0) {
		res->nbackref = ncaptures;
	}
#else
	/* Check number of captures */
	if (pcre2_pattern_info (res->raw_re, PCRE2_INFO_CAPTURECOUNT,
			&ncaptures) == 0) {
		res->ncaptures = ncaptures;
	}

	/* Check number of backrefs */
	if (pcre2_pattern_info (res->raw_re, PCRE2_INFO_BACKREFMAX,
			&ncaptures) == 0) {
		res->nbackref = ncaptures;
	}
#endif

	return res;
}
Beispiel #7
0
static void
rspamd_regexp_post_process (rspamd_regexp_t *r)
{
	if (global_re_cache == NULL) {
		rspamd_regexp_library_init (NULL);
	}
#if defined(WITH_PCRE2)
	gsize jsz;
	guint jit_flags = PCRE2_JIT_COMPLETE;
	/* Create match context */

	r->mcontext = pcre2_match_context_create (NULL);

	if (r->re != r->raw_re) {
		r->raw_mcontext = pcre2_match_context_create (NULL);
	}
	else {
		r->raw_mcontext = r->mcontext;
	}

#ifdef HAVE_PCRE_JIT
	if (pcre2_jit_compile (r->re, jit_flags) < 0) {
		msg_err ("jit compilation of %s is not supported: %d", r->pattern, jit_flags);
		r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
	}
	else {
		if (!(pcre2_pattern_info (r->re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) {
			msg_err ("jit compilation of %s is not supported", r->pattern);
			r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
		}
	}

	if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) {
		pcre2_jit_stack_assign (r->mcontext, NULL, global_re_cache->jstack);
	}

	if (r->re != r->raw_re) {
		if (pcre2_jit_compile (r->raw_re, jit_flags) < 0) {
			msg_debug ("jit compilation of %s is not supported", r->pattern);
			r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
		}

		if (!(pcre2_pattern_info (r->raw_re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) {
			msg_debug ("jit compilation of raw %s is not supported", r->pattern);
		}
		else if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) {
			pcre2_jit_stack_assign (r->raw_mcontext, NULL, global_re_cache->jstack);
		}
	}
#endif

#else
	const gchar *err_str = "unknown";
	gboolean try_jit = TRUE, try_raw_jit = TRUE;
	gint study_flags = 0;

#if defined(HAVE_PCRE_JIT)
	study_flags |= PCRE_STUDY_JIT_COMPILE;
#endif

	/* Pcre 1 needs study */
	if (r->re) {
		r->extra = pcre_study (r->re, study_flags, &err_str);

		if (r->extra == NULL) {
			msg_debug ("cannot optimize regexp pattern: '%s': %s",
					r->pattern, err_str);
			try_jit = FALSE;
			r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
		}
	}
	else {
		g_assert_not_reached ();
	}

	if (r->raw_re && r->raw_re != r->re) {
		r->raw_extra = pcre_study (r->re, study_flags, &err_str);
	}
	else if (r->raw_re == r->re) {
		r->raw_extra = r->extra;
	}

	if (r->raw_extra == NULL) {

		msg_debug ("cannot optimize raw regexp pattern: '%s': %s",
				r->pattern, err_str);
		try_raw_jit = FALSE;
	}
	/* JIT path */
	if (try_jit) {
#ifdef HAVE_PCRE_JIT
		gint jit, n;

		if (can_jit) {
			jit = 0;
			n = pcre_fullinfo (r->re, r->extra,
					PCRE_INFO_JIT, &jit);

			if (n != 0 || jit != 1) {
				msg_debug ("jit compilation of %s is not supported", r->pattern);
				r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
			}
			else {
				pcre_assign_jit_stack (r->extra, NULL, global_re_cache->jstack);
			}
		}
#endif
	}
	else {
		msg_debug ("cannot optimize regexp pattern: '%s': %s",
				r->pattern, err_str);
		r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
	}

	if (try_raw_jit) {
#ifdef HAVE_PCRE_JIT
		gint jit, n;

		if (can_jit) {

			if (r->raw_re != r->re) {
				jit = 0;
				n = pcre_fullinfo (r->raw_re, r->raw_extra,
						PCRE_INFO_JIT, &jit);

				if (n != 0 || jit != 1) {
					msg_debug ("jit compilation of %s is not supported", r->pattern);
					r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
				}
				else {
					pcre_assign_jit_stack (r->raw_extra, NULL,
							global_re_cache->jstack);
				}
			}
		}
#endif
	}
#endif /* WITH_PCRE2 */
}