예제 #1
0
파일: bloom.c 프로젝트: bryongloden/rspamd
gboolean
rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s)
{
	size_t n, len;
	u_char t;
	guint v;

	if (s == NULL) {
		return FALSE;
	}
	len = strlen (s);
	for (n = 0; n < bloom->nfuncs; ++n) {
		v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
				s, len, bloom->seeds[n]) % bloom->asize;
		INCBIT (bloom->a, v, t);
	}

	return TRUE;
}
예제 #2
0
파일: filter.c 프로젝트: cequencer/rspamd
gboolean
rspamd_action_from_str (const gchar *data, gint *result)
{
	guint64 h;

	h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
			data, strlen (data), 0xdeadbabe);

	switch (h) {
	case 0x9917BFDB46332B8CULL: /* reject */
		*result = METRIC_ACTION_REJECT;
		break;
	case 0x7130EE37D07B3715ULL: /* greylist */
		*result = METRIC_ACTION_GREYLIST;
		break;
	case 0xCA6087E05480C60CULL: /* add_header */
	case 0x87A3D27783B16241ULL: /* add header */
		*result = METRIC_ACTION_ADD_HEADER;
		break;
	case 0x4963374ED8B90449ULL: /* rewrite_subject */
	case 0x5C9FC4679C025948ULL: /* rewrite subject */
		*result = METRIC_ACTION_REWRITE_SUBJECT;
		break;
	case 0xFC7D6502EE71FDD9ULL: /* soft reject */
	case 0x73576567C262A82DULL: /* soft_reject */
		*result = METRIC_ACTION_SOFT_REJECT;
		break;
	case 0x207091B927D1EC0DULL: /* no action */
	case 0xB7D92D002CD46325ULL: /* no_action */
	case 0x167C0DF4BAA9BCECULL: /* accept */
		*result = METRIC_ACTION_NOACTION;
		break;
	default:
		return FALSE;
	}

	return TRUE;
}
예제 #3
0
파일: shingles.c 프로젝트: croessner/rspamd
rspamd_shingles_from_text (GArray *input,
		const guchar key[16],
		rspamd_mempool_t *pool,
		rspamd_shingles_filter filter,
		gpointer filterd,
		enum rspamd_shingle_alg alg)
{
	struct rspamd_shingle *res;
	guint64 **hashes;
	guchar **keys;
	rspamd_fstring_t *row;
	rspamd_stat_token_t *word;
	guint64 val;
	gint i, j, k;
	gsize hlen, beg = 0;
	enum rspamd_cryptobox_fast_hash_type ht;

	if (pool != NULL) {
		res = rspamd_mempool_alloc (pool, sizeof (*res));
	}
	else {
		res = g_malloc (sizeof (*res));
	}

	row = rspamd_fstring_sized_new (256);

	/* Init hashes pipes and keys */
	hashes = g_malloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE);
	hlen = input->len > SHINGLES_WINDOW ?
			(input->len - SHINGLES_WINDOW + 1) : 1;
	keys = rspamd_shingles_get_keys_cached (key);

	for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
		hashes[i] = g_malloc (hlen * sizeof (guint64));
	}

	/* Now parse input words into a vector of hashes using rolling window */
	if (alg == RSPAMD_SHINGLES_OLD) {
		for (i = 0; i <= (gint)input->len; i ++) {
			if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) {
				for (j = beg; j < i; j ++) {
					word = &g_array_index (input, rspamd_stat_token_t, j);
					row = rspamd_fstring_append (row, word->begin, word->len);
				}

				/* Now we need to create a new row here */
				for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) {
					rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len,
							keys[j]);
					g_assert (hlen > beg);
					hashes[j][beg] = val;
				}

				beg++;

				row = rspamd_fstring_assign (row, "", 0);
			}
		}
	}
	else {
		guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE], seed;

		switch (alg) {
		case RSPAMD_SHINGLES_XXHASH:
			ht = RSPAMD_CRYPTOBOX_XXHASH64;
			break;
		case RSPAMD_SHINGLES_MUMHASH:
			ht = RSPAMD_CRYPTOBOX_MUMHASH;
			break;
		default:
			ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT;
			break;
		}

		memset (res, 0, sizeof (res));
		for (i = 0; i <= (gint)input->len; i ++) {
			if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) {

				for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) {
					/* Shift hashes window to right */
					for (k = 0; k < SHINGLES_WINDOW - 1; k ++) {
						res[j * SHINGLES_WINDOW + k] =
								res[j * SHINGLES_WINDOW + k + 1];
					}

					word = &g_array_index (input, rspamd_stat_token_t, beg);
					/* Insert the last element to the pipe */
					memcpy (&seed, keys[j], sizeof (seed));
					res[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] =
							rspamd_cryptobox_fast_hash_specific (ht,
									word->begin, word->len,
									seed);
					val = 0;
					for (k = 0; k < SHINGLES_WINDOW; k ++) {
						val ^= res[j * SHINGLES_WINDOW + k] >>
								(8 * (SHINGLES_WINDOW - k - 1));
					}

					g_assert (hlen > beg);
					hashes[j][beg] = val;
				}
				beg++;
			}
		}
	}

	/* Now we need to filter all hashes and make a shingles result */
	for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
		res->hashes[i] = filter (hashes[i], hlen,
				i, key, filterd);
		g_free (hashes[i]);
	}

	g_free (hashes);

	rspamd_fstring_free (row);

	return res;
}
예제 #4
0
파일: osb.c 프로젝트: croessner/rspamd
gint
rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx,
		rspamd_mempool_t *pool,
		GArray *words,
		gboolean is_utf,
		const gchar *prefix,
		GPtrArray *result)
{
	rspamd_token_t *new_tok = NULL;
	rspamd_stat_token_t *token;
	struct rspamd_osb_tokenizer_config *osb_cf;
	guint64 cur, seed;
	struct token_pipe_entry *hashpipe;
	guint32 h1, h2;
	gsize token_size;
	guint processed = 0, i, w, window_size, token_flags = 0;

	if (words == NULL) {
		return FALSE;
	}

	osb_cf = ctx->tkcf;
	window_size = osb_cf->window_size;

	if (prefix) {
		seed = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
				prefix, strlen (prefix), osb_cf->seed);
	}
	else {
		seed = osb_cf->seed;
	}

	hashpipe = g_alloca (window_size * sizeof (hashpipe[0]));
	for (i = 0; i < window_size; i++) {
		hashpipe[i].h = 0xfe;
		hashpipe[i].t = NULL;
	}

	token_size = sizeof (rspamd_token_t) +
			sizeof (gdouble) * ctx->statfiles->len;
	g_assert (token_size > 0);

	for (w = 0; w < words->len; w ++) {
		token = &g_array_index (words, rspamd_stat_token_t, w);
		token_flags = token->flags;

		if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) {
			rspamd_ftok_t ftok;

			ftok.begin = token->begin;
			ftok.len = token->len;
			cur = rspamd_fstrhash_lc (&ftok, is_utf);
		}
		else {
			/* We know that the words are normalized */
			if (osb_cf->ht == RSPAMD_OSB_HASH_XXHASH) {
				cur = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
						token->begin, token->len, osb_cf->seed);
			}
			else {
				rspamd_cryptobox_siphash ((guchar *)&cur, token->begin,
						token->len, osb_cf->sk);

				if (prefix) {
					cur ^= seed;
				}
			}
		}

		if (token_flags & RSPAMD_STAT_TOKEN_FLAG_UNIGRAM) {
			new_tok = rspamd_mempool_alloc0 (pool, token_size);
			new_tok->flags = token_flags;
			new_tok->t1 = token;
			new_tok->t2 = token;
			new_tok->data = cur;
			new_tok->window_idx = 0;
			g_ptr_array_add (result, new_tok);

			continue;
		}

#define ADD_TOKEN do {\
    new_tok = rspamd_mempool_alloc0 (pool, token_size); \
    new_tok->flags = token_flags; \
    new_tok->t1 = hashpipe[0].t; \
    new_tok->t2 = hashpipe[i].t; \
    if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { \
        h1 = ((guint32)hashpipe[0].h) * primes[0] + \
            ((guint32)hashpipe[i].h) * primes[i << 1]; \
        h2 = ((guint32)hashpipe[0].h) * primes[1] + \
            ((guint32)hashpipe[i].h) * primes[(i << 1) - 1]; \
        memcpy((guchar *)&new_tok->data, &h1, sizeof (h1)); \
        memcpy(((guchar *)&new_tok->data) + sizeof (h1), &h2, sizeof (h2)); \
    } \
    else { \
        new_tok->data = hashpipe[0].h * primes[0] + hashpipe[i].h * primes[i << 1]; \
    } \
    new_tok->window_idx = i + 1; \
    g_ptr_array_add (result, new_tok); \
  } while(0)

		if (processed < window_size) {
			/* Just fill a hashpipe */
			++processed;
			hashpipe[window_size - processed].h = cur;
			hashpipe[window_size - processed].t = token;
		}
		else {
			/* Shift hashpipe */
			for (i = window_size - 1; i > 0; i--) {
				hashpipe[i] = hashpipe[i - 1];
			}
			hashpipe[0].h = cur;
			hashpipe[0].t = token;

			processed++;

			for (i = 1; i < window_size; i++) {
				ADD_TOKEN;
			}
		}
	}

	if (processed > 1 && processed <= window_size) {
		processed --;
		memmove (hashpipe, &hashpipe[window_size - processed],
				processed * sizeof (hashpipe[0]));

		for (i = 1; i < processed; i++) {
			ADD_TOKEN;
		}
	}

#undef ADD_TOKEN

	return TRUE;
}