gboolean rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s) { size_t n, len; u_char t; guint v; if (s == NULL) { return FALSE; } len = strlen (s); for (n = 0; n < bloom->nfuncs; ++n) { v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, s, len, bloom->seeds[n]) % bloom->asize; INCBIT (bloom->a, v, t); } return TRUE; }
gboolean rspamd_action_from_str (const gchar *data, gint *result) { guint64 h; h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, data, strlen (data), 0xdeadbabe); switch (h) { case 0x9917BFDB46332B8CULL: /* reject */ *result = METRIC_ACTION_REJECT; break; case 0x7130EE37D07B3715ULL: /* greylist */ *result = METRIC_ACTION_GREYLIST; break; case 0xCA6087E05480C60CULL: /* add_header */ case 0x87A3D27783B16241ULL: /* add header */ *result = METRIC_ACTION_ADD_HEADER; break; case 0x4963374ED8B90449ULL: /* rewrite_subject */ case 0x5C9FC4679C025948ULL: /* rewrite subject */ *result = METRIC_ACTION_REWRITE_SUBJECT; break; case 0xFC7D6502EE71FDD9ULL: /* soft reject */ case 0x73576567C262A82DULL: /* soft_reject */ *result = METRIC_ACTION_SOFT_REJECT; break; case 0x207091B927D1EC0DULL: /* no action */ case 0xB7D92D002CD46325ULL: /* no_action */ case 0x167C0DF4BAA9BCECULL: /* accept */ *result = METRIC_ACTION_NOACTION; break; default: return FALSE; } return TRUE; }
rspamd_shingles_from_text (GArray *input, const guchar key[16], rspamd_mempool_t *pool, rspamd_shingles_filter filter, gpointer filterd, enum rspamd_shingle_alg alg) { struct rspamd_shingle *res; guint64 **hashes; guchar **keys; rspamd_fstring_t *row; rspamd_stat_token_t *word; guint64 val; gint i, j, k; gsize hlen, beg = 0; enum rspamd_cryptobox_fast_hash_type ht; if (pool != NULL) { res = rspamd_mempool_alloc (pool, sizeof (*res)); } else { res = g_malloc (sizeof (*res)); } row = rspamd_fstring_sized_new (256); /* Init hashes pipes and keys */ hashes = g_malloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE); hlen = input->len > SHINGLES_WINDOW ? (input->len - SHINGLES_WINDOW + 1) : 1; keys = rspamd_shingles_get_keys_cached (key); for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { hashes[i] = g_malloc (hlen * sizeof (guint64)); } /* Now parse input words into a vector of hashes using rolling window */ if (alg == RSPAMD_SHINGLES_OLD) { for (i = 0; i <= (gint)input->len; i ++) { if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { for (j = beg; j < i; j ++) { word = &g_array_index (input, rspamd_stat_token_t, j); row = rspamd_fstring_append (row, word->begin, word->len); } /* Now we need to create a new row here */ for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len, keys[j]); g_assert (hlen > beg); hashes[j][beg] = val; } beg++; row = rspamd_fstring_assign (row, "", 0); } } } else { guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE], seed; switch (alg) { case RSPAMD_SHINGLES_XXHASH: ht = RSPAMD_CRYPTOBOX_XXHASH64; break; case RSPAMD_SHINGLES_MUMHASH: ht = RSPAMD_CRYPTOBOX_MUMHASH; break; default: ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT; break; } memset (res, 0, sizeof (res)); for (i = 0; i <= (gint)input->len; i ++) { if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { /* Shift hashes window to right */ for (k = 0; k < SHINGLES_WINDOW - 1; k ++) { res[j * SHINGLES_WINDOW + k] = res[j * SHINGLES_WINDOW + k + 1]; } word = &g_array_index (input, rspamd_stat_token_t, beg); /* Insert the last element to the pipe */ memcpy (&seed, keys[j], sizeof (seed)); res[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] = rspamd_cryptobox_fast_hash_specific (ht, word->begin, word->len, seed); val = 0; for (k = 0; k < SHINGLES_WINDOW; k ++) { val ^= res[j * SHINGLES_WINDOW + k] >> (8 * (SHINGLES_WINDOW - k - 1)); } g_assert (hlen > beg); hashes[j][beg] = val; } beg++; } } } /* Now we need to filter all hashes and make a shingles result */ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { res->hashes[i] = filter (hashes[i], hlen, i, key, filterd); g_free (hashes[i]); } g_free (hashes); rspamd_fstring_free (row); return res; }
gint rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, rspamd_mempool_t *pool, GArray *words, gboolean is_utf, const gchar *prefix, GPtrArray *result) { rspamd_token_t *new_tok = NULL; rspamd_stat_token_t *token; struct rspamd_osb_tokenizer_config *osb_cf; guint64 cur, seed; struct token_pipe_entry *hashpipe; guint32 h1, h2; gsize token_size; guint processed = 0, i, w, window_size, token_flags = 0; if (words == NULL) { return FALSE; } osb_cf = ctx->tkcf; window_size = osb_cf->window_size; if (prefix) { seed = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, prefix, strlen (prefix), osb_cf->seed); } else { seed = osb_cf->seed; } hashpipe = g_alloca (window_size * sizeof (hashpipe[0])); for (i = 0; i < window_size; i++) { hashpipe[i].h = 0xfe; hashpipe[i].t = NULL; } token_size = sizeof (rspamd_token_t) + sizeof (gdouble) * ctx->statfiles->len; g_assert (token_size > 0); for (w = 0; w < words->len; w ++) { token = &g_array_index (words, rspamd_stat_token_t, w); token_flags = token->flags; if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { rspamd_ftok_t ftok; ftok.begin = token->begin; ftok.len = token->len; cur = rspamd_fstrhash_lc (&ftok, is_utf); } else { /* We know that the words are normalized */ if (osb_cf->ht == RSPAMD_OSB_HASH_XXHASH) { cur = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, token->begin, token->len, osb_cf->seed); } else { rspamd_cryptobox_siphash ((guchar *)&cur, token->begin, token->len, osb_cf->sk); if (prefix) { cur ^= seed; } } } if (token_flags & RSPAMD_STAT_TOKEN_FLAG_UNIGRAM) { new_tok = rspamd_mempool_alloc0 (pool, token_size); new_tok->flags = token_flags; new_tok->t1 = token; new_tok->t2 = token; new_tok->data = cur; new_tok->window_idx = 0; g_ptr_array_add (result, new_tok); continue; } #define ADD_TOKEN do {\ new_tok = rspamd_mempool_alloc0 (pool, token_size); \ new_tok->flags = token_flags; \ new_tok->t1 = hashpipe[0].t; \ new_tok->t2 = hashpipe[i].t; \ if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { \ h1 = ((guint32)hashpipe[0].h) * primes[0] + \ ((guint32)hashpipe[i].h) * primes[i << 1]; \ h2 = ((guint32)hashpipe[0].h) * primes[1] + \ ((guint32)hashpipe[i].h) * primes[(i << 1) - 1]; \ memcpy((guchar *)&new_tok->data, &h1, sizeof (h1)); \ memcpy(((guchar *)&new_tok->data) + sizeof (h1), &h2, sizeof (h2)); \ } \ else { \ new_tok->data = hashpipe[0].h * primes[0] + hashpipe[i].h * primes[i << 1]; \ } \ new_tok->window_idx = i + 1; \ g_ptr_array_add (result, new_tok); \ } while(0) if (processed < window_size) { /* Just fill a hashpipe */ ++processed; hashpipe[window_size - processed].h = cur; hashpipe[window_size - processed].t = token; } else { /* Shift hashpipe */ for (i = window_size - 1; i > 0; i--) { hashpipe[i] = hashpipe[i - 1]; } hashpipe[0].h = cur; hashpipe[0].t = token; processed++; for (i = 1; i < window_size; i++) { ADD_TOKEN; } } } if (processed > 1 && processed <= window_size) { processed --; memmove (hashpipe, &hashpipe[window_size - processed], processed * sizeof (hashpipe[0])); for (i = 1; i < processed; i++) { ADD_TOKEN; } } #undef ADD_TOKEN return TRUE; }