static void xxHash64_test(const void *key, int len, uint32_t seed, void *out) { #if 0 if (! state) state = XXH64_createState (); XXH64_reset (state, seed); XXH64_update (state, key, len); *(uint64_t*)out = XXH64_digest (state); #else *(uint64_t*)out = XXH64 (key, len, seed); #endif }
/* * Class: net_jpountz_xxhash_XXHashJNI * Method: XXH64_init * Signature: (J)J */ JNIEXPORT jlong JNICALL Java_net_jpountz_xxhash_XXHashJNI_XXH64_1init (JNIEnv *env, jclass cls, jlong seed) { XXH64_state_t *state = XXH64_createState(); if (XXH64_reset(state, seed) != XXH_OK) { XXH64_freeState(state); throw_OOM(env); return 0; } return (jlong) state; }
GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, struct rspamd_config *cfg, GList *exceptions, gboolean compat, guint64 *hash) { rspamd_ftok_t token, buf; const gchar *pos = NULL; gsize l; GArray *res; GList *cur = exceptions; token_get_function func; guint min_len = 0, max_len = 0, word_decay = 0, initial_size = 128; guint64 hv = 0; XXH64_state_t *st; gboolean decay = FALSE; guint64 prob; if (text == NULL) { return NULL; } buf.begin = text; buf.len = len; token.begin = NULL; token.len = 0; if (compat || !is_utf) { func = rspamd_tokenizer_get_word_compat; } else { func = rspamd_tokenizer_get_word; } if (cfg != NULL) { min_len = cfg->min_word_len; max_len = cfg->max_word_len; word_decay = cfg->words_decay; initial_size = word_decay * 2; } res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_ftok_t), initial_size); st = XXH64_createState (); XXH64_reset (st, 0); while (func (&buf, &pos, &token, &cur, is_utf, &l, FALSE)) { if (l == 0 || (min_len > 0 && l < min_len) || (max_len > 0 && l > max_len)) { token.begin = pos; continue; } if (!decay) { XXH64_update (st, token.begin, token.len); /* Check for decay */ if (word_decay > 0 && res->len > word_decay && pos - text < (gssize)len) { /* Start decay */ gdouble decay_prob; decay = TRUE; hv = XXH64_digest (st); /* We assume that word is 6 symbols length in average */ decay_prob = (gdouble)word_decay / ((len - (pos - text)) / 6.0); if (decay_prob >= 1.0) { prob = G_MAXUINT64; } else { prob = decay_prob * G_MAXUINT64; } } } else { /* Decaying probability */ /* LCG64 x[n] = a x[n - 1] + b mod 2^64 */ hv = 2862933555777941757ULL * hv + 3037000493ULL; if (hv > prob) { token.begin = pos; continue; } } g_array_append_val (res, token); token.begin = pos; } if (!decay) { hv = XXH64_digest (st); } if (hash) { *hash = hv; } XXH64_freeState (st); return res; }