gboolean rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache, const char *cache_dir) { g_assert (cache != NULL); g_assert (cache_dir != NULL); #ifndef WITH_HYPERSCAN return FALSE; #else gchar path[PATH_MAX]; gint fd, i, n, *hs_ids = NULL, *hs_flags = NULL, total = 0, ret; GHashTableIter it; gpointer k, v; guint8 *map, *p, *end; struct rspamd_re_class *re_class; struct rspamd_re_cache_elt *elt; struct stat st; g_hash_table_iter_init (&it, cache->re_classes); while (g_hash_table_iter_next (&it, &k, &v)) { re_class = v; rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cache_dir, G_DIR_SEPARATOR, re_class->hash); if (rspamd_re_cache_is_valid_hyperscan_file (cache, path, FALSE, FALSE)) { msg_debug_re_cache ("load hyperscan database from '%s'", re_class->hash); fd = open (path, O_RDONLY); /* Read number of regexps */ g_assert (fd != -1); fstat (fd, &st); map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { msg_err_re_cache ("cannot mmap %s: %s", path, strerror (errno)); close (fd); return FALSE; } close (fd); end = map + st.st_size; p = map + RSPAMD_HS_MAGIC_LEN + sizeof (cache->plt); n = *(gint *)p; if (n <= 0 || 2 * n * sizeof (gint) + /* IDs + flags */ sizeof (guint64) + /* crc */ RSPAMD_HS_MAGIC_LEN + /* header */ sizeof (cache->plt) > (gsize)st.st_size) { /* Some wrong amount of regexps */ msg_err_re_cache ("bad number of expressions in %s: %d", path, n); munmap (map, st.st_size); return FALSE; } total += n; p += sizeof (n); hs_ids = g_malloc (n * sizeof (*hs_ids)); memcpy (hs_ids, p, n * sizeof (*hs_ids)); p += n * sizeof (*hs_ids); hs_flags = g_malloc (n * sizeof (*hs_flags)); memcpy (hs_flags, p, n * sizeof (*hs_flags)); /* Skip crc */ p += n * sizeof (*hs_ids) + sizeof (guint64); /* Cleanup */ if (re_class->hs_scratch != NULL) { hs_free_scratch (re_class->hs_scratch); } if (re_class->hs_db != NULL) { hs_free_database (re_class->hs_db); } if (re_class->hs_ids) { g_free (re_class->hs_ids); } re_class->hs_ids = NULL; re_class->hs_scratch = NULL; re_class->hs_db = NULL; if ((ret = hs_deserialize_database (p, end - p, &re_class->hs_db)) != HS_SUCCESS) { msg_err_re_cache ("bad hs database in %s: %d", path, ret); munmap (map, st.st_size); g_free (hs_ids); g_free (hs_flags); return FALSE; } munmap (map, st.st_size); g_assert (hs_alloc_scratch (re_class->hs_db, &re_class->hs_scratch) == HS_SUCCESS); /* * Now find hyperscan elts that are successfully compiled and * specify that they should be matched using hyperscan */ for (i = 0; i < n; i ++) { g_assert ((gint)cache->re->len > hs_ids[i] && hs_ids[i] >= 0); elt = g_ptr_array_index (cache->re, hs_ids[i]); if (hs_flags[i] & HS_FLAG_PREFILTER) { elt->match_type = RSPAMD_RE_CACHE_HYPERSCAN_PRE; } else { elt->match_type = RSPAMD_RE_CACHE_HYPERSCAN; } } re_class->hs_ids = hs_ids; g_free (hs_flags); re_class->nhs = n; } else { msg_err_re_cache ("invalid hyperscan hash file '%s'", path); return FALSE; } } msg_info_re_cache ("hyperscan database of %d regexps has been loaded", total); cache->hyperscan_loaded = TRUE; return TRUE; #endif }
static gboolean rspamd_re_cache_is_finite (struct rspamd_re_cache *cache, rspamd_regexp_t *re, gint flags, gdouble max_time) { pid_t cld; gint status; struct timespec ts; hs_compile_error_t *hs_errors; hs_database_t *test_db; gdouble wait_time; const gint max_tries = 10; gint tries = 0, rc; wait_time = max_time / max_tries; /* We need to restore SIGCHLD processing */ signal (SIGCHLD, SIG_DFL); cld = fork (); g_assert (cld != -1); if (cld == 0) { /* Try to compile pattern */ if (hs_compile (rspamd_regexp_get_pattern (re), flags | HS_FLAG_PREFILTER, cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK, &cache->plt, &test_db, &hs_errors) != HS_SUCCESS) { exit (EXIT_FAILURE); } exit (EXIT_SUCCESS); } else { double_to_ts (wait_time, &ts); while ((rc = waitpid (cld, &status, WNOHANG)) == 0 && tries ++ < max_tries) { (void)nanosleep (&ts, NULL); } /* Child has been terminated */ if (rc > 0) { /* Forget about SIGCHLD after this point */ signal (SIGCHLD, SIG_IGN); if (WIFEXITED (status) && WEXITSTATUS (status) == EXIT_SUCCESS) { return TRUE; } else { msg_err_re_cache ( "cannot approximate %s to hyperscan", rspamd_regexp_get_pattern (re)); return FALSE; } } else { /* We consider that as timeout */ kill (cld, SIGKILL); g_assert (waitpid (cld, &status, 0) != -1); msg_err_re_cache ( "cannot approximate %s to hyperscan: timeout waiting", rspamd_regexp_get_pattern (re)); signal (SIGCHLD, SIG_IGN); } } return FALSE; }
gboolean rspamd_re_cache_is_valid_hyperscan_file (struct rspamd_re_cache *cache, const char *path, gboolean silent, gboolean try_load) { g_assert (cache != NULL); g_assert (path != NULL); #ifndef WITH_HYPERSCAN return FALSE; #else gint fd, n, ret; guchar magicbuf[RSPAMD_HS_MAGIC_LEN]; const guchar *mb; GHashTableIter it; gpointer k, v; struct rspamd_re_class *re_class; gsize len; const gchar *hash_pos; hs_platform_info_t test_plt; hs_database_t *test_db = NULL; guchar *map, *p, *end; len = strlen (path); if (len < sizeof (rspamd_cryptobox_HASHBYTES + 3)) { return FALSE; } if (memcmp (path + len - 3, ".hs", 3) != 0) { return FALSE; } hash_pos = path + len - 3 - (sizeof (re_class->hash) - 1); g_hash_table_iter_init (&it, cache->re_classes); while (g_hash_table_iter_next (&it, &k, &v)) { re_class = v; if (memcmp (hash_pos, re_class->hash, sizeof (re_class->hash) - 1) == 0) { /* Open file and check magic */ fd = open (path, O_RDONLY); if (fd == -1) { if (!silent) { msg_err_re_cache ("cannot open hyperscan cache file %s: %s", path, strerror (errno)); } return FALSE; } if (read (fd, magicbuf, sizeof (magicbuf)) != sizeof (magicbuf)) { msg_err_re_cache ("cannot read hyperscan cache file %s: %s", path, strerror (errno)); close (fd); return FALSE; } if (cache->vectorized_hyperscan) { mb = rspamd_hs_magic_vector; } else { mb = rspamd_hs_magic; } if (memcmp (magicbuf, mb, sizeof (magicbuf)) != 0) { msg_err_re_cache ("cannot open hyperscan cache file %s: " "bad magic ('%*xs', '%*xs' expected)", path, (int) RSPAMD_HS_MAGIC_LEN, magicbuf, (int) RSPAMD_HS_MAGIC_LEN, mb); close (fd); return FALSE; } if (read (fd, &test_plt, sizeof (test_plt)) != sizeof (test_plt)) { msg_err_re_cache ("cannot read hyperscan cache file %s: %s", path, strerror (errno)); close (fd); return FALSE; } if (memcmp (&test_plt, &cache->plt, sizeof (test_plt)) != 0) { msg_err_re_cache ("cannot open hyperscan cache file %s: " "compiled for a different platform", path); close (fd); return FALSE; } close (fd); if (try_load) { map = rspamd_file_xmap (path, PROT_READ, &len); if (map == NULL) { msg_err_re_cache ("cannot mmap hyperscan cache file %s: " "%s", path, strerror (errno)); return FALSE; } p = map + RSPAMD_HS_MAGIC_LEN + sizeof (test_plt); end = map + len; n = *(gint *)p; p += sizeof (gint); if (n <= 0 || 2 * n * sizeof (gint) + /* IDs + flags */ sizeof (guint64) + /* crc */ RSPAMD_HS_MAGIC_LEN + /* header */ sizeof (cache->plt) > len) { /* Some wrong amount of regexps */ msg_err_re_cache ("bad number of expressions in %s: %d", path, n); munmap (map, len); return FALSE; } p += n * sizeof (gint) * 2 + sizeof (guint64); if ((ret = hs_deserialize_database (p, end - p, &test_db)) != HS_SUCCESS) { msg_err_re_cache ("bad hs database in %s: %d", path, ret); munmap (map, len); return FALSE; } hs_free_database (test_db); munmap (map, len); } /* XXX: add crc check */ return TRUE; } } if (!silent) { msg_warn_re_cache ("unknown hyperscan cache file %s", path); } return FALSE; #endif }
gboolean rspamd_re_cache_is_valid_hyperscan_file (struct rspamd_re_cache *cache, const char *path, gboolean silent) { g_assert (cache != NULL); g_assert (path != NULL); #ifndef WITH_HYPERSCAN return FALSE; #else gint fd; guchar magicbuf[RSPAMD_HS_MAGIC_LEN]; GHashTableIter it; gpointer k, v; struct rspamd_re_class *re_class; gsize len; const gchar *hash_pos; hs_platform_info_t test_plt; len = strlen (path); if (len < sizeof (rspamd_cryptobox_HASHBYTES + 3)) { return FALSE; } if (memcmp (path + len - 3, ".hs", 3) != 0) { return FALSE; } hash_pos = path + len - 3 - (sizeof (re_class->hash) - 1); g_hash_table_iter_init (&it, cache->re_classes); while (g_hash_table_iter_next (&it, &k, &v)) { re_class = v; if (memcmp (hash_pos, re_class->hash, sizeof (re_class->hash) - 1) == 0) { /* Open file and check magic */ fd = open (path, O_RDONLY); if (fd == -1) { if (!silent) { msg_err_re_cache ("cannot open hyperscan cache file %s: %s", path, strerror (errno)); } return FALSE; } if (read (fd, magicbuf, sizeof (magicbuf)) != sizeof (magicbuf)) { msg_err_re_cache ("cannot read hyperscan cache file %s: %s", path, strerror (errno)); close (fd); return FALSE; } if (memcmp (magicbuf, rspamd_hs_magic, sizeof (magicbuf)) != 0) { msg_err_re_cache ("cannot open hyperscan cache file %s: " "bad magic ('%*xs', '%*xs' expected)", path, (int) RSPAMD_HS_MAGIC_LEN, magicbuf, (int) RSPAMD_HS_MAGIC_LEN, rspamd_hs_magic); close (fd); return FALSE; } if (read (fd, &test_plt, sizeof (test_plt)) != sizeof (test_plt)) { msg_err_re_cache ("cannot read hyperscan cache file %s: %s", path, strerror (errno)); close (fd); return FALSE; } if (memcmp (&test_plt, &cache->plt, sizeof (test_plt)) != 0) { msg_err_re_cache ("cannot open hyperscan cache file %s: " "compiled for a different platform", path); close (fd); return FALSE; } /* XXX: add crc check */ close (fd); return TRUE; } } if (!silent) { msg_warn_re_cache ("unknown hyperscan cache file %s", path); } return FALSE; #endif }