/* * Calculates the specified regexp for the specified class if it's not calculated */ static guint rspamd_re_cache_exec_re (struct rspamd_task *task, struct rspamd_re_runtime *rt, rspamd_regexp_t *re, struct rspamd_re_class *re_class, gboolean is_strong) { guint ret = 0, i, re_id; GPtrArray *headerlist; GHashTableIter it; struct raw_header *rh; const gchar *in, *end; const guchar **scvec; guint *lenvec; gboolean raw = FALSE; struct mime_text_part *part; struct rspamd_url *url; struct rspamd_re_cache *cache = rt->cache; gpointer k, v; guint len, cnt; msg_debug_re_cache ("get to the slow path for re type: %s: %s", rspamd_re_cache_type_to_string (re_class->type), rspamd_regexp_get_pattern (re)); re_id = rspamd_regexp_get_cache_id (re); switch (re_class->type) { case RSPAMD_RE_HEADER: case RSPAMD_RE_RAWHEADER: /* Get list of specified headers */ headerlist = rspamd_message_get_header_array (task, re_class->type_data, is_strong); if (headerlist) { scvec = g_malloc (sizeof (*scvec) * headerlist->len); lenvec = g_malloc (sizeof (*lenvec) * headerlist->len); for (i = 0; i < headerlist->len; i ++) { rh = g_ptr_array_index (headerlist, i); if (re_class->type == RSPAMD_RE_RAWHEADER) { in = rh->value; raw = TRUE; lenvec[i] = strlen (rh->value); } else { in = rh->decoded; /* Validate input */ if (!in || !g_utf8_validate (in, -1, &end)) { lenvec[i] = 0; scvec[i] = (guchar *)""; continue; } lenvec[i] = end - in; } scvec[i] = (guchar *)in; } ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, scvec, lenvec, headerlist->len, raw); debug_task ("checking header %s regexp: %s -> %d", re_class->type_data, rspamd_regexp_get_pattern (re), ret); g_free (scvec); g_free (lenvec); } break; case RSPAMD_RE_ALLHEADER: raw = TRUE; in = task->raw_headers_content.begin; len = task->raw_headers_content.len; ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, (const guchar **)&in, &len, 1, raw); debug_task ("checking allheader regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); break; case RSPAMD_RE_MIME: case RSPAMD_RE_RAWMIME: /* Iterate through text parts */ if (task->text_parts->len > 0) { scvec = g_malloc (sizeof (*scvec) * task->text_parts->len); lenvec = g_malloc (sizeof (*lenvec) * task->text_parts->len); for (i = 0; i < task->text_parts->len; i++) { part = g_ptr_array_index (task->text_parts, i); /* Skip empty parts */ if (IS_PART_EMPTY (part)) { lenvec[i] = 0; scvec[i] = (guchar *) ""; continue; } /* Check raw flags */ if (!IS_PART_UTF (part)) { raw = TRUE; } /* Select data for regexp */ if (re_class->type == RSPAMD_RE_RAWMIME) { in = part->orig->data; len = part->orig->len; raw = TRUE; } else { in = part->content->data; len = part->content->len; } scvec[i] = (guchar *) in; lenvec[i] = len; } ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, scvec, lenvec, task->text_parts->len, raw); debug_task ("checking mime regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); g_free (scvec); g_free (lenvec); } break; case RSPAMD_RE_URL: cnt = g_hash_table_size (task->urls) + g_hash_table_size (task->emails); if (cnt > 0) { scvec = g_malloc (sizeof (*scvec) * cnt); lenvec = g_malloc (sizeof (*lenvec) * cnt); g_hash_table_iter_init (&it, task->urls); i = 0; while (g_hash_table_iter_next (&it, &k, &v)) { url = v; in = url->string; len = url->urllen; raw = FALSE; scvec[i] = (guchar *)in; lenvec[i++] = len; } g_hash_table_iter_init (&it, task->emails); while (g_hash_table_iter_next (&it, &k, &v)) { url = v; in = url->string; len = url->urllen; raw = FALSE; scvec[i] = (guchar *) in; lenvec[i++] = len; } g_assert (i == cnt); ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, scvec, lenvec, i, raw); debug_task ("checking url regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); g_free (scvec); g_free (lenvec); } break; case RSPAMD_RE_BODY: raw = TRUE; in = task->msg.begin; len = task->msg.len; ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, (const guchar **)&in, &len, 1, raw); debug_task ("checking rawbody regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); break; case RSPAMD_RE_MAX: msg_err_task ("regexp of class invalid has been called: %s", rspamd_regexp_get_pattern (re)); break; } #if WITH_HYPERSCAN if (!rt->cache->disable_hyperscan) { rspamd_re_cache_finish_class (rt, re_class); } #endif setbit (rt->checked, re_id); return rt->results[re_id]; }
static void insert_metric_result (struct rspamd_task *task, struct metric *metric, const gchar *symbol, double flag, GList * opts, gboolean single) { struct metric_result *metric_res; struct symbol *s; gdouble w, *gr_score = NULL; struct rspamd_symbol_def *sdef; struct rspamd_symbols_group *gr = NULL; const ucl_object_t *mobj, *sobj; metric_res = rspamd_create_metric_result (task, metric->name); sdef = g_hash_table_lookup (metric->symbols, symbol); if (sdef == NULL) { w = 0.0; } else { w = (*sdef->weight_ptr) * flag; gr = sdef->gr; if (gr != NULL) { gr_score = g_hash_table_lookup (metric_res->sym_groups, gr); if (gr_score == NULL) { gr_score = rspamd_mempool_alloc (task->task_pool, sizeof (gdouble)); *gr_score = 0; g_hash_table_insert (metric_res->sym_groups, gr, gr_score); } } } if (task->settings) { mobj = ucl_object_find_key (task->settings, metric->name); if (mobj) { gdouble corr; sobj = ucl_object_find_key (mobj, symbol); if (sobj != NULL && ucl_object_todouble_safe (sobj, &corr)) { msg_debug ("settings: changed weight of symbol %s from %.2f to %.2f", symbol, w, corr); w = corr * flag; } } } /* XXX: does not take grow factor into account */ if (gr != NULL && gr_score != NULL && gr->max_score > 0.0) { if (*gr_score >= gr->max_score) { msg_info ("maximum group score %.2f for group %s has been reached," " ignoring symbol %s with weight %.2f", gr->max_score, gr->name, symbol, w); return; } else if (*gr_score + w > gr->max_score) { w = gr->max_score - *gr_score; } *gr_score += w; } /* Add metric score */ if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) { if (sdef && sdef->one_shot) { /* * For one shot symbols we do not need to add them again, so * we just force single behaviour here */ single = TRUE; } if (s->options && opts && opts != s->options) { /* Append new options */ s->options = g_list_concat (s->options, g_list_copy (opts)); /* * Note that there is no need to add new destructor of GList as elements of appended * GList are used directly, so just free initial GList */ } else if (opts) { s->options = g_list_copy (opts); rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options); } if (!single) { /* Handle grow factor */ if (metric_res->grow_factor && w > 0) { w *= metric_res->grow_factor; metric_res->grow_factor *= metric->grow_factor; } s->score += w; metric_res->score += w; } else { if (fabs (s->score) < fabs (w)) { /* Replace less weight with a bigger one */ metric_res->score = metric_res->score - s->score + w; s->score = w; } } } else { s = rspamd_mempool_alloc (task->task_pool, sizeof (struct symbol)); /* Handle grow factor */ if (metric_res->grow_factor && w > 0) { w *= metric_res->grow_factor; metric_res->grow_factor *= metric->grow_factor; } else if (w > 0) { metric_res->grow_factor = metric->grow_factor; } s->score = w; s->name = symbol; s->def = sdef; metric_res->score += w; if (opts) { s->options = g_list_copy (opts); rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options); } else { s->options = NULL; } g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s); } debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w); }
static void dkim_symbol_callback (struct rspamd_task *task, void *unused) { GList *hlist; rspamd_dkim_context_t *ctx; rspamd_dkim_key_t *key; GError *err = NULL; struct raw_header *rh; struct dkim_check_result *res = NULL, *cur; /* First check if a message has its signature */ hlist = rspamd_message_get_header (task, DKIM_SIGNHEADER, FALSE); if (hlist != NULL) { /* Check whitelist */ msg_debug ("dkim signature found"); if (radix_find_compressed_addr (dkim_module_ctx->whitelist_ip, task->from_addr) == RADIX_NO_VALUE) { /* Parse signature */ msg_debug ("create dkim signature"); while (hlist != NULL) { rh = (struct raw_header *)hlist->data; if (res == NULL) { res = rspamd_mempool_alloc0 (task->task_pool, sizeof (*res)); res->prev = res; res->w = rspamd_session_get_watcher (task->s); cur = res; } else { cur = rspamd_mempool_alloc0 (task->task_pool, sizeof (*res)); } cur->first = res; cur->res = -1; cur->task = task; cur->mult_allow = 1.0; cur->mult_deny = 1.0; ctx = rspamd_create_dkim_context (rh->decoded, task->task_pool, dkim_module_ctx->time_jitter, &err); if (ctx == NULL) { if (err != NULL) { msg_info ("<%s> cannot parse DKIM context: %s", task->message_id, err->message); g_error_free (err); } else { msg_info ("<%s> cannot parse DKIM context: unknown error", task->message_id); } hlist = g_list_next (hlist); continue; } else { /* Get key */ cur->ctx = ctx; if (dkim_module_ctx->trusted_only && (dkim_module_ctx->dkim_domains == NULL || g_hash_table_lookup (dkim_module_ctx->dkim_domains, ctx->domain) == NULL)) { msg_debug ("skip dkim check for %s domain", ctx->domain); hlist = g_list_next (hlist); continue; } key = rspamd_lru_hash_lookup (dkim_module_ctx->dkim_hash, ctx->dns_key, task->tv.tv_sec); if (key != NULL) { debug_task ("found key for %s in cache", ctx->dns_key); cur->key = key; } else { debug_task ("request key for %s from DNS", ctx->dns_key); rspamd_get_dkim_key (ctx, task, dkim_module_key_handler, cur); } } if (res != cur) { DL_APPEND (res, cur); } hlist = g_list_next (hlist); } } } if (res != NULL) { rspamd_session_watcher_push (task->s); dkim_module_check (res); } }
gboolean rspamd_task_load_message (struct rspamd_task *task, struct rspamd_http_message *msg, const gchar *start, gsize len) { guint control_len, r; struct ucl_parser *parser; ucl_object_t *control_obj; gchar filepath[PATH_MAX], *fp; gint fd, flen; gpointer map; struct stat st; if (msg) { rspamd_protocol_handle_headers (task, msg); } if (task->flags & RSPAMD_TASK_FLAG_FILE) { g_assert (task->msg.len > 0); r = rspamd_strlcpy (filepath, task->msg.start, MIN (sizeof (filepath), task->msg.len + 1)); rspamd_decode_url (filepath, filepath, r + 1); flen = strlen (filepath); if (filepath[0] == '"' && flen > 2) { /* We need to unquote filepath */ fp = &filepath[1]; fp[flen - 2] = '\0'; } else { fp = &filepath[0]; } if (access (fp, R_OK) == -1 || stat (fp, &st) == -1) { g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, "Invalid file (%s): %s", fp, strerror (errno)); return FALSE; } fd = open (fp, O_RDONLY); if (fd == -1) { g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, "Cannot open file (%s): %s", fp, strerror (errno)); return FALSE; } map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { close (fd); g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, "Cannot mmap file (%s): %s", fp, strerror (errno)); return FALSE; } close (fd); task->msg.start = map; task->msg.len = st.st_size; rspamd_mempool_add_destructor (task->task_pool, rspamd_task_unmapper, task); } else { debug_task ("got input of length %z", task->msg.len); task->msg.start = start; task->msg.len = len; if (task->msg.len == 0) { msg_warn_task ("message has invalid message length: %ud", task->msg.len); g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, "Invalid length"); return FALSE; } if (task->flags & RSPAMD_TASK_FLAG_HAS_CONTROL) { /* We have control chunk, so we need to process it separately */ if (task->msg.len < task->message_len) { msg_warn_task ("message has invalid message length: %ud and total len: %ud", task->message_len, task->msg.len); g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, "Invalid length"); return FALSE; } control_len = task->msg.len - task->message_len; if (control_len > 0) { parser = ucl_parser_new (UCL_PARSER_KEY_LOWERCASE); if (!ucl_parser_add_chunk (parser, task->msg.start, control_len)) { msg_warn_task ("processing of control chunk failed: %s", ucl_parser_get_error (parser)); ucl_parser_free (parser); } else { control_obj = ucl_parser_get_object (parser); ucl_parser_free (parser); rspamd_protocol_handle_control (task, control_obj); ucl_object_unref (control_obj); } task->msg.start += control_len; task->msg.len -= control_len; } } } return TRUE; }
/* * Free all structures of worker_task */ void rspamd_task_free (struct rspamd_task *task, gboolean is_soft) { struct mime_part *p; struct mime_text_part *tp; guint i; if (task) { debug_task ("free pointer %p", task); for (i = 0; i < task->parts->len; i ++) { p = g_ptr_array_index (task->parts, i); g_byte_array_free (p->content, TRUE); } for (i = 0; i < task->text_parts->len; i ++) { tp = g_ptr_array_index (task->text_parts, i); if (tp->words) { g_array_free (tp->words, TRUE); } if (tp->normalized_words) { g_array_free (tp->normalized_words, TRUE); } } if (task->images) { g_list_free (task->images); } if (task->messages) { g_list_free (task->messages); } if (task->http_conn != NULL) { rspamd_http_connection_unref (task->http_conn); } if (task->sock != -1) { close (task->sock); } if (task->settings != NULL) { ucl_object_unref (task->settings); } if (task->client_addr) { rspamd_inet_address_destroy (task->client_addr); } if (task->from_addr) { rspamd_inet_address_destroy (task->from_addr); } if (task->err) { g_error_free (task->err); } rspamd_mempool_delete (task->task_pool); g_slice_free1 (sizeof (struct rspamd_task), task); } }
/* * Calculates the specified regexp for the specified class if it's not calculated */ static guint rspamd_re_cache_exec_re (struct rspamd_task *task, struct rspamd_re_runtime *rt, rspamd_regexp_t *re, struct rspamd_re_class *re_class, gboolean is_strong) { guint ret = 0, i, re_id; GList *cur, *headerlist; GHashTableIter it; struct raw_header *rh; const gchar *in; gboolean raw = FALSE; struct mime_text_part *part; struct rspamd_url *url; struct rspamd_re_cache *cache = rt->cache; gpointer k, v; gsize len; msg_debug_re_cache ("get to the slow path for re type: %s: %s", rspamd_re_cache_type_to_string (re_class->type), rspamd_regexp_get_pattern (re)); re_id = rspamd_regexp_get_cache_id (re); switch (re_class->type) { case RSPAMD_RE_HEADER: case RSPAMD_RE_RAWHEADER: /* Get list of specified headers */ headerlist = rspamd_message_get_header (task, re_class->type_data, is_strong); if (headerlist) { cur = headerlist; while (cur) { rh = cur->data; if (re_class->type == RSPAMD_RE_RAWHEADER) { in = rh->value; raw = TRUE; } else { in = rh->decoded; /* Validate input */ if (!in || !g_utf8_validate (in, -1, NULL)) { cur = g_list_next (cur); continue; } } /* Match re */ if (in) { ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, in, strlen (in), raw); debug_task ("checking header %s regexp: %s -> %d", re_class->type_data, rspamd_regexp_get_pattern (re), ret); } cur = g_list_next (cur); } } break; case RSPAMD_RE_ALLHEADER: raw = TRUE; in = task->raw_headers_content.begin; len = task->raw_headers_content.len; ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, in, len, raw); debug_task ("checking allheader regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); break; case RSPAMD_RE_MIME: case RSPAMD_RE_RAWMIME: /* Iterate throught text parts */ for (i = 0; i < task->text_parts->len; i++) { part = g_ptr_array_index (task->text_parts, i); /* Skip empty parts */ if (IS_PART_EMPTY (part)) { continue; } /* Check raw flags */ if (!IS_PART_UTF (part)) { raw = TRUE; } /* Select data for regexp */ if (re_class->type == RSPAMD_RE_RAWMIME) { in = part->orig->data; len = part->orig->len; raw = TRUE; } else { in = part->content->data; len = part->content->len; } if (len > 0) { ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, in, len, raw); debug_task ("checking mime regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); } } break; case RSPAMD_RE_URL: g_hash_table_iter_init (&it, task->urls); while (g_hash_table_iter_next (&it, &k, &v)) { url = v; in = url->string; len = url->urllen; raw = FALSE; ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, in, len, raw); } g_hash_table_iter_init (&it, task->emails); while (g_hash_table_iter_next (&it, &k, &v)) { url = v; in = url->string; len = url->urllen; raw = FALSE; ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, in, len, raw); } debug_task ("checking url regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); break; case RSPAMD_RE_BODY: raw = TRUE; in = task->msg.begin; len = task->msg.len; ret = rspamd_re_cache_process_regexp_data (rt, re, task->task_pool, in, len, raw); debug_task ("checking rawbody regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); break; case RSPAMD_RE_MAX: msg_err_task ("regexp of class invalid has been called: %s", rspamd_regexp_get_pattern (re)); break; } #if WITH_HYPERSCAN if (!rt->cache->disable_hyperscan) { rspamd_re_cache_finish_class (rt, re_class); } #endif setbit (rt->checked, re_id); return rt->results[re_id]; }
static void dkim_symbol_callback (struct rspamd_task *task, void *unused) { GList *hlist; rspamd_dkim_context_t *ctx; rspamd_dkim_key_t *key; GError *err = NULL; struct raw_header *rh; /* First check if a message has its signature */ hlist = message_get_header (task, DKIM_SIGNHEADER, FALSE); if (hlist != NULL) { /* Check whitelist */ msg_debug ("dkim signature found"); if (radix_find_compressed_addr (dkim_module_ctx->whitelist_ip, &task->from_addr) == RADIX_NO_VALUE) { /* Parse signature */ msg_debug ("create dkim signature"); /* * Check only last signature as there is no way to check embeded signatures after * resend or something like this */ if (dkim_module_ctx->skip_multi) { if (hlist->next != NULL) { msg_info ( "<%s> skip dkim check as it has several dkim signatures", task->message_id); return; } } hlist = g_list_last (hlist); rh = (struct raw_header *)hlist->data; ctx = rspamd_create_dkim_context (rh->decoded, task->task_pool, dkim_module_ctx->time_jitter, &err); if (ctx == NULL) { msg_info ("cannot parse DKIM context: %s", err->message); g_error_free (err); } else { /* Get key */ if (dkim_module_ctx->trusted_only && (dkim_module_ctx->dkim_domains == NULL || g_hash_table_lookup (dkim_module_ctx->dkim_domains, ctx->domain) == NULL)) { msg_debug ("skip dkim check for %s domain", ctx->domain); return; } key = rspamd_lru_hash_lookup (dkim_module_ctx->dkim_hash, ctx->dns_key, task->tv.tv_sec); if (key != NULL) { debug_task ("found key for %s in cache", ctx->dns_key); dkim_module_check (task, ctx, key); } else { debug_task ("request key for %s from DNS", ctx->dns_key); task->dns_requests++; rspamd_get_dkim_key (ctx, task->resolver, task->s, dkim_module_key_handler, task); } } } } }