struct rspamd_email_address * rspamd_email_address_from_smtp (const gchar *str, guint len) { struct rspamd_email_address addr, *ret; gsize nlen; if (str == NULL || len == 0) { return NULL; } rspamd_smtp_addr_parse (str, len, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { ret = g_slice_alloc (sizeof (*ret)); memcpy (ret, &addr, sizeof (addr)); if ((ret->flags & RSPAMD_EMAIL_ADDR_QUOTED) && ret->addr[0] == '"') { if (ret->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) { /* We also need to unquote user */ rspamd_email_address_unescape (ret); } /* We need to unquote addr */ nlen = ret->domain_len + ret->user_len + 2; ret->addr = g_malloc (nlen + 1); ret->addr_len = rspamd_snprintf ((char *)ret->addr, nlen, "%*s@%*s", (gint)ret->user_len, ret->user, (gint)ret->domain_len, ret->domain); ret->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED; } REF_INIT_RETAIN (ret, rspamd_email_addr_dtor); return ret; } return NULL; }
GPtrArray * rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, guint len, GPtrArray *src) { GPtrArray *res = src; gboolean seen_at = FALSE; struct rspamd_email_address addr; const gchar *p = hdr, *end = hdr + len, *c = hdr, *t; GString *ns; gint obraces, ebraces; enum { parse_name = 0, parse_quoted, parse_addr, skip_comment, skip_spaces } state = parse_name, next_state = parse_name; if (res == NULL) { res = g_ptr_array_sized_new (2); rspamd_mempool_add_destructor (pool, rspamd_email_address_list_destroy, res); } ns = g_string_sized_new (127); while (p < end) { switch (state) { case parse_name: if (*p == '"') { /* We need to strip last spaces and update `ns` */ if (p > c) { t = p - 1; while (t > c && g_ascii_isspace (*t)) { t --; } g_string_append_len (ns, c, t - c + 1); } state = parse_quoted; c = p + 1; } else if (*p == '<') { if (p > c) { t = p - 1; while (t > c && g_ascii_isspace (*t)) { t --; } g_string_append_len (ns, c, t - c + 1); } c = p; state = parse_addr; } else if (*p == ',') { if (p > c) { /* * Last token must be the address: * e.g. Some name [email protected] */ t = p - 1; while (t > c && g_ascii_isspace (*t)) { t --; } rspamd_smtp_addr_parse (c, t - c + 1, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { rspamd_email_address_add (pool, res, &addr, ns); } else { /* Try heuristic */ if (seen_at && rspamd_email_address_parse_heuristic (c, t - c + 1, &addr)) { rspamd_email_address_add (pool, res, &addr, ns); } else { rspamd_email_address_add (pool, res, NULL, ns); } } /* Cleanup for the next use */ g_string_set_size (ns, 0); seen_at = FALSE; } state = skip_spaces; next_state = parse_name; } else if (*p == '@') { seen_at = TRUE; } else if (*p == '(') { if (p > c) { t = p - 1; while (t > c && g_ascii_isspace (*t)) { t --; } g_string_append_len (ns, c, t - c + 1); } c = p; obraces = 1; ebraces = 0; state = skip_comment; next_state = parse_name; } p ++; break; case parse_quoted: if (*p == '"') { if (p > c) { g_string_append_len (ns, c, p - c); } state = skip_spaces; next_state = parse_name; } p ++; break; case parse_addr: if (*p == '>') { rspamd_smtp_addr_parse (c, p - c + 1, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { rspamd_email_address_add (pool, res, &addr, ns); } else { /* Try heuristic */ if (seen_at && rspamd_email_address_parse_heuristic (c, p - c + 1, &addr)) { rspamd_email_address_add (pool, res, &addr, ns); } else { rspamd_email_address_add (pool, res, NULL, ns); } } /* Cleanup for the next use */ g_string_set_size (ns, 0); seen_at = FALSE; state = skip_spaces; next_state = parse_name; } else if (*p == '@') { seen_at = TRUE; } else if (*p == '(') { obraces = 1; ebraces = 0; state = skip_comment; next_state = parse_addr; } p ++; break; case skip_spaces: if (!g_ascii_isspace (*p)) { c = p; state = next_state; } else { p ++; } break; case skip_comment: if (*p == '(') { obraces ++; } else if (*p == ')') { ebraces ++; } if (obraces == ebraces) { if (next_state == parse_name) { /* Include comment in name */ if (p > c) { t = p - 1; while (t > c && g_ascii_isspace (*t)) { t --; } g_string_append_len (ns, c, t - c + 1); } c = p; } state = next_state; } p ++; break; } } /* Handle leftover */ switch (state) { case parse_name: /* Assume the whole header as name (bad thing) */ if (p > c) { while (p > c && g_ascii_isspace (*p)) { p --; } if (seen_at) { /* The whole email is likely address */ rspamd_smtp_addr_parse (c, p - c, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { rspamd_email_address_add (pool, res, &addr, ns); } else { /* Try heuristic */ if (rspamd_email_address_parse_heuristic (c, p - c, &addr)) { rspamd_email_address_add (pool, res, &addr, ns); } else { rspamd_email_address_add (pool, res, NULL, ns); } } } else { /* No @ seen */ g_string_append_len (ns, c, p - c); rspamd_email_address_add (pool, res, NULL, ns); } } break; case parse_addr: if (p > c) { rspamd_smtp_addr_parse (c, p - c, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { rspamd_email_address_add (pool, res, &addr, ns); } else { /* Try heuristic */ if (rspamd_email_address_parse_heuristic (c, p - c, &addr)) { rspamd_email_address_add (pool, res, &addr, ns); } } } break; case parse_quoted: case skip_comment: /* Unfinished quoted string or a comment */ break; default: /* Do nothing */ break; } g_string_free (ns, TRUE); return res; }