static bool get_untokenized_msgid(const char **msgid_p, string_t *msgid) { struct rfc822_parser_context parser; rfc822_parser_init(&parser, (const unsigned char *)*msgid_p, strlen(*msgid_p), NULL); /* msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] id-left = dot-atom-text / no-fold-quote / obs-id-left id-right = dot-atom-text / no-fold-literal / obs-id-right no-fold-quote = DQUOTE *(qtext / quoted-pair) DQUOTE no-fold-literal = "[" *(dtext / quoted-pair) "]" */ (void)rfc822_skip_lwsp(&parser); if (rfc822_parse_dot_atom(&parser, msgid) <= 0) return FALSE; if (*parser.data != '@') return FALSE; str_append_c(msgid, '@'); parser.data++; (void)rfc822_skip_lwsp(&parser); if (rfc822_parse_dot_atom(&parser, msgid) <= 0) return FALSE; if (*parser.data != '>') return FALSE; *msgid_p = (const char *)parser.data + 1; return TRUE; }
int rfc822_parse_content_param(struct rfc822_parser_context *ctx, const char **key_r, const char **value_r) { string_t *tmp; size_t value_pos; int ret; /* .. := *(";" parameter) parameter := attribute "=" value attribute := token value := token / quoted-string */ *key_r = NULL; *value_r = NULL; if (ctx->data == ctx->end) return 0; if (*ctx->data != ';') return -1; ctx->data++; if (rfc822_skip_lwsp(ctx) <= 0) return -1; tmp = t_str_new(64); if (rfc822_parse_mime_token(ctx, tmp) <= 0) return -1; str_append_c(tmp, '\0'); value_pos = str_len(tmp); if (*ctx->data != '=') return -1; ctx->data++; if ((ret = rfc822_skip_lwsp(ctx)) <= 0) { /* broken / no value */ } else if (*ctx->data == '"') { ret = rfc822_parse_quoted_string(ctx, tmp); (void)str_unescape(str_c_modifiable(tmp) + value_pos); } else if (ctx->data != ctx->end && *ctx->data == '=') { /* workaround for broken input: name==?utf-8?b?...?= */ while (ctx->data != ctx->end && *ctx->data != ';' && *ctx->data != ' ' && *ctx->data != '\t' && *ctx->data != '\r' && *ctx->data != '\n') { str_append_c(tmp, *ctx->data); ctx->data++; } } else { ret = rfc822_parse_mime_token(ctx, tmp); } *key_r = str_c(tmp); *value_r = *key_r + value_pos; return ret < 0 ? -1 : 1; }
static int rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; /* domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] dcontent = dtext / quoted-pair dtext = NO-WS-CTL / ; Non white space controls %d33-90 / ; The rest of the US-ASCII %d94-126 ; characters not including "[", ; "]", or "\" */ i_assert(*ctx->data == '['); for (start = ctx->data; ctx->data != ctx->end; ctx->data++) { if (*ctx->data == '\\') { ctx->data++; if (ctx->data == ctx->end) break; } else if (*ctx->data == ']') { ctx->data++; str_append_n(str, start, ctx->data - start); return rfc822_skip_lwsp(ctx); } } /* missing ']' */ return -1; }
int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str) { int ret; /* phrase = 1*word / obs-phrase word = atom / quoted-string obs-phrase = word *(word / "." / CFWS) */ if (ctx->data == ctx->end) return 0; if (*ctx->data == '.') return -1; for (;;) { if (*ctx->data == '"') ret = rfc822_parse_quoted_string(ctx, str); else ret = rfc822_parse_atom_or_dot(ctx, str); if (ret <= 0) return ret; if (!IS_ATEXT(*ctx->data) && *ctx->data != '"' && *ctx->data != '.') break; str_append_c(str, ' '); } return rfc822_skip_lwsp(ctx); }
static void parse_content_type(struct message_decoder_context *ctx, struct message_header_line *hdr) { struct rfc822_parser_context parser; const char *const *results; string_t *str; if (ctx->content_type != NULL) return; rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); rfc822_skip_lwsp(&parser); str = t_str_new(64); if (rfc822_parse_content_type(&parser, str) < 0) return; ctx->content_type = i_strdup(str_c(str)); rfc2231_parse(&parser, &results); for (; *results != NULL; results += 2) { if (strcasecmp(results[0], "charset") == 0) { ctx->content_charset = i_strdup(results[1]); break; } } }
enum message_cte message_decoder_parse_cte(struct message_header_line *hdr) { struct rfc822_parser_context parser; enum message_cte message_cte; string_t *value; value = t_str_new(64); rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); rfc822_skip_lwsp(&parser); (void)rfc822_parse_mime_token(&parser, value); message_cte = MESSAGE_CTE_UNKNOWN; switch (str_len(value)) { case 4: if (i_memcasecmp(str_data(value), "7bit", 4) == 0 || i_memcasecmp(str_data(value), "8bit", 4) == 0) message_cte = MESSAGE_CTE_78BIT; break; case 6: if (i_memcasecmp(str_data(value), "base64", 6) == 0) message_cte = MESSAGE_CTE_BASE64; else if (i_memcasecmp(str_data(value), "binary", 6) == 0) message_cte = MESSAGE_CTE_BINARY; break; case 16: if (i_memcasecmp(str_data(value), "quoted-printable", 16) == 0) message_cte = MESSAGE_CTE_QP; break; } return message_cte; }
int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; int ret; /* dot-atom = [CFWS] dot-atom-text [CFWS] dot-atom-text = 1*atext *("." 1*atext) atext = ; Any character except controls, SP, and specials. For RFC-822 compatibility allow LWSP around '.' */ if (ctx->data == ctx->end || !IS_ATEXT(*ctx->data)) return -1; for (start = ctx->data++; ctx->data != ctx->end; ) { if (IS_ATEXT(*ctx->data)) { ctx->data++; continue; } str_append_n(str, start, ctx->data - start); if ((ret = rfc822_skip_lwsp(ctx)) <= 0) return ret; if (*ctx->data != '.') return 1; ctx->data++; str_append_c(str, '.'); if ((ret = rfc822_skip_lwsp(ctx)) <= 0) return ret; start = ctx->data; } str_append_n(str, start, ctx->data - start); return 0; }
int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str) { if (rfc822_skip_lwsp(ctx) <= 0) return -1; /* get main type */ if (rfc822_parse_mime_token(ctx, str) <= 0) return -1; /* skip over "/" */ if (*ctx->data != '/') return -1; ctx->data++; if (rfc822_skip_lwsp(ctx) <= 0) return -1; str_append_c(str, '/'); /* get subtype */ return rfc822_parse_mime_token(ctx, str); }
static void parse_content_type(struct message_search_context *ctx, struct message_header_line *hdr) { struct rfc822_parser_context parser; string_t *content_type; rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); rfc822_skip_lwsp(&parser); content_type = t_str_new(64); (void)rfc822_parse_content_type(&parser, content_type); ctx->content_type_text = strncasecmp(str_c(content_type), "text/", 5) == 0 || strncasecmp(str_c(content_type), "message/", 8) == 0; }
int rfc822_parse_mime_token(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; for (start = ctx->data; ctx->data != ctx->end; ctx->data++) { if (IS_ATEXT_NON_TSPECIAL(*ctx->data) || *ctx->data == '.') continue; str_append_n(str, start, ctx->data - start); return rfc822_skip_lwsp(ctx); } str_append_n(str, start, ctx->data - start); return 0; }
static void parse_content_type(struct message_parser_ctx *ctx, struct message_header_line *hdr) { struct rfc822_parser_context parser; const char *const *results; string_t *content_type; int ret; if (ctx->part_seen_content_type) return; ctx->part_seen_content_type = TRUE; rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); rfc822_skip_lwsp(&parser); content_type = t_str_new(64); ret = rfc822_parse_content_type(&parser, content_type); if (strcasecmp(str_c(content_type), "message/rfc822") == 0) ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; else if (strncasecmp(str_c(content_type), "text", 4) == 0 && (str_len(content_type) == 4 || str_data(content_type)[4] == '/')) ctx->part->flags |= MESSAGE_PART_FLAG_TEXT; else if (strncasecmp(str_c(content_type), "multipart/", 10) == 0) { ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART; if (strcasecmp(str_c(content_type)+10, "digest") == 0) ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST; } if (ret < 0 || (ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || ctx->last_boundary != NULL) { rfc822_parser_deinit(&parser); return; } rfc2231_parse(&parser, &results); for (; *results != NULL; results += 2) { if (strcasecmp(results[0], "boundary") == 0) { ctx->last_boundary = p_strdup(ctx->parser_pool, results[1]); break; } } rfc822_parser_deinit(&parser); }
int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str) { /* domain = dot-atom / domain-literal / obs-domain domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] obs-domain = atom *("." atom) */ i_assert(*ctx->data == '@'); ctx->data++; if (rfc822_skip_lwsp(ctx) <= 0) return -1; if (*ctx->data == '[') return rfc822_parse_domain_literal(ctx, str); else return rfc822_parse_dot_atom(ctx, str); }
static void parse_content_type(struct attachment_istream *astream, const struct message_header_line *hdr) { struct rfc822_parser_context parser; string_t *content_type; if (astream->part.content_type != NULL) return; rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); rfc822_skip_lwsp(&parser); T_BEGIN { content_type = t_str_new(64); (void)rfc822_parse_content_type(&parser, content_type); astream->part.content_type = i_strdup(str_c(content_type)); } T_END; }
int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; /* atom = [CFWS] 1*atext [CFWS] atext = ; Any character except controls, SP, and specials. */ if (ctx->data == ctx->end || !IS_ATEXT(*ctx->data)) return -1; for (start = ctx->data++; ctx->data != ctx->end; ctx->data++) { if (IS_ATEXT(*ctx->data)) continue; str_append_n(str, start, ctx->data - start); return rfc822_skip_lwsp(ctx); } str_append_n(str, start, ctx->data - start); return 0; }
int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; size_t len; i_assert(*ctx->data == '"'); ctx->data++; for (start = ctx->data; ctx->data != ctx->end; ctx->data++) { switch (*ctx->data) { case '"': str_append_n(str, start, ctx->data - start); ctx->data++; return rfc822_skip_lwsp(ctx); case '\n': /* folding whitespace, remove the (CR)LF */ len = ctx->data - start; if (len > 0 && start[len-1] == '\r') len--; str_append_n(str, start, len); start = ctx->data + 1; break; case '\\': ctx->data++; if (ctx->data == ctx->end) return -1; str_append_n(str, start, ctx->data - start); start = ctx->data; break; } } /* missing '"' */ return -1; }
static int rfc822_parse_atom_or_dot(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; /* atom = [CFWS] 1*atext [CFWS] atext = ; Any character except controls, SP, and specials. The difference between this function and rfc822_parse_dot_atom() is that this doesn't just silently skip over all the whitespace. */ for (start = ctx->data; ctx->data != ctx->end; ctx->data++) { if (IS_ATEXT(*ctx->data) || *ctx->data == '.') continue; str_append_n(str, start, ctx->data - start); return rfc822_skip_lwsp(ctx); } str_append_n(str, start, ctx->data - start); return 0; }
static bool message_date_parser_tokens(struct message_date_parser_context *ctx, time_t *timestamp_r, int *timezone_offset_r) { struct tm tm; const unsigned char *value; size_t i, len; int ret; /* [weekday_name "," ] dd month_name [yy]yy hh:mi[:ss] timezone */ memset(&tm, 0, sizeof(tm)); rfc822_skip_lwsp(&ctx->parser); /* skip the optional weekday */ if (next_token(ctx, &value, &len) <= 0) return FALSE; if (len == 3) { if (*ctx->parser.data != ',') return FALSE; ctx->parser.data++; rfc822_skip_lwsp(&ctx->parser); if (next_token(ctx, &value, &len) <= 0) return FALSE; } /* dd */ if (len < 1 || len > 2 || !i_isdigit(value[0])) return FALSE; tm.tm_mday = value[0]-'0'; if (len == 2) { if (!i_isdigit(value[1])) return FALSE; tm.tm_mday = (tm.tm_mday * 10) + (value[1]-'0'); } /* month name */ if (next_token(ctx, &value, &len) <= 0 || len < 3) return FALSE; for (i = 0; i < 12; i++) { if (i_memcasecmp(month_names[i], value, 3) == 0) { tm.tm_mon = i; break; } } if (i == 12) return FALSE; /* [yy]yy */ if (next_token(ctx, &value, &len) <= 0 || (len != 2 && len != 4)) return FALSE; for (i = 0; i < len; i++) { if (!i_isdigit(value[i])) return FALSE; tm.tm_year = tm.tm_year * 10 + (value[i]-'0'); } if (len == 2) { /* two digit year, assume 1970+ */ if (tm.tm_year < 70) tm.tm_year += 100; } else { if (tm.tm_year < 1900) return FALSE; tm.tm_year -= 1900; } /* hh, allow also single digit */ if (next_token(ctx, &value, &len) <= 0 || len < 1 || len > 2 || !i_isdigit(value[0])) return FALSE; tm.tm_hour = value[0]-'0'; if (len == 2) { if (!i_isdigit(value[1])) return FALSE; tm.tm_hour = tm.tm_hour * 10 + (value[1]-'0'); } /* :mm (may be the last token) */ if (!IS_TIME_SEP(*ctx->parser.data)) return FALSE; ctx->parser.data++; rfc822_skip_lwsp(&ctx->parser); if (next_token(ctx, &value, &len) < 0 || len != 2 || !i_isdigit(value[0]) || !i_isdigit(value[1])) return FALSE; tm.tm_min = (value[0]-'0') * 10 + (value[1]-'0'); /* [:ss] */ if (ctx->parser.data != ctx->parser.end && IS_TIME_SEP(*ctx->parser.data)) { ctx->parser.data++; rfc822_skip_lwsp(&ctx->parser); if (next_token(ctx, &value, &len) <= 0 || len != 2 || !i_isdigit(value[0]) || !i_isdigit(value[1])) return FALSE; tm.tm_sec = (value[0]-'0') * 10 + (value[1]-'0'); } if ((ret = next_token(ctx, &value, &len)) < 0) return FALSE; if (ret == 0) { /* missing timezone */ *timezone_offset_r = 0; } else { /* timezone */ *timezone_offset_r = parse_timezone(value, len); } tm.tm_isdst = -1; *timestamp_r = utc_mktime(&tm); if (*timestamp_r == (time_t)-1) return FALSE; *timestamp_r -= *timezone_offset_r * 60; return TRUE; }