static gboolean is_spam (GMimeMessage *mime_message) { const char *spam; g_return_val_if_fail(GMIME_IS_MESSAGE(mime_message), FALSE); /* SpamAssassin */ spam = g_mime_message_get_header(mime_message, "X-Spam-Status"); if (spam && mn_ascii_str_case_has_prefix(spam, "yes")) return TRUE; /* bogofilter */ spam = g_mime_message_get_header(mime_message, "X-Bogosity"); if (spam && mn_ascii_str_case_has_prefix(spam, "yes")) return TRUE; return FALSE; }
parsed_article *parse_file(const char *file_name) { static parsed_article pa; GMimeStream *stream; GMimeMessage *msg = NULL; int offset; int file; printf("%s\n", file_name); if ((file = open(file_name, O_RDONLY|O_STREAMING)) == -1) { fprintf(stderr, "Can't open %s\n", file_name); return NULL; } stream = g_mime_stream_fs_new(file); msg = g_mime_parser_construct_message(stream); g_mime_stream_unref(stream); if (msg != 0) { hstrcpy(pa.from, g_mime_message_get_header(msg, "From")); hstrcpy(pa.subject, g_mime_message_get_subject(msg)); hstrcpy(pa.message_id, g_mime_message_get_message_id(msg)); hstrcpy(pa.references, g_mime_message_get_header(msg, "references")); hstrcpy(pa.xref, g_mime_message_get_header(msg, "xref")); hstrcpy(pa.original_message_id, g_mime_message_get_header(msg, "original-message-id")); g_mime_message_get_date(msg, &pa.date, &offset); if (pa.xref != NULL && strstr(pa.xref, "gmane.spam.detected") != NULL) pa.spamp = 1; else pa.spamp = 0; g_mime_message_foreach_part(msg, count_part, (gpointer) &pa); g_mime_object_unref(GMIME_OBJECT(msg)); } close(file); return &pa; }
document* parse_file(const char *file_name) { int tallied_length = 0; GMimeStream *stream; GMimeMessage *msg = 0; // struct stat stat_buf; const char *author, *subject, *xref, *xref_end; time_t date; int offset; int num_words = 0; int file; InternetAddress *iaddr; InternetAddressList *iaddr_list; char *address; #if DEBUG printf("%s\n", file_name); #endif /* if ((file = stat(file_name, &stat_buf)) == -1) { perror("tokenizer"); return NULL; } */ // |O_STREAMING if ((file = open(file_name, O_RDONLY|O_STREAMING)) == -1) { perror("tokenizer"); return NULL; } #ifdef POSIX_FADV_NOREUSE no_reuse(file); #endif stream = g_mime_stream_fs_new(file); msg = g_mime_parser_construct_message(stream); g_mime_stream_unref(stream); if (msg != 0) { table = g_hash_table_new(g_str_hash, g_str_equal); bufp = buffer; dword_table[0].word = NULL; bzero(saved_body, MAX_SAVED_BODY_LENGTH); saved_body_length = 0; author = g_mime_message_get_sender(msg); subject = g_mime_message_get_subject(msg); xref = g_mime_message_get_header(msg, "Xref"); g_mime_message_get_date(msg, &date, &offset); if (author != NULL && subject != NULL && xref != NULL) { tallied_length = tally_string(author, tallied_length); strncpy(doc.author, author, MAX_HEADER_LENGTH-1); /* Get the address from the From header. */ if ((iaddr_list = internet_address_parse_string(author)) != NULL) { iaddr = iaddr_list->address; internet_address_set_name(iaddr, NULL); address = internet_address_to_string(iaddr, FALSE); strncpy(doc.address, address, MAX_HEADER_LENGTH-1); downcase_string(doc.address); free(address); internet_address_list_destroy(iaddr_list); } else { *doc.address = 0; } tallied_length = tally_string(subject, tallied_length); strncpy(doc.subject, subject, MAX_HEADER_LENGTH-1); doc.time = date; if ((xref = strchr(xref, ' ')) != NULL) { xref++; xref_end = strchr(xref, ':'); *doc.group = 0; strncat(doc.group, xref, min(xref_end-xref, MAX_HEADER_LENGTH-1)); xref_end++; sscanf(xref_end, "%d", &doc.article); } g_mime_message_foreach_part(msg, partFound, (gpointer) &tallied_length); strncpy(doc.body, saved_body, MAX_SAVED_BODY_LENGTH); g_hash_table_foreach(table, add_word_to_table, (gpointer) &num_words); dword_table[num_words].word = NULL; g_hash_table_destroy(table); g_mime_object_unref(GMIME_OBJECT(msg)); } else { close(file); return NULL; } } close(file); doc.words = dword_table; doc.num_words = num_words; return &doc; }
MNMessage * mn_message_new_from_mime_message_full (GType type, MNMailbox *mailbox, GMimeMessage *mime_message, const char *mid, const char *uri, MNMessageFlags flags, gboolean handle_status) { MNMessage *message; const char *id; time_t sent_time; const char *from; const char *subject; char *decoded_from; char *decoded_subject; g_return_val_if_fail(type != 0, NULL); g_return_val_if_fail(MN_IS_MAILBOX(mailbox), NULL); g_return_val_if_fail(GMIME_IS_MESSAGE(mime_message), NULL); if (is_spam(mime_message)) return NULL; if (handle_status) { const char *status; status = g_mime_message_get_header(mime_message, "Status"); if (status && strchr(status, 'R')) return NULL; /* the message was read */ else if (status && strchr(status, 'O')) flags &= ~MN_MESSAGE_NEW; else flags |= MN_MESSAGE_NEW; } id = g_mime_message_get_message_id(mime_message); g_mime_message_get_date(mime_message, &sent_time, NULL); from = g_mime_message_get_sender(mime_message); subject = g_mime_message_get_subject(mime_message); decoded_from = from ? g_mime_utils_header_decode_text(from) : NULL; decoded_subject = subject ? g_mime_utils_header_decode_text(subject) : NULL; message = g_object_new(type, MN_MESSAGE_PROP_MAILBOX(mailbox), MN_MESSAGE_PROP_SENT_TIME(sent_time), MN_MESSAGE_PROP_ID((char *) id), MN_MESSAGE_PROP_MID((char *) mid), MN_MESSAGE_PROP_FROM(decoded_from), MN_MESSAGE_PROP_SUBJECT(decoded_subject), MN_MESSAGE_PROP_URI((char *) uri), MN_MESSAGE_PROP_FLAGS(flags), NULL); g_free(decoded_from); g_free(decoded_subject); return message; }