static void display_part(GMimeObject *part, const GMimeContentType *ct) { GMimeStream *ostream, *fstream; GMimeFilter *basic; GMimeDataWrapper *content; GMimeFilter *charset, *html; GMimePartEncodingType encoding; encoding = g_mime_part_get_encoding(GMIME_PART(part)); fstream = g_mime_stream_file_new(stdout); ostream = g_mime_stream_filter_new_with_stream(fstream); g_mime_stream_unref(fstream); /* Encoding filter, always on */ if (charset = g_mime_filter_charset_new(g_mime_content_type_get_parameter(ct, "charset"), "utf-8")) { g_mime_stream_filter_add(GMIME_STREAM_FILTER(ostream), charset); g_object_unref(charset); } if (g_mime_content_type_is_type(ct, "text", "plain")) { if (text_only == 0) { html = g_mime_filter_html_new ( GMIME_FILTER_HTML_CONVERT_SPACES | GMIME_FILTER_HTML_CONVERT_URLS | GMIME_FILTER_HTML_MARK_CITATION | GMIME_FILTER_HTML_CITE, 0); g_mime_stream_filter_add(GMIME_STREAM_FILTER(ostream), html); g_object_unref(html); } content = g_mime_part_get_content_object(GMIME_PART(part)); g_mime_data_wrapper_write_to_stream(content, ostream); g_mime_stream_flush(ostream); g_object_unref(content); // GMimeFilterBasic (base64, quopri) // GMimeFilterCharset // GMimeFilterHTML // GMimeFilterEnriched (text/enriched, text/rtf) } else if (g_mime_content_type_is_type(ct, "text", "html")) { content = g_mime_part_get_content_object(GMIME_PART(part)); g_mime_data_wrapper_write_to_stream(content, ostream); g_mime_stream_flush(ostream); g_object_unref(content); } else if (strcmp(ct->type, "image") == 0) { display_image(part); } }
parsed_article *parse_file(const char *file_name) { static parsed_article pa; GMimeStream *stream; GMimeMessage *msg = NULL; int offset; int file; printf("%s\n", file_name); if ((file = open(file_name, O_RDONLY|O_STREAMING)) == -1) { fprintf(stderr, "Can't open %s\n", file_name); return NULL; } stream = g_mime_stream_fs_new(file); msg = g_mime_parser_construct_message(stream); g_mime_stream_unref(stream); if (msg != 0) { hstrcpy(pa.from, g_mime_message_get_header(msg, "From")); hstrcpy(pa.subject, g_mime_message_get_subject(msg)); hstrcpy(pa.message_id, g_mime_message_get_message_id(msg)); hstrcpy(pa.references, g_mime_message_get_header(msg, "references")); hstrcpy(pa.xref, g_mime_message_get_header(msg, "xref")); hstrcpy(pa.original_message_id, g_mime_message_get_header(msg, "original-message-id")); g_mime_message_get_date(msg, &pa.date, &offset); if (pa.xref != NULL && strstr(pa.xref, "gmane.spam.detected") != NULL) pa.spamp = 1; else pa.spamp = 0; g_mime_message_foreach_part(msg, count_part, (gpointer) &pa); g_mime_object_unref(GMIME_OBJECT(msg)); } close(file); return &pa; }
document* parse_file(const char *file_name) { int tallied_length = 0; GMimeStream *stream; GMimeMessage *msg = 0; // struct stat stat_buf; const char *author, *subject, *xref, *xref_end; time_t date; int offset; int num_words = 0; int file; InternetAddress *iaddr; InternetAddressList *iaddr_list; char *address; #if DEBUG printf("%s\n", file_name); #endif /* if ((file = stat(file_name, &stat_buf)) == -1) { perror("tokenizer"); return NULL; } */ // |O_STREAMING if ((file = open(file_name, O_RDONLY|O_STREAMING)) == -1) { perror("tokenizer"); return NULL; } #ifdef POSIX_FADV_NOREUSE no_reuse(file); #endif stream = g_mime_stream_fs_new(file); msg = g_mime_parser_construct_message(stream); g_mime_stream_unref(stream); if (msg != 0) { table = g_hash_table_new(g_str_hash, g_str_equal); bufp = buffer; dword_table[0].word = NULL; bzero(saved_body, MAX_SAVED_BODY_LENGTH); saved_body_length = 0; author = g_mime_message_get_sender(msg); subject = g_mime_message_get_subject(msg); xref = g_mime_message_get_header(msg, "Xref"); g_mime_message_get_date(msg, &date, &offset); if (author != NULL && subject != NULL && xref != NULL) { tallied_length = tally_string(author, tallied_length); strncpy(doc.author, author, MAX_HEADER_LENGTH-1); /* Get the address from the From header. */ if ((iaddr_list = internet_address_parse_string(author)) != NULL) { iaddr = iaddr_list->address; internet_address_set_name(iaddr, NULL); address = internet_address_to_string(iaddr, FALSE); strncpy(doc.address, address, MAX_HEADER_LENGTH-1); downcase_string(doc.address); free(address); internet_address_list_destroy(iaddr_list); } else { *doc.address = 0; } tallied_length = tally_string(subject, tallied_length); strncpy(doc.subject, subject, MAX_HEADER_LENGTH-1); doc.time = date; if ((xref = strchr(xref, ' ')) != NULL) { xref++; xref_end = strchr(xref, ':'); *doc.group = 0; strncat(doc.group, xref, min(xref_end-xref, MAX_HEADER_LENGTH-1)); xref_end++; sscanf(xref_end, "%d", &doc.article); } g_mime_message_foreach_part(msg, partFound, (gpointer) &tallied_length); strncpy(doc.body, saved_body, MAX_SAVED_BODY_LENGTH); g_hash_table_foreach(table, add_word_to_table, (gpointer) &num_words); dword_table[num_words].word = NULL; g_hash_table_destroy(table); g_mime_object_unref(GMIME_OBJECT(msg)); } else { close(file); return NULL; } } close(file); doc.words = dword_table; doc.num_words = num_words; return &doc; }