static int process_message(GMimeMessage *message, const char *post_dir) { struct mime_cbinfo cbinfo = { .count = 0, .post_dir = post_dir, }; #ifdef AST_GMIME_VER_24 g_mime_message_foreach(message, process_message_callback, &cbinfo); #else g_mime_message_foreach_part(message, process_message_callback, &cbinfo); #endif return cbinfo.count; } /* Find a sequence of bytes within a binary array. */ static int find_sequence(char * inbuf, int inlen, char * matchbuf, int matchlen) { int current; int comp; int found = 0; for (current = 0; current < inlen-matchlen; current++, inbuf++) { if (*inbuf == *matchbuf) { found=1; for (comp = 1; comp < matchlen; comp++) { if (inbuf[comp] != matchbuf[comp]) { found = 0; break; } } if (found) { break; } } } if (found) { return current; } else { return -1; } }
parsed_article *parse_file(const char *file_name) { static parsed_article pa; GMimeStream *stream; GMimeMessage *msg = NULL; int offset; int file; printf("%s\n", file_name); if ((file = open(file_name, O_RDONLY|O_STREAMING)) == -1) { fprintf(stderr, "Can't open %s\n", file_name); return NULL; } stream = g_mime_stream_fs_new(file); msg = g_mime_parser_construct_message(stream); g_mime_stream_unref(stream); if (msg != 0) { hstrcpy(pa.from, g_mime_message_get_header(msg, "From")); hstrcpy(pa.subject, g_mime_message_get_subject(msg)); hstrcpy(pa.message_id, g_mime_message_get_message_id(msg)); hstrcpy(pa.references, g_mime_message_get_header(msg, "references")); hstrcpy(pa.xref, g_mime_message_get_header(msg, "xref")); hstrcpy(pa.original_message_id, g_mime_message_get_header(msg, "original-message-id")); g_mime_message_get_date(msg, &pa.date, &offset); if (pa.xref != NULL && strstr(pa.xref, "gmane.spam.detected") != NULL) pa.spamp = 1; else pa.spamp = 0; g_mime_message_foreach_part(msg, count_part, (gpointer) &pa); g_mime_object_unref(GMIME_OBJECT(msg)); } close(file); return &pa; }
document* parse_file(const char *file_name) { int tallied_length = 0; GMimeStream *stream; GMimeMessage *msg = 0; // struct stat stat_buf; const char *author, *subject, *xref, *xref_end; time_t date; int offset; int num_words = 0; int file; InternetAddress *iaddr; InternetAddressList *iaddr_list; char *address; #if DEBUG printf("%s\n", file_name); #endif /* if ((file = stat(file_name, &stat_buf)) == -1) { perror("tokenizer"); return NULL; } */ // |O_STREAMING if ((file = open(file_name, O_RDONLY|O_STREAMING)) == -1) { perror("tokenizer"); return NULL; } #ifdef POSIX_FADV_NOREUSE no_reuse(file); #endif stream = g_mime_stream_fs_new(file); msg = g_mime_parser_construct_message(stream); g_mime_stream_unref(stream); if (msg != 0) { table = g_hash_table_new(g_str_hash, g_str_equal); bufp = buffer; dword_table[0].word = NULL; bzero(saved_body, MAX_SAVED_BODY_LENGTH); saved_body_length = 0; author = g_mime_message_get_sender(msg); subject = g_mime_message_get_subject(msg); xref = g_mime_message_get_header(msg, "Xref"); g_mime_message_get_date(msg, &date, &offset); if (author != NULL && subject != NULL && xref != NULL) { tallied_length = tally_string(author, tallied_length); strncpy(doc.author, author, MAX_HEADER_LENGTH-1); /* Get the address from the From header. */ if ((iaddr_list = internet_address_parse_string(author)) != NULL) { iaddr = iaddr_list->address; internet_address_set_name(iaddr, NULL); address = internet_address_to_string(iaddr, FALSE); strncpy(doc.address, address, MAX_HEADER_LENGTH-1); downcase_string(doc.address); free(address); internet_address_list_destroy(iaddr_list); } else { *doc.address = 0; } tallied_length = tally_string(subject, tallied_length); strncpy(doc.subject, subject, MAX_HEADER_LENGTH-1); doc.time = date; if ((xref = strchr(xref, ' ')) != NULL) { xref++; xref_end = strchr(xref, ':'); *doc.group = 0; strncat(doc.group, xref, min(xref_end-xref, MAX_HEADER_LENGTH-1)); xref_end++; sscanf(xref_end, "%d", &doc.article); } g_mime_message_foreach_part(msg, partFound, (gpointer) &tallied_length); strncpy(doc.body, saved_body, MAX_SAVED_BODY_LENGTH); g_hash_table_foreach(table, add_word_to_table, (gpointer) &num_words); dword_table[num_words].word = NULL; g_hash_table_destroy(table); g_mime_object_unref(GMIME_OBJECT(msg)); } else { close(file); return NULL; } } close(file); doc.words = dword_table; doc.num_words = num_words; return &doc; }