void token_init(void) { static bool fTokenInit = false; yyinit(); if ( fTokenInit) { token_clear(); } else { fTokenInit = true; if (max_multi_token_len == 0) max_multi_token_len = (max_token_len+1) * multi_token_count + MAX_PREFIX_LEN; yylval_text_size = max_multi_token_len + MSG_COUNT_PADDING; yylval_text = (byte *) malloc( yylval_text_size+D ); yylval.leng = 0; yylval.u.text = yylval_text; /* First IP Address in Received: statement */ msg_addr = word_new( NULL, max_token_len ); /* Message ID */ msg_id = word_new( NULL, max_token_len * 3 ); /* Message's first queue ID */ queue_id = word_new( NULL, max_token_len ); ipsave = word_new( NULL, max_token_len ); /* word_new() used to avoid compiler complaints */ w_to = word_news("to:"); /* To: */ w_from = word_news("from:"); /* From: */ w_rtrn = word_news("rtrn:"); /* Return-Path: */ w_subj = word_news("subj:"); /* Subject: */ w_recv = word_news("rcvd:"); /* Received: */ w_head = word_news("head:"); /* Header: */ w_mime = word_news("mime:"); /* Mime: */ w_ip = word_news("ip:"); /* ip: */ w_url = word_news("url:"); /* url: */ nonblank_line = word_news(NONBLANK); /* do multi-word token initializations */ init_token_array(); } return; }
static void bogotune_init(void) { const char *msg_count = MSG_COUNT; w_msg_count = word_news(msg_count); train = wordhash_new(); ns_and_sp = tunelist_new("tr"); /* training lists */ ns_msglists = tunelist_new("ns"); /* non-spam scoring lists */ sp_msglists = tunelist_new("sp"); /* spam scoring lists */ return; }
static ex_t get_robx(bfpath *bfp) { double rx; int ret = 0; init_wordlist("word", bfp->filepath, 0, WL_REGULAR); rx = compute_robinson_x(); if (rx < 0) return EX_ERROR; if (onlyprint) printf("%f\n", rx); else { dsv_t val; word_t *word_robx = word_news(ROBX_W); /* since compute_robinson_x() closes the wordlists, init_wordlist() must be called again */ init_wordlist("word", bfp->filepath, 0, WL_REGULAR); open_wordlists(DS_WRITE); val.goodcount = 0; val.spamcount = (uint32_t) (rx * 1000000); do { ret = ds_write(word_lists->dsh, word_robx, &val); if (ret == DS_ABORT_RETRY) { rand_sleep(1000, 1000000); begin_wordlist(word_lists); } } while (ret == DS_ABORT_RETRY); close_wordlists(true); free_wordlists(); word_free(word_robx); } return ret ? EX_ERROR : EX_OK; }
static ex_t display_words(bfpath *bfp, int argc, char **argv, bool show_probability) { byte buf[BUFSIZE]; buff_t *buff = buff_new(buf, 0, BUFSIZE); const byte *word; const char *path = bfp->filepath; const char *head_format = !show_probability ? "%-30s %6s %6s\n" : "%-30s %6s %6s %6s\n"; const char *data_format = !show_probability ? "%-30s %6lu %6lu\n" : "%-30s %6lu %6lu %f\n"; void *dsh = NULL; /* initialize to silence bogus gcc warning */ void *dbe; int rv = 0; ex_t ec = EX_OK; dsv_t msgcnts; /* protect against broken stat(2) that succeeds for empty names */ if (path == NULL || *path == '\0') { fprintf(stderr, "Expecting non-empty directory or file name.\n"); return EX_ERROR; } dbe = ds_init(bfp); dsh = ds_open(dbe, bfp, DS_READ);; if (dsh == NULL) /* print error, cleanup, and exit */ ds_open_failure(bfp, dbe); if (DST_OK != ds_txn_begin(dsh)) { ds_close(dsh); ds_cleanup(dbe); fprintf(stderr, "Cannot begin transaction.\n"); return EX_ERROR; } if (show_probability) { ds_get_msgcounts(dsh, &msgcnts); robs = ROBS; robx = ROBX; } fprintf(fpo, head_format, "", "spam", "good", " Fisher"); while (argc >= 0) { dsv_t val; word_t *token; int rc; unsigned long spam_count; unsigned long good_count; double rob_prob = 0.0; if (argc == 0) { if (get_token(buff, stdin) != 0) break; token = &buff->t; } else { word = (const byte *) *argv++; if (--argc == 0) argc = -1; token = word_news((const char *)word); } rc = ds_read(dsh, token, &val); switch (rc) { case 0: spam_count = val.spamcount; good_count = val.goodcount; if (!show_probability) fprintf(fpo, data_format, token->u.text, spam_count, good_count); else { rob_prob = calc_prob(good_count, spam_count, msgcnts.goodcount, msgcnts.spamcount); fprintf(fpo, data_format, token->u.text, spam_count, good_count, rob_prob); } break; case 1: break; default: fprintf(stderr, "Cannot read from database.\n"); ec = EX_ERROR; goto finish; } if (token != &buff->t) word_free(token); } finish: if (DST_OK != rv ? ds_txn_abort(dsh) : ds_txn_commit(dsh)) { fprintf(stderr, "Cannot %s transaction.\n", rv ? "abort" : "commit"); ec = EX_ERROR; } ds_close(dsh); ds_cleanup(dbe); buff_free(buff); return ec; }