static void distribute(int mode, tunelist_t *ns_or_sp) { int good = mode == REG_GOOD; int bad = 1 - good; bool divvy = ds_flag == DS_RAM && user_robx < EPS && !msg_count_file; mlitem_t *item; mlhead_t *msgs = ns_or_sp->msgs; int score_count = 0; int train_count = 0; static int train_good = 0; static int train_bad = 0; double ratio = scale(msgs->count, LIST_COUNT + TEST_COUNT, /* small count */ LIST_COUNT + LIST_COUNT, /* large count */ LIST_COUNT / TEST_COUNT, /* small ratio */ LIST_COUNT / LIST_COUNT); /* large ratio */ for (item = msgs->head; item != NULL; item = item->next) { wordhash_t *wh = item->wh; /* training set */ if (divvy && train_count / ratio < score_count + 1) { wordhash_set_counts(wh, good, bad); wordhash_add(train, wh, &wordprop_init); train_count += 1; wordhash_free(wh); train_good += good; train_bad += bad; } /* scoring set */ else { uint bin = divvy ? MOD(score_count,3) : 0; msglist_add(ns_or_sp->u.sets[bin], wh); score_count += 1; } item->wh = NULL; } if (divvy) { wordhash_insert(train, w_msg_count, sizeof(wordprop_t), &wordprop_init); set_msg_counts(train_good, train_bad); } if (verbose > 1) printf("%s: train_count = %d, score_count = %d\n", good ? "ns" : "sp", train_count, score_count); return; }
void set_msg_counts_from_str(char *str) { uint b, g; b = atoi(str); str = strchr(str, ' ') + 1; g = atoi(str); set_msg_counts(g, b); msg_count_header_len= strlen(msg_count_header); }
static int load_hook(word_t *key, dsv_t *data, void *userdata) /* returns 0 if ok, 1 if not ok */ { wordprop_t *tokenprop = wordhash_insert(train, key, sizeof(wordprop_t), &wordprop_init); (void) userdata; /* quiet compiler complaint */ tokenprop->cnts.bad = data->spamcount; tokenprop->cnts.good = data->goodcount; if (word_cmps(key, ".MSG_COUNT") == 0) set_msg_counts(data->goodcount, data->spamcount); if (word_cmps(key, ".ENCODING") == 0) { if (encoding == E_UNKNOWN) encoding = data->spamcount; if (encoding != data->spamcount) { fprintf(stderr, "Can't mix database encodings, i.e. utf-8 and any other.\n"); exit(EX_ERROR); } } return 0; }