Example #1
0
void token_init(void)
{
    static bool fTokenInit = false;

    yyinit();

    if ( fTokenInit) {
	token_clear();
    }
    else {
	fTokenInit = true;

	if (max_multi_token_len == 0)
	    max_multi_token_len = (max_token_len+1) * multi_token_count + MAX_PREFIX_LEN;

	yylval_text_size = max_multi_token_len + MSG_COUNT_PADDING;

	yylval_text = (byte *) malloc( yylval_text_size+D );
	yylval.leng   = 0;
	yylval.u.text   = yylval_text;

	/* First IP Address in Received: statement */
	msg_addr = word_new( NULL, max_token_len );

	/* Message ID */
	msg_id = word_new( NULL, max_token_len * 3 );

	/* Message's first queue ID */
	queue_id = word_new( NULL, max_token_len );

	ipsave = word_new( NULL, max_token_len );

	/* word_new() used to avoid compiler complaints */
	w_to   = word_news("to:");	/* To:          */
	w_from = word_news("from:");	/* From:        */
	w_rtrn = word_news("rtrn:");	/* Return-Path: */
	w_subj = word_news("subj:");	/* Subject:     */
	w_recv = word_news("rcvd:");	/* Received:    */
	w_head = word_news("head:");	/* Header:      */
	w_mime = word_news("mime:");	/* Mime:        */
	w_ip   = word_news("ip:");	/* ip:          */
	w_url  = word_news("url:");	/* url:         */
	nonblank_line = word_news(NONBLANK);

	/* do multi-word token initializations */
	init_token_array();
    }

    return;
}
Example #2
0
static void bogotune_init(void)
{
    const char *msg_count = MSG_COUNT;
    w_msg_count = word_news(msg_count);
    train       = wordhash_new();
    ns_and_sp   = tunelist_new("tr");		/* training lists */
    ns_msglists = tunelist_new("ns");		/* non-spam scoring lists */
    sp_msglists = tunelist_new("sp");		/* spam     scoring lists */

    return;
}
Example #3
0
static ex_t get_robx(bfpath *bfp)
{
    double rx;
    int ret = 0;

    init_wordlist("word", bfp->filepath, 0, WL_REGULAR);
    rx = compute_robinson_x();
    if (rx < 0)
	return EX_ERROR;

    if (onlyprint)
	printf("%f\n", rx);
    else {
	dsv_t val;
	word_t *word_robx = word_news(ROBX_W);

	/* since compute_robinson_x() closes the wordlists, 
	   init_wordlist() must be called again */
	init_wordlist("word", bfp->filepath, 0, WL_REGULAR);

	open_wordlists(DS_WRITE);

	val.goodcount = 0;
	val.spamcount = (uint32_t) (rx * 1000000);
	do {
	    ret = ds_write(word_lists->dsh, word_robx, &val);
	    if (ret == DS_ABORT_RETRY) {
		rand_sleep(1000, 1000000);
		begin_wordlist(word_lists);
	    }
	} while (ret == DS_ABORT_RETRY);

	close_wordlists(true);
	free_wordlists();

	word_free(word_robx);
    }

    return ret ? EX_ERROR : EX_OK;
}
Example #4
0
static ex_t display_words(bfpath *bfp, int argc, char **argv, bool show_probability)
{
    byte buf[BUFSIZE];
    buff_t *buff = buff_new(buf, 0, BUFSIZE);
    const byte *word;

    const char *path = bfp->filepath;

    const char *head_format = !show_probability ? "%-30s %6s %6s\n"   : "%-30s %6s  %6s  %6s\n";
    const char *data_format = !show_probability ? "%-30s %6lu %6lu\n" : "%-30s %6lu  %6lu  %f\n";

    void *dsh = NULL; /* initialize to silence bogus gcc warning */
    void *dbe;

    int rv = 0;
    ex_t ec = EX_OK;

    dsv_t msgcnts;

    /* protect against broken stat(2) that succeeds for empty names */
    if (path == NULL || *path == '\0') {
        fprintf(stderr, "Expecting non-empty directory or file name.\n");
        return EX_ERROR;
    }

    dbe = ds_init(bfp);
    dsh = ds_open(dbe, bfp, DS_READ);;
    if (dsh == NULL)
	/* print error, cleanup, and exit */
	ds_open_failure(bfp, dbe);

    if (DST_OK != ds_txn_begin(dsh)) {
	ds_close(dsh);
	ds_cleanup(dbe);
	fprintf(stderr, "Cannot begin transaction.\n");
	return EX_ERROR;
    }

    if (show_probability)
    {
	ds_get_msgcounts(dsh, &msgcnts);
	robs = ROBS;
	robx = ROBX;
    }

    fprintf(fpo, head_format, "", "spam", "good", "  Fisher");
    while (argc >= 0)
    {
	dsv_t val;
	word_t *token;
	int rc;

	unsigned long spam_count;
	unsigned long good_count;
	double rob_prob = 0.0;
	
	if (argc == 0)
	{
	    if (get_token(buff, stdin) != 0)
		break;
	    token = &buff->t;
	} else {
	    word = (const byte *) *argv++;
	    if (--argc == 0)
		argc = -1;
	    token = word_news((const char *)word);
	}

	rc = ds_read(dsh, token, &val);
	switch (rc) {
	    case 0:
		spam_count = val.spamcount;
		good_count = val.goodcount;

		if (!show_probability)
		    fprintf(fpo, data_format, token->u.text, spam_count, good_count);
		else
		{
		    rob_prob = calc_prob(good_count, spam_count, msgcnts.goodcount, msgcnts.spamcount);
		    fprintf(fpo, data_format, token->u.text, spam_count, good_count, rob_prob);
		}
		break;
	    case 1:
		break;
	    default:
		fprintf(stderr, "Cannot read from database.\n");
		ec = EX_ERROR;
		goto finish;
	}

	if (token != &buff->t)
	    word_free(token);
    }

finish:
    if (DST_OK != rv ? ds_txn_abort(dsh) : ds_txn_commit(dsh)) {
	fprintf(stderr, "Cannot %s transaction.\n", rv ? "abort" : "commit");
	ec = EX_ERROR;
    }
    ds_close(dsh);
    ds_cleanup(dbe);

    buff_free(buff);

    return ec;
}