Exemplo n.º 1
0
ex_t histogram(bfpath *bfp)
{
    ex_t rc;
    uint count;
    void *dsh, *dbe;
    dsv_t val;

    rhistogram_t hist;

    dbe = ds_init(bfp);
    if (dbe == NULL)
	return EX_ERROR;

    dsh = ds_open(dbe, bfp, DS_READ);
    if (dsh == NULL)
	return EX_ERROR;

    if (DST_OK != ds_txn_begin(dsh)) {
	ds_close(dsh);
	ds_cleanup(dbe);
	fprintf(stderr, "cannot begin transaction!\n");
	return EX_ERROR;
    }

    ds_get_msgcounts(dsh, &val);
    mgood = val.goodcount;
    mbad = val.spamcount;

    memset(&hist, 0, sizeof(hist));
    rc = ds_foreach(dsh, ds_histogram_hook, &hist);

    if (DST_OK != ds_txn_commit(dsh)) {
	ds_close(dsh);
	ds_cleanup(dbe);
	fprintf(stderr, "cannot commit transaction!\n");
	return EX_ERROR;
    }

    ds_close(dsh);
    ds_cleanup(dbe);

    count = print_histogram(&hist);

    if (verbose > 0) {
	printf("hapaxes:  ham %7u, spam %7u\n", ham_hapax, spam_hapax);
	printf("   pure:  ham %7u, spam %7u\n", ham_only,  spam_only);
    }
    else {
	printf("hapaxes:  ham %7u (%5.2f%%), spam %7u (%5.2f%%)\n", ham_hapax, PCT(ham_hapax), spam_hapax, PCT(spam_hapax));
	printf("   pure:  ham %7u (%5.2f%%), spam %7u (%5.2f%%)\n", ham_only,  PCT(ham_only),  spam_only,  PCT(spam_only));
    }

    return rc;
}
Exemplo n.º 2
0
static void ds_open_failure(bfpath *bfp, void *dbe)
{
    fprintf(stderr, "Error accessing file or directory '%s'.\n", bfp->filepath);
    if (errno != 0)
	fprintf(stderr, "error #%d - %s.\n", errno, strerror(errno));
    if (dbe != NULL)
	ds_cleanup(dbe);
    exit(EX_ERROR);
}
Exemplo n.º 3
0
static ex_t dump_wordlist(bfpath *bfp)
{
    ex_t rc;
    void *dbe;

    token_count = 0;

    dbe = ds_init(bfp);
    rc = ds_oper(dbe, bfp, DS_READ, ds_dump_hook, NULL);
    ds_cleanup(dbe);

    if (rc != EX_OK)
	fprintf(stderr, "error dumping tokens!\n");
    else
	if (verbose)
	    fprintf(dbgout, "%d tokens dumped\n", token_count);

    return rc;
}
Exemplo n.º 4
0
int main(int argc, char **argv) /*@globals errno,stderr,stdout@*/
{
    ex_t exitcode = EX_OK;

    fBogotune = true;		/* for rob_compute_spamicity() */

    dbgout = stderr;

    progtype = build_progtype(progname, DB_TYPE);

    ham_files  = filelist_new("ham");
    spam_files = filelist_new("spam");

    /* process args and read mailboxes */
    process_arglist(argc, argv);

    /* directories from command line and config file are already handled */
    if (ds_flag == DS_DSK) {

	bfpath *bfp;

	if (ds_path == NULL)
	    ds_path = get_directory(PR_ENV_BOGO);
	if (ds_path == NULL)
	    ds_path = get_directory(PR_ENV_HOME);

	if (ds_path == NULL) {
	    fprintf(stderr, "Cannot derive bogofilter directory from environment, aborting.\n");
	    exit(EX_ERROR);
	}

	set_bogohome(ds_path);
	bfp = bfpath_create(ds_path);

	if (!bfpath_check_mode(bfp, BFP_MUST_EXIST)) {
	    fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath);
	    exit(EX_ERROR);
	}

	if (bfp->exists && bfp->isdir) {
	    bfpath_free(bfp);
	    ds_path = mxcat(ds_path, DIRSEP_S, WORDLIST, NULL);	
	    bfp = bfpath_create(ds_path);
	    if (!bfpath_check_mode(bfp, BFP_MUST_EXIST)) {
		fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath);
		exit(EX_ERROR);
	    }
	}

	env = ds_init(bfp);
	
	init_wordlist("word", ds_path, 0, WL_REGULAR);
    }

    bogotune_init();

    if (ds_flag == DS_DSK)
	load_wordlist(load_hook, train);

    /* if encoding not yet set, assume old style */
    if (encoding == E_UNKNOWN)
	encoding = E_RAW;

    if (bogolex_file != NULL)
	bogolex();
    else
	bogotune();

    bogotune_free();

    if (ds_flag == DS_DSK)
	ds_cleanup(env);

    exit(exitcode);
}
Exemplo n.º 5
0
static ex_t display_words(bfpath *bfp, int argc, char **argv, bool show_probability)
{
    byte buf[BUFSIZE];
    buff_t *buff = buff_new(buf, 0, BUFSIZE);
    const byte *word;

    const char *path = bfp->filepath;

    const char *head_format = !show_probability ? "%-30s %6s %6s\n"   : "%-30s %6s  %6s  %6s\n";
    const char *data_format = !show_probability ? "%-30s %6lu %6lu\n" : "%-30s %6lu  %6lu  %f\n";

    void *dsh = NULL; /* initialize to silence bogus gcc warning */
    void *dbe;

    int rv = 0;
    ex_t ec = EX_OK;

    dsv_t msgcnts;

    /* protect against broken stat(2) that succeeds for empty names */
    if (path == NULL || *path == '\0') {
        fprintf(stderr, "Expecting non-empty directory or file name.\n");
        return EX_ERROR;
    }

    dbe = ds_init(bfp);
    dsh = ds_open(dbe, bfp, DS_READ);;
    if (dsh == NULL)
	/* print error, cleanup, and exit */
	ds_open_failure(bfp, dbe);

    if (DST_OK != ds_txn_begin(dsh)) {
	ds_close(dsh);
	ds_cleanup(dbe);
	fprintf(stderr, "Cannot begin transaction.\n");
	return EX_ERROR;
    }

    if (show_probability)
    {
	ds_get_msgcounts(dsh, &msgcnts);
	robs = ROBS;
	robx = ROBX;
    }

    fprintf(fpo, head_format, "", "spam", "good", "  Fisher");
    while (argc >= 0)
    {
	dsv_t val;
	word_t *token;
	int rc;

	unsigned long spam_count;
	unsigned long good_count;
	double rob_prob = 0.0;
	
	if (argc == 0)
	{
	    if (get_token(buff, stdin) != 0)
		break;
	    token = &buff->t;
	} else {
	    word = (const byte *) *argv++;
	    if (--argc == 0)
		argc = -1;
	    token = word_news((const char *)word);
	}

	rc = ds_read(dsh, token, &val);
	switch (rc) {
	    case 0:
		spam_count = val.spamcount;
		good_count = val.goodcount;

		if (!show_probability)
		    fprintf(fpo, data_format, token->u.text, spam_count, good_count);
		else
		{
		    rob_prob = calc_prob(good_count, spam_count, msgcnts.goodcount, msgcnts.spamcount);
		    fprintf(fpo, data_format, token->u.text, spam_count, good_count, rob_prob);
		}
		break;
	    case 1:
		break;
	    default:
		fprintf(stderr, "Cannot read from database.\n");
		ec = EX_ERROR;
		goto finish;
	}

	if (token != &buff->t)
	    word_free(token);
    }

finish:
    if (DST_OK != rv ? ds_txn_abort(dsh) : ds_txn_commit(dsh)) {
	fprintf(stderr, "Cannot %s transaction.\n", rv ? "abort" : "commit");
	ec = EX_ERROR;
    }
    ds_close(dsh);
    ds_cleanup(dbe);

    buff_free(buff);

    return ec;
}
Exemplo n.º 6
0
static int load_wordlist(bfpath *bfp)
{
    void *dsh;
    byte buf[BUFSIZE];
    byte *p;
    int rv = 0;
    size_t len;
    int load_count = 0;
    unsigned long line = 0;
    unsigned long count[IX_SIZE], date;
    YYYYMMDD today_save = today;

    void *dbe = ds_init(bfp);

    dsh = ds_open(dbe, bfp, (dbmode_t)(DS_WRITE | DS_LOAD));
    if (dsh == NULL)
	/* print error, cleanup, and exit */
	ds_open_failure(bfp, dbe);

    memset(buf, '\0', BUFSIZE);

    if (DST_OK != ds_txn_begin(dsh))
	exit(EX_ERROR);

    for (;;) {
	dsv_t data;
	word_t *token;
	if (fgets((char *)buf, BUFSIZE, fpin) == NULL) {
	    if (ferror(fpin)) {
		perror(progname);
		rv = 2;
	    }
	    break;
	}

	line++;

	len = strlen((char *)buf);

	/* too short. */
	if (len < 4)
	    continue;

	p = spanword(buf);
	len = strlen((const char *)buf);

	if (max_token_len != 0 &&
	    len > max_token_len)
	    continue;		/* too long - discard */

	spamcount = (uint) atoi((const char *)p);
	if ((int) spamcount < 0)
	    spamcount = 0;
	p = spanword(p);

	goodcount = (uint) atoi((const char *)p);
	if ((int) goodcount < 0)
	    goodcount = 0;
	p = spanword(p);

	date = (uint) atoi((const char *)p);
	p = spanword(p);

	if (*p != '\0') {
	    fprintf(stderr,
		    "%s: Unexpected input [%s] on line %lu. "
		    "Expecting whitespace before count.\n",
		    progname, buf, line);
	    rv = 1;
	    break;
	}

	if (date == 0)				/* date as YYYYMMDD */
	    date = today_save;

	if (replace_nonascii_characters)
	    do_replace_nonascii_characters(buf, len);
 
 	token = word_new(buf, len);
	data.goodcount = goodcount;
	data.spamcount = spamcount;
	data.date = date;

	if (is_count((const char *)buf)
		&& !(maintain && discard_token(token, &data))) {
	    load_count += 1;
	    /* Slower, but allows multiple lists to be concatenated */
	    set_date(date);
	    switch (ds_read(dsh, token, &data)) {
		case 0:
		case 1:
		    break;
		default:
		    rv = 1;
	    }
	    data.spamcount += spamcount;
	    data.goodcount += goodcount;
	    if (ds_write(dsh, token, &data)) rv = 1;
	}
	word_free(token);
    }

    if (rv) {
	fprintf(stderr, "read or write error, aborting.\n");
	ds_txn_abort(dsh);
    } else {
	switch (ds_txn_commit(dsh)) {
	    case DST_FAILURE:
	    case DST_TEMPFAIL:
		fprintf(stderr, "commit failed\n");
		exit(EX_ERROR);
	    case DST_OK:
		break;
	}
    }

    ds_close(dsh);

    ds_cleanup(dbe);

    if (verbose)
	fprintf(dbgout, "%d tokens loaded\n", load_count);

    return rv;
}