Ejemplo n.º 1
0
struct store * store_open_disk(const char *filename, size_t blockSize, size_t blockCount) 
{
	struct store * store = malloc(sizeof(struct store));

	assert(store);
	memset(store, 0, sizeof(struct store));
	store->functions = &disk_functions;
	store->store_p = ds_open(filename, blockSize, blockCount);
	
	return store;
}
Ejemplo n.º 2
0
ex_t histogram(bfpath *bfp)
{
    ex_t rc;
    uint count;
    void *dsh, *dbe;
    dsv_t val;

    rhistogram_t hist;

    dbe = ds_init(bfp);
    if (dbe == NULL)
	return EX_ERROR;

    dsh = ds_open(dbe, bfp, DS_READ);
    if (dsh == NULL)
	return EX_ERROR;

    if (DST_OK != ds_txn_begin(dsh)) {
	ds_close(dsh);
	ds_cleanup(dbe);
	fprintf(stderr, "cannot begin transaction!\n");
	return EX_ERROR;
    }

    ds_get_msgcounts(dsh, &val);
    mgood = val.goodcount;
    mbad = val.spamcount;

    memset(&hist, 0, sizeof(hist));
    rc = ds_foreach(dsh, ds_histogram_hook, &hist);

    if (DST_OK != ds_txn_commit(dsh)) {
	ds_close(dsh);
	ds_cleanup(dbe);
	fprintf(stderr, "cannot commit transaction!\n");
	return EX_ERROR;
    }

    ds_close(dsh);
    ds_cleanup(dbe);

    count = print_histogram(&hist);

    if (verbose > 0) {
	printf("hapaxes:  ham %7u, spam %7u\n", ham_hapax, spam_hapax);
	printf("   pure:  ham %7u, spam %7u\n", ham_only,  spam_only);
    }
    else {
	printf("hapaxes:  ham %7u (%5.2f%%), spam %7u (%5.2f%%)\n", ham_hapax, PCT(ham_hapax), spam_hapax, PCT(spam_hapax));
	printf("   pure:  ham %7u (%5.2f%%), spam %7u (%5.2f%%)\n", ham_only,  PCT(ham_only),  spam_only,  PCT(spam_only));
    }

    return rc;
}
Ejemplo n.º 3
0
main()
{
int count;
int fd;
int measurements = 10000;
char buffer[SECTOR];

FILE *fout = fopen("ploppy.txt", "w");

float time1;
float time2;
float outtime;
struct timeval tv;

for (count = 0; count < SECTOR; count++)
{
   buffer[count] = 'X';
}

fd = ds_open("/dev/disksim", O_WRONLY);

for (count = 0; count < measurements; count++)
{
   ds_lseek(fd, 0, SEEK_CUR);
   ds_write(fd, buffer, SECTOR);

   ds_gettimeofday(&tv);
   time1 = (float) tv.tv_sec*1000 + (float) tv.tv_usec/1000;

   ds_lseek(fd, SECTOR, SEEK_CUR);
   ds_write(fd, buffer, SECTOR);

   ds_gettimeofday(&tv);
   time2 = (float) tv.tv_sec*1000 + (float) tv.tv_usec/1000;

   outtime = time2 - time1;

   fprintf(fout, "%d ", count);
   fprintf(fout, "%f\n", outtime);
}
ds_close(fd);
fclose(fout);
}
Ejemplo n.º 4
0
static ex_t display_words(bfpath *bfp, int argc, char **argv, bool show_probability)
{
    byte buf[BUFSIZE];
    buff_t *buff = buff_new(buf, 0, BUFSIZE);
    const byte *word;

    const char *path = bfp->filepath;

    const char *head_format = !show_probability ? "%-30s %6s %6s\n"   : "%-30s %6s  %6s  %6s\n";
    const char *data_format = !show_probability ? "%-30s %6lu %6lu\n" : "%-30s %6lu  %6lu  %f\n";

    void *dsh = NULL; /* initialize to silence bogus gcc warning */
    void *dbe;

    int rv = 0;
    ex_t ec = EX_OK;

    dsv_t msgcnts;

    /* protect against broken stat(2) that succeeds for empty names */
    if (path == NULL || *path == '\0') {
        fprintf(stderr, "Expecting non-empty directory or file name.\n");
        return EX_ERROR;
    }

    dbe = ds_init(bfp);
    dsh = ds_open(dbe, bfp, DS_READ);;
    if (dsh == NULL)
	/* print error, cleanup, and exit */
	ds_open_failure(bfp, dbe);

    if (DST_OK != ds_txn_begin(dsh)) {
	ds_close(dsh);
	ds_cleanup(dbe);
	fprintf(stderr, "Cannot begin transaction.\n");
	return EX_ERROR;
    }

    if (show_probability)
    {
	ds_get_msgcounts(dsh, &msgcnts);
	robs = ROBS;
	robx = ROBX;
    }

    fprintf(fpo, head_format, "", "spam", "good", "  Fisher");
    while (argc >= 0)
    {
	dsv_t val;
	word_t *token;
	int rc;

	unsigned long spam_count;
	unsigned long good_count;
	double rob_prob = 0.0;
	
	if (argc == 0)
	{
	    if (get_token(buff, stdin) != 0)
		break;
	    token = &buff->t;
	} else {
	    word = (const byte *) *argv++;
	    if (--argc == 0)
		argc = -1;
	    token = word_news((const char *)word);
	}

	rc = ds_read(dsh, token, &val);
	switch (rc) {
	    case 0:
		spam_count = val.spamcount;
		good_count = val.goodcount;

		if (!show_probability)
		    fprintf(fpo, data_format, token->u.text, spam_count, good_count);
		else
		{
		    rob_prob = calc_prob(good_count, spam_count, msgcnts.goodcount, msgcnts.spamcount);
		    fprintf(fpo, data_format, token->u.text, spam_count, good_count, rob_prob);
		}
		break;
	    case 1:
		break;
	    default:
		fprintf(stderr, "Cannot read from database.\n");
		ec = EX_ERROR;
		goto finish;
	}

	if (token != &buff->t)
	    word_free(token);
    }

finish:
    if (DST_OK != rv ? ds_txn_abort(dsh) : ds_txn_commit(dsh)) {
	fprintf(stderr, "Cannot %s transaction.\n", rv ? "abort" : "commit");
	ec = EX_ERROR;
    }
    ds_close(dsh);
    ds_cleanup(dbe);

    buff_free(buff);

    return ec;
}
Ejemplo n.º 5
0
static int load_wordlist(bfpath *bfp)
{
    void *dsh;
    byte buf[BUFSIZE];
    byte *p;
    int rv = 0;
    size_t len;
    int load_count = 0;
    unsigned long line = 0;
    unsigned long count[IX_SIZE], date;
    YYYYMMDD today_save = today;

    void *dbe = ds_init(bfp);

    dsh = ds_open(dbe, bfp, (dbmode_t)(DS_WRITE | DS_LOAD));
    if (dsh == NULL)
	/* print error, cleanup, and exit */
	ds_open_failure(bfp, dbe);

    memset(buf, '\0', BUFSIZE);

    if (DST_OK != ds_txn_begin(dsh))
	exit(EX_ERROR);

    for (;;) {
	dsv_t data;
	word_t *token;
	if (fgets((char *)buf, BUFSIZE, fpin) == NULL) {
	    if (ferror(fpin)) {
		perror(progname);
		rv = 2;
	    }
	    break;
	}

	line++;

	len = strlen((char *)buf);

	/* too short. */
	if (len < 4)
	    continue;

	p = spanword(buf);
	len = strlen((const char *)buf);

	if (max_token_len != 0 &&
	    len > max_token_len)
	    continue;		/* too long - discard */

	spamcount = (uint) atoi((const char *)p);
	if ((int) spamcount < 0)
	    spamcount = 0;
	p = spanword(p);

	goodcount = (uint) atoi((const char *)p);
	if ((int) goodcount < 0)
	    goodcount = 0;
	p = spanword(p);

	date = (uint) atoi((const char *)p);
	p = spanword(p);

	if (*p != '\0') {
	    fprintf(stderr,
		    "%s: Unexpected input [%s] on line %lu. "
		    "Expecting whitespace before count.\n",
		    progname, buf, line);
	    rv = 1;
	    break;
	}

	if (date == 0)				/* date as YYYYMMDD */
	    date = today_save;

	if (replace_nonascii_characters)
	    do_replace_nonascii_characters(buf, len);
 
 	token = word_new(buf, len);
	data.goodcount = goodcount;
	data.spamcount = spamcount;
	data.date = date;

	if (is_count((const char *)buf)
		&& !(maintain && discard_token(token, &data))) {
	    load_count += 1;
	    /* Slower, but allows multiple lists to be concatenated */
	    set_date(date);
	    switch (ds_read(dsh, token, &data)) {
		case 0:
		case 1:
		    break;
		default:
		    rv = 1;
	    }
	    data.spamcount += spamcount;
	    data.goodcount += goodcount;
	    if (ds_write(dsh, token, &data)) rv = 1;
	}
	word_free(token);
    }

    if (rv) {
	fprintf(stderr, "read or write error, aborting.\n");
	ds_txn_abort(dsh);
    } else {
	switch (ds_txn_commit(dsh)) {
	    case DST_FAILURE:
	    case DST_TEMPFAIL:
		fprintf(stderr, "commit failed\n");
		exit(EX_ERROR);
	    case DST_OK:
		break;
	}
    }

    ds_close(dsh);

    ds_cleanup(dbe);

    if (verbose)
	fprintf(dbgout, "%d tokens loaded\n", load_count);

    return rv;
}