/** *************************************************************************** * A few simple tests to check if it works at all. * */ static void basic() { (void)printf("----- basic -----\n"); struct bloom bloom; assert(bloom_init(&bloom, 0, 1.0) == 1); assert(bloom_init(&bloom, 10, 0) == 1); assert(bloom.ready == 0); assert(bloom_add(&bloom, "hello world", 11) == -1); assert(bloom_check(&bloom, "hello world", 11) == -1); bloom_free(&bloom); assert(bloom_init(&bloom, 102, 0.1) == 0); assert(bloom.ready == 1); bloom_print(&bloom); assert(bloom_check(&bloom, "hello world", 11) == 0); assert(bloom_add(&bloom, "hello world", 11) == 0); assert(bloom_check(&bloom, "hello world", 11) == 1); assert(bloom_add(&bloom, "hello world", 11) > 0); assert(bloom_add(&bloom, "hello", 5) == 0); assert(bloom_add(&bloom, "hello", 5) > 0); assert(bloom_check(&bloom, "hello", 5) == 1); bloom_free(&bloom); }
int main() { //Part 1. Evaluating Hash Functions int bloomsize = 100; int x; bloom_filter_t bloomfilter; bloom_init(&bloomfilter, bloomsize); printf ("Hash1: %i %i %i %i %i %i\n",hash1(&bloomfilter, 0),hash1(&bloomfilter, 1), hash1(&bloomfilter, 2),hash1(&bloomfilter, 3),hash1(&bloomfilter, 13), hash1(&bloomfilter, 97)); printf ("Hash2: %i %i %i %i %i %i\n",hash2(&bloomfilter, 0),hash2(&bloomfilter, 1), hash2(&bloomfilter, 2),hash2(&bloomfilter, 3),hash2(&bloomfilter, 13), hash2(&bloomfilter, 97)); bloom_destroy(&bloomfilter); //Part 2: printf("\nDoing Smoke Test.\n"); bloomsize = 1000; bloom_init(&bloomfilter, bloomsize); for (x= 0; x< 70; x++) { bloom_add(&bloomfilter, x); } int totalbits = 0; for (x = 0; x< bloomsize; x++) { totalbits += get_bit(&bloomfilter, x); } printf("Total bits set: %i\n",totalbits); bloom_destroy(&bloomfilter); //Part 3 printf("\nDoing N_HASHES Test.\n"); int array1[100]; int array2[100]; gen_rand(array1, 100, 1000000); gen_rand(array2, 100, 1000000); run_test3(array1, array2, 100); }
/** *************************************************************************** * Create a bloom filter with given parameters and add 'count' random elements * into it to see if collission rates are within expectations. * */ static void add_random(int entries, double error, int count) { (void)printf("----- add_random(%d, %f, %d) -----\n", entries, error, count); struct bloom bloom; assert(bloom_init(&bloom, entries, error) == 0); bloom_print(&bloom); char block[32]; int collisions = 0; int fd = open("/dev/urandom", O_RDONLY); int n; for (n = 0; n < count; n++) { assert(read(fd, block, 32) == 32); if (bloom_add(&bloom, (void *)block, 32)) { collisions++; } } (void)close(fd); bloom_free(&bloom); (void)printf("added %d elements, got %d collisions\n", count, collisions); if (count <= entries) { assert(collisions <= (entries * error)); } else if (count <= entries * 2) { assert(collisions < (2 * entries * error)); } }
Datum pgbloomfun_init(PG_FUNCTION_ARGS) { int capacity = PG_GETARG_INT32(0); int growth_factor = PG_GETARG_INT32(1); double error_rate = PG_GETARG_FLOAT8(2); pgbloom_t pgbloom; size_t bloom_size; bytea *res; if (capacity <= 0) elog(ERROR, "pgbloomfun: bloom filter capacity must be positive"); if (growth_factor < 0 || growth_factor > 1000) elog(ERROR, "pgbloomfun: growth factor must be between 0 and 1000"); if (error_rate <= 0.0 || error_rate >= 1.0) elog(ERROR, "pgbloomfun: error rate must be higher than 0.0 and lower than 1.0"); pgbloom.version = PGBLOOM_VERSION; pgbloom.total_entries = pgbloom.last_entries = 0; pgbloom.total_capacity = pgbloom.last_capacity = capacity; pgbloom.growth_factor = growth_factor; pgbloom.error_rate = error_rate; pgbloom.filters = 1; bloom_init(&pgbloom.bloom, capacity, error_rate); bloom_size = sizeof(pgbloom) + pgbloom.bloom.bits / 8; res = palloc(VARHDRSZ + bloom_size); SET_VARSIZE(res, VARHDRSZ + bloom_size); memset(VARDATA(res), 0, bloom_size); memcpy(VARDATA(res), &pgbloom, sizeof(pgbloom)); PG_RETURN_BYTEA_P(res); }
void run_test3(int* array1, int* array2, int arraysize) { int bloomsize = 1000; bloom_filter_t bloomfilter; bloom_init(&bloomfilter, bloomsize); int x = 0; //set the bits in bloomfilter based on array1 for (x= 0; x< arraysize; x++) { bloom_add(&bloomfilter, array1[x]); } //First, count all the bits that are set int totalbits = 0; for (x = 0; x< bloomsize; x++) { totalbits += get_bit(&bloomfilter, x); } printf("Total bits set: %i\n",totalbits); int array2bits = 0; //Next, count all the bits in the second array that are set in bloomfiter for (x = 0; x< arraysize; x++) { array2bits += bloom_check(&bloomfilter, array2[x]); } printf("Array2 bits set: %i\n",array2bits); bloom_destroy(&bloomfilter); }
/** *************************************************************************** * Simple loop to compare performance. * */ static void perf_loop(int entries, int count) { (void)printf("----- perf_loop -----\n"); struct bloom bloom; assert(bloom_init(&bloom, entries, 0.001) == 0); bloom_print(&bloom); int i; int collisions = 0; struct timeval tp; (void)gettimeofday(&tp, NULL); long before = (tp.tv_sec * 1000L) + (tp.tv_usec / 1000L); for (i = 0; i < count; i++) { if (bloom_add(&bloom, (void *)&i, sizeof(int))) { collisions++; } } (void)gettimeofday(&tp, NULL); long after = (tp.tv_sec * 1000L) + (tp.tv_usec / 1000L); (void)printf("Added %d elements of size %d, took %d ms (collisions=%d)\n", count, (int)sizeof(int), (int)(after - before), collisions); (void)printf("%d,%d,%ld\n", entries, bloom.bytes, after - before); bloom_free(&bloom); }
Datum pgbloomfun_add(PG_FUNCTION_ARGS) { bytea *newbloomba, *bloomba = PG_GETARG_BYTEA_P(0); text *key = PG_GETARG_TEXT_P(1); pgbloom_t *pgbloom = get_pgbloom(bloomba); bloom_t newbloom, *bloom = NULL; size_t newbloom_size; int space_left, i; space_left = (pgbloom->last_capacity > pgbloom->last_entries) || (pgbloom->growth_factor == 0); for (i=0; i<pgbloom->filters; i++) { bloom = next_bloom(bloomba, bloom); if (bloom == NULL) { elog(ERROR, "pgbloomfun: missing filter in bloom object"); } if (i == pgbloom->filters - 1 && space_left) { if (bloom_add(bloom, VARDATA(key), VARSIZE(key) - VARHDRSZ) == 0) { pgbloom->total_entries ++; pgbloom->last_entries ++; } PG_RETURN_BYTEA_P(bloomba); } else if (bloom_check(bloom, VARDATA(key), VARSIZE(key) - VARHDRSZ)) { PG_RETURN_BYTEA_P(bloomba); /* key already exists */ } } /* create a new filter */ pgbloom->filters += 1; pgbloom->total_entries += 1; pgbloom->last_entries = 1; pgbloom->last_capacity *= pgbloom->growth_factor; pgbloom->total_capacity += pgbloom->last_capacity; /* calculate and allocate space */ bloom_init(&newbloom, pgbloom->last_capacity, pgbloom->error_rate); newbloom_size = sizeof(newbloom) + newbloom.bits / 8; newbloomba = palloc(VARSIZE(bloomba) + newbloom_size); memcpy(newbloomba, bloomba, VARSIZE(bloomba)); SET_VARSIZE(newbloomba, VARSIZE(bloomba) + newbloom_size); /* initialize the new bloom filter and add the new key to it */ bloom = (bloom_t *) (((unsigned char *) newbloomba) + VARSIZE(bloomba)); memset(bloom, 0, newbloom_size); memcpy(bloom, &newbloom, sizeof(newbloom)); bloom_add(bloom, VARDATA(key), VARSIZE(key) - VARHDRSZ); PG_RETURN_BYTEA_P(newbloomba); }
BLOOM *bloom_load(uint32_t size, double error_rate) { BLOOM *bloom = bloom_init(size, error_rate); if (bloom_read(bloom) == 0) { fprintf(stderr, "Error, could not alloc a new bloom filter\n"); return NULL; } return bloom; }
int ppbloom_init(int n, double e) { int err; entries = n / 2; error = e; err = bloom_init(ppbloom + PING, entries, error); if (err) return err; err = bloom_init(ppbloom + PONG, entries, error); if (err) return err; bloom_count[PING] = 0; bloom_count[PONG] = 0; current = PING; return 0; }
/* * Size represents the length of the filter in BITS. */ bloom_t* bloom_create(size_t size, uint nfuncs, ...) { bloom_t* bloom; va_list arg_list; if ((bloom = bloom_new()) == NULL) { return NULL; } if (!bloom_init(bloom, size, nfuncs, arg_list)) { free(bloom); return NULL; } return bloom; }
int build (char *ref_name, char *target_path, int k_mer, double error_rate, char *prefix) { char *position = mmaping (ref_name); bloom *bl = NEW (bloom); if (k_mer != 0) bl->k_mer = k_mer; else bl->k_mer = kmer_suggestion (get_size (ref_name)); bl->stat.e = error_rate; bl->dx = bl->k_mer*bl->k_mer; bl->stat.capacity = strlen (position); get_rec (&bl->stat); bloom_init (bl, bl->stat.elements, bl->stat.capacity, bl->stat.e, bl->stat.ideal_hashes, NULL, 3); ref_add (bl, position); save_bloom (ref_name, bl, prefix, target_path); return 0; }
void init_bloom(bloom *bl, BIGNUM capacity,float error_rate,int k_mer,char *filename) { int flags = 3; get_suggestion (&bl->stat, capacity, error_rate); #ifdef DEBUG printf ("Capacity: %lld\n", bl->stat.capacity); printf ("Vector size: %lld\n", bl->stat.elements); printf ("Ideal hashes: %d\n", bl->stat.ideal_hashes); printf ("Error rate: %f\n", bl->stat.e); printf ("Real size: %lld\n", bl->stat.elements / 8); #endif bloom_init (bl, bl->stat.elements, bl->stat.capacity, bl->stat.e, bl->stat.ideal_hashes, NULL, flags); if (k_mer != 0) bl->k_mer = k_mer; else bl->k_mer = kmer_suggestion (get_size (filename)); bl->dx = bl->k_mer*bl->k_mer; }
int ppbloom_add(const void *buffer, int len) { int err; err = bloom_add(ppbloom + current, buffer, len); if (err == -1) return err; bloom_count[current]++; if (bloom_count[current] >= entries) { bloom_count[current] = 0; current = current == PING ? PONG : PING; bloom_free(ppbloom + current); bloom_init(ppbloom + current, entries, error); } return 0; }
static void runtest (void) { unsigned char md1[SHA256_DIGEST_LENGTH]; unsigned char md2[SHA256_DIGEST_LENGTH]; sha256_Raw((unsigned char *)data1, strlen(data1), md1); sha256_Raw((unsigned char *)data2, strlen(data2), md2); struct bloom bloom; assert(bloom_init(&bloom, 1000, 0.001) == true); bloom_insert(&bloom, md1, sizeof(md1)); assert(bloom_contains(&bloom, md1, sizeof(md1)) == true); assert(bloom_contains(&bloom, md2, sizeof(md2)) == false); cstring *ser = cstr_new_sz(1024); ser_bloom(ser, &bloom); struct bloom bloom2; __bloom_init(&bloom2); struct const_buffer buf = { ser->str, ser->len }; assert(deser_bloom(&bloom2, &buf) == true); assert(bloom.nHashFuncs == bloom2.nHashFuncs); assert(bloom.vData->len == bloom2.vData->len); assert(memcmp(bloom.vData->str, bloom2.vData->str, bloom2.vData->len) == 0); assert(bloom_contains(&bloom2, md1, sizeof(md1)) == true); assert(bloom_contains(&bloom2, md2, sizeof(md2)) == false); bloom_free(&bloom2); bloom_free(&bloom); cstr_free(ser, true); }
static void set_up_bloom(void) { bloom_init(&bloom, TESTS_BLOOM_BITS, bf, hashes, TESTS_BLOOM_HASHF); }
int main (int argc, char *argv[]) { int count; unsigned long maxitems=0; int c; int index; FILE *fp; unsigned long items; char line[MAX_LINE_SIZE]; char pline[MAX_LINE_SIZE]; char unhex[MAX_LINE_SIZE]; char *toprocess; int size; int found=0; /* safe defaults */ opt_errorrate=0.01; opt_bloomfile=NULL; /* load config */ loadconfig(); while ((c = getopt (argc, argv, "huicp:svde:b:f:")) != -1) switch (c) { case 'h': displayhelp(); exit(0); break; case 'u': opt_unhex = 1; break; case 'i': opt_ignorecase = 1; break; case 'c': opt_init = 1; break; case 'p': opt_progressitems = atoi(optarg); break; case 'e': opt_errorrate = atof(optarg); break; case 'b': opt_bloomfile = optarg; break; case 'f': opt_readfromfile = optarg; break; case 's': opt_search = 1; break; case 'v': opt_verbose++; break; case 'd': opt_debug++; break; case '?': if (optopt == 'b') fprintf (stderr, "Option -%c requires an argument.\n", optopt); else if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); return 1; default: abort (); } if (opt_debug) { printf ("opt_init = %d, opt_search = %d, opt_bloomfile = %s\n", opt_init, opt_search, opt_bloomfile); for (count = 1; count < argc; count++) { printf("argv[%d] = %s\n", count, argv[count]); } for (index = optind; index < argc; index++) printf ("Non-option argument %s\n", argv[index]); } if (opt_init) { for (index = optind; index < argc; index++) { if (opt_verbose) fprintf(stderr,"[i] Counting lines for %s\n", argv[index]); fp=fopen(argv[index],"r"); if (fp==NULL) { fprintf(stderr,"Error opening %s\n",argv[index]); break; } items=getlinecount(fp); if (opt_verbose) fprintf(stderr,"[i] %s have %lu lines/items\n",argv[index],items); maxitems=maxitems+items; fclose(fp); } if (opt_verbose) fprintf(stderr,"[i] Maximum number of items: %lu\n",maxitems); bloom_init(&bloom, maxitems, opt_errorrate); items=0; for (index = optind; index < argc; index++) { if (opt_verbose) fprintf(stderr,"[i] Processing %s\n", argv[index]); fp=fopen(argv[index],"r"); if (fp==NULL) { fprintf(stderr,"Error opening %s\n",argv[index]); break; } /* read line by line */ while (fgets (line, sizeof(line), fp)) { toprocess=line; size=strlen(line); if (line[size-1]=='\n') line[--size]='\0'; if (line[size-1]=='\r') line[--size]='\0'; if (opt_debug) fprintf(stderr,"Line (%d): %s \n",size,line); if (opt_verbose && (items++ % opt_progressitems==0)) fprintf(stderr,"\r[i] Line %lu of %lu", items, maxitems); if (opt_ignorecase) { toprocess=str2upper(toprocess,pline); } if (opt_unhex) { size=hexstr2char(toprocess,unhex,MAX_LINE_SIZE); toprocess=unhex; } bloom_add(&bloom, toprocess, size); } if (opt_verbose) fprintf(stderr,"\n[i] Done for %s!\n",argv[index]); fclose(fp); } if (opt_bloomfile==NULL) { fprintf(stderr,"No bloom file specified for init. Not saving.\n"); } else { if (opt_verbose) fprintf(stderr,"[i] Saving to %s\n",opt_bloomfile); bloom_save(&bloom,opt_bloomfile); /* if (opt_verbose) bloom_print(&bloom); */ } } if (opt_search || (!opt_init)) { if (opt_bloomfile==NULL) { fprintf(stderr,"No bloom file specified.\n"); } else { if (opt_verbose) fprintf(stderr,"[i] Opening bloom file: %s\n", opt_bloomfile); if (bloom_load(&bloom, opt_bloomfile)) { fprintf(stderr,"[i] Error loading bloom file: %s\n", opt_bloomfile); return (1); } } if (opt_verbose) fprintf(stderr,"[i] Searching patterns\n"); for (index = optind; index < argc; index++) { toprocess=argv[index]; if (opt_verbose) fprintf(stderr,"[i] Processing %s\n", toprocess); if (searchpattern(toprocess)) { fprintf(stdout,"%s found\n", argv[index]); } else { fprintf(stdout,"%s not found\n", argv[index]); } } if (opt_readfromfile!=NULL) { if (opt_verbose) fprintf(stderr,"[v] Reading from file %s\n",opt_readfromfile); if (strcmp(opt_readfromfile,"-")==0) { fprintf (stderr,"[i] Reading from standard input. Specify pattern separated by new line.\n"); fp=stdin; } else { fp=fopen(opt_readfromfile,"r"); } if (fp==NULL) { fprintf(stderr,"[!] Error opening file: %s\n",opt_readfromfile); exit(1); } while (fgets (line, sizeof(line), fp)) { toprocess=line; size=strlen(line); if (line[size-1]=='\n') line[--size]='\0'; if (line[size-1]=='\r') line[--size]='\0'; if (opt_debug) fprintf(stderr,"[d] Line in pattern (%d): %s \n",size,line); if (opt_verbose) fprintf(stderr,"[v] Processing from file %s\n", toprocess); if (searchpattern(toprocess)) { fprintf(stdout,"%s found\n", toprocess); } else { fprintf(stdout,"%s not found\n", toprocess); } } if (fp!=stdin) fclose (fp); } } }
int dbfile_populate_hashes(struct rb_root *d_tree) { int ret; sqlite3 *db; sqlite3_stmt *stmt = NULL; char *filename; uint64_t ino, subvolid; uint64_t num_hashes; struct filerec *file; struct bloom_cb_priv priv; db = dbfile_get_handle(); if (!db) return ENOENT; ret = dbfile_count_rows(db, &num_hashes, NULL); if (ret) return ret; priv.d_tree = d_tree; ret = bloom_init(&priv.bloom, num_hashes, 0.01); if (ret) return ret; ret = sqlite3_prepare_v2(db, "SELECT ino, subvol, filename from files;", -1, &stmt, NULL); if (ret) { perror_sqlite(ret, "preparing statement"); goto out_bloom; } while ((ret = sqlite3_step(stmt)) == SQLITE_ROW) { ino = sqlite3_column_int64(stmt, 0); subvolid = sqlite3_column_int64(stmt, 1); filename = (char *)sqlite3_column_text(stmt, 2); file = filerec_new(filename, ino, subvolid); if (!file) { ret = ENOMEM; goto out_finalize; } ret = dbfile_walk_file_hashes(db, file, load_into_bloom_cb, &priv); if (ret) goto out_finalize; } if (ret != SQLITE_DONE) { perror_sqlite(ret, "retrieving file info from table"); goto out_finalize; } ret = 0; out_finalize: sqlite3_finalize(stmt); out_bloom: bloom_free(&priv.bloom); return ret; }
int main(int argc, char **argv) { bloom_filter bf; int bsz; long long rll; int n_inserted; long long *testnums; int matched = 0; int i; if(argc < 2) { printf("Usage:\n ./bloom_test <bitmap_size> <random_num_seed>\n"); exit(1); } bsz = atoi(argv[1]); /* initialize random number generator's seed*/ if (argc > 2) { srandom(atoi(argv[2])); } n_inserted = bsz/10; testnums = (long long *)malloc(sizeof(long long)*n_inserted); /*generate n_inserted random numbers (of long long type) insert them into bloom filter*/ bf = bloom_init(bsz); for (i = 0; i < n_inserted; i++) { rll = (long long) random(); rll = rll << 31 | random(); testnums[i] = rll; bloom_add(bf, rll); } /*check if all the n_inserted numbers are present in the bloom filter using bloom_query*/ for (i = 0; i < n_inserted; i++) { if (!bloom_query(bf, testnums[i])) { printf("%lld inserted, but not present according to bloom_query\n", testnums[i]); exit(1); } } /*generate n_inserted*100 random numbers and check if any of them is in the bloom filter*/ for (i = 0; i < n_inserted*100; i++) { rll = (long long) random(); rll = rll << 31 | random(); if (bloom_query(bf, rll)) { matched++; } } printf("false positive %d/%d\n", matched, n_inserted*100); /* print the first 1024 bits of bloom filter*/ bloom_print(bf, 1024); return 0; }
void masterServer(FILE *f1, FILE *f2, FILE *f3, char *buffer) { char line[300]="", fullline[300]="", cacheline[300]=""; char *splithash[10]; int i; long num, entries = 0, hashcatalogentries = 0; int nobloom = 0; struct bloom bloom; FILE *f4; f4=fopen(buffer,"w+"); for(i=0;i<10;i++) splithash[i]=NULL; //check length of new input while (fgets (line, 300, f1) != NULL) { entries++; } //printf("\nEntries:%lu",entries); //check length of hashcatalog while (fgets (line, 300, f2) != NULL) { hashcatalogentries++; } //printf("\nHCE:%lu",hashcatalogentries); rewind(f1); rewind(f2); bloom_init(&bloom, hashcatalogentries + entries, 0.01); //add hashcatalog entries to bloom filter while (fgets (line, 300, f2) != NULL) { i = 0; splithash[i] = strtok(line, "|"); while (splithash[i] != NULL) splithash[++i] = strtok(NULL, "|"); bloom_add(&bloom, splithash[1], strlen(splithash[1])); } //read list of updated files while (fgets (line, 300, f1) != NULL) { i = 0; strcpy(fullline,line); splithash[i] = strtok(line, "|"); while (splithash[i] != NULL) splithash[++i] = strtok(NULL, "|"); //Size Filter num = atoi(splithash[2]); if (num < 8000) { continue; } //Cache Check; NOTE: Hash lang ang nakastore sa Cache while (fgets (cacheline, 300, f3) != NULL) { cacheline[strlen(cacheline) - 1] = '\0'; //printf("CHECK IN!! %s || %s\n", cacheline, splithash[1]); if (strcmp(splithash[1], cacheline) == 0) { //printf("IT EXISTS SA CACHEEEEEEEEEEEE\n"); nobloom = 1; } else ; //printf("WALA HUHUHU\n"); } //Nobloom = 1 if meron sa cache, no need to continue to bloom if (nobloom) { nobloom = 0; continue; } //Bloom Filter if (bloom_add(&bloom, splithash[1], strlen(splithash[1])) == 0) { folderCache (splithash[0], splithash[1], f2, f3); fprintf(f2, "%s", fullline); fprintf(f4, "%s", fullline); } } fclose(f4); }