Example #1
0
/** ***************************************************************************
 * A few simple tests to check if it works at all.
 *
 */
static void basic()
{
  (void)printf("----- basic -----\n");

  struct bloom bloom;

  assert(bloom_init(&bloom, 0, 1.0) == 1);
  assert(bloom_init(&bloom, 10, 0) == 1);
  assert(bloom.ready == 0);
  assert(bloom_add(&bloom, "hello world", 11) == -1);
  assert(bloom_check(&bloom, "hello world", 11) == -1);
  bloom_free(&bloom);

  assert(bloom_init(&bloom, 102, 0.1) == 0);
  assert(bloom.ready == 1);
  bloom_print(&bloom);

  assert(bloom_check(&bloom, "hello world", 11) == 0);
  assert(bloom_add(&bloom, "hello world", 11) == 0);
  assert(bloom_check(&bloom, "hello world", 11) == 1);
  assert(bloom_add(&bloom, "hello world", 11) > 0);
  assert(bloom_add(&bloom, "hello", 5) == 0);
  assert(bloom_add(&bloom, "hello", 5) > 0);
  assert(bloom_check(&bloom, "hello", 5) == 1);
  bloom_free(&bloom);
}
Example #2
0
Datum pgbloomfun_add(PG_FUNCTION_ARGS)
{
  bytea *newbloomba, *bloomba = PG_GETARG_BYTEA_P(0);
  text *key = PG_GETARG_TEXT_P(1);
  pgbloom_t *pgbloom = get_pgbloom(bloomba);
  bloom_t newbloom, *bloom = NULL;
  size_t newbloom_size;
  int space_left, i;

  space_left = (pgbloom->last_capacity > pgbloom->last_entries) ||
               (pgbloom->growth_factor == 0);
  for (i=0; i<pgbloom->filters; i++)
    {
      bloom = next_bloom(bloomba, bloom);
      if (bloom == NULL)
        {
          elog(ERROR, "pgbloomfun: missing filter in bloom object");
        }
      if (i == pgbloom->filters - 1 && space_left)
        {
          if (bloom_add(bloom, VARDATA(key), VARSIZE(key) - VARHDRSZ) == 0)
            {
              pgbloom->total_entries ++;
              pgbloom->last_entries ++;
            }
          PG_RETURN_BYTEA_P(bloomba);
        }
      else if (bloom_check(bloom, VARDATA(key), VARSIZE(key) - VARHDRSZ))
        {
          PG_RETURN_BYTEA_P(bloomba);  /* key already exists */
        }
    }

  /* create a new filter */
  pgbloom->filters += 1;
  pgbloom->total_entries += 1;
  pgbloom->last_entries = 1;
  pgbloom->last_capacity *= pgbloom->growth_factor;
  pgbloom->total_capacity += pgbloom->last_capacity;

  /* calculate and allocate space */
  bloom_init(&newbloom, pgbloom->last_capacity, pgbloom->error_rate);
  newbloom_size = sizeof(newbloom) + newbloom.bits / 8;
  newbloomba = palloc(VARSIZE(bloomba) + newbloom_size);
  memcpy(newbloomba, bloomba, VARSIZE(bloomba));
  SET_VARSIZE(newbloomba, VARSIZE(bloomba) + newbloom_size);

  /* initialize the new bloom filter and add the new key to it */
  bloom = (bloom_t *) (((unsigned char *) newbloomba) + VARSIZE(bloomba));
  memset(bloom, 0, newbloom_size);
  memcpy(bloom, &newbloom, sizeof(newbloom));
  bloom_add(bloom, VARDATA(key), VARSIZE(key) - VARHDRSZ);

  PG_RETURN_BYTEA_P(newbloomba);
}
Example #3
0
int
main(void)
{
    plan_tests(6);
    setvbuf(stdout, 0, _IOLBF, 0);

    BLOOM *bp = bloom_create(12, 16, 2);
    ok(bp, "created");

    int rc = bloom_chk(bp, "AB");
    ok(rc == 0, "Check 'AB' not in empty table: %d", rc);

    printf("# Add AB\n");
    bloom_add(bp, "AB");

    rc = bloom_chk(bp, "AB");
    ok(rc != 0, "Check 'AB' after add: %d", rc);

    printf("# Add AC\n");
    bloom_add(bp, "AC");

    rc = bloom_chk(bp, "!@");
    ok(rc != 0, "Check '!@' without add: %d (false positive)", rc);

    printf("# Before adding CA...DZ\n");
    bloom_dump(bp, stdout);

    int antestat = bloom_stat(bp), anteover = bloom_over(bp);

    char hash[] = "__";
    for (hash[1] = 'A'; hash[1] <= 'D'; ++hash[1])
        for (hash[0] = 'C'; hash[0] <= 'Z'; ++hash[0])
            bloom_add(bp, hash);

    printf("# After adding CA...DZ:\n");
    bloom_dump(bp, stdout);

    for (hash[1] = 'A'; hash[1] <= 'D'; ++hash[1])
        for (hash[0] = 'C'; hash[0] <= 'Z'; ++hash[0])
            bloom_del(bp, hash);

    printf("# After deleting CA...DZ:\n");
    bloom_dump(bp, stdout);

    int poststat = bloom_stat(bp), postover = bloom_over(bp);
    ok(poststat == antestat, "stat %d -> %d", antestat, poststat);
    ok(postover == anteover, "over %d -> %d", anteover, postover);

    bloom_destroy(bp);

    return exit_status();
}
/* handle a received beacon */
static void _handle_beacon(gnrc_pktsnip_t *pkt)
{
    if (pkt->size != sizeof(beacon_t)) {
        LOG_WARNING("beaconing: received packet doesn't seem to be a beacon - wrong size\n");
        gnrc_pktbuf_release(pkt);
        return;
    }
    beacon_t b = *((beacon_t*) pkt->data);
    if (b.magic_key != BEACONING_MK) {
        LOG_WARNING("beaconing: received packet doesn't seem to be a beacon - wrong magic key\n");
        gnrc_pktbuf_release(pkt);
        return;
    }
    LOG_DEBUG("beaconing: received a beacon, id is %" PRIu32 "\n", b.id);
    if (bloom_check(&dow_neighbors, (uint8_t*) &(b.id), sizeof(b.id))) {
        LOG_DEBUG("beaconing: already know this neighbor\n");
    }
    /* if we don't know the neighbor we analyze its ID */
    else {
        bloom_add(&dow_neighbors, (uint8_t*) &(b.id), sizeof(b.id));
        dow_size++;
        if (b.id < dow_my_id) {
            dow_position++;
        }
    }
    gnrc_pktbuf_release(pkt);
}
Example #5
0
int main(void)
{
    bloom_t *bloom = bloom_new(1 << 7, 6, fnv_hash, sax_hash, sdbm_hash,
                                      djb2_hash, kr_hash, dek_hash, rotating_hash, one_at_a_time_hash);

    printf("Testing Bloom filter.\n\n");
    printf("m: %zd\nk: %zd\n\n", bloom->m, bloom->k);

    for (int i = 0; i < lenB; i++) {
        bloom_add(bloom, (const uint8_t *) B[i], strlen(B[i]));
        printf("Added \"%s\"\n", B[i]);
    }

    int in = 0;
    int not_in = 0;

    for (int i = 0; i < lenA; i++) {
        if (bloom_check(bloom, (const uint8_t *) A[i], strlen(A[i]))) {
            in++;
        }
        else {
            not_in++;
        }
    }

    printf("\n");
    printf("%d elements probably in the filter.\n", in);
    printf("%d elements not in the filter.\n", not_in);
    double false_positive_rate = (double) in / (double) lenA;
    printf("%f false positive rate.\n", false_positive_rate);

    bloom_del(bloom);
    printf("\nAll done!\n");
    return 0;
}
Example #6
0
int get_rating(unsigned char *info_hash, const struct sockaddr_storage *target,
		const struct sockaddr *from, int fromlen) {
	struct dht_rating_entry * entry = list;
	while (entry) {
		if (memcmp(entry->key, info_hash, SHA_DIGEST_LENGTH) == 0) {
			break;
		}
		entry = entry->next;
	}
	if (entry) {
		struct dht_result_rating * pos = entry->ratings;
		struct dht_result_rating * prev = NULL;
		while (pos) {
			if (sockaddr_storage_equals(pos->ss, target)) {
				entry->updated = time(NULL);
				if (pos->rating < 10) {
					unsigned char md[SHA_DIGEST_LENGTH];
					SHA1((const unsigned char*) from, fromlen, md);
					if (!bloom_check(pos->frombloom, (const char*) md)) {
						bloom_add(pos->frombloom,(const char*) md);
						pos->rating++;
					}
				}
				return pos->rating;
			}
			prev = pos;
			pos = pos->next;
		}
		prev->next = rating_create(target, from, fromlen);
		return 0;
	} else {
		rating_create_entry(info_hash, target, from, fromlen);
		return 0;
	}
}
Example #7
0
int main(void)
{
    xtimer_init();

    bloom_t *bloom = bloom_new(1 << 12, 8, fnv_hash, sax_hash, sdbm_hash,
                                      djb2_hash, kr_hash, dek_hash, rotating_hash, one_at_a_time_hash);

    printf("Testing Bloom filter.\n\n");
    printf("m: %" PRIu32 " k: %" PRIu32 "\n\n", (uint32_t) bloom->m,
           (uint32_t) bloom->k);

    genrand_init(myseed);

    unsigned long t1 = xtimer_now();

    for (int i = 0; i < lenB; i++) {
        buf_fill(buf, BUF_SIZE);
        buf[0] = MAGIC_B;
        bloom_add(bloom,
                  (uint8_t *) buf,
                  BUF_SIZE * sizeof(uint32_t) / sizeof(uint8_t));
    }

    unsigned long t2 = xtimer_now();
    printf("adding %d elements took %" PRIu32 "ms\n", lenB,
           (uint32_t) (t2 - t1) / 1000);

    int in = 0;
    int not_in = 0;

    unsigned long t3 = xtimer_now();

    for (int i = 0; i < lenA; i++) {
        buf_fill(buf, BUF_SIZE);
        buf[0] = MAGIC_A;

        if (bloom_check(bloom,
                        (uint8_t *) buf,
                        BUF_SIZE * sizeof(uint32_t) / sizeof(uint8_t))) {
            in++;
        }
        else {
            not_in++;
        }
    }

    unsigned long t4 = xtimer_now();
    printf("checking %d elements took %" PRIu32 "ms\n", lenA,
           (uint32_t) (t4 - t3) / 1000);

    printf("\n");
    printf("%d elements probably in the filter.\n", in);
    printf("%d elements not in the filter.\n", not_in);
    double false_positive_rate = (double) in / (double) lenA;
    printf("%f false positive rate.\n", false_positive_rate);

    bloom_del(bloom);
    printf("\nAll done!\n");
    return 0;
}
Example #8
0
void run_test3(int* array1, int* array2, int arraysize)
{
	int bloomsize = 1000;	
	bloom_filter_t bloomfilter;
	bloom_init(&bloomfilter, bloomsize);
	int x = 0;

	//set the bits in bloomfilter based on array1
	for (x= 0; x< arraysize; x++)
	{
		bloom_add(&bloomfilter, array1[x]);
	}

	//First, count all the bits that are set
	int totalbits = 0;
	for (x = 0; x< bloomsize; x++)
	{
		totalbits += get_bit(&bloomfilter, x);
	}
	printf("Total bits set: %i\n",totalbits);	

	int array2bits = 0;
	//Next, count all the bits in the second array that are set in bloomfiter
	for (x = 0; x< arraysize; x++)
	{
		array2bits += bloom_check(&bloomfilter, array2[x]);
	}
	printf("Array2 bits set: %i\n",array2bits);

	bloom_destroy(&bloomfilter);

}
Example #9
0
// Fill a bloom filter with all local hashes
void fl_local_bloom(bloom_t *b) {
  GHashTableIter iter;
  const char *tth;
  g_hash_table_iter_init(&iter, fl_hash_index);
  while(g_hash_table_iter_next(&iter, (gpointer *)&tth, NULL))
    bloom_add(b, tth);
}
Example #10
0
/**
 * create bloom filter from mailfile
 * param: filename mailfile name
 */
BF *bloom_create(char *filename, int vector_size)
{
	//initilize the BF 
	BF *bf = (BF*)malloc(sizeof(BF));

	bf->size = vector_size;
	bf->bf_vector = malloc(sizeof(char) * (vector_size+1) );
	memset(bf->bf_vector, 0, vector_size);

	//read the mailfile line by line
	FILE *fp;
	char line[MAXLINE+1];

	if((fp = fopen(filename, "r")) == NULL)
	{
		fprintf(stderr, "File %s open error!\n", filename);
	}

	while((fgets(line, MAXLINE, fp)) != NULL)
	{
		//remove the newline char
		int line_len = strlen(line);
		if(line[line_len-1] == '\n')
			line[line_len-1] = '\0';
		if(line[line_len-2] == '\r')
			line[line_len-2] = '\0';
		//set the vector
		bloom_add(bf, line);
		//fputs(line, stdout);
	}

	fclose(fp);
	return bf;
}
Example #11
0
/** ***************************************************************************
 * Create a bloom filter with given parameters and add 'count' random elements
 * into it to see if collission rates are within expectations.
 *
 */
static void add_random(int entries, double error, int count)
{
  (void)printf("----- add_random(%d, %f, %d) -----\n", entries, error, count);

  struct bloom bloom;
  assert(bloom_init(&bloom, entries, error) == 0);
  bloom_print(&bloom);

  char block[32];
  int collisions = 0;
  int fd = open("/dev/urandom", O_RDONLY);
  int n;

  for (n = 0; n < count; n++) {
    assert(read(fd, block, 32) == 32);
    if (bloom_add(&bloom, (void *)block, 32)) { collisions++; }
  }
  (void)close(fd);
  bloom_free(&bloom);

  (void)printf("added %d elements, got %d collisions\n", count, collisions);

  if (count <= entries) {
    assert(collisions <= (entries * error));
  } else if (count <= entries * 2) {
    assert(collisions < (2 * entries * error));
  }
}
Example #12
0
/** ***************************************************************************
 * Simple loop to compare performance.
 *
 */
static void perf_loop(int entries, int count)
{
  (void)printf("----- perf_loop -----\n");

  struct bloom bloom;
  assert(bloom_init(&bloom, entries, 0.001) == 0);
  bloom_print(&bloom);

  int i;
  int collisions = 0;

  struct timeval tp;
  (void)gettimeofday(&tp, NULL);
  long before = (tp.tv_sec * 1000L) + (tp.tv_usec / 1000L);

  for (i = 0; i < count; i++) {
    if (bloom_add(&bloom, (void *)&i, sizeof(int))) { collisions++; }
  }

  (void)gettimeofday(&tp, NULL);
  long after = (tp.tv_sec * 1000L) + (tp.tv_usec / 1000L);

  (void)printf("Added %d elements of size %d, took %d ms (collisions=%d)\n",
               count, (int)sizeof(int), (int)(after - before), collisions);

  (void)printf("%d,%d,%ld\n", entries, bloom.bytes, after - before);
  bloom_free(&bloom);
}
Example #13
0
struct dht_result_rating * rating_create(const struct sockaddr_storage *ss,
		const struct sockaddr *from, int fromlen) {
	unsigned char md[SHA_DIGEST_LENGTH];
	struct dht_result_rating * result;
	result = (struct dht_result_rating*) malloc(
			sizeof(struct dht_result_rating));
	result->frombloom = bloom_create(96, 7, hash1, hash2, hash3, hash4, hash5,
			hash6, hash7);
	SHA1((const unsigned char*) from, fromlen, md);
	bloom_add(result->frombloom, (const char*) md);
	result->next = NULL;
	result->rating = 0;
	result->ss = malloc(sizeof(struct sockaddr_storage));
	result->ss->ss_family = ss->ss_family;
	if (ss->ss_family == AF_INET) {
		struct sockaddr_in* dest = (struct sockaddr_in*) result->ss;
		struct sockaddr_in* src = (struct sockaddr_in*) ss;
		dest->sin_port = src->sin_port;
		memcpy(&dest->sin_addr, &src->sin_addr, 4);
	} else if (ss->ss_family == AF_INET6) {
		struct sockaddr_in6* dest = (struct sockaddr_in6*) result->ss;
		struct sockaddr_in6* src = (struct sockaddr_in6*) ss;
		dest->sin6_port = src->sin6_port;
		memcpy(&dest->sin6_addr, &src->sin6_addr, 16);
	}
	return result;
}
Example #14
0
static void load_dictionary_fixture(void)
{
    for (int i = 0; i < lenB; i++)
    {
        bloom_add(&bloom, (const uint8_t *) B[i], strlen(B[i]));
    }

}
Example #15
0
static void
wallet_update_filter_cb(const void *key,
                        size_t len,
                        void *cbData,
                        void *keyData)
{
   struct bloom_filter *filter = (struct bloom_filter *)cbData;
   struct wallet_key *wkey = (struct wallet_key *)keyData;

   bloom_add(filter, &wkey->pub_key, sizeof wkey->pub_key);
}
Example #16
0
/**
 cs_setopt_url : add urls to task queue
 @cspider : the cspider
 @url : new task's url
**/
void cs_setopt_url(cspider_t *cspider, char *url){
  PANIC(cspider);
  PANIC(url);
  if (!bloom_check(cspider->bloom, url)) {
    //url no exits
    bloom_add(cspider->bloom, url);
    unsigned int len = strlen(url);
    char *reUrl = (char*)malloc(sizeof(char) * (len+1));
    PANIC(reUrl);
    strncpy(reUrl, url, len+1);
    createTask(cspider->task_queue, reUrl);
  }
}
Example #17
0
static int load_into_bloom_cb(struct filerec *file, unsigned char *digest,
			      uint64_t loff, int flags, void *priv)
{
	int ret;
	struct bloom_cb_priv *p = priv;

	ret = bloom_add(&p->bloom, digest, digest_len);
	if (ret == 1) {
		ret = digest_insert(p->d_tree, digest);
		if (ret)
			return ret;
	}

	return ret;
}
Example #18
0
int main(int argc, char *argv[])
{
	FILE *fp;
	char line[1024];
	char *p;
	BLOOM *bloom;
	
	if(argc<2) {
		fprintf(stderr, "ERROR: No word file specified\n");
		return EXIT_FAILURE;
	}

	if(!(bloom=bloom_create(2500000, 2, sax_hash, sdbm_hash))) {
		fprintf(stderr, "ERROR: Could not create bloom filter\n");
		return EXIT_FAILURE;
	}

	if(!(fp=fopen(argv[1], "r"))) {
		fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]);
		return EXIT_FAILURE;
	}

	while(fgets(line, 1024, fp)) {
		if((p=strchr(line, '\r'))) *p='\0';
		if((p=strchr(line, '\n'))) *p='\0';

		bloom_add(bloom, line);
	}

	fclose(fp);

	while(fgets(line, 1024, stdin)) {
		if((p=strchr(line, '\r'))) *p='\0';
		if((p=strchr(line, '\n'))) *p='\0';

		p=strtok(line, " \t,.;:\r\n?!-/()");
		while(p) {
			if(!bloom_check(bloom, p)) {
				printf("No match for ford \"%s\"\n", p);
			}
			p=strtok(NULL, " \t,.;:\r\n?!-/()");
		}
	}

	bloom_destroy(bloom);

	return EXIT_SUCCESS;
}
Example #19
0
int main()
{
	//Part 1. Evaluating Hash Functions
	int bloomsize = 100;
	int x;
	bloom_filter_t bloomfilter;
	bloom_init(&bloomfilter, bloomsize);


	printf ("Hash1: %i %i %i %i %i %i\n",hash1(&bloomfilter, 0),hash1(&bloomfilter, 1),
		hash1(&bloomfilter, 2),hash1(&bloomfilter, 3),hash1(&bloomfilter, 13),
		hash1(&bloomfilter, 97));

	printf ("Hash2: %i %i %i %i %i %i\n",hash2(&bloomfilter, 0),hash2(&bloomfilter, 1),
		hash2(&bloomfilter, 2),hash2(&bloomfilter, 3),hash2(&bloomfilter, 13),
		hash2(&bloomfilter, 97));

	bloom_destroy(&bloomfilter);

	//Part 2: 
	printf("\nDoing Smoke Test.\n");
	bloomsize = 1000;
	bloom_init(&bloomfilter, bloomsize);


	for (x= 0; x< 70; x++)
	{
		bloom_add(&bloomfilter, x);
	}

	int totalbits = 0;
	for (x = 0; x< bloomsize; x++)
	{
		totalbits += get_bit(&bloomfilter, x);
	}
	printf("Total bits set: %i\n",totalbits);	
	bloom_destroy(&bloomfilter);

	//Part 3
	printf("\nDoing N_HASHES Test.\n");

	int array1[100];
	int array2[100];
	gen_rand(array1, 100, 1000000);
	gen_rand(array2, 100, 1000000);
	run_test3(array1, array2, 100);

}
Example #20
0
void bf_check(FILE* fp1,FILE* fp2,FILE *fp3) {
	BLOOM *bloom ;
	char line[1024];
	int pos = -1;
	bloom = bloom_create(239620000);
	printf("Create done\n");
	while(fgets(line,1024,fp1))
		bloom_add(bloom,line);
	printf("add done\n");
	fprintf(fp3,"-------------------------------------Bloom Filter Match-------------------------------------------\n");
	while(fgets(line,1024,fp2)) {
		pos++;
		if(!bloom_check(bloom,line))
			fprintf(fp3,"%d no\n",pos);
	}
}
Example #21
0
int
ppbloom_add(const void *buffer, int len)
{
    int err;
    err = bloom_add(ppbloom + current, buffer, len);
    if (err == -1)
        return err;

    bloom_count[current]++;

    if (bloom_count[current] >= entries) {
        bloom_count[current] = 0;
        current              = current == PING ? PONG : PING;
        bloom_free(ppbloom + current);
        bloom_init(ppbloom + current, entries, error);
    }

    return 0;
}
Example #22
0
int CS_summary(struct bloom ** filter_ptr)
{
    if (!filter_ptr) return -1;

    *filter_ptr = bloom_create(_cs.summary_size, BLOOM_ARGS);
    int i;

    pthread_mutex_lock(&_cs.lock);

    for (i = 0; i < _cs.table->size; i++) {
        struct hash_entry * entry = _cs.table->entries[i];
        if (entry && entry->valid) {
            char * name = entry->key;
            bloom_add(*filter_ptr, name);
        }
    }

    pthread_mutex_unlock(&_cs.lock);

    return 0;
}
Example #23
0
/*-------------------------------------*/
void fastq_add (bloom * bl, char *position)
{
  char *key = (char *) calloc (1,bl->k_mer * sizeof (char) + 1);
  while (position[0] != '\0')
  {
      position = strchr (position, '\n') + 1;
      while (position[bl->k_mer - 1] != '\n')
      {
          memcpy (key, position, sizeof (char) * bl->k_mer);
	  key[bl->k_mer] = '\0';
	  bloom_add (bl, key);
	  position++;
      }
      position += bl->k_mer;
      position = strchr (position, '\n') + 1;
      char *v = strchr (position, '\n');
      if (!v)
      	break;
      else
	position = v + 1;
    }
  free (key);
}
Example #24
0
/*-------------------------------------*/
char *fasta_data (bloom * bl_2, char *data)
{
  char *key = (char *)calloc(1,bl_2->k_mer * sizeof (char) + 1);
  char *p = data;
  int n = 0, m = 0;
  while (*p != '>' && *p != '\0')
  {
      while (n < bl_2->k_mer)
      {
	  if (p[m] == '>' || p[m] == '\0')
	  {
	  	m--;
	  	break;
	  }
	  if (p[m] != '\r' && p[m] != '\n')
	  	key[n++] = p[m];
	  m++;
      }
      key[n] = '\0';
/*
      if (strlen (key) == bl_2->k_mer)
      {
	  if (bloom_add (bl_2, key))
	    hit++;
	  else
	    un_hit++;
      }
*/
      bloom_add (bl_2, key);
      p += 1;
      n = 0;
      m = 0;
  }
  free (key);
  return p;
}
Example #25
0
int main (int argc, char *argv[]) {
	int count;
	unsigned long maxitems=0;
	int c;
	int index;
	FILE *fp;
	unsigned long items;
	char line[MAX_LINE_SIZE];
	char pline[MAX_LINE_SIZE];
	char unhex[MAX_LINE_SIZE];
	char *toprocess;
	int size;
	int found=0;	

	/* safe defaults */
	opt_errorrate=0.01;
	opt_bloomfile=NULL;

	/* load config */
	loadconfig();

  while ((c = getopt (argc, argv, "huicp:svde:b:f:")) != -1)
    switch (c)
      {
      case 'h':
	displayhelp();
	exit(0);
	break;
      case 'u':
	opt_unhex = 1;
	break;
      case 'i':
	opt_ignorecase = 1;
	break;
      case 'c':
        opt_init = 1;
        break;
      case 'p':
	opt_progressitems = atoi(optarg);
	break;
      case 'e':
	opt_errorrate = atof(optarg);
	break;
      case 'b':
        opt_bloomfile = optarg;
        break;
      case 'f':
	opt_readfromfile = optarg;
	break;
      case 's':
        opt_search = 1;
        break;
		case 'v':
		opt_verbose++;
		break;

		case 'd':
		opt_debug++;
		break;
      case '?':
        if (optopt == 'b')
          fprintf (stderr, "Option -%c requires an argument.\n", optopt);
        else if (isprint (optopt))
          fprintf (stderr, "Unknown option `-%c'.\n", optopt);
        else
          fprintf (stderr,
                   "Unknown option character `\\x%x'.\n",
                   optopt);
        return 1;
      default:
        abort ();
      }

	if (opt_debug) {
	  printf ("opt_init = %d, opt_search = %d, opt_bloomfile = %s\n",
		  opt_init, opt_search, opt_bloomfile);

	for (count = 1; count < argc; count++) {
		printf("argv[%d] = %s\n", count, argv[count]);
	}

		for (index = optind; index < argc; index++)
		  printf ("Non-option argument %s\n", argv[index]);
	}

	if (opt_init) {
		for (index = optind; index < argc; index++) {
			if (opt_verbose) fprintf(stderr,"[i] Counting lines for %s\n", argv[index]);
			fp=fopen(argv[index],"r");
			if (fp==NULL) {
				fprintf(stderr,"Error opening %s\n",argv[index]);
				break;	
			}
			items=getlinecount(fp);
			if (opt_verbose) fprintf(stderr,"[i] %s have %lu lines/items\n",argv[index],items);
			maxitems=maxitems+items;
			fclose(fp);
		}
		if (opt_verbose) fprintf(stderr,"[i] Maximum number of items: %lu\n",maxitems);
		bloom_init(&bloom, maxitems, opt_errorrate);

		items=0;
		for (index = optind; index < argc; index++) {
			if (opt_verbose) fprintf(stderr,"[i] Processing %s\n", argv[index]);
			fp=fopen(argv[index],"r");
			if (fp==NULL) {
				fprintf(stderr,"Error opening %s\n",argv[index]);
				break;	
			}
			/* read line by line */
			while (fgets (line, sizeof(line), fp)) {
				toprocess=line;
				size=strlen(line);
				if (line[size-1]=='\n') line[--size]='\0';
				if (line[size-1]=='\r') line[--size]='\0';
				if (opt_debug) fprintf(stderr,"Line (%d): %s \n",size,line);
				if (opt_verbose && (items++ % opt_progressitems==0)) fprintf(stderr,"\r[i] Line %lu of %lu", items, maxitems);

				if (opt_ignorecase) {
					toprocess=str2upper(toprocess,pline);
				}
				if (opt_unhex) {
					size=hexstr2char(toprocess,unhex,MAX_LINE_SIZE);
					toprocess=unhex;
				} 
				bloom_add(&bloom, toprocess, size);
			}
			if (opt_verbose) fprintf(stderr,"\n[i] Done for %s!\n",argv[index]);
			fclose(fp);
		}

		if (opt_bloomfile==NULL) {
			fprintf(stderr,"No bloom file specified for init. Not saving.\n");
		} else {
			if (opt_verbose) fprintf(stderr,"[i] Saving to %s\n",opt_bloomfile);
			bloom_save(&bloom,opt_bloomfile);
			/* if (opt_verbose) bloom_print(&bloom); */
		}
	}

	if (opt_search || (!opt_init)) {
		if (opt_bloomfile==NULL) {
			fprintf(stderr,"No bloom file specified.\n");
		} else {
			if (opt_verbose) fprintf(stderr,"[i] Opening bloom file: %s\n", opt_bloomfile);
			if (bloom_load(&bloom, opt_bloomfile)) {
				fprintf(stderr,"[i] Error loading bloom file: %s\n", opt_bloomfile);
				return (1);
			}
		}

		if (opt_verbose) fprintf(stderr,"[i] Searching patterns\n");

		for (index = optind; index < argc; index++) {
			toprocess=argv[index];
			if (opt_verbose) fprintf(stderr,"[i] Processing %s\n", toprocess);
			if (searchpattern(toprocess)) {
				fprintf(stdout,"%s found\n", argv[index]);
			} else {
				fprintf(stdout,"%s not found\n", argv[index]);
			}
		}

		if (opt_readfromfile!=NULL) {
			if (opt_verbose) fprintf(stderr,"[v] Reading from file %s\n",opt_readfromfile);
			if (strcmp(opt_readfromfile,"-")==0) {
				fprintf (stderr,"[i] Reading from standard input. Specify pattern separated by new line.\n");
				fp=stdin;
			} else {
				fp=fopen(opt_readfromfile,"r");
			}
			if (fp==NULL) {
				fprintf(stderr,"[!] Error opening file: %s\n",opt_readfromfile);
				exit(1);
			}
			while (fgets (line, sizeof(line), fp)) {
				toprocess=line;
				size=strlen(line);
				if (line[size-1]=='\n') line[--size]='\0';
				if (line[size-1]=='\r') line[--size]='\0';
				if (opt_debug) fprintf(stderr,"[d] Line in pattern (%d): %s \n",size,line);
				if (opt_verbose) fprintf(stderr,"[v] Processing from file %s\n", toprocess);
				if (searchpattern(toprocess)) {
					fprintf(stdout,"%s found\n", toprocess);
				} else {
					fprintf(stdout,"%s not found\n", toprocess);
				}
			}
			if (fp!=stdin) fclose (fp);
		}
	}
}
Example #26
0
int main(int argc, char *argv[])
{
	FILE *fp;
    /* No domain more than 1024 characs */
	char line[1024];
	char *p;
	BLOOM *bloom;
	
	if(argc<2) {
		fprintf(stderr, "ERROR: No word file specified\n");
		return EXIT_FAILURE;
	}

    if(!(bloom=bloom_create(2500000, 2, sax_hash, sdbm_hash))) {
        fprintf(stderr, "ERROR: Could not create bloom filter\n");
        return EXIT_FAILURE;
	}

	if(!(fp=fopen(argv[1], "r"))) {
		fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]);
		return EXIT_FAILURE;
	}

    /* set block - read names from file argv[1] and add to filter */
	while(fgets(line, 1024, fp)) {
		if((p=strchr(line, '\r'))) *p='\0';
		if((p=strchr(line, '\n'))) *p='\0';

		bloom_add(bloom, line);
	}

	fclose(fp);

    /* TODO Dump twice - filter.bin and filter_version.bin */
    const char *filepath;
    filepath = "bloomfilter/filter.bin";
    if (!bloom_dump(bloom, filepath)) {
        fprintf(stderr, "ERROR: Could not dump bloom filter %s\n", filepath);
    }
    char filepath2[30];
    filepath = "";
    sprintf(filepath2, "%s%d.bin", "bloomfilter/filter_", bloom->timestamp);
    if (!bloom_dump(bloom, filepath2)) {
        fprintf(stderr, "ERROR: Could not dump bloom filter %s\n", filepath2);
    }

	/* check block - enter name to check */
    while(fgets(line, 1024, stdin)) {
		if((p=strchr(line, '\r'))) *p='\0';
		if((p=strchr(line, '\n'))) *p='\0';

        /* Domain name can have ".", "/", ":" and "-"
         * p=strtok(line, " \t,.;:\r\n?!-/()");
         * Divide line on symbols to check each word*/
		p=strtok(line, " \t,;\r\n?!()");
		while(p) {
			if(!bloom_check(bloom, p)) {
				printf("No match for word \"%s\"\n", p);
			}
			p=strtok(NULL, " \t,.;:\r\n?!-/()");
		}
	}


	bloom_destroy(bloom);

	return EXIT_SUCCESS;
}
Example #27
0
int main(int argc, char *argv[]) {
	clock_t start=clock();
	//程序运行时需从cmd启动,后面两个参数依次为源字符串和待查询字符串的文件名(文件地址)
	char line[1024];
	if (argc < 2) {
		fprintf(stderr, "ERROR: No word file specified\n");
		return -1;
	}

	//获取时间

	int big_count = 0;
	int count = 0;

	//bloom filter algorithm

	BF *bloomfilter = bloom_create(BIG_PRIME);
	freopen(argv[1], "r", stdin);
	while (scanf("%s", line) != EOF) {
		upper_string(line);
		if (check_string(line) == 1) {
			bloom_add(bloomfilter, line);
			big_count++;
		}
	}
	fclose(stdin);
	freopen(argv[2], "r", stdin);
	freopen("result_bf.dat", "w", stdout);
	while (scanf("%s", line) != EOF) {
		upper_string(line);
		if (check_string(line) == 1 && bloom_check(bloomfilter, line) == 1) {
			printf("yes\n");
			count++;
		} else {
			printf("no\n");
		}
	}
	fclose(stdin);
	fclose(stdout);
	freopen("/dev/stdin", "r", stdin);
	freopen("computer.time", "a", stdout);

	printf("%f\n",(double)(clock()-start)/CLOCKS_PER_SEC);
	bloom_destroy(bloomfilter);

	/*****************************************************************************/
/*
	//压缩trie树
	trie_Node* root;
	root = trie_create();
	//读文件,建立压缩trie树
	freopen(argv[1], "r", stdin);
	while (scanf("%s", line) != EOF) {
		upper_string(line);
		if (check_string(line) == 1) {
			trie_add(line, root);
		}
	}
	fclose(stdin);

	//查询是否在树中
	count = 0;
	freopen(argv[2], "r", stdin);
	freopen("result_trie.dat", "w", stdout);
	while (scanf("%s", line) != EOF) {
		upper_string(line);
		if (check_string(line) == 1 && trie_check(line, root) == 1) {
			printf("yes\n");
			count++;
		} else {
			printf("no\n");
		}
	}
	fclose(stdin);
	fclose(stdout);
	freopen("/dev/pts/3", "r", stdin);
	freopen("computer.time", "a", stdout);
	//printf("存在%d个\n", count);
*/	
//	trie_destroy(root);//运行完直接退出,自动释放内存,是否destroy不重要(取消注释可以运行destroy)
	return 0;
}
int
main(int argc, char **argv)
{
	bloom_filter bf;
	int bsz;
	long long rll;
	int n_inserted;
	long long *testnums;
	int matched  = 0;
	int i;
	

	if(argc < 2) {
		printf("Usage:\n ./bloom_test <bitmap_size> <random_num_seed>\n");
		exit(1);
	}

	bsz = atoi(argv[1]);

	/* initialize random number generator's seed*/
	if (argc > 2) {
		srandom(atoi(argv[2]));
	}

	n_inserted = bsz/10;
	testnums = (long long *)malloc(sizeof(long long)*n_inserted);


	/*generate n_inserted random numbers (of long long type)
	  insert them into bloom filter*/
	bf = bloom_init(bsz);
	for (i = 0; i < n_inserted; i++) {
		rll = (long long) random();
		rll = rll << 31 | random();
		testnums[i] = rll;
		bloom_add(bf, rll);
	}

	/*check if all the n_inserted numbers are present in the 
	  bloom filter using bloom_query*/
	for (i = 0; i < n_inserted; i++) {
		if (!bloom_query(bf, testnums[i])) {
			printf("%lld inserted, but not present according to bloom_query\n", testnums[i]);
			exit(1);
		}
	}

	/*generate n_inserted*100 random numbers and check if any of them is 
	  in the bloom filter*/
	for (i = 0; i < n_inserted*100; i++) {
		rll = (long long) random();
		rll = rll << 31 | random();
		if (bloom_query(bf, rll)) {
			matched++;
		}
	}

	printf("false positive %d/%d\n", matched, n_inserted*100);

	/* print the first 1024 bits of bloom filter*/
	bloom_print(bf, 1024);

	return 0;
}
Example #29
0
int main(int argc, char *argv[])
{
    FILE *fp1;
    FILE *fp2;
    FILE *fp3;
    
    int  i = 0;
    char line[1024];
    char *p;
    BF *bloom;
    
    if(argc<2) {
        fprintf(stderr, "ERROR: No word file specified\n");
        return EXIT_FAILURE;
    }
    if(!(bloom=bloom_create(200000000, 11, RSHash, JSHash, PJWHash, ELFHash, BKDRHash, SDBMHash, DJBHash, DEKHash, BPHash, FNVHash, APHash))) {
        fprintf(stderr, "ERROR: Could not create bloom filter\n");
        return EXIT_FAILURE;
    }

    if(!(fp1=fopen(argv[1], "r"))) {
        fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]);
        return EXIT_FAILURE;
    }
    
    while(fgets(line, 1024, fp1)) {
        if((p=strchr(line, '\r'))) *p='\0';
        if((p=strchr(line, '\n'))) *p='\0';
        bloom_add(bloom, line);
    }

    fclose(fp1);

    if(!(fp2=fopen(argv[2], "r"))) {
        fprintf(stderr, "ERROR: Could not open file %s\n", argv[2]);
        return EXIT_FAILURE;
    }
    if(!(fp3=fopen("checkedemailresult.dat","w"))){
                fprintf(stderr, "ERROR:Could not open file");
                return EXIT_FAILURE;
    }
    
    while(fgets(line, 1024, fp2)) {
        i++;
        if((p=strchr(line, '\r'))) *p='\0';
        if((p=strchr(line, '\n'))) *p='\0';
        p=strtok(line, "\r\n");
        while(p) {
            if(bloom_check(bloom, line)==1) {
            fputs("yes\n",fp3);
            }
            else{fputs("no\n",fp3);}
          p=strtok(NULL, "\r\n");
        }
    }

    fclose(fp3);     
    fclose(fp2);

    bloom_destroy(bloom);

    return EXIT_SUCCESS;
}
Example #30
0
void masterServer(FILE *f1, FILE *f2, FILE *f3, char *buffer) {

	char	line[300]="",
			fullline[300]="",
			cacheline[300]="";
	char *splithash[10];
	int i;
	long num, entries = 0, hashcatalogentries = 0;
	int nobloom = 0;
	struct bloom bloom;
	FILE *f4;	
	f4=fopen(buffer,"w+");

	for(i=0;i<10;i++) splithash[i]=NULL;

	//check length of new input
	while (fgets (line, 300, f1) != NULL) {
		entries++;
	}
	//printf("\nEntries:%lu",entries);
	
	//check length of hashcatalog
	while (fgets (line, 300, f2) != NULL) {
		hashcatalogentries++;
	}
	//printf("\nHCE:%lu",hashcatalogentries);
	
	rewind(f1);
	rewind(f2);
	bloom_init(&bloom, hashcatalogentries + entries, 0.01);
	
	//add hashcatalog entries to bloom filter
	while (fgets (line, 300, f2) != NULL) {
		i = 0;
		splithash[i] = strtok(line, "|");
		while (splithash[i] != NULL)
			splithash[++i] = strtok(NULL, "|");
		
		bloom_add(&bloom, splithash[1], strlen(splithash[1]));
	}
	
	//read list of updated files
	while (fgets (line, 300, f1) != NULL) {
		
		i = 0;
		strcpy(fullline,line);
		splithash[i] = strtok(line, "|");
		while (splithash[i] != NULL)
			splithash[++i] = strtok(NULL, "|");
		
		//Size Filter
		num = atoi(splithash[2]);
		if (num < 8000) {
			continue;
		}
			
		//Cache Check; NOTE: Hash lang ang nakastore sa Cache
		while (fgets (cacheline, 300, f3) != NULL) {
			cacheline[strlen(cacheline) - 1] = '\0';
			
			//printf("CHECK IN!! %s || %s\n", cacheline, splithash[1]);
			if (strcmp(splithash[1], cacheline) == 0) {
				//printf("IT EXISTS SA CACHEEEEEEEEEEEE\n");
				nobloom = 1;				
			} else ;
				//printf("WALA HUHUHU\n");
				
		}
		
		//Nobloom = 1 if meron sa cache, no need to continue to bloom
		if (nobloom) {
			nobloom = 0;
			continue;
		}
			
		//Bloom Filter
		if (bloom_add(&bloom, splithash[1], strlen(splithash[1])) == 0) {
			folderCache (splithash[0], splithash[1], f2, f3);
			fprintf(f2, "%s", fullline);
			fprintf(f4, "%s", fullline);
		}
	}
	fclose(f4);
	
}