Ejemplo n.º 1
0
int build_main (int argc, char **argv)
{
  if (argc < 2)
  	build_usage ();
  char *position;
  BIGNUM capacity;
/*-------defaults for bloom filter building-------*/
  int opt;
  int k_mer = 0;
  float error_rate = 0.0005;

  char *list = NULL;
  char *target_path = NULL;
  char *source = NULL;
  //XXX make -l and -r mutually exclusive
  while ((opt = getopt (argc, argv, "e:k:o:r:l:h")) != -1)
  {
      switch (opt)
	{
	case 'e':
	  (optarg) && ((error_rate = atof (optarg)), 1);
	  break;
	case 'k':
	  (optarg) && ((k_mer = atoi (optarg)), 1);
	  break;
	case 'o':
	  (optarg) && ((target_path = optarg), 1);
	  break;
	case 'r':
	  (optarg) && (source = optarg, 1);
	  break;
	case 'l':
	  (optarg) && (list = optarg, 1);
	  break;
	case 'h':
	  return build_usage ();
	default:
	  printf ("Unknown option: -%c\n", (char) optopt);
	  return build_usage ();
	}
  }
  if (!list && !source)
  {
      fprintf (stderr, "\nPlease, at least specify a reference file (-r) and an output bloom filter (-o)\n");
      exit (-1);
  }
  if (!list)
  {
#ifdef DEBUG
      printf ("[bloom build]: source is %s\n", source);
      printf ("[bloom build]: target is %s\n", target_path);
#endif
      build (source, target_path, k_mer, error_rate, argv[0]);
  }
  else
  {
      bloom *bl_2 = NEW (bloom);
      Queue *head = NEW (Queue);
      Queue *tail = NEW (Queue);
      head->next = tail;
      F_set *File_head = NEW (F_set);
      File_head = make_list (source, list);
      while (File_head)
      {
	  //map query- into memory--------------
	  position = mmaping (File_head->filename);
	  if (*position == '>')
	  	capacity = strlen (position);
	  else
	  	capacity = strlen (position) / 2;
	  init_bloom (bl_2, capacity, error_rate, k_mer, File_head->filename);
	  ref_add (bl_2, position);
	  save_bloom (File_head->filename, bl_2, argv[0], target_path);
	  bloom_destroy (bl_2);
	  munmap (position, strlen (position));
	  File_head = File_head->next;
      }
  }
  return 0;
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
{
        uint  r          = ROUNDS;
        uint  only_crea  = 0;
        uint  do_judy    = 0;
        uint  seed       = 0;
        float scaler     = 0;
        uint  sum        = 0;
        uint  false_hits = 0;
        uint  nof_false  = 0;
        
        Pvoid_t judy     = NULL;
        
        dub_init();

        PPARM_INT(seed, SEED);
        PPARM_FLOAT(scaler, SCALER);
        
        /* allow at most one false hit in 10^7 queries */
        init_bloom(7, scaler);

        if (getenv("ONLY_CREATE")){
                dub_msg("Only encoding %u lists", r);
                only_crea = 1;
        }else
                dub_msg("Encoding %u lists and testing %u items per list",
                                r, TESTMAX);
        
        if (!only_crea && getenv("FALSE_HITS")){

                dub_msg("Checking for false hits");
                false_hits = 1;
                
        }else if (getenv("JUDY")){
                dub_msg("Using Judy");
                do_judy = 1;
        }
        
        srand(seed);

        while (r--){
                
                uint  sze  = MAXSZE * (rand() / (RAND_MAX + 1.0));
                u32   *p   = xmalloc(sze * 4);
                bloom_s *b = NULL;
                uint  j, k;
                
                for (j = 0; j < sze; j++)
                        p[j] = 1000 * (rand() / (RAND_MAX + 1.0));

                
                if (do_judy || false_hits){
                        uint tmp;
                        for (j = 0; j < sze; j++)
                                J1S(tmp, judy, p[j]);
                }

                if (!do_judy)
                        b = new_bloom(p, sze);

                sum += sze;
                
                if (only_crea)
                        goto next;
                
                for (j = 1; j < TESTMAX; j++){

                        if (false_hits){
                                
                                J1T(k, judy, j);
                                if (bloom_test(b, j)){
                                        if (!k) ++nof_false;
                                }else
                                        if (k){
                                                print(p, sze);
                                                dub_die("False negative! "
                                                  "Value %u not in bloom. "
                                                   "Bloom broken!", j);
                                        }
                                        
                        }else if (do_judy){
                                J1T(k, judy, j);
                        }else
                                bloom_test(b, j);
                }
                
next:
                if (do_judy || false_hits){
                        J1FA(k, judy);                
                }
                
                if (!do_judy){
                        free(b);
                        free(p);
                }
        }
        
        dub_msg("In total %u items in lists", sum);
        if (false_hits)
                dub_msg("In total %u false hits found", nof_false);
        
        return 0;
}