int build_main (int argc, char **argv) { if (argc < 2) build_usage (); char *position; BIGNUM capacity; /*-------defaults for bloom filter building-------*/ int opt; int k_mer = 0; float error_rate = 0.0005; char *list = NULL; char *target_path = NULL; char *source = NULL; //XXX make -l and -r mutually exclusive while ((opt = getopt (argc, argv, "e:k:o:r:l:h")) != -1) { switch (opt) { case 'e': (optarg) && ((error_rate = atof (optarg)), 1); break; case 'k': (optarg) && ((k_mer = atoi (optarg)), 1); break; case 'o': (optarg) && ((target_path = optarg), 1); break; case 'r': (optarg) && (source = optarg, 1); break; case 'l': (optarg) && (list = optarg, 1); break; case 'h': return build_usage (); default: printf ("Unknown option: -%c\n", (char) optopt); return build_usage (); } } if (!list && !source) { fprintf (stderr, "\nPlease, at least specify a reference file (-r) and an output bloom filter (-o)\n"); exit (-1); } if (!list) { #ifdef DEBUG printf ("[bloom build]: source is %s\n", source); printf ("[bloom build]: target is %s\n", target_path); #endif build (source, target_path, k_mer, error_rate, argv[0]); } else { bloom *bl_2 = NEW (bloom); Queue *head = NEW (Queue); Queue *tail = NEW (Queue); head->next = tail; F_set *File_head = NEW (F_set); File_head = make_list (source, list); while (File_head) { //map query- into memory-------------- position = mmaping (File_head->filename); if (*position == '>') capacity = strlen (position); else capacity = strlen (position) / 2; init_bloom (bl_2, capacity, error_rate, k_mer, File_head->filename); ref_add (bl_2, position); save_bloom (File_head->filename, bl_2, argv[0], target_path); bloom_destroy (bl_2); munmap (position, strlen (position)); File_head = File_head->next; } } return 0; }
int main(int argc, char **argv) { uint r = ROUNDS; uint only_crea = 0; uint do_judy = 0; uint seed = 0; float scaler = 0; uint sum = 0; uint false_hits = 0; uint nof_false = 0; Pvoid_t judy = NULL; dub_init(); PPARM_INT(seed, SEED); PPARM_FLOAT(scaler, SCALER); /* allow at most one false hit in 10^7 queries */ init_bloom(7, scaler); if (getenv("ONLY_CREATE")){ dub_msg("Only encoding %u lists", r); only_crea = 1; }else dub_msg("Encoding %u lists and testing %u items per list", r, TESTMAX); if (!only_crea && getenv("FALSE_HITS")){ dub_msg("Checking for false hits"); false_hits = 1; }else if (getenv("JUDY")){ dub_msg("Using Judy"); do_judy = 1; } srand(seed); while (r--){ uint sze = MAXSZE * (rand() / (RAND_MAX + 1.0)); u32 *p = xmalloc(sze * 4); bloom_s *b = NULL; uint j, k; for (j = 0; j < sze; j++) p[j] = 1000 * (rand() / (RAND_MAX + 1.0)); if (do_judy || false_hits){ uint tmp; for (j = 0; j < sze; j++) J1S(tmp, judy, p[j]); } if (!do_judy) b = new_bloom(p, sze); sum += sze; if (only_crea) goto next; for (j = 1; j < TESTMAX; j++){ if (false_hits){ J1T(k, judy, j); if (bloom_test(b, j)){ if (!k) ++nof_false; }else if (k){ print(p, sze); dub_die("False negative! " "Value %u not in bloom. " "Bloom broken!", j); } }else if (do_judy){ J1T(k, judy, j); }else bloom_test(b, j); } next: if (do_judy || false_hits){ J1FA(k, judy); } if (!do_judy){ free(b); free(p); } } dub_msg("In total %u items in lists", sum); if (false_hits) dub_msg("In total %u false hits found", nof_false); return 0; }